Fix typos in 'string-join' docs: 'string-infix' -> 'strict-infix'.
[bpt/guile.git] / libguile / strings.c
CommitLineData
a7e392c1 1/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
0f2d19dd 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd 20\f
dbb605f5
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
0f2d19dd 24
cdd47ec7 25#include <alloca.h>
faf2c9d7 26#include <string.h>
3ee86942 27#include <stdio.h>
9c44cd45 28#include <ctype.h>
edb7bb47 29#include <uninorm.h>
9c44cd45 30#include <unistr.h>
eca29b02 31#include <uniconv.h>
faf2c9d7 32
889975e5
MG
33#include "striconveh.h"
34
a0599745
MD
35#include "libguile/_scm.h"
36#include "libguile/chars.h"
7c33806a 37#include "libguile/root.h"
a0599745 38#include "libguile/strings.h"
a3d7d5d5 39#include "libguile/error.h"
f45eccff 40#include "libguile/generalized-vectors.h"
1afff620 41#include "libguile/deprecation.h"
a0599745 42#include "libguile/validate.h"
d31b9519 43#include "libguile/private-options.h"
1afff620 44
0f2d19dd
JB
45\f
46
47/* {Strings}
48 */
49
3ee86942
MV
50
51/* Stringbufs
52 *
53 * XXX - keeping an accurate refcount during GC seems to be quite
54 * tricky, so we just keep score of whether a stringbuf might be
50b1996f 55 * shared, not whether it definitely is.
3ee86942
MV
56 *
57 * The scheme I (mvo) tried to keep an accurate reference count would
58 * recount all strings that point to a stringbuf during the mark-phase
59 * of the GC. This was done since one cannot access the stringbuf of
60 * a string when that string is freed (in order to decrease the
61 * reference count). The memory of the stringbuf might have been
62 * reused already for something completely different.
63 *
64 * This recounted worked for a small number of threads beating on
65 * cow-strings, but it failed randomly with more than 10 threads, say.
66 * I couldn't figure out what went wrong, so I used the conservative
67 * approach implemented below.
50b1996f 68 *
ba54a202
LC
69 * There are 2 storage strategies for stringbufs: 8-bit and wide. 8-bit
70 * strings are ISO-8859-1-encoded strings; wide strings are 32-bit (UCS-4)
71 * strings.
3ee86942
MV
72 */
73
ba54a202
LC
74/* The size in words of the stringbuf header (type tag + size). */
75#define STRINGBUF_HEADER_SIZE 2U
76
77#define STRINGBUF_HEADER_BYTES (STRINGBUF_HEADER_SIZE * sizeof (SCM))
78
35920c00 79#define STRINGBUF_F_SHARED SCM_I_STRINGBUF_F_SHARED
5f236208 80#define STRINGBUF_F_WIDE SCM_I_STRINGBUF_F_WIDE
3ee86942
MV
81
82#define STRINGBUF_TAG scm_tc7_stringbuf
83#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
9c44cd45 84#define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
3ee86942 85
100e20c7 86#define STRINGBUF_CONTENTS(buf) ((void *) \
ba54a202
LC
87 SCM_CELL_OBJECT_LOC (buf, \
88 STRINGBUF_HEADER_SIZE))
100e20c7
LC
89#define STRINGBUF_CHARS(buf) ((unsigned char *) STRINGBUF_CONTENTS (buf))
90#define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *) STRINGBUF_CONTENTS (buf))
3ee86942 91
100e20c7 92#define STRINGBUF_LENGTH(buf) (SCM_CELL_WORD_1 (buf))
3ee86942 93
9b41542f
LC
94#define SET_STRINGBUF_SHARED(buf) \
95 do \
96 { \
97 /* Don't modify BUF if it's already marked as shared since it might be \
98 a read-only, statically allocated stringbuf. */ \
99 if (SCM_LIKELY (!STRINGBUF_SHARED (buf))) \
100 SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
101 } \
102 while (0)
3ee86942 103
56a3dcd4 104#ifdef SCM_STRING_LENGTH_HISTOGRAM
3ee86942
MV
105static size_t lenhist[1001];
106#endif
107
50b1996f
MG
108/* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
109 characters. */
3ee86942
MV
110static SCM
111make_stringbuf (size_t len)
0f2d19dd 112{
3ee86942
MV
113 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
114 scm_i_symbol_chars, all stringbufs are null-terminated. Once
115 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
116 has been changed for scm_i_symbol_chars, this null-termination
117 can be dropped.
118 */
119
ba54a202
LC
120 SCM buf;
121
56a3dcd4 122#ifdef SCM_STRING_LENGTH_HISTOGRAM
3ee86942
MV
123 if (len < 1000)
124 lenhist[len]++;
125 else
126 lenhist[1000]++;
127#endif
0f2d19dd 128
ba54a202
LC
129 buf = PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + len + 1,
130 "string"));
131
132 SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG);
133 SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);
134
135 STRINGBUF_CHARS (buf)[len] = 0;
136
137 return buf;
3ee86942 138}
e53cc817 139
50b1996f
MG
140/* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
141 characters. */
9c44cd45
MG
142static SCM
143make_wide_stringbuf (size_t len)
144{
ba54a202
LC
145 SCM buf;
146 size_t raw_len;
147
56a3dcd4 148#ifdef SCM_STRING_LENGTH_HISTOGRAM
9c44cd45
MG
149 if (len < 1000)
150 lenhist[len]++;
151 else
152 lenhist[1000]++;
153#endif
154
ba54a202
LC
155 raw_len = (len + 1) * sizeof (scm_t_wchar);
156 buf = PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + raw_len,
157 "string"));
158
159 SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG | STRINGBUF_F_WIDE);
160 SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);
161
162 STRINGBUF_WIDE_CHARS (buf)[len] = 0;
163
164 return buf;
9c44cd45
MG
165}
166
ba54a202
LC
167/* Return a UCS-4-encoded stringbuf containing the (possibly Latin-1-encoded)
168 characters from BUF. */
169static SCM
170wide_stringbuf (SCM buf)
9c44cd45 171{
ba54a202 172 SCM new_buf;
9c44cd45
MG
173
174 if (STRINGBUF_WIDE (buf))
ba54a202
LC
175 new_buf = buf;
176 else
9c44cd45 177 {
ba54a202
LC
178 size_t i, len;
179 scm_t_wchar *mem;
9c44cd45 180
ba54a202 181 len = STRINGBUF_LENGTH (buf);
9c44cd45 182
ba54a202 183 new_buf = make_wide_stringbuf (len);
9c44cd45 184
ba54a202 185 mem = STRINGBUF_WIDE_CHARS (new_buf);
9c44cd45 186 for (i = 0; i < len; i++)
ba54a202 187 mem[i] = (scm_t_wchar) STRINGBUF_CHARS (buf)[i];
9c44cd45 188 mem[len] = 0;
9c44cd45 189 }
ba54a202
LC
190
191 return new_buf;
3ee86942 192}
bd9e24b3 193
ba54a202
LC
194/* Return a Latin-1-encoded stringbuf containing the (possibly UCS-4-encoded)
195 characters from BUF, if possible. */
196static SCM
587a3355
MG
197narrow_stringbuf (SCM buf)
198{
ba54a202 199 SCM new_buf;
587a3355
MG
200
201 if (!STRINGBUF_WIDE (buf))
ba54a202
LC
202 new_buf = buf;
203 else
204 {
205 size_t i, len;
206 scm_t_wchar *wmem;
207 unsigned char *mem;
587a3355 208
ba54a202
LC
209 len = STRINGBUF_LENGTH (buf);
210 wmem = STRINGBUF_WIDE_CHARS (buf);
587a3355 211
ba54a202
LC
212 for (i = 0; i < len; i++)
213 if (wmem[i] > 0xFF)
214 /* BUF cannot be narrowed. */
215 return buf;
587a3355 216
ba54a202 217 new_buf = make_stringbuf (len);
587a3355 218
ba54a202
LC
219 mem = STRINGBUF_CHARS (new_buf);
220 for (i = 0; i < len; i++)
221 mem[i] = (unsigned char) wmem[i];
222 mem[len] = 0;
223 }
224
225 return new_buf;
587a3355
MG
226}
227
9de87eea 228scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
bd9e24b3 229
ba54a202 230\f
3ee86942
MV
231/* Copy-on-write strings.
232 */
bd9e24b3 233
3ee86942 234#define STRING_TAG scm_tc7_string
bd9e24b3 235
3ee86942
MV
236#define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
237#define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
238#define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
bd9e24b3 239
3ee86942
MV
240#define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
241#define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
242
243#define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
244
ed35de72
MV
245/* Read-only strings.
246 */
247
35920c00 248#define RO_STRING_TAG scm_tc7_ro_string
ed35de72
MV
249#define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
250
e1b29f6a
MV
251/* Mutation-sharing substrings
252 */
253
254#define SH_STRING_TAG (scm_tc7_string + 0x100)
255
256#define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
257/* START and LENGTH as for STRINGs. */
258
259#define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
260
e7efe8e7
AW
261SCM scm_nullstr;
262
50b1996f
MG
263/* Create a scheme string with space for LEN 8-bit Latin-1-encoded
264 characters. CHARSP, if not NULL, will be set to location of the
190d4b0d
LC
265 char array. If READ_ONLY_P, the returned string is read-only;
266 otherwise it is writable. */
3ee86942 267SCM
190d4b0d 268scm_i_make_string (size_t len, char **charsp, int read_only_p)
3ee86942 269{
69cd5299
MW
270 static SCM null_stringbuf = SCM_BOOL_F;
271 SCM buf;
3ee86942 272 SCM res;
69cd5299
MW
273
274 if (len == 0)
275 {
276 if (SCM_UNLIKELY (scm_is_false (null_stringbuf)))
277 {
278 null_stringbuf = make_stringbuf (0);
279 SET_STRINGBUF_SHARED (null_stringbuf);
280 }
281 buf = null_stringbuf;
282 }
283 else
284 buf = make_stringbuf (len);
285
3ee86942 286 if (charsp)
f59cf998 287 *charsp = (char *) STRINGBUF_CHARS (buf);
190d4b0d
LC
288 res = scm_double_cell (read_only_p ? RO_STRING_TAG : STRING_TAG,
289 SCM_UNPACK (buf),
290 (scm_t_bits) 0, (scm_t_bits) len);
3ee86942 291 return res;
0f2d19dd
JB
292}
293
50b1996f
MG
294/* Create a scheme string with space for LEN 32-bit UCS-4-encoded
295 characters. CHARSP, if not NULL, will be set to location of the
190d4b0d
LC
296 character array. If READ_ONLY_P, the returned string is read-only;
297 otherwise it is writable. */
9c44cd45 298SCM
190d4b0d 299scm_i_make_wide_string (size_t len, scm_t_wchar **charsp, int read_only_p)
9c44cd45
MG
300{
301 SCM buf = make_wide_stringbuf (len);
302 SCM res;
303 if (charsp)
304 *charsp = STRINGBUF_WIDE_CHARS (buf);
190d4b0d
LC
305 res = scm_double_cell (read_only_p ? RO_STRING_TAG : STRING_TAG,
306 SCM_UNPACK (buf),
9c44cd45
MG
307 (scm_t_bits) 0, (scm_t_bits) len);
308 return res;
309}
310
3ee86942
MV
311static void
312validate_substring_args (SCM str, size_t start, size_t end)
313{
314 if (!IS_STRING (str))
315 scm_wrong_type_arg_msg (NULL, 0, str, "string");
316 if (start > STRING_LENGTH (str))
317 scm_out_of_range (NULL, scm_from_size_t (start));
318 if (end > STRING_LENGTH (str) || end < start)
319 scm_out_of_range (NULL, scm_from_size_t (end));
320}
0f2d19dd 321
e1b29f6a
MV
322static inline void
323get_str_buf_start (SCM *str, SCM *buf, size_t *start)
324{
325 *start = STRING_START (*str);
326 if (IS_SH_STRING (*str))
327 {
328 *str = SH_STRING_STRING (*str);
329 *start += STRING_START (*str);
330 }
331 *buf = STRING_STRINGBUF (*str);
332}
333
3ee86942
MV
334SCM
335scm_i_substring (SCM str, size_t start, size_t end)
0f2d19dd 336{
17bec545
MW
337 if (start == end)
338 return scm_i_make_string (0, NULL, 0);
339 else
340 {
341 SCM buf;
342 size_t str_start;
343 get_str_buf_start (&str, &buf, &str_start);
344 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
345 SET_STRINGBUF_SHARED (buf);
346 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
347 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
348 (scm_t_bits)str_start + start,
349 (scm_t_bits) end - start);
350 }
0f2d19dd
JB
351}
352
ed35de72
MV
353SCM
354scm_i_substring_read_only (SCM str, size_t start, size_t end)
355{
17bec545
MW
356 if (start == end)
357 return scm_i_make_string (0, NULL, 1);
358 else
359 {
360 SCM buf;
361 size_t str_start;
362 get_str_buf_start (&str, &buf, &str_start);
363 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
364 SET_STRINGBUF_SHARED (buf);
365 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
366 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
367 (scm_t_bits)str_start + start,
368 (scm_t_bits) end - start);
369 }
ed35de72
MV
370}
371
3ee86942
MV
372SCM
373scm_i_substring_copy (SCM str, size_t start, size_t end)
374{
d5b75b6c
MW
375 if (start == end)
376 return scm_i_make_string (0, NULL, 0);
9c44cd45
MG
377 else
378 {
d5b75b6c
MW
379 size_t len = end - start;
380 SCM buf, my_buf, substr;
381 size_t str_start;
382 int wide = 0;
383 get_str_buf_start (&str, &buf, &str_start);
384 if (scm_i_is_narrow_string (str))
385 {
386 my_buf = make_stringbuf (len);
387 memcpy (STRINGBUF_CHARS (my_buf),
388 STRINGBUF_CHARS (buf) + str_start + start, len);
389 }
390 else
391 {
392 my_buf = make_wide_stringbuf (len);
393 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
394 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
395 + start), len);
396 wide = 1;
397 }
398 scm_remember_upto_here_1 (buf);
399 substr = scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
400 (scm_t_bits) 0, (scm_t_bits) len);
401 if (wide)
402 scm_i_try_narrow_string (substr);
403 return substr;
9c44cd45 404 }
3ee86942 405}
0f2d19dd 406
e1b29f6a
MV
407SCM
408scm_i_substring_shared (SCM str, size_t start, size_t end)
409{
410 if (start == 0 && end == STRING_LENGTH (str))
411 return str;
17bec545
MW
412 else if (start == end)
413 return scm_i_make_string (0, NULL, 0);
414 else
e1b29f6a
MV
415 {
416 size_t len = end - start;
417 if (IS_SH_STRING (str))
418 {
419 start += STRING_START (str);
420 str = SH_STRING_STRING (str);
421 }
422 return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
423 (scm_t_bits)start, (scm_t_bits) len);
424 }
425}
426
3ee86942
MV
427SCM
428scm_c_substring (SCM str, size_t start, size_t end)
429{
430 validate_substring_args (str, start, end);
431 return scm_i_substring (str, start, end);
432}
ee149d03 433
ed35de72
MV
434SCM
435scm_c_substring_read_only (SCM str, size_t start, size_t end)
436{
437 validate_substring_args (str, start, end);
438 return scm_i_substring_read_only (str, start, end);
439}
440
0f2d19dd 441SCM
3ee86942 442scm_c_substring_copy (SCM str, size_t start, size_t end)
0f2d19dd 443{
3ee86942
MV
444 validate_substring_args (str, start, end);
445 return scm_i_substring_copy (str, start, end);
446}
447
3ee86942
MV
448SCM
449scm_c_substring_shared (SCM str, size_t start, size_t end)
450{
451 validate_substring_args (str, start, end);
452 return scm_i_substring_shared (str, start, end);
453}
0f2d19dd 454
d6c74168 455\f
3ee86942
MV
456/* Internal accessors
457 */
458
50b1996f
MG
459/* Returns the number of characters in STR. This may be different
460 than the memory size of the string storage. */
3ee86942
MV
461size_t
462scm_i_string_length (SCM str)
0f2d19dd 463{
3ee86942 464 return STRING_LENGTH (str);
0f2d19dd
JB
465}
466
50b1996f
MG
467/* True if the string is 'narrow', meaning it has a 8-bit Latin-1
468 encoding. False if it is 'wide', having a 32-bit UCS-4
469 encoding. */
9c44cd45
MG
470int
471scm_i_is_narrow_string (SCM str)
472{
49d09292
MW
473 if (IS_SH_STRING (str))
474 str = SH_STRING_STRING (str);
475
9c44cd45
MG
476 return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
477}
478
587a3355
MG
479/* Try to coerce a string to be narrow. It if is narrow already, do
480 nothing. If it is wide, shrink it to narrow if none of its
481 characters are above 0xFF. Return true if the string is narrow or
482 was made to be narrow. */
483int
484scm_i_try_narrow_string (SCM str)
485{
49d09292
MW
486 if (IS_SH_STRING (str))
487 str = SH_STRING_STRING (str);
488
ba54a202 489 SET_STRING_STRINGBUF (str, narrow_stringbuf (STRING_STRINGBUF (str)));
587a3355
MG
490
491 return scm_i_is_narrow_string (str);
492}
493
100e20c7
LC
494/* Return a pointer to the raw data of the string, which can be either Latin-1
495 or UCS-4 encoded data, depending on `scm_i_is_narrow_string (STR)'. */
496const void *
497scm_i_string_data (SCM str)
498{
499 SCM buf;
500 size_t start;
501 const char *data;
502
503 get_str_buf_start (&str, &buf, &start);
504
505 data = STRINGBUF_CONTENTS (buf);
506 data += start * (scm_i_is_narrow_string (str) ? 1 : 4);
507
508 return data;
509}
510
50b1996f
MG
511/* Returns a pointer to the 8-bit Latin-1 encoded character array of
512 STR. */
3ee86942
MV
513const char *
514scm_i_string_chars (SCM str)
515{
516 SCM buf;
e1b29f6a
MV
517 size_t start;
518 get_str_buf_start (&str, &buf, &start);
9c44cd45 519 if (scm_i_is_narrow_string (str))
f59cf998 520 return (const char *) STRINGBUF_CHARS (buf) + start;
9c44cd45
MG
521 else
522 scm_misc_error (NULL, "Invalid read access of chars of wide string: ~s",
523 scm_list_1 (str));
524 return NULL;
3ee86942 525}
b00418df 526
50b1996f
MG
527/* Returns a pointer to the 32-bit UCS-4 encoded character array of
528 STR. */
9c44cd45
MG
529const scm_t_wchar *
530scm_i_string_wide_chars (SCM str)
531{
532 SCM buf;
533 size_t start;
534
535 get_str_buf_start (&str, &buf, &start);
536 if (!scm_i_is_narrow_string (str))
f59cf998 537 return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf) + start;
9c44cd45
MG
538 else
539 scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
540 scm_list_1 (str));
541}
542
543/* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
544 a new string buffer, so that it can be modified without modifying
50b1996f
MG
545 other strings. Also, lock the string mutex. Later, one must call
546 scm_i_string_stop_writing to unlock the mutex. */
9c44cd45
MG
547SCM
548scm_i_string_start_writing (SCM orig_str)
b00418df 549{
ed35de72 550 SCM buf, str = orig_str;
e1b29f6a 551 size_t start;
ed35de72 552
e1b29f6a 553 get_str_buf_start (&str, &buf, &start);
ed35de72
MV
554 if (IS_RO_STRING (str))
555 scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
556
9de87eea 557 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942
MV
558 if (STRINGBUF_SHARED (buf))
559 {
9c44cd45 560 /* Clone the stringbuf. */
3ee86942
MV
561 size_t len = STRING_LENGTH (str);
562 SCM new_buf;
563
9de87eea 564 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 565
9c44cd45
MG
566 if (scm_i_is_narrow_string (str))
567 {
568 new_buf = make_stringbuf (len);
569 memcpy (STRINGBUF_CHARS (new_buf),
570 STRINGBUF_CHARS (buf) + STRING_START (str), len);
571
572 }
573 else
574 {
575 new_buf = make_wide_stringbuf (len);
576 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
577 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf)
578 + STRING_START (str)), len);
579 }
3ee86942 580
3ee86942
MV
581 SET_STRING_STRINGBUF (str, new_buf);
582 start -= STRING_START (str);
902578f1
LC
583
584 /* FIXME: The following operations are not atomic, so other threads
585 looking at STR may see an inconsistent state. Nevertheless it can't
586 hurt much since (i) accessing STR while it is being mutated can't
587 yield a crash, and (ii) concurrent accesses to STR should be
588 protected by a mutex at the application level. The latter may not
589 apply when STR != ORIG_STR, though. */
3ee86942 590 SET_STRING_START (str, 0);
902578f1 591 SET_STRING_STRINGBUF (str, new_buf);
3ee86942
MV
592
593 buf = new_buf;
594
9de87eea 595 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 596 }
9c44cd45
MG
597 return orig_str;
598}
3ee86942 599
50b1996f 600/* Return a pointer to the 8-bit Latin-1 chars of a string. */
9c44cd45
MG
601char *
602scm_i_string_writable_chars (SCM str)
603{
604 SCM buf;
605 size_t start;
3ee86942 606
9c44cd45
MG
607 get_str_buf_start (&str, &buf, &start);
608 if (scm_i_is_narrow_string (str))
f59cf998 609 return (char *) STRINGBUF_CHARS (buf) + start;
9c44cd45
MG
610 else
611 scm_misc_error (NULL, "Invalid write access of chars of wide string: ~s",
612 scm_list_1 (str));
613 return NULL;
b00418df
DH
614}
615
50b1996f 616/* Return a pointer to the UCS-4 codepoints of a string. */
9c44cd45
MG
617static scm_t_wchar *
618scm_i_string_writable_wide_chars (SCM str)
619{
620 SCM buf;
621 size_t start;
622
623 get_str_buf_start (&str, &buf, &start);
624 if (!scm_i_is_narrow_string (str))
625 return STRINGBUF_WIDE_CHARS (buf) + start;
626 else
1c7b216f 627 scm_misc_error (NULL, "Invalid write access of chars of narrow string: ~s",
9c44cd45 628 scm_list_1 (str));
b00418df
DH
629}
630
50b1996f
MG
631/* Unlock the string mutex that was locked when
632 scm_i_string_start_writing was called. */
3ee86942
MV
633void
634scm_i_string_stop_writing (void)
635{
9de87eea 636 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 637}
b00418df 638
50b1996f 639/* Return the Xth character of STR as a UCS-4 codepoint. */
9c44cd45
MG
640scm_t_wchar
641scm_i_string_ref (SCM str, size_t x)
642{
643 if (scm_i_is_narrow_string (str))
644 return (scm_t_wchar) (unsigned char) (scm_i_string_chars (str)[x]);
645 else
646 return scm_i_string_wide_chars (str)[x];
647}
648
889975e5
MG
649/* Returns index+1 of the first char in STR that matches C, or
650 0 if the char is not found. */
651int
652scm_i_string_contains_char (SCM str, char ch)
653{
654 size_t i;
655 size_t len = scm_i_string_length (str);
656
657 i = 0;
658 if (scm_i_is_narrow_string (str))
659 {
660 while (i < len)
661 {
662 if (scm_i_string_chars (str)[i] == ch)
663 return i+1;
664 i++;
665 }
666 }
667 else
668 {
669 while (i < len)
670 {
671 if (scm_i_string_wide_chars (str)[i]
672 == (unsigned char) ch)
673 return i+1;
674 i++;
675 }
676 }
677 return 0;
678}
679
3f47e526
MG
680int
681scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
682{
683 if (scm_i_is_narrow_string (sstr))
684 {
685 const char *a = scm_i_string_chars (sstr) + start_x;
686 const char *b = cstr;
687 return strncmp (a, b, strlen(b));
688 }
689 else
690 {
691 size_t i;
692 const scm_t_wchar *a = scm_i_string_wide_chars (sstr) + start_x;
693 const char *b = cstr;
694 for (i = 0; i < strlen (b); i++)
695 {
696 if (a[i] != (unsigned char) b[i])
697 return 1;
698 }
699 }
700 return 0;
701}
702
50b1996f 703/* Set the Pth character of STR to UCS-4 codepoint CHR. */
9c44cd45
MG
704void
705scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
706{
49d09292
MW
707 if (IS_SH_STRING (str))
708 {
709 p += STRING_START (str);
710 str = SH_STRING_STRING (str);
711 }
712
9c44cd45 713 if (chr > 0xFF && scm_i_is_narrow_string (str))
ba54a202 714 SET_STRING_STRINGBUF (str, wide_stringbuf (STRING_STRINGBUF (str)));
9c44cd45
MG
715
716 if (scm_i_is_narrow_string (str))
717 {
718 char *dst = scm_i_string_writable_chars (str);
587a3355 719 dst[p] = chr;
9c44cd45
MG
720 }
721 else
722 {
723 scm_t_wchar *dst = scm_i_string_writable_wide_chars (str);
724 dst[p] = chr;
725 }
726}
727
ba54a202 728\f
3ee86942 729/* Symbols.
587a3355 730
3ee86942
MV
731 Basic symbol creation and accessing is done here, the rest is in
732 symbols.[hc]. This has been done to keep stringbufs and the
733 internals of strings and string-like objects confined to this file.
734*/
735
736#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
737
738SCM
6869328b
MV
739scm_i_make_symbol (SCM name, scm_t_bits flags,
740 unsigned long hash, SCM props)
3ee86942
MV
741{
742 SCM buf;
743 size_t start = STRING_START (name);
744 size_t length = STRING_LENGTH (name);
745
746 if (IS_SH_STRING (name))
747 {
748 name = SH_STRING_STRING (name);
749 start += STRING_START (name);
750 }
1a4d7653 751 buf = STRING_STRINGBUF (name);
3ee86942
MV
752
753 if (start == 0 && length == STRINGBUF_LENGTH (buf))
754 {
755 /* reuse buf. */
9de87eea 756 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 757 SET_STRINGBUF_SHARED (buf);
9de87eea 758 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942
MV
759 }
760 else
761 {
762 /* make new buf. */
9c44cd45
MG
763 if (scm_i_is_narrow_string (name))
764 {
765 SCM new_buf = make_stringbuf (length);
766 memcpy (STRINGBUF_CHARS (new_buf),
767 STRINGBUF_CHARS (buf) + start, length);
768 buf = new_buf;
769 }
770 else
771 {
772 SCM new_buf = make_wide_stringbuf (length);
773 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
774 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf) + start,
775 length);
776 buf = new_buf;
777 }
3ee86942 778 }
6869328b 779 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
3ee86942
MV
780 (scm_t_bits) hash, SCM_UNPACK (props));
781}
782
fd0a5bbc
HWN
783SCM
784scm_i_c_make_symbol (const char *name, size_t len,
785 scm_t_bits flags, unsigned long hash, SCM props)
786{
787 SCM buf = make_stringbuf (len);
788 memcpy (STRINGBUF_CHARS (buf), name, len);
789
65619ebe
AW
790 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
791 (scm_t_bits) hash, SCM_UNPACK (props));
fd0a5bbc
HWN
792}
793
50b1996f
MG
794/* Returns the number of characters in SYM. This may be different
795 from the memory size of SYM. */
3ee86942
MV
796size_t
797scm_i_symbol_length (SCM sym)
0f2d19dd 798{
3ee86942 799 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
0f2d19dd
JB
800}
801
071bb6a8
LC
802size_t
803scm_c_symbol_length (SCM sym)
804#define FUNC_NAME "scm_c_symbol_length"
805{
806 SCM_VALIDATE_SYMBOL (1, sym);
807
808 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
809}
810#undef FUNC_NAME
811
50b1996f
MG
812/* True if the name of SYM is stored as a Latin-1 encoded string.
813 False if it is stored as a 32-bit UCS-4-encoded string. */
9c44cd45
MG
814int
815scm_i_is_narrow_symbol (SCM sym)
816{
817 SCM buf;
818
819 buf = SYMBOL_STRINGBUF (sym);
820 return !STRINGBUF_WIDE (buf);
821}
822
50b1996f
MG
823/* Returns a pointer to the 8-bit Latin-1 encoded character array that
824 contains the name of SYM. */
3ee86942
MV
825const char *
826scm_i_symbol_chars (SCM sym)
827{
9c44cd45
MG
828 SCM buf;
829
830 buf = SYMBOL_STRINGBUF (sym);
831 if (!STRINGBUF_WIDE (buf))
f59cf998 832 return (const char *) STRINGBUF_CHARS (buf);
9c44cd45
MG
833 else
834 scm_misc_error (NULL, "Invalid access of chars of a wide symbol ~S",
835 scm_list_1 (sym));
836}
837
50b1996f
MG
838/* Return a pointer to the 32-bit UCS-4-encoded character array of a
839 symbol's name. */
9c44cd45
MG
840const scm_t_wchar *
841scm_i_symbol_wide_chars (SCM sym)
842{
843 SCM buf;
844
845 buf = SYMBOL_STRINGBUF (sym);
846 if (STRINGBUF_WIDE (buf))
f59cf998 847 return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf);
9c44cd45
MG
848 else
849 scm_misc_error (NULL, "Invalid access of chars of a narrow symbol ~S",
850 scm_list_1 (sym));
3ee86942 851}
1cc91f1b 852
be54b15d 853SCM
3ee86942 854scm_i_symbol_substring (SCM sym, size_t start, size_t end)
be54b15d 855{
3ee86942 856 SCM buf = SYMBOL_STRINGBUF (sym);
9de87eea 857 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 858 SET_STRINGBUF_SHARED (buf);
9de87eea 859 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
fd2b17b9 860 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
3ee86942
MV
861 (scm_t_bits)start, (scm_t_bits) end - start);
862}
be54b15d 863
50b1996f 864/* Returns the Xth character of symbol SYM as a UCS-4 codepoint. */
9c44cd45
MG
865scm_t_wchar
866scm_i_symbol_ref (SCM sym, size_t x)
867{
868 if (scm_i_is_narrow_symbol (sym))
869 return (scm_t_wchar) (unsigned char) (scm_i_symbol_chars (sym)[x]);
870 else
871 return scm_i_symbol_wide_chars (sym)[x];
872}
873
3ee86942
MV
874/* Debugging
875 */
be54b15d 876
6ce6923b
MG
877SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
878 "Returns an association list containing debugging information\n"
879 "for @var{str}. The association list has the following entries."
880 "@table @code\n"
881 "@item string\n"
882 "The string itself.\n"
883 "@item start\n"
884 "The start index of the string into its stringbuf\n"
885 "@item length\n"
886 "The length of the string\n"
887 "@item shared\n"
888 "If this string is a substring, it returns its parent string.\n"
889 "Otherwise, it returns @code{#f}\n"
88ed5759
MG
890 "@item read-only\n"
891 "@code{#t} if the string is read-only\n"
6ce6923b
MG
892 "@item stringbuf-chars\n"
893 "A new string containing this string's stringbuf's characters\n"
894 "@item stringbuf-length\n"
895 "The number of characters in this stringbuf\n"
896 "@item stringbuf-shared\n"
897 "@code{#t} if this stringbuf is shared\n"
6ce6923b
MG
898 "@item stringbuf-wide\n"
899 "@code{#t} if this stringbuf's characters are stored in a\n"
900 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
901 "buffer\n"
902 "@end table")
3ee86942
MV
903#define FUNC_NAME s_scm_sys_string_dump
904{
ba54a202 905 SCM e1, e2, e3, e4, e5, e6, e7, e8, e9;
6ce6923b 906 SCM buf;
3ee86942 907 SCM_VALIDATE_STRING (1, str);
6ce6923b
MG
908
909 /* String info */
4a655e50 910 e1 = scm_cons (scm_from_latin1_symbol ("string"),
6ce6923b 911 str);
4a655e50 912 e2 = scm_cons (scm_from_latin1_symbol ("start"),
6ce6923b 913 scm_from_size_t (STRING_START (str)));
4a655e50 914 e3 = scm_cons (scm_from_latin1_symbol ("length"),
6ce6923b
MG
915 scm_from_size_t (STRING_LENGTH (str)));
916
3ee86942
MV
917 if (IS_SH_STRING (str))
918 {
4a655e50 919 e4 = scm_cons (scm_from_latin1_symbol ("shared"),
6ce6923b
MG
920 SH_STRING_STRING (str));
921 buf = STRING_STRINGBUF (SH_STRING_STRING (str));
3ee86942
MV
922 }
923 else
924 {
4a655e50 925 e4 = scm_cons (scm_from_latin1_symbol ("shared"),
6ce6923b
MG
926 SCM_BOOL_F);
927 buf = STRING_STRINGBUF (str);
3ee86942 928 }
9c44cd45 929
88ed5759 930 if (IS_RO_STRING (str))
4a655e50 931 e5 = scm_cons (scm_from_latin1_symbol ("read-only"),
88ed5759
MG
932 SCM_BOOL_T);
933 else
4a655e50 934 e5 = scm_cons (scm_from_latin1_symbol ("read-only"),
88ed5759 935 SCM_BOOL_F);
587a3355 936
6ce6923b 937 /* Stringbuf info */
6ce6923b
MG
938 if (!STRINGBUF_WIDE (buf))
939 {
940 size_t len = STRINGBUF_LENGTH (buf);
941 char *cbuf;
190d4b0d 942 SCM sbc = scm_i_make_string (len, &cbuf, 0);
6ce6923b 943 memcpy (cbuf, STRINGBUF_CHARS (buf), len);
4a655e50 944 e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
6ce6923b 945 sbc);
3ee86942 946 }
6ce6923b
MG
947 else
948 {
949 size_t len = STRINGBUF_LENGTH (buf);
950 scm_t_wchar *cbuf;
190d4b0d 951 SCM sbc = scm_i_make_wide_string (len, &cbuf, 0);
6ce6923b
MG
952 u32_cpy ((scm_t_uint32 *) cbuf,
953 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
4a655e50 954 e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
6ce6923b
MG
955 sbc);
956 }
4a655e50 957 e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"),
6ce6923b
MG
958 scm_from_size_t (STRINGBUF_LENGTH (buf)));
959 if (STRINGBUF_SHARED (buf))
4a655e50 960 e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"),
6ce6923b
MG
961 SCM_BOOL_T);
962 else
4a655e50 963 e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"),
6ce6923b 964 SCM_BOOL_F);
6ce6923b 965 if (STRINGBUF_WIDE (buf))
4a655e50 966 e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
ba54a202 967 SCM_BOOL_T);
6ce6923b 968 else
4a655e50 969 e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
ba54a202 970 SCM_BOOL_F);
6ce6923b 971
ba54a202 972 return scm_list_n (e1, e2, e3, e4, e5, e6, e7, e8, e9, SCM_UNDEFINED);
3ee86942
MV
973}
974#undef FUNC_NAME
975
6ce6923b
MG
976SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
977 "Returns an association list containing debugging information\n"
978 "for @var{sym}. The association list has the following entries."
979 "@table @code\n"
980 "@item symbol\n"
981 "The symbol itself\n"
982 "@item hash\n"
983 "Its hash value\n"
88ed5759
MG
984 "@item interned\n"
985 "@code{#t} if it is an interned symbol\n"
6ce6923b
MG
986 "@item stringbuf-chars\n"
987 "A new string containing this symbols's stringbuf's characters\n"
988 "@item stringbuf-length\n"
989 "The number of characters in this stringbuf\n"
990 "@item stringbuf-shared\n"
991 "@code{#t} if this stringbuf is shared\n"
6ce6923b
MG
992 "@item stringbuf-wide\n"
993 "@code{#t} if this stringbuf's characters are stored in a\n"
994 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
995 "buffer\n"
996 "@end table")
3ee86942
MV
997#define FUNC_NAME s_scm_sys_symbol_dump
998{
ba54a202 999 SCM e1, e2, e3, e4, e5, e6, e7;
6ce6923b 1000 SCM buf;
3ee86942 1001 SCM_VALIDATE_SYMBOL (1, sym);
4a655e50 1002 e1 = scm_cons (scm_from_latin1_symbol ("symbol"),
6ce6923b 1003 sym);
4a655e50 1004 e2 = scm_cons (scm_from_latin1_symbol ("hash"),
6ce6923b 1005 scm_from_ulong (scm_i_symbol_hash (sym)));
4a655e50 1006 e3 = scm_cons (scm_from_latin1_symbol ("interned"),
88ed5759 1007 scm_symbol_interned_p (sym));
6ce6923b
MG
1008 buf = SYMBOL_STRINGBUF (sym);
1009
1010 /* Stringbuf info */
6ce6923b
MG
1011 if (!STRINGBUF_WIDE (buf))
1012 {
1013 size_t len = STRINGBUF_LENGTH (buf);
1014 char *cbuf;
190d4b0d 1015 SCM sbc = scm_i_make_string (len, &cbuf, 0);
6ce6923b 1016 memcpy (cbuf, STRINGBUF_CHARS (buf), len);
4a655e50 1017 e4 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
6ce6923b
MG
1018 sbc);
1019 }
9c44cd45 1020 else
6ce6923b
MG
1021 {
1022 size_t len = STRINGBUF_LENGTH (buf);
1023 scm_t_wchar *cbuf;
190d4b0d 1024 SCM sbc = scm_i_make_wide_string (len, &cbuf, 0);
6ce6923b
MG
1025 u32_cpy ((scm_t_uint32 *) cbuf,
1026 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
4a655e50 1027 e4 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
6ce6923b
MG
1028 sbc);
1029 }
4a655e50 1030 e5 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"),
6ce6923b
MG
1031 scm_from_size_t (STRINGBUF_LENGTH (buf)));
1032 if (STRINGBUF_SHARED (buf))
4a655e50 1033 e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"),
6ce6923b
MG
1034 SCM_BOOL_T);
1035 else
4a655e50 1036 e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"),
6ce6923b 1037 SCM_BOOL_F);
6ce6923b 1038 if (STRINGBUF_WIDE (buf))
4a655e50 1039 e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
6ce6923b
MG
1040 SCM_BOOL_T);
1041 else
4a655e50 1042 e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
6ce6923b 1043 SCM_BOOL_F);
ba54a202 1044 return scm_list_n (e1, e2, e3, e4, e5, e6, e7, SCM_UNDEFINED);
6ce6923b 1045
3ee86942
MV
1046}
1047#undef FUNC_NAME
1048
56a3dcd4 1049#ifdef SCM_STRING_LENGTH_HISTOGRAM
6ce6923b 1050
9c44cd45 1051SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, (void), "")
e1b29f6a 1052#define FUNC_NAME s_scm_sys_stringbuf_hist
3ee86942
MV
1053{
1054 int i;
1055 for (i = 0; i < 1000; i++)
1056 if (lenhist[i])
1057 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
1058 fprintf (stderr, ">999: %u\n", lenhist[1000]);
1059 return SCM_UNSPECIFIED;
be54b15d
DH
1060}
1061#undef FUNC_NAME
1062
3ee86942
MV
1063#endif
1064
1065\f
1066
1067SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
1068 (SCM obj),
1069 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
1070#define FUNC_NAME s_scm_string_p
1071{
1072 return scm_from_bool (IS_STRING (obj));
1073}
1074#undef FUNC_NAME
1075
1076
1077SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
1078
1079SCM_DEFINE (scm_string, "string", 0, 0, 1,
1080 (SCM chrs),
1081 "@deffnx {Scheme Procedure} list->string chrs\n"
1082 "Return a newly allocated string composed of the arguments,\n"
1083 "@var{chrs}.")
1084#define FUNC_NAME s_scm_string
1085{
9aa27c1a 1086 SCM result = SCM_BOOL_F;
9c44cd45 1087 SCM rest;
3ee86942 1088 size_t len;
9c44cd45
MG
1089 size_t p = 0;
1090 long i;
9aa27c1a 1091 int wide = 0;
3ee86942 1092
9c44cd45
MG
1093 /* Verify that this is a list of chars. */
1094 i = scm_ilength (chrs);
3c7cf7f5 1095 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
3ee86942 1096
9c44cd45
MG
1097 len = (size_t) i;
1098 rest = chrs;
3ee86942 1099
9c44cd45 1100 while (len > 0 && scm_is_pair (rest))
3ee86942 1101 {
9c44cd45 1102 SCM elt = SCM_CAR (rest);
3ee86942 1103 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
9aa27c1a
MG
1104 if (SCM_CHAR (elt) > 0xFF)
1105 wide = 1;
9c44cd45
MG
1106 rest = SCM_CDR (rest);
1107 len--;
1108 scm_remember_upto_here_1 (elt);
1109 }
1110
1111 /* Construct a string containing this list of chars. */
1112 len = (size_t) i;
1113 rest = chrs;
1114
9aa27c1a 1115 if (wide == 0)
9c44cd45 1116 {
56a3dcd4
LC
1117 char *buf;
1118
190d4b0d 1119 result = scm_i_make_string (len, NULL, 0);
9aa27c1a 1120 result = scm_i_string_start_writing (result);
56a3dcd4 1121 buf = scm_i_string_writable_chars (result);
9aa27c1a
MG
1122 while (len > 0 && scm_is_pair (rest))
1123 {
1124 SCM elt = SCM_CAR (rest);
1125 buf[p] = (unsigned char) SCM_CHAR (elt);
1126 p++;
1127 rest = SCM_CDR (rest);
1128 len--;
1129 scm_remember_upto_here_1 (elt);
1130 }
1131 }
1132 else
1133 {
56a3dcd4
LC
1134 scm_t_wchar *buf;
1135
190d4b0d 1136 result = scm_i_make_wide_string (len, NULL, 0);
9aa27c1a 1137 result = scm_i_string_start_writing (result);
56a3dcd4 1138 buf = scm_i_string_writable_wide_chars (result);
9aa27c1a
MG
1139 while (len > 0 && scm_is_pair (rest))
1140 {
1141 SCM elt = SCM_CAR (rest);
1142 buf[p] = SCM_CHAR (elt);
1143 p++;
1144 rest = SCM_CDR (rest);
1145 len--;
1146 scm_remember_upto_here_1 (elt);
1147 }
3ee86942 1148 }
9c44cd45
MG
1149 scm_i_string_stop_writing ();
1150
3ee86942
MV
1151 if (len > 0)
1152 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
9c44cd45 1153 if (!scm_is_null (rest))
3ee86942
MV
1154 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
1155
1156 return result;
1157}
1158#undef FUNC_NAME
be54b15d 1159
3b3b36dd 1160SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
6fa73e72 1161 (SCM k, SCM chr),
0d26a824
MG
1162 "Return a newly allocated string of\n"
1163 "length @var{k}. If @var{chr} is given, then all elements of\n"
1164 "the string are initialized to @var{chr}, otherwise the contents\n"
b7e64f8b 1165 "of the string are all set to @code{#\nul}.")
1bbd0b84 1166#define FUNC_NAME s_scm_make_string
0f2d19dd 1167{
3ee86942
MV
1168 return scm_c_make_string (scm_to_size_t (k), chr);
1169}
1170#undef FUNC_NAME
1171
1172SCM
1173scm_c_make_string (size_t len, SCM chr)
1174#define FUNC_NAME NULL
1175{
9c44cd45 1176 size_t p;
3ef6650d 1177 char *contents = NULL;
190d4b0d 1178 SCM res = scm_i_make_string (len, &contents, 0);
cb0d8be2 1179
3ef6650d
AW
1180 /* If no char is given, initialize string contents to NULL. */
1181 if (SCM_UNBNDP (chr))
1182 memset (contents, 0, len);
1183 else
e11e83f3 1184 {
3ee86942 1185 SCM_VALIDATE_CHAR (0, chr);
9c44cd45
MG
1186 res = scm_i_string_start_writing (res);
1187 for (p = 0; p < len; p++)
1188 scm_i_string_set_x (res, p, SCM_CHAR (chr));
1189 scm_i_string_stop_writing ();
0f2d19dd 1190 }
e11e83f3
MV
1191
1192 return res;
0f2d19dd 1193}
1bbd0b84 1194#undef FUNC_NAME
0f2d19dd 1195
3b3b36dd 1196SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
0d26a824
MG
1197 (SCM string),
1198 "Return the number of characters in @var{string}.")
1bbd0b84 1199#define FUNC_NAME s_scm_string_length
0f2d19dd 1200{
d1ca2c64 1201 SCM_VALIDATE_STRING (1, string);
3ee86942 1202 return scm_from_size_t (STRING_LENGTH (string));
0f2d19dd 1203}
1bbd0b84 1204#undef FUNC_NAME
0f2d19dd 1205
f8ba2bb9 1206SCM_DEFINE (scm_string_bytes_per_char, "string-bytes-per-char", 1, 0, 0,
9c44cd45
MG
1207 (SCM string),
1208 "Return the bytes used to represent a character in @var{string}."
1209 "This will return 1 or 4.")
f8ba2bb9 1210#define FUNC_NAME s_scm_string_bytes_per_char
9c44cd45
MG
1211{
1212 SCM_VALIDATE_STRING (1, string);
1213 if (!scm_i_is_narrow_string (string))
1214 return scm_from_int (4);
1215
1216 return scm_from_int (1);
1217}
1218#undef FUNC_NAME
1219
3ee86942
MV
1220size_t
1221scm_c_string_length (SCM string)
1222{
1223 if (!IS_STRING (string))
1224 scm_wrong_type_arg_msg (NULL, 0, string, "string");
1225 return STRING_LENGTH (string);
1226}
1227
bd9e24b3 1228SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
6fa73e72 1229 (SCM str, SCM k),
9c44cd45
MG
1230 "Return character @var{k} of @var{str} using zero-origin\n"
1231 "indexing. @var{k} must be a valid index of @var{str}.")
1bbd0b84 1232#define FUNC_NAME s_scm_string_ref
0f2d19dd 1233{
3ae3166b 1234 size_t len;
a55c2b68 1235 unsigned long idx;
bd9e24b3 1236
d1ca2c64 1237 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
1238
1239 len = scm_i_string_length (str);
1240 if (SCM_LIKELY (len > 0))
1241 idx = scm_to_unsigned_integer (k, 0, len - 1);
1242 else
1243 scm_out_of_range (NULL, k);
1244
9c44cd45
MG
1245 if (scm_i_is_narrow_string (str))
1246 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
1247 else
1248 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[idx]);
0f2d19dd 1249}
1bbd0b84 1250#undef FUNC_NAME
0f2d19dd 1251
3ee86942
MV
1252SCM
1253scm_c_string_ref (SCM str, size_t p)
1254{
1255 if (p >= scm_i_string_length (str))
1256 scm_out_of_range (NULL, scm_from_size_t (p));
9c44cd45
MG
1257 if (scm_i_is_narrow_string (str))
1258 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
1259 else
1260 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[p]);
1261
3ee86942 1262}
f0942910 1263
3b3b36dd 1264SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
6fa73e72 1265 (SCM str, SCM k, SCM chr),
9c44cd45
MG
1266 "Store @var{chr} in element @var{k} of @var{str} and return\n"
1267 "an unspecified value. @var{k} must be a valid index of\n"
1268 "@var{str}.")
1bbd0b84 1269#define FUNC_NAME s_scm_string_set_x
0f2d19dd 1270{
3ae3166b 1271 size_t len;
a55c2b68
MV
1272 unsigned long idx;
1273
f0942910 1274 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
1275
1276 len = scm_i_string_length (str);
1277 if (SCM_LIKELY (len > 0))
1278 idx = scm_to_unsigned_integer (k, 0, len - 1);
1279 else
1280 scm_out_of_range (NULL, k);
1281
34d19ef6 1282 SCM_VALIDATE_CHAR (3, chr);
9c44cd45
MG
1283 str = scm_i_string_start_writing (str);
1284 scm_i_string_set_x (str, idx, SCM_CHAR (chr));
1285 scm_i_string_stop_writing ();
1286
0f2d19dd
JB
1287 return SCM_UNSPECIFIED;
1288}
1bbd0b84 1289#undef FUNC_NAME
0f2d19dd 1290
3ee86942
MV
1291void
1292scm_c_string_set_x (SCM str, size_t p, SCM chr)
1293{
1294 if (p >= scm_i_string_length (str))
1295 scm_out_of_range (NULL, scm_from_size_t (p));
9c44cd45
MG
1296 str = scm_i_string_start_writing (str);
1297 scm_i_string_set_x (str, p, SCM_CHAR (chr));
1298 scm_i_string_stop_writing ();
3ee86942 1299}
0f2d19dd 1300
3b3b36dd 1301SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
0d26a824
MG
1302 (SCM str, SCM start, SCM end),
1303 "Return a newly allocated string formed from the characters\n"
1304 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1305 "ending with index @var{end} (exclusive).\n"
1306 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1307 "exact integers satisfying:\n\n"
1308 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1bbd0b84 1309#define FUNC_NAME s_scm_substring
0f2d19dd 1310{
3ee86942 1311 size_t len, from, to;
685c0d71 1312
d1ca2c64 1313 SCM_VALIDATE_STRING (1, str);
3ee86942
MV
1314 len = scm_i_string_length (str);
1315 from = scm_to_unsigned_integer (start, 0, len);
a55c2b68 1316 if (SCM_UNBNDP (end))
3ee86942 1317 to = len;
a55c2b68 1318 else
3ee86942
MV
1319 to = scm_to_unsigned_integer (end, from, len);
1320 return scm_i_substring (str, from, to);
0f2d19dd 1321}
1bbd0b84 1322#undef FUNC_NAME
0f2d19dd 1323
ed35de72
MV
1324SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
1325 (SCM str, SCM start, SCM end),
1326 "Return a newly allocated string formed from the characters\n"
1327 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1328 "ending with index @var{end} (exclusive).\n"
1329 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1330 "exact integers satisfying:\n"
1331 "\n"
1332 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
1333 "\n"
1334 "The returned string is read-only.\n")
1335#define FUNC_NAME s_scm_substring_read_only
1336{
1337 size_t len, from, to;
1338
1339 SCM_VALIDATE_STRING (1, str);
1340 len = scm_i_string_length (str);
1341 from = scm_to_unsigned_integer (start, 0, len);
1342 if (SCM_UNBNDP (end))
1343 to = len;
1344 else
1345 to = scm_to_unsigned_integer (end, from, len);
1346 return scm_i_substring_read_only (str, from, to);
1347}
1348#undef FUNC_NAME
1349
3ee86942
MV
1350SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
1351 (SCM str, SCM start, SCM end),
1352 "Return a newly allocated string formed from the characters\n"
1353 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1354 "ending with index @var{end} (exclusive).\n"
1355 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1356 "exact integers satisfying:\n\n"
1357 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1358#define FUNC_NAME s_scm_substring_copy
1359{
e1b29f6a
MV
1360 /* For the Scheme version, START is mandatory, but for the C
1361 version, it is optional. See scm_string_copy in srfi-13.c for a
1362 rationale.
1363 */
1364
1365 size_t from, to;
3ee86942
MV
1366
1367 SCM_VALIDATE_STRING (1, str);
e1b29f6a
MV
1368 scm_i_get_substring_spec (scm_i_string_length (str),
1369 start, &from, end, &to);
3ee86942
MV
1370 return scm_i_substring_copy (str, from, to);
1371}
1372#undef FUNC_NAME
1373
1374SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
1375 (SCM str, SCM start, SCM end),
1376 "Return string that indirectly refers to the characters\n"
1377 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1378 "ending with index @var{end} (exclusive).\n"
1379 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1380 "exact integers satisfying:\n\n"
1381 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1382#define FUNC_NAME s_scm_substring_shared
1383{
1384 size_t len, from, to;
1385
1386 SCM_VALIDATE_STRING (1, str);
1387 len = scm_i_string_length (str);
1388 from = scm_to_unsigned_integer (start, 0, len);
1389 if (SCM_UNBNDP (end))
1390 to = len;
1391 else
1392 to = scm_to_unsigned_integer (end, from, len);
1393 return scm_i_substring_shared (str, from, to);
1394}
1395#undef FUNC_NAME
685c0d71 1396
3b3b36dd 1397SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
6fa73e72 1398 (SCM args),
9c44cd45 1399 "Return a newly allocated string whose characters form the\n"
0d26a824 1400 "concatenation of the given strings, @var{args}.")
1bbd0b84 1401#define FUNC_NAME s_scm_string_append
0f2d19dd
JB
1402{
1403 SCM res;
9c44cd45
MG
1404 size_t len = 0;
1405 int wide = 0;
c829a427 1406 SCM l, s;
bd4911ef 1407 size_t i;
9909c395
MG
1408 union
1409 {
1410 char *narrow;
1411 scm_t_wchar *wide;
1412 } data;
af45e3b0
DH
1413
1414 SCM_VALIDATE_REST_ARGUMENT (args);
9c44cd45 1415 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427
MV
1416 {
1417 s = SCM_CAR (l);
1418 SCM_VALIDATE_STRING (SCM_ARGn, s);
9c44cd45
MG
1419 len += scm_i_string_length (s);
1420 if (!scm_i_is_narrow_string (s))
1421 wide = 1;
c829a427 1422 }
9909c395 1423 data.narrow = NULL;
9c44cd45 1424 if (!wide)
190d4b0d 1425 res = scm_i_make_string (len, &data.narrow, 0);
9c44cd45 1426 else
190d4b0d 1427 res = scm_i_make_wide_string (len, &data.wide, 0);
9c44cd45
MG
1428
1429 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427 1430 {
edea856c 1431 size_t len;
c829a427 1432 s = SCM_CAR (l);
3ee86942 1433 SCM_VALIDATE_STRING (SCM_ARGn, s);
edea856c 1434 len = scm_i_string_length (s);
9c44cd45
MG
1435 if (!wide)
1436 {
9909c395
MG
1437 memcpy (data.narrow, scm_i_string_chars (s), len);
1438 data.narrow += len;
9c44cd45
MG
1439 }
1440 else
1441 {
1442 if (scm_i_is_narrow_string (s))
1443 {
1444 for (i = 0; i < scm_i_string_length (s); i++)
9909c395 1445 data.wide[i] = (unsigned char) scm_i_string_chars (s)[i];
9c44cd45
MG
1446 }
1447 else
9909c395 1448 u32_cpy ((scm_t_uint32 *) data.wide,
9c44cd45 1449 (scm_t_uint32 *) scm_i_string_wide_chars (s), len);
9909c395 1450 data.wide += len;
9c44cd45 1451 }
c829a427
MV
1452 scm_remember_upto_here_1 (s);
1453 }
0f2d19dd
JB
1454 return res;
1455}
1bbd0b84 1456#undef FUNC_NAME
0f2d19dd 1457
24933780 1458
a3d7d5d5 1459\f
c62da8f8 1460/* Charset conversion error handling. */
a3d7d5d5
LC
1461
1462SCM_SYMBOL (scm_encoding_error_key, "encoding-error");
c62da8f8
LC
1463SCM_SYMBOL (scm_decoding_error_key, "decoding-error");
1464
6851d3be
LC
1465/* Raise an exception informing that character CHR could not be written
1466 to PORT in its current encoding. */
d14418a5 1467void
ef7e4ba3 1468scm_encoding_error (const char *subr, int err, const char *message,
6851d3be 1469 SCM port, SCM chr)
ef7e4ba3 1470{
ef7e4ba3 1471 scm_throw (scm_encoding_error_key,
d050ef66
AW
1472 scm_list_n (scm_from_latin1_string (subr),
1473 scm_from_latin1_string (message),
ef7e4ba3 1474 scm_from_int (err),
6851d3be 1475 port, chr,
ef7e4ba3 1476 SCM_UNDEFINED));
a3d7d5d5
LC
1477}
1478
c62da8f8
LC
1479/* Raise an exception informing of an encoding error on PORT. This
1480 means that a character could not be written in PORT's encoding. */
1481void
1482scm_decoding_error (const char *subr, int err, const char *message, SCM port)
1483{
1484 scm_throw (scm_decoding_error_key,
d050ef66
AW
1485 scm_list_n (scm_from_latin1_string (subr),
1486 scm_from_latin1_string (message),
c62da8f8
LC
1487 scm_from_int (err),
1488 port,
1489 SCM_UNDEFINED));
1490}
1491
1492\f
1493/* String conversion to/from C. */
1494
fac32b51 1495SCM
587a3355
MG
1496scm_from_stringn (const char *str, size_t len, const char *encoding,
1497 scm_t_string_failed_conversion_handler handler)
1498{
1499 size_t u32len, i;
1500 scm_t_wchar *u32;
1501 int wide = 0;
1502 SCM res;
1503
d40e1ca8 1504 /* The order of these checks is important. */
a574564c 1505 if (!str && len != 0)
d40e1ca8
AW
1506 scm_misc_error ("scm_from_stringn", "NULL string pointer", SCM_EOL);
1507 if (len == (size_t) -1)
1508 len = strlen (str);
fac32b51 1509
75321259 1510 if (encoding == NULL || len == 0)
889975e5 1511 {
75321259 1512 /* If encoding is null (or the string is empty), use Latin-1. */
889975e5 1513 char *buf;
190d4b0d 1514 res = scm_i_make_string (len, &buf, 0);
889975e5
MG
1515 memcpy (buf, str, len);
1516 return res;
1517 }
1518
587a3355
MG
1519 u32len = 0;
1520 u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
1521 (enum iconv_ilseq_handler)
1522 handler,
1523 str, len,
1524 NULL,
1525 NULL, &u32len);
1526
ef7e4ba3 1527 if (SCM_UNLIKELY (u32 == NULL))
587a3355 1528 {
ef7e4ba3
LC
1529 /* Raise an error and pass the raw C string as a bytevector to the `throw'
1530 handler. */
1531 SCM bv;
1532 signed char *buf;
1533
1534 buf = scm_gc_malloc_pointerless (len, "bytevector");
1535 memcpy (buf, str, len);
fb031aba 1536 bv = scm_c_take_gc_bytevector (buf, len);
ef7e4ba3 1537
c62da8f8
LC
1538 scm_decoding_error (__func__, errno,
1539 "input locale conversion error", bv);
587a3355
MG
1540 }
1541
1542 i = 0;
1543 while (i < u32len)
1544 if (u32[i++] > 0xFF)
1545 {
1546 wide = 1;
1547 break;
1548 }
1549
1550 if (!wide)
1551 {
1552 char *dst;
190d4b0d 1553 res = scm_i_make_string (u32len, &dst, 0);
587a3355
MG
1554 for (i = 0; i < u32len; i ++)
1555 dst[i] = (unsigned char) u32[i];
1556 dst[u32len] = '\0';
1557 }
1558 else
1559 {
1560 scm_t_wchar *wdst;
190d4b0d 1561 res = scm_i_make_wide_string (u32len, &wdst, 0);
587a3355
MG
1562 u32_cpy ((scm_t_uint32 *) wdst, (scm_t_uint32 *) u32, u32len);
1563 wdst[u32len] = 0;
1564 }
1565
1566 free (u32);
1567 return res;
1568}
1569
cf313a94 1570SCM
d40e1ca8 1571scm_from_locale_string (const char *str)
cf313a94 1572{
d40e1ca8 1573 return scm_from_locale_stringn (str, -1);
cf313a94
MG
1574}
1575
c829a427
MV
1576SCM
1577scm_from_locale_stringn (const char *str, size_t len)
1578{
95f5e303 1579 return scm_from_stringn (str, len, locale_charset (),
b22e94db 1580 scm_i_default_port_conversion_handler ());
c829a427 1581}
4d4528e7 1582
c829a427 1583SCM
d40e1ca8 1584scm_from_latin1_string (const char *str)
4d4528e7 1585{
d40e1ca8
AW
1586 return scm_from_latin1_stringn (str, -1);
1587}
9c44cd45 1588
d40e1ca8
AW
1589SCM
1590scm_from_latin1_stringn (const char *str, size_t len)
1591{
e9a35a96
LC
1592 char *buf;
1593 SCM result;
1594
1595 if (len == (size_t) -1)
1596 len = strlen (str);
1597
1598 /* Make a narrow string and copy STR as is. */
190d4b0d 1599 result = scm_i_make_string (len, &buf, 0);
e9a35a96
LC
1600 memcpy (buf, str, len);
1601
1602 return result;
c829a427 1603}
4d4528e7 1604
587a3355 1605SCM
d40e1ca8 1606scm_from_utf8_string (const char *str)
587a3355 1607{
d40e1ca8
AW
1608 return scm_from_utf8_stringn (str, -1);
1609}
1610
1611SCM
1612scm_from_utf8_stringn (const char *str, size_t len)
1613{
1614 return scm_from_stringn (str, len, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
587a3355
MG
1615}
1616
647dc1ac
LC
1617SCM
1618scm_from_utf32_string (const scm_t_wchar *str)
1619{
1620 return scm_from_utf32_stringn (str, -1);
1621}
1622
1623SCM
1624scm_from_utf32_stringn (const scm_t_wchar *str, size_t len)
1625{
1626 SCM result;
1627 scm_t_wchar *buf;
1628
1629 if (len == (size_t) -1)
1630 len = u32_strlen ((uint32_t *) str);
1631
190d4b0d 1632 result = scm_i_make_wide_string (len, &buf, 0);
647dc1ac
LC
1633 memcpy (buf, str, len * sizeof (scm_t_wchar));
1634 scm_i_try_narrow_string (result);
1635
1636 return result;
1637}
1638
50b1996f
MG
1639/* Create a new scheme string from the C string STR. The memory of
1640 STR may be used directly as storage for the new string. */
13a94556
LC
1641/* FIXME: GC-wise, the only way to use the memory area pointed to by STR
1642 would be to register a finalizer to eventually free(3) STR, which isn't
1643 worth it. Should we just deprecate the `scm_take_' functions? */
c829a427
MV
1644SCM
1645scm_take_locale_stringn (char *str, size_t len)
1646{
13a94556 1647 SCM res;
48ddf0d9 1648
13a94556
LC
1649 res = scm_from_locale_stringn (str, len);
1650 free (str);
c829a427 1651
c829a427
MV
1652 return res;
1653}
1654
48ddf0d9
KR
1655SCM
1656scm_take_locale_string (char *str)
1657{
1658 return scm_take_locale_stringn (str, -1);
1659}
1660
f1ee6d54
LC
1661/* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
1662 *LENP-byte locale-encoded string, to `\xXX', `\uXXXX', or `\UXXXXXX'.
31d4d02b
LC
1663 Set *LENP to the size of the resulting string.
1664
1665 FIXME: This is a hack we should get rid of. See
1666 <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00004.html>
1667 for details. */
1668static void
1669unistring_escapes_to_guile_escapes (char *buf, size_t *lenp)
9c44cd45
MG
1670{
1671 char *before, *after;
1672 size_t i, j;
1673
4ff2b9f4
LC
1674 before = buf;
1675 after = buf;
9c44cd45
MG
1676 i = 0;
1677 j = 0;
1678 while (i < *lenp)
1679 {
1680 if ((i <= *lenp - 6)
1681 && before[i] == '\\'
1682 && before[i + 1] == 'u'
1683 && before[i + 2] == '0' && before[i + 3] == '0')
1684 {
1685 /* Convert \u00NN to \xNN */
1686 after[j] = '\\';
1687 after[j + 1] = 'x';
30a6b9ca
MG
1688 after[j + 2] = tolower ((int) before[i + 4]);
1689 after[j + 3] = tolower ((int) before[i + 5]);
9c44cd45
MG
1690 i += 6;
1691 j += 4;
1692 }
1693 else if ((i <= *lenp - 10)
1694 && before[i] == '\\'
1695 && before[i + 1] == 'U'
1696 && before[i + 2] == '0' && before[i + 3] == '0')
1697 {
1698 /* Convert \U00NNNNNN to \UNNNNNN */
1699 after[j] = '\\';
1700 after[j + 1] = 'U';
30a6b9ca
MG
1701 after[j + 2] = tolower ((int) before[i + 4]);
1702 after[j + 3] = tolower ((int) before[i + 5]);
1703 after[j + 4] = tolower ((int) before[i + 6]);
1704 after[j + 5] = tolower ((int) before[i + 7]);
1705 after[j + 6] = tolower ((int) before[i + 8]);
1706 after[j + 7] = tolower ((int) before[i + 9]);
9c44cd45
MG
1707 i += 10;
1708 j += 8;
1709 }
1710 else
1711 {
1712 after[j] = before[i];
1713 i++;
1714 j++;
1715 }
1716 }
1717 *lenp = j;
9c44cd45
MG
1718}
1719
f1ee6d54
LC
1720/* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
1721 *LENP-byte locale-encoded string, to `\xXXXX;'. Set *LEN to the size
1722 of the resulting string. BUF must be large enough to handle the
1723 worst case when `\uXXXX' escapes (6 characters) are replaced by
1724 `\xXXXX;' (7 characters). */
31d4d02b
LC
1725static void
1726unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
d31b9519
MG
1727{
1728 char *before, *after;
1729 size_t i, j;
1730 /* The worst case is if the input string contains all 4-digit hex escapes.
1731 "\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */
1732 size_t max_out_len = (*lenp * 7) / 6 + 1;
1733 size_t nzeros, ndigits;
1734
4ff2b9f4 1735 before = buf;
d31b9519
MG
1736 after = alloca (max_out_len);
1737 i = 0;
1738 j = 0;
1739 while (i < *lenp)
1740 {
1741 if (((i <= *lenp - 6) && before[i] == '\\' && before[i + 1] == 'u')
1742 || ((i <= *lenp - 10) && before[i] == '\\' && before[i + 1] == 'U'))
1743 {
1744 if (before[i + 1] == 'u')
1745 ndigits = 4;
1746 else if (before[i + 1] == 'U')
1747 ndigits = 8;
1748 else
1749 abort ();
1750
1751 /* Add the R6RS hex escape initial sequence. */
1752 after[j] = '\\';
1753 after[j + 1] = 'x';
1754
1755 /* Move string positions to the start of the hex numbers. */
1756 i += 2;
1757 j += 2;
1758
1759 /* Find the number of initial zeros in this hex number. */
1760 nzeros = 0;
1761 while (before[i + nzeros] == '0' && nzeros < ndigits)
1762 nzeros++;
1763
1764 /* Copy the number, skipping initial zeros, and then move the string
1765 positions. */
1766 if (nzeros == ndigits)
1767 {
1768 after[j] = '0';
1769 i += ndigits;
1770 j += 1;
1771 }
1772 else
1773 {
1774 int pos;
1775 for (pos = 0; pos < ndigits - nzeros; pos++)
1776 after[j + pos] = tolower ((int) before[i + nzeros + pos]);
1777 i += ndigits;
1778 j += (ndigits - nzeros);
1779 }
1780
1781 /* Add terminating semicolon. */
1782 after[j] = ';';
1783 j++;
1784 }
1785 else
1786 {
1787 after[j] = before[i];
1788 i++;
1789 j++;
1790 }
1791 }
1792 *lenp = j;
d31b9519
MG
1793 memcpy (before, after, j);
1794}
1795
cf313a94 1796char *
d40e1ca8 1797scm_to_locale_string (SCM str)
cf313a94 1798{
d40e1ca8 1799 return scm_to_locale_stringn (str, NULL);
cf313a94 1800}
d31b9519 1801
c829a427 1802char *
fac32b51 1803scm_to_locale_stringn (SCM str, size_t *lenp)
c829a427 1804{
b22e94db 1805 return scm_to_stringn (str, lenp,
95f5e303 1806 locale_charset (),
b22e94db 1807 scm_i_default_port_conversion_handler ());
9c44cd45
MG
1808}
1809
d40e1ca8
AW
1810char *
1811scm_to_latin1_string (SCM str)
1812{
1813 return scm_to_latin1_stringn (str, NULL);
1814}
1815
1816char *
1817scm_to_latin1_stringn (SCM str, size_t *lenp)
e9a35a96 1818#define FUNC_NAME "scm_to_latin1_stringn"
d40e1ca8 1819{
e9a35a96
LC
1820 char *result;
1821
1822 SCM_VALIDATE_STRING (1, str);
1823
1824 if (scm_i_is_narrow_string (str))
1825 {
fe133640
AW
1826 size_t len = scm_i_string_length (str);
1827
e9a35a96 1828 if (lenp)
fe133640 1829 *lenp = len;
e9a35a96 1830
fe133640 1831 result = scm_strndup (scm_i_string_data (str), len);
e9a35a96
LC
1832 }
1833 else
1834 result = scm_to_stringn (str, lenp, NULL,
fe133640 1835 SCM_FAILED_CONVERSION_ERROR);
e9a35a96
LC
1836
1837 return result;
d40e1ca8 1838}
e9a35a96 1839#undef FUNC_NAME
d40e1ca8
AW
1840
1841char *
1842scm_to_utf8_string (SCM str)
1843{
1844 return scm_to_utf8_stringn (str, NULL);
1845}
1846
e3d45974
AW
1847static size_t
1848latin1_u8_strlen (const scm_t_uint8 *str, size_t len)
1849{
1850 size_t ret, i;
1851 for (i = 0, ret = 0; i < len; i++)
1852 ret += (str[i] < 128) ? 1 : 2;
1853 return ret;
1854}
1855
1856static scm_t_uint8*
1857latin1_to_u8 (const scm_t_uint8 *str, size_t latin_len,
1858 scm_t_uint8 *u8_result, size_t *u8_lenp)
1859{
1860 size_t i, n;
1861 size_t u8_len = latin1_u8_strlen (str, latin_len);
1862
1863 if (!(u8_result && u8_lenp && *u8_lenp > u8_len))
1864 u8_result = scm_malloc (u8_len + 1);
1865 if (u8_lenp)
1866 *u8_lenp = u8_len;
1867
1868 for (i = 0, n = 0; i < latin_len; i++)
1869 n += u8_uctomb (u8_result + n, str[i], u8_len - n);
1870 if (n != u8_len)
1871 abort ();
1872 u8_result[n] = 0;
1873
1874 return u8_result;
1875}
1876
e26da7a2
MW
1877/* UTF-8 code table
1878
1879 (Note that this includes code points that are not allowed by Unicode,
1880 but since this function has no way to report an error, and its
1881 purpose is to determine the size of destination buffers for
1882 libunicode conversion functions, we err on the safe side and handle
1883 everything that libunicode might conceivably handle, now or in the
1884 future.)
2c1b7951
AW
1885
1886 Char. number range | UTF-8 octet sequence
1887 (hexadecimal) | (binary)
e26da7a2 1888 --------------------+------------------------------------------------------
2c1b7951
AW
1889 0000 0000-0000 007F | 0xxxxxxx
1890 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
1891 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
e26da7a2
MW
1892 0001 0000-001F FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1893 0020 0000-03FF FFFF | 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1894 0400 0000-7FFF FFFF | 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
2c1b7951
AW
1895*/
1896
1897static size_t
e26da7a2 1898u32_u8_length_in_bytes (const scm_t_uint32 *str, size_t len)
2c1b7951
AW
1899{
1900 size_t ret, i;
1901
1902 for (i = 0, ret = 0; i < len; i++)
1903 {
1904 scm_t_uint32 c = str[i];
1905
1906 if (c <= 0x7f)
1907 ret += 1;
1908 else if (c <= 0x7ff)
1909 ret += 2;
1910 else if (c <= 0xffff)
1911 ret += 3;
e26da7a2 1912 else if (c <= 0x1fffff)
2c1b7951 1913 ret += 4;
e26da7a2
MW
1914 else if (c <= 0x3ffffff)
1915 ret += 5;
1916 else
1917 ret += 6;
2c1b7951
AW
1918 }
1919
1920 return ret;
1921}
1922
d40e1ca8
AW
1923char *
1924scm_to_utf8_stringn (SCM str, size_t *lenp)
ee26a9eb 1925#define FUNC_NAME "scm_to_utf8_stringn"
d40e1ca8 1926{
ee26a9eb
AW
1927 SCM_VALIDATE_STRING (1, str);
1928
e3d45974
AW
1929 if (scm_i_is_narrow_string (str))
1930 return (char *) latin1_to_u8 ((scm_t_uint8 *) scm_i_string_chars (str),
1931 scm_i_string_length (str),
1932 NULL, lenp);
1933 else
2c1b7951 1934 {
bbb9f000 1935 scm_t_uint32 *chars = (scm_t_uint32 *) scm_i_string_wide_chars (str);
2c1b7951 1936 scm_t_uint8 *buf, *ret;
bbb9f000
MW
1937 size_t num_chars = scm_i_string_length (str);
1938 size_t num_bytes_predicted, num_bytes_actual;
2c1b7951 1939
bbb9f000 1940 num_bytes_predicted = u32_u8_length_in_bytes (chars, num_chars);
2c1b7951 1941
e26da7a2 1942 if (lenp)
2c1b7951 1943 {
bbb9f000
MW
1944 *lenp = num_bytes_predicted;
1945 buf = scm_malloc (num_bytes_predicted);
2c1b7951 1946 }
e26da7a2
MW
1947 else
1948 {
bbb9f000
MW
1949 buf = scm_malloc (num_bytes_predicted + 1);
1950 buf[num_bytes_predicted] = 0;
e26da7a2
MW
1951 }
1952
bbb9f000
MW
1953 num_bytes_actual = num_bytes_predicted;
1954 ret = u32_to_u8 (chars, num_chars, buf, &num_bytes_actual);
e26da7a2 1955
bbb9f000 1956 if (SCM_LIKELY (ret == buf && num_bytes_actual == num_bytes_predicted))
e26da7a2 1957 return (char *) ret;
2c1b7951
AW
1958
1959 /* An error: a bad codepoint. */
1960 {
1961 int saved_errno = errno;
1962
1963 free (buf);
1964 if (!saved_errno)
1965 abort ();
1966
1967 scm_decoding_error ("scm_to_utf8_stringn", errno,
1968 "invalid codepoint in string", str);
1969
1970 /* Not reached. */
1971 return NULL;
1972 }
1973 }
d40e1ca8 1974}
ee26a9eb 1975#undef FUNC_NAME
d40e1ca8 1976
647dc1ac
LC
1977scm_t_wchar *
1978scm_to_utf32_string (SCM str)
1979{
1980 return scm_to_utf32_stringn (str, NULL);
1981}
1982
1983scm_t_wchar *
1984scm_to_utf32_stringn (SCM str, size_t *lenp)
1985#define FUNC_NAME "scm_to_utf32_stringn"
1986{
1987 scm_t_wchar *result;
1988
1989 SCM_VALIDATE_STRING (1, str);
1990
1991 if (scm_i_is_narrow_string (str))
e3d45974
AW
1992 {
1993 scm_t_uint8 *codepoints;
1994 size_t i, len;
1995
1996 codepoints = (scm_t_uint8*) scm_i_string_chars (str);
1997 len = scm_i_string_length (str);
1998 if (lenp)
1999 *lenp = len;
2000
2001 result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
2002 for (i = 0; i < len; i++)
2003 result[i] = codepoints[i];
2004 result[len] = 0;
2005 }
647dc1ac
LC
2006 else
2007 {
2008 size_t len;
2009
2010 len = scm_i_string_length (str);
2011 if (lenp)
2012 *lenp = len;
2013
2014 result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
2015 memcpy (result, scm_i_string_wide_chars (str),
2016 len * sizeof (scm_t_wchar));
2017 result[len] = 0;
2018 }
2019
2020 return result;
2021}
2022#undef FUNC_NAME
2023
29bcdbb0
LC
2024/* Return a malloc(3)-allocated buffer containing the contents of STR encoded
2025 according to ENCODING. If LENP is non-NULL, set it to the size in bytes of
2026 the returned buffer. If the conversion to ENCODING fails, apply the strategy
2027 defined by HANDLER. */
9c44cd45 2028char *
587a3355 2029scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
eca29b02 2030 scm_t_string_failed_conversion_handler handler)
9c44cd45 2031{
9c44cd45
MG
2032 char *buf;
2033 size_t ilen, len, i;
889975e5
MG
2034 int ret;
2035 const char *enc;
4d4528e7 2036
3ee86942 2037 if (!scm_is_string (str))
c829a427 2038 scm_wrong_type_arg_msg (NULL, 0, str, "string");
9c44cd45
MG
2039 ilen = scm_i_string_length (str);
2040
2041 if (ilen == 0)
2042 {
2043 buf = scm_malloc (1);
2044 buf[0] = '\0';
2045 if (lenp)
2046 *lenp = 0;
2047 return buf;
2048 }
587a3355 2049
c829a427 2050 if (lenp == NULL)
9c44cd45
MG
2051 for (i = 0; i < ilen; i++)
2052 if (scm_i_string_ref (str, i) == '\0')
2053 scm_misc_error (NULL,
2054 "string contains #\\nul character: ~S",
2055 scm_list_1 (str));
2056
889975e5 2057 if (scm_i_is_narrow_string (str) && (encoding == NULL))
c829a427 2058 {
889975e5
MG
2059 /* If using native Latin-1 encoding, just copy the string
2060 contents. */
9c44cd45
MG
2061 if (lenp)
2062 {
2063 buf = scm_malloc (ilen);
2064 memcpy (buf, scm_i_string_chars (str), ilen);
2065 *lenp = ilen;
2066 return buf;
2067 }
2068 else
2069 {
2070 buf = scm_malloc (ilen + 1);
2071 memcpy (buf, scm_i_string_chars (str), ilen);
2072 buf[ilen] = '\0';
2073 return buf;
2074 }
c829a427 2075 }
9c44cd45 2076
587a3355 2077
9c44cd45
MG
2078 buf = NULL;
2079 len = 0;
889975e5
MG
2080 enc = encoding;
2081 if (enc == NULL)
2082 enc = "ISO-8859-1";
2083 if (scm_i_is_narrow_string (str))
2084 {
2085 ret = mem_iconveh (scm_i_string_chars (str), ilen,
2086 "ISO-8859-1", enc,
2087 (enum iconv_ilseq_handler) handler, NULL,
2088 &buf, &len);
9c44cd45 2089
889975e5 2090 if (ret != 0)
ef7e4ba3 2091 scm_encoding_error (__func__, errno,
6851d3be
LC
2092 "cannot convert narrow string to output locale",
2093 SCM_BOOL_F,
2094 /* FIXME: Faulty character unknown. */
2095 SCM_BOOL_F);
889975e5
MG
2096 }
2097 else
2098 {
d31b9519 2099 buf = u32_conv_to_encoding (enc,
889975e5 2100 (enum iconv_ilseq_handler) handler,
d31b9519 2101 (scm_t_uint32 *) scm_i_string_wide_chars (str),
889975e5
MG
2102 ilen,
2103 NULL,
2104 NULL, &len);
2105 if (buf == NULL)
ef7e4ba3 2106 scm_encoding_error (__func__, errno,
6851d3be
LC
2107 "cannot convert wide string to output locale",
2108 SCM_BOOL_F,
2109 /* FIXME: Faulty character unknown. */
2110 SCM_BOOL_F);
d31b9519
MG
2111 }
2112 if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
2113 {
2114 if (SCM_R6RS_ESCAPES_P)
f1ee6d54
LC
2115 {
2116 /* The worst case is if the input string contains all 4-digit
2117 hex escapes. "\uXXXX" (six characters) becomes "\xXXXX;"
2118 (seven characters). Make BUF large enough to hold
2119 that. */
2120 buf = scm_realloc (buf, (len * 7) / 6 + 1);
31d4d02b 2121 unistring_escapes_to_r6rs_escapes (buf, &len);
f1ee6d54 2122 }
d31b9519 2123 else
31d4d02b 2124 unistring_escapes_to_guile_escapes (buf, &len);
4ff2b9f4
LC
2125
2126 buf = scm_realloc (buf, len);
889975e5 2127 }
9c44cd45 2128 if (lenp)
4d4528e7 2129 *lenp = len;
9c44cd45
MG
2130 else
2131 {
2132 buf = scm_realloc (buf, len + 1);
2133 buf[len] = '\0';
2134 }
24933780 2135
c829a427 2136 scm_remember_upto_here_1 (str);
9c44cd45 2137 return buf;
4d4528e7 2138}
af68e5e5 2139
c829a427
MV
2140size_t
2141scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
2142{
2143 size_t len;
9c44cd45 2144 char *result = NULL;
3ee86942 2145 if (!scm_is_string (str))
c829a427 2146 scm_wrong_type_arg_msg (NULL, 0, str, "string");
9c44cd45
MG
2147 result = scm_to_locale_stringn (str, &len);
2148
2149 memcpy (buf, result, (len > max_len) ? max_len : len);
2150 free (result);
2151
c829a427
MV
2152 scm_remember_upto_here_1 (str);
2153 return len;
2154}
af68e5e5 2155
a3d7d5d5
LC
2156\f
2157/* Unicode string normalization. */
2158
edb7bb47
JG
2159/* This function is a partial clone of SCM_STRING_TO_U32_BUF from
2160 libguile/i18n.c. It would be useful to have this factored out into a more
2161 convenient location, but its use of alloca makes that tricky to do. */
2162
2163static SCM
2164normalize_str (SCM string, uninorm_t form)
2165{
2166 SCM ret;
2167 scm_t_uint32 *w_str;
2168 scm_t_wchar *cbuf;
2169 size_t rlen, len = scm_i_string_length (string);
2170
2171 if (scm_i_is_narrow_string (string))
2172 {
2173 size_t i;
2174 const char *buf = scm_i_string_chars (string);
2175
2176 w_str = alloca (sizeof (scm_t_wchar) * (len + 1));
2177
2178 for (i = 0; i < len; i ++)
2179 w_str[i] = (unsigned char) buf[i];
2180 w_str[len] = 0;
2181 }
d8164b04
JG
2182 else
2183 w_str = (scm_t_uint32 *) scm_i_string_wide_chars (string);
2184
edb7bb47
JG
2185 w_str = u32_normalize (form, w_str, len, NULL, &rlen);
2186
190d4b0d 2187 ret = scm_i_make_wide_string (rlen, &cbuf, 0);
edb7bb47
JG
2188 u32_cpy ((scm_t_uint32 *) cbuf, w_str, rlen);
2189 free (w_str);
d8164b04
JG
2190
2191 scm_i_try_narrow_string (ret);
2192
edb7bb47
JG
2193 return ret;
2194}
2195
2196SCM_DEFINE (scm_string_normalize_nfc, "string-normalize-nfc", 1, 0, 0,
2197 (SCM string),
2198 "Returns the NFC normalized form of @var{string}.")
2199#define FUNC_NAME s_scm_string_normalize_nfc
2200{
2201 SCM_VALIDATE_STRING (1, string);
2202 return normalize_str (string, UNINORM_NFC);
2203}
2204#undef FUNC_NAME
2205
2206SCM_DEFINE (scm_string_normalize_nfd, "string-normalize-nfd", 1, 0, 0,
2207 (SCM string),
2208 "Returns the NFD normalized form of @var{string}.")
2209#define FUNC_NAME s_scm_string_normalize_nfd
2210{
2211 SCM_VALIDATE_STRING (1, string);
2212 return normalize_str (string, UNINORM_NFD);
2213}
2214#undef FUNC_NAME
2215
2216SCM_DEFINE (scm_string_normalize_nfkc, "string-normalize-nfkc", 1, 0, 0,
2217 (SCM string),
2218 "Returns the NFKC normalized form of @var{string}.")
2219#define FUNC_NAME s_scm_string_normalize_nfkc
2220{
2221 SCM_VALIDATE_STRING (1, string);
2222 return normalize_str (string, UNINORM_NFKC);
2223}
2224#undef FUNC_NAME
2225
2226SCM_DEFINE (scm_string_normalize_nfkd, "string-normalize-nfkd", 1, 0, 0,
2227 (SCM string),
2228 "Returns the NFKD normalized form of @var{string}.")
2229#define FUNC_NAME s_scm_string_normalize_nfkd
2230{
2231 SCM_VALIDATE_STRING (1, string);
2232 return normalize_str (string, UNINORM_NFKD);
2233}
2234#undef FUNC_NAME
2235
7505c6e0
MW
2236/* converts C scm_array of strings to SCM scm_list of strings.
2237 If argc < 0, a null terminated scm_array is assumed.
2238 The current locale encoding is assumed */
9c44cd45 2239SCM
3ee86942
MV
2240scm_makfromstrs (int argc, char **argv)
2241{
2242 int i = argc;
2243 SCM lst = SCM_EOL;
2244 if (0 > i)
2245 for (i = 0; argv[i]; i++);
2246 while (i--)
2247 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
2248 return lst;
2249}
2250
c829a427 2251/* Return a newly allocated array of char pointers to each of the strings
7505c6e0
MW
2252 in args, with a terminating NULL pointer. The strings are encoded using
2253 the current locale. */
c829a427
MV
2254
2255char **
2256scm_i_allocate_string_pointers (SCM list)
2a776823 2257#define FUNC_NAME "scm_i_allocate_string_pointers"
af68e5e5 2258{
c829a427 2259 char **result;
7505c6e0 2260 int list_len = scm_ilength (list);
c829a427
MV
2261 int i;
2262
7505c6e0 2263 if (list_len < 0)
c829a427
MV
2264 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
2265
7505c6e0 2266 result = scm_gc_malloc ((list_len + 1) * sizeof (char *),
2a776823 2267 "string pointers");
7505c6e0 2268 result[list_len] = NULL;
c829a427 2269
7505c6e0 2270 /* The list might have been modified in another thread, so
c829a427
MV
2271 we check LIST before each access.
2272 */
7505c6e0 2273 for (i = 0; i < list_len && scm_is_pair (list); i++)
c829a427 2274 {
7505c6e0
MW
2275 SCM str = SCM_CAR (list);
2276 size_t len; /* String length in bytes */
2277 char *c_str = scm_to_locale_stringn (str, &len);
2278
2279 /* OPTIMIZE-ME: Right now, scm_to_locale_stringn always uses
2280 scm_malloc to allocate the returned string, which must be
2281 explicitly deallocated. This forces us to copy the string a
2282 second time into a new buffer. Ideally there would be variants
2283 of scm_to_*_stringn that can return garbage-collected buffers. */
2284
2285 result[i] = scm_gc_malloc_pointerless (len + 1, "string");
2286 memcpy (result[i], c_str, len);
2a776823 2287 result[i][len] = '\0';
7505c6e0 2288 free (c_str);
2a776823 2289
c829a427
MV
2290 list = SCM_CDR (list);
2291 }
2292
c829a427 2293 return result;
af68e5e5 2294}
2a776823 2295#undef FUNC_NAME
24933780 2296
6f14f578
MV
2297void
2298scm_i_get_substring_spec (size_t len,
2299 SCM start, size_t *cstart,
2300 SCM end, size_t *cend)
2301{
2302 if (SCM_UNBNDP (start))
2303 *cstart = 0;
2304 else
2305 *cstart = scm_to_unsigned_integer (start, 0, len);
2306
2307 if (SCM_UNBNDP (end))
2308 *cend = len;
2309 else
2310 *cend = scm_to_unsigned_integer (end, *cstart, len);
2311}
2312
3ee86942
MV
2313#if SCM_ENABLE_DEPRECATED
2314
556d75db
MV
2315/* When these definitions are removed, it becomes reasonable to use
2316 read-only strings for string literals. For that, change the reader
2317 to create string literals with scm_c_substring_read_only instead of
2318 with scm_c_substring_copy.
2319*/
2320
3ee86942 2321int
fe78c51a 2322scm_i_deprecated_stringp (SCM str)
3ee86942
MV
2323{
2324 scm_c_issue_deprecation_warning
2325 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
2326
2616f0e0 2327 return scm_is_string (str);
3ee86942
MV
2328}
2329
2330char *
fe78c51a 2331scm_i_deprecated_string_chars (SCM str)
3ee86942
MV
2332{
2333 char *chars;
2334
2335 scm_c_issue_deprecation_warning
2336 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
2337
2616f0e0
MV
2338 /* We don't accept shared substrings here since they are not
2339 null-terminated.
2340 */
2341 if (IS_SH_STRING (str))
c291b588
LC
2342 scm_misc_error (NULL,
2343 "SCM_STRING_CHARS does not work with shared substrings",
2616f0e0
MV
2344 SCM_EOL);
2345
877f06c3 2346 /* We explicitly test for read-only strings to produce a better
556d75db
MV
2347 error message.
2348 */
2349
2350 if (IS_RO_STRING (str))
c291b588
LC
2351 scm_misc_error (NULL,
2352 "SCM_STRING_CHARS does not work with read-only strings",
556d75db 2353 SCM_EOL);
c291b588 2354
2616f0e0 2355 /* The following is still wrong, of course...
3ee86942 2356 */
9c44cd45 2357 str = scm_i_string_start_writing (str);
3ee86942
MV
2358 chars = scm_i_string_writable_chars (str);
2359 scm_i_string_stop_writing ();
2360 return chars;
2361}
2362
2363size_t
fe78c51a 2364scm_i_deprecated_string_length (SCM str)
3ee86942
MV
2365{
2366 scm_c_issue_deprecation_warning
2367 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
2368 return scm_c_string_length (str);
2369}
2370
2371#endif
2372
2a610be5
AW
2373static SCM
2374string_handle_ref (scm_t_array_handle *h, size_t index)
2375{
2376 return scm_c_string_ref (h->array, index);
2377}
2378
2379static void
2380string_handle_set (scm_t_array_handle *h, size_t index, SCM val)
2381{
2382 scm_c_string_set_x (h->array, index, val);
2383}
2384
2385static void
2386string_get_handle (SCM v, scm_t_array_handle *h)
2387{
2388 h->array = v;
2389 h->ndims = 1;
2390 h->dims = &h->dim0;
2391 h->dim0.lbnd = 0;
2392 h->dim0.ubnd = scm_c_string_length (v) - 1;
2393 h->dim0.inc = 1;
2394 h->element_type = SCM_ARRAY_ELEMENT_TYPE_CHAR;
2395 h->elements = h->writable_elements = NULL;
2396}
2397
c5f17102 2398SCM_ARRAY_IMPLEMENTATION (scm_tc7_string, 0x7f,
2a610be5 2399 string_handle_ref, string_handle_set,
f65e0168
LC
2400 string_get_handle)
2401SCM_VECTOR_IMPLEMENTATION (SCM_ARRAY_ELEMENT_TYPE_CHAR, scm_make_string)
2a610be5 2402
0f2d19dd
JB
2403void
2404scm_init_strings ()
0f2d19dd 2405{
a7e392c1 2406 scm_nullstr = scm_i_make_string (0, NULL, 0);
7c33806a 2407
a0599745 2408#include "libguile/strings.x"
0f2d19dd
JB
2409}
2410
89e00824
ML
2411
2412/*
2413 Local Variables:
2414 c-file-style: "gnu"
2415 End:
2416*/