maintainer changed: was lord, now jimb; first import
[bpt/guile.git] / libguile / mbstrings.c
1
2
3 /* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this software; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 *
19 * As a special exception, the Free Software Foundation gives permission
20 * for additional uses of the text contained in its release of GUILE.
21 *
22 * The exception is that, if you link the GUILE library with other files
23 * to produce an executable, this does not by itself cause the
24 * resulting executable to be covered by the GNU General Public License.
25 * Your use of that executable is in no way restricted on account of
26 * linking the GUILE library code into it.
27 *
28 * This exception does not however invalidate any other reasons why
29 * the executable file might be covered by the GNU General Public License.
30 *
31 * This exception applies only to the code released by the
32 * Free Software Foundation under the name GUILE. If you copy
33 * code from other Free Software Foundation releases into a copy of
34 * GUILE, as the General Public License permits, the exception does
35 * not apply to the code that you add in this way. To avoid misleading
36 * anyone as to the status of such modified files, you must delete
37 * this exception notice from them.
38 *
39 * If you write modifications of your own for GUILE, it is your choice
40 * whether to permit this exception to apply to your modifications.
41 * If you do not wish that, delete this exception notice.
42 */
43 \f
44
45 \f
46 #include "extchrs.h"
47 #include <stdio.h>
48 #include "_scm.h"
49
50 \f
51
52 SCM_PROC(s_multi_byte_string_p, "multi-byte-string?", 1, 0, 0, scm_multi_byte_string_p);
53 #ifdef __STDC__
54 SCM
55 scm_multi_byte_string_p (SCM obj)
56 #else
57 SCM
58 scm_multi_byte_string_p (obj)
59 SCM obj;
60 #endif
61 {
62 return (SCM_MB_STRINGP (obj)
63 ? SCM_BOOL_T
64 : SCM_BOOL_F);
65 }
66
67
68 #ifdef __STDC__
69 SCM
70 scm_regular_string_p (SCM obj)
71 #else
72 SCM
73 scm_regular_string_p (obj)
74 SCM obj;
75 #endif
76 {
77 return (SCM_REGULAR_STRINGP (obj)
78 ? SCM_BOOL_T
79 : SCM_BOOL_F);
80 }
81
82 SCM_PROC(s_list_to_multi_byte_string, "list->multi-byte-string", 1, 0, 0, scm_multi_byte_string);
83 SCM_PROC(s_multi_byte_string, "multi-byte-string", 0, 0, 1, scm_multi_byte_string);
84 #ifdef __STDC__
85 SCM
86 scm_multi_byte_string (SCM chrs)
87 #else
88 SCM
89 scm_multi_byte_string (chrs)
90 SCM chrs;
91 #endif
92 {
93 SCM res;
94 register char *data;
95 long i;
96 long byte_len;
97
98 i = scm_ilength (chrs);
99 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, s_multi_byte_string);
100 i = i * XMB_CUR_MAX;
101 res = scm_makstr (i, 0);
102 SCM_SETLENGTH (res, SCM_LENGTH (res), scm_tc7_mb_string);
103 data = SCM_CHARS (res);
104 byte_len = 0;
105 xwctomb (0, 0);
106 while (i && SCM_NNULLP (chrs))
107 {
108 int used;
109 SCM ch;
110
111 ch = SCM_CAR (chrs);
112 SCM_ASSERT (SCM_ICHRP (ch), chrs, SCM_ARG1, s_multi_byte_string);
113 used = xwctomb (data + byte_len, SCM_ICHR (ch));
114 SCM_ASSERT (used >= 0, chrs, SCM_ARG1, s_multi_byte_string);
115 byte_len += (used ? used : 1);
116 chrs = SCM_CDR (chrs);
117 --i;
118 }
119 res = scm_vector_set_length_x (res, SCM_MAKINUM (byte_len));
120 return res;
121 }
122
123 #ifdef __STDC__
124 int
125 scm_mb_ilength (unsigned char * data, int size)
126 #else
127 int
128 scm_mb_ilength (data, size)
129 unsigned char * data;
130 int size;
131 #endif
132 {
133 int pos;
134 int len;
135
136 len = 0;
137 pos = 0;
138 xmblen (0, 0);
139 while (pos < size)
140 {
141 int inc;
142
143 inc = xmblen (data + pos, size - pos);
144 if (inc == 0)
145 ++inc;
146
147 if (inc < 0)
148 return -1;
149
150 ++len;
151 pos += inc;
152 }
153
154 return len;
155 }
156
157 SCM_PROC(s_multi_byte_string_length, "multi-byte-string-length", 1, 0, 0, scm_multi_byte_string_length);
158 #ifdef __STDC__
159 SCM
160 scm_multi_byte_string_length (SCM str)
161 #else
162 SCM
163 scm_multi_byte_string_length (str)
164 SCM str;
165 #endif
166 {
167 int size;
168 int len;
169 unsigned char * data;
170
171 SCM_ASSERT (SCM_NIMP (str) && SCM_ROSTRINGP (str), str, SCM_ARG1, s_multi_byte_string_length);
172
173 data = SCM_ROCHARS (str);
174 size = SCM_ROLENGTH (str);
175 len = scm_mb_ilength (data, size);
176 SCM_ASSERT (len >= 0, str, SCM_ARG1, s_multi_byte_string_length);
177 return SCM_MAKINUM (len);
178 }
179
180
181 SCM_PROC(s_symbol_multi_byte_p, "symbol-multi-byte?", 1, 0, 0, scm_symbol_multi_byte_p);
182 #ifdef __STDC__
183 SCM
184 scm_symbol_multi_byte_p (SCM symbol)
185 #else
186 SCM
187 scm_symbol_multi_byte_p (symbol)
188 SCM symbol;
189 #endif
190 {
191 return SCM_SYMBOL_MULTI_BYTE_SCM_STRINGP(symbol);
192 }
193
194 SCM_PROC(s_set_symbol_multi_byte_x, "set-symbol-multi-byte!", 2, 0, 0, scm_set_symbol_multi_byte_x);
195 #ifdef __STDC__
196 SCM
197 scm_set_symbol_multi_byte_x (SCM symbol, SCM val)
198 #else
199 SCM
200 scm_set_symbol_multi_byte_x (symbol, val)
201 SCM symbol;
202 SCM val;
203 #endif
204 {
205 if (SCM_TYP7 (symbol) == scm_tc7_msymbol)
206 {
207 SCM_SYMBOL_MULTI_BYTE_SCM_STRINGP(symbol) = (SCM_FALSEP (val)
208 ? SCM_BOOL_F
209 : SCM_BOOL_T);
210 }
211 return SCM_UNSPECIFIED;
212 }
213
214
215 SCM_PROC(s_regular_port_p, "regular-port?", 1, 0, 0, scm_regular_port_p);
216 #ifdef __STDC__
217 SCM
218 scm_regular_port_p (SCM p)
219 #else
220 SCM
221 scm_regular_port_p (p)
222 SCM p;
223 #endif
224 {
225 return (SCM_PORT_REPRESENTATION(p) == scm_regular_port
226 ? SCM_BOOL_T
227 : SCM_BOOL_F);
228 }
229
230 SCM_PROC(s_regular_port_x, "regular-port!", 1, 0, 0, scm_regular_port_x);
231 #ifdef __STDC__
232 SCM
233 scm_regular_port_x (SCM p)
234 #else
235 SCM
236 scm_regular_port_x (p)
237 SCM p;
238 #endif
239 {
240 SCM_PORT_REPRESENTATION(p) = scm_regular_port;
241 return SCM_UNSPECIFIED;
242 }
243
244 SCM_PROC(s_multi_byte_port_p, "multi-byte-port?", 1, 0, 0, scm_multi_byte_port_p);
245 #ifdef __STDC__
246 SCM
247 scm_multi_byte_port_p (SCM p)
248 #else
249 SCM
250 scm_multi_byte_port_p (p)
251 SCM p;
252 #endif
253 {
254 return (SCM_PORT_REPRESENTATION(p) == scm_mb_port
255 ? SCM_BOOL_T
256 : SCM_BOOL_F);
257 }
258
259 SCM_PROC(s_multi_byte_port_x, "multi-byte-port!", 1, 0, 0, scm_multi_byte_port_x);
260 #ifdef __STDC__
261 SCM
262 scm_multi_byte_port_x (SCM p)
263 #else
264 SCM
265 scm_multi_byte_port_x (p)
266 SCM p;
267 #endif
268 {
269 SCM_PORT_REPRESENTATION(p) = scm_mb_port;
270 return SCM_UNSPECIFIED;
271 }
272
273
274 SCM_PROC(s_wide_character_port_p, "wide-character-port?", 1, 0, 0, scm_wide_character_port_p);
275 #ifdef __STDC__
276 SCM
277 scm_wide_character_port_p (SCM p)
278 #else
279 SCM
280 scm_wide_character_port_p (p)
281 SCM p;
282 #endif
283 {
284 return (SCM_PORT_REPRESENTATION(p) == scm_wchar_port
285 ? SCM_BOOL_T
286 : SCM_BOOL_F);
287 }
288
289 SCM_PROC(s_wide_character_port_x, "wide-character-port!", 1, 0, 0, scm_wide_character_port_x);
290 #ifdef __STDC__
291 SCM
292 scm_wide_character_port_x (SCM p)
293 #else
294 SCM
295 scm_wide_character_port_x (p)
296 SCM p;
297 #endif
298 {
299 SCM_PORT_REPRESENTATION(p) = scm_wchar_port;
300 return SCM_UNSPECIFIED;
301 }
302
303
304
305 \f
306
307 #ifdef __STDC__
308 void
309 scm_put_wchar (int c, SCM port, int writing)
310 #else
311 void
312 scm_put_wchar (c, port, writing)
313 int c;
314 SCM port;
315 int writing;
316 #endif
317 {
318 if (writing)
319 scm_gen_puts (scm_regular_string, "#\\", port);
320 switch (SCM_PORT_REPRESENTATION (port))
321 {
322 case scm_regular_port:
323 {
324 if (c < 256)
325 {
326 if (!writing)
327 scm_gen_putc ((unsigned char)c, port);
328 else if ((c <= ' ') && scm_charnames[c])
329 scm_gen_puts (scm_regular_string, scm_charnames[c], port);
330 else if (c > '\177')
331 scm_intprint (c, 8, port);
332 else
333 scm_gen_putc ((int) c, port);
334 }
335 else
336 {
337 print_octal:
338 if (!writing)
339 scm_gen_putc ('\\', port);
340 scm_intprint (c, 8, port);
341 }
342 break;
343 }
344
345 case scm_mb_port:
346 {
347 char buf[256];
348 int len;
349
350 if (XMB_CUR_MAX > sizeof (buf))
351 goto print_octal;
352
353 len = xwctomb (buf, c);
354
355 if (len < 0)
356 goto print_octal;
357
358 if (len == 0)
359 scm_gen_putc (0, port);
360 else
361 scm_gen_putc (c, port);
362 break;
363 }
364
365 case scm_wchar_port:
366 {
367 scm_gen_putc (c, port);
368 break;
369 }
370 }
371 }
372
373
374
375
376
377 #ifdef __STDC__
378 void
379 scm_print_mb_string (SCM exp, SCM port, int writing)
380 #else
381 void
382 scm_print_mb_string (exp, port, writing)
383 SCM exp;
384 SCM port;
385 int writing;
386 #endif
387 {
388 if (writing)
389 {
390 int i;
391 int len;
392 char * data;
393
394 scm_gen_putc ('\"', port);
395 i = 0;
396 len = SCM_ROLENGTH (exp);
397 data = SCM_ROCHARS (exp);
398
399 while (i < len)
400 {
401 xwchar_t c;
402 int inc;
403
404 inc = xmbtowc (&c, data + i, len - i);
405 if (inc == 0)
406 inc = 1;
407 if (inc < 0)
408 {
409 inc = 1;
410 c = data[i];
411 }
412 i += inc;
413 switch (c)
414 {
415 case '\"':
416 case '\\':
417 scm_gen_putc ('\\', port);
418 default:
419 scm_gen_putc (c, port);
420 }
421 }
422 scm_gen_putc ('\"', port);
423 }
424 else
425 scm_gen_write (scm_mb_string, SCM_ROCHARS (exp), SCM_ROLENGTH (exp), port);
426 }
427
428
429 #ifdef __STDC__
430 void
431 scm_print_mb_symbol (SCM exp, SCM port)
432 #else
433 void
434 scm_print_mb_symbol (exp, port)
435 SCM exp;
436 SCM port;
437 #endif
438 {
439 int pos;
440 int end;
441 int len;
442 char * str;
443 int weird;
444 int maybe_weird;
445 int mw_pos;
446 int inc;
447 xwchar_t c;
448
449 len = SCM_LENGTH (exp);
450 str = SCM_CHARS (exp);
451 scm_remember (&exp);
452 pos = 0;
453 weird = 0;
454 maybe_weird = 0;
455
456 for (end = pos; end < len; end += inc)
457 {
458 inc = xmbtowc (&c, str + end, len - end);
459 if (inc < 0)
460 {
461 inc = 1;
462 c = str[end];
463 goto weird_handler;
464 }
465 if (inc == 0)
466 {
467 inc = 1;
468 goto weird_handler;
469 }
470 switch (c)
471 {
472 #ifdef BRACKETS_AS_PARENS
473 case '[':
474 case ']':
475 #endif
476 case '(':
477 case ')':
478 case '\"':
479 case ';':
480 case SCM_WHITE_SPACES:
481 case SCM_LINE_INCREMENTORS:
482 weird_handler:
483 if (maybe_weird)
484 {
485 end = mw_pos;
486 maybe_weird = 0;
487 }
488 if (!weird)
489 {
490 scm_gen_write (scm_regular_string, "#{", 2, port);
491 weird = 1;
492 }
493 if (pos < end)
494 {
495 int q;
496 int qinc;
497
498 q = pos;
499 while (q < end)
500 {
501 qinc = xmbtowc (&c, str + q, end - q);
502 if (inc <= 0)
503 {
504 inc = 1;
505 c = str[q];
506 }
507 scm_gen_putc (c, port);
508 q += qinc;
509 }
510 }
511 {
512 char buf[2];
513 buf[0] = '\\';
514 buf[1] = str[end];
515 scm_gen_write (scm_regular_string, buf, 2, port);
516 }
517 pos = end + 1;
518 break;
519 case '\\':
520 if (weird)
521 goto weird_handler;
522 if (!maybe_weird)
523 {
524 maybe_weird = 1;
525 mw_pos = pos;
526 }
527 break;
528 case '}':
529 case '#':
530 if (weird)
531 goto weird_handler;
532 break;
533 default:
534 break;
535 }
536 }
537 if (pos < end)
538 {
539 int q;
540 int qinc;
541 q = pos;
542 while (q < end)
543 {
544 qinc = xmbtowc (&c, str + q, end - q);
545 if (inc <= 0)
546 inc = 1;
547 scm_gen_putc (c, port);
548 q += qinc;
549 }
550 }
551 if (weird)
552 scm_gen_write (scm_regular_string, "}#", 2, port);
553 }
554
555
556 \f
557
558 #ifdef __STDC__
559 void
560 scm_init_mbstrings (void)
561 #else
562 void
563 scm_init_mbstrings ()
564 #endif
565 {
566 #include "mbstrings.x"
567 }
568