* ioext.c (scm_setfileno): throw a runtime error if SET_FILE_FD_FIELD
[bpt/guile.git] / libguile / mbstrings.c
1
2
3 /* Copyright (C) 1995, 1996 Free Software Foundation, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this software; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 *
19 * As a special exception, the Free Software Foundation gives permission
20 * for additional uses of the text contained in its release of GUILE.
21 *
22 * The exception is that, if you link the GUILE library with other files
23 * to produce an executable, this does not by itself cause the
24 * resulting executable to be covered by the GNU General Public License.
25 * Your use of that executable is in no way restricted on account of
26 * linking the GUILE library code into it.
27 *
28 * This exception does not however invalidate any other reasons why
29 * the executable file might be covered by the GNU General Public License.
30 *
31 * This exception applies only to the code released by the
32 * Free Software Foundation under the name GUILE. If you copy
33 * code from other Free Software Foundation releases into a copy of
34 * GUILE, as the General Public License permits, the exception does
35 * not apply to the code that you add in this way. To avoid misleading
36 * anyone as to the status of such modified files, you must delete
37 * this exception notice from them.
38 *
39 * If you write modifications of your own for GUILE, it is your choice
40 * whether to permit this exception to apply to your modifications.
41 * If you do not wish that, delete this exception notice.
42 */
43 \f
44
45 \f
46 #include "extchrs.h"
47 #include <stdio.h>
48 #include "_scm.h"
49 #include "chars.h"
50 #include "unif.h"
51 #include "genio.h"
52 #include "read.h"
53
54 #include "mbstrings.h"
55 \f
56
57 SCM_PROC(s_multi_byte_string_p, "multi-byte-string?", 1, 0, 0, scm_multi_byte_string_p);
58
59 SCM
60 scm_multi_byte_string_p (obj)
61 SCM obj;
62 {
63 return (SCM_MB_STRINGP (obj)
64 ? SCM_BOOL_T
65 : SCM_BOOL_F);
66 }
67
68
69
70 SCM
71 scm_regular_string_p (obj)
72 SCM obj;
73 {
74 return (SCM_REGULAR_STRINGP (obj)
75 ? SCM_BOOL_T
76 : SCM_BOOL_F);
77 }
78
79 SCM_PROC(s_list_to_multi_byte_string, "list->multi-byte-string", 1, 0, 0, scm_multi_byte_string);
80 SCM_PROC(s_multi_byte_string, "multi-byte-string", 0, 0, 1, scm_multi_byte_string);
81
82 SCM
83 scm_multi_byte_string (chrs)
84 SCM chrs;
85 {
86 SCM res;
87 register char *data;
88 long i;
89 long byte_len;
90
91 i = scm_ilength (chrs);
92 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, s_multi_byte_string);
93 i = i * XMB_CUR_MAX;
94 res = scm_makstr (i, 0);
95 SCM_SETLENGTH (res, SCM_LENGTH (res), scm_tc7_mb_string);
96 data = SCM_CHARS (res);
97 byte_len = 0;
98 xwctomb (0, 0);
99 while (i && SCM_NNULLP (chrs))
100 {
101 int used;
102 SCM ch;
103
104 ch = SCM_CAR (chrs);
105 SCM_ASSERT (SCM_ICHRP (ch), chrs, SCM_ARG1, s_multi_byte_string);
106 used = xwctomb (data + byte_len, SCM_ICHR (ch));
107 SCM_ASSERT (used >= 0, chrs, SCM_ARG1, s_multi_byte_string);
108 byte_len += (used ? used : 1);
109 chrs = SCM_CDR (chrs);
110 --i;
111 }
112 res = scm_vector_set_length_x (res, SCM_MAKINUM (byte_len));
113 return res;
114 }
115
116
117 int
118 scm_mb_ilength (data, size)
119 unsigned char * data;
120 int size;
121 {
122 int pos;
123 int len;
124
125 len = 0;
126 pos = 0;
127 xmblen (0, 0);
128 while (pos < size)
129 {
130 int inc;
131
132 inc = xmblen (data + pos, size - pos);
133 if (inc == 0)
134 ++inc;
135
136 if (inc < 0)
137 return -1;
138
139 ++len;
140 pos += inc;
141 }
142
143 return len;
144 }
145
146 SCM_PROC(s_multi_byte_string_length, "multi-byte-string-length", 1, 0, 0, scm_multi_byte_string_length);
147
148 SCM
149 scm_multi_byte_string_length (str)
150 SCM str;
151 {
152 int size;
153 int len;
154 unsigned char * data;
155
156 SCM_ASSERT (SCM_NIMP (str) && SCM_ROSTRINGP (str), str, SCM_ARG1, s_multi_byte_string_length);
157
158 data = SCM_ROCHARS (str);
159 size = SCM_ROLENGTH (str);
160 len = scm_mb_ilength (data, size);
161 SCM_ASSERT (len >= 0, str, SCM_ARG1, s_multi_byte_string_length);
162 return SCM_MAKINUM (len);
163 }
164
165
166 SCM_PROC(s_symbol_multi_byte_p, "symbol-multi-byte?", 1, 0, 0, scm_symbol_multi_byte_p);
167
168 SCM
169 scm_symbol_multi_byte_p (symbol)
170 SCM symbol;
171 {
172 return SCM_SYMBOL_MULTI_BYTE_STRINGP(symbol);
173 }
174
175 SCM_PROC(s_set_symbol_multi_byte_x, "set-symbol-multi-byte!", 2, 0, 0, scm_set_symbol_multi_byte_x);
176
177 SCM
178 scm_set_symbol_multi_byte_x (symbol, val)
179 SCM symbol;
180 SCM val;
181 {
182 if (SCM_TYP7 (symbol) == scm_tc7_msymbol)
183 {
184 SCM_SYMBOL_MULTI_BYTE_STRINGP(symbol) = (SCM_FALSEP (val)
185 ? SCM_BOOL_F
186 : SCM_BOOL_T);
187 }
188 return SCM_UNSPECIFIED;
189 }
190
191
192 SCM_PROC(s_regular_port_p, "regular-port?", 1, 0, 0, scm_regular_port_p);
193
194 SCM
195 scm_regular_port_p (p)
196 SCM p;
197 {
198 return (SCM_PORT_REPRESENTATION(p) == scm_regular_port
199 ? SCM_BOOL_T
200 : SCM_BOOL_F);
201 }
202
203 SCM_PROC(s_regular_port_x, "regular-port!", 1, 0, 0, scm_regular_port_x);
204
205 SCM
206 scm_regular_port_x (p)
207 SCM p;
208 {
209 SCM_PORT_REPRESENTATION(p) = scm_regular_port;
210 return SCM_UNSPECIFIED;
211 }
212
213 SCM_PROC(s_multi_byte_port_p, "multi-byte-port?", 1, 0, 0, scm_multi_byte_port_p);
214
215 SCM
216 scm_multi_byte_port_p (p)
217 SCM p;
218 {
219 return (SCM_PORT_REPRESENTATION(p) == scm_mb_port
220 ? SCM_BOOL_T
221 : SCM_BOOL_F);
222 }
223
224 SCM_PROC(s_multi_byte_port_x, "multi-byte-port!", 1, 0, 0, scm_multi_byte_port_x);
225
226 SCM
227 scm_multi_byte_port_x (p)
228 SCM p;
229 {
230 SCM_PORT_REPRESENTATION(p) = scm_mb_port;
231 return SCM_UNSPECIFIED;
232 }
233
234
235 SCM_PROC(s_wide_character_port_p, "wide-character-port?", 1, 0, 0, scm_wide_character_port_p);
236
237 SCM
238 scm_wide_character_port_p (p)
239 SCM p;
240 {
241 return (SCM_PORT_REPRESENTATION(p) == scm_wchar_port
242 ? SCM_BOOL_T
243 : SCM_BOOL_F);
244 }
245
246 SCM_PROC(s_wide_character_port_x, "wide-character-port!", 1, 0, 0, scm_wide_character_port_x);
247
248 SCM
249 scm_wide_character_port_x (p)
250 SCM p;
251 {
252 SCM_PORT_REPRESENTATION(p) = scm_wchar_port;
253 return SCM_UNSPECIFIED;
254 }
255
256
257
258 \f
259
260
261 void
262 scm_put_wchar (c, port, writing)
263 int c;
264 SCM port;
265 int writing;
266 {
267 if (writing)
268 scm_gen_puts (scm_regular_string, "#\\", port);
269 switch (SCM_PORT_REPRESENTATION (port))
270 {
271 case scm_regular_port:
272 {
273 if (c < 256)
274 {
275 if (!writing)
276 scm_gen_putc ((unsigned char)c, port);
277 else if ((c <= ' ') && scm_charnames[c])
278 scm_gen_puts (scm_regular_string, scm_charnames[c], port);
279 else if (c > '\177')
280 scm_intprint (c, 8, port);
281 else
282 scm_gen_putc ((int) c, port);
283 }
284 else
285 {
286 print_octal:
287 if (!writing)
288 scm_gen_putc ('\\', port);
289 scm_intprint (c, 8, port);
290 }
291 break;
292 }
293
294 case scm_mb_port:
295 {
296 char buf[256];
297 int len;
298
299 if (XMB_CUR_MAX > sizeof (buf))
300 goto print_octal;
301
302 len = xwctomb (buf, c);
303
304 if (len < 0)
305 goto print_octal;
306
307 if (len == 0)
308 scm_gen_putc (0, port);
309 else
310 scm_gen_putc (c, port);
311 break;
312 }
313
314 case scm_wchar_port:
315 {
316 scm_gen_putc (c, port);
317 break;
318 }
319 }
320 }
321
322
323
324
325
326
327 void
328 scm_print_mb_string (exp, port, writing)
329 SCM exp;
330 SCM port;
331 int writing;
332 {
333 if (writing)
334 {
335 int i;
336 int len;
337 char * data;
338
339 scm_gen_putc ('\"', port);
340 i = 0;
341 len = SCM_ROLENGTH (exp);
342 data = SCM_ROCHARS (exp);
343
344 while (i < len)
345 {
346 xwchar_t c;
347 int inc;
348
349 inc = xmbtowc (&c, data + i, len - i);
350 if (inc == 0)
351 inc = 1;
352 if (inc < 0)
353 {
354 inc = 1;
355 c = data[i];
356 }
357 i += inc;
358 switch (c)
359 {
360 case '\"':
361 case '\\':
362 scm_gen_putc ('\\', port);
363 default:
364 scm_gen_putc (c, port);
365 }
366 }
367 scm_gen_putc ('\"', port);
368 }
369 else
370 scm_gen_write (scm_mb_string, SCM_ROCHARS (exp), SCM_ROLENGTH (exp), port);
371 }
372
373
374
375 void
376 scm_print_mb_symbol (exp, port)
377 SCM exp;
378 SCM port;
379 {
380 int pos;
381 int end;
382 int len;
383 char * str;
384 int weird;
385 int maybe_weird;
386 int mw_pos = 0; /* initialized to placate compiler */
387 int inc = 0; /* same */
388 xwchar_t c;
389
390 len = SCM_LENGTH (exp);
391 str = SCM_CHARS (exp);
392 scm_remember (&exp);
393 pos = 0;
394 weird = 0;
395 maybe_weird = 0;
396
397 for (end = pos; end < len; end += inc)
398 {
399 inc = xmbtowc (&c, str + end, len - end);
400 if (inc < 0)
401 {
402 inc = 1;
403 c = str[end];
404 goto weird_handler;
405 }
406 if (inc == 0)
407 {
408 inc = 1;
409 goto weird_handler;
410 }
411 switch (c)
412 {
413 #ifdef BRACKETS_AS_PARENS
414 case '[':
415 case ']':
416 #endif
417 case '(':
418 case ')':
419 case '\"':
420 case ';':
421 case SCM_WHITE_SPACES:
422 case SCM_LINE_INCREMENTORS:
423 weird_handler:
424 if (maybe_weird)
425 {
426 end = mw_pos;
427 maybe_weird = 0;
428 }
429 if (!weird)
430 {
431 scm_gen_write (scm_regular_string, "#{", 2, port);
432 weird = 1;
433 }
434 if (pos < end)
435 {
436 int q;
437 int qinc;
438
439 q = pos;
440 while (q < end)
441 {
442 qinc = xmbtowc (&c, str + q, end - q);
443 if (inc <= 0)
444 {
445 inc = 1;
446 c = str[q];
447 }
448 scm_gen_putc (c, port);
449 q += qinc;
450 }
451 }
452 {
453 char buf[2];
454 buf[0] = '\\';
455 buf[1] = str[end];
456 scm_gen_write (scm_regular_string, buf, 2, port);
457 }
458 pos = end + 1;
459 break;
460 case '\\':
461 if (weird)
462 goto weird_handler;
463 if (!maybe_weird)
464 {
465 maybe_weird = 1;
466 mw_pos = pos;
467 }
468 break;
469 case '}':
470 case '#':
471 if (weird)
472 goto weird_handler;
473 break;
474 default:
475 break;
476 }
477 }
478 if (pos < end)
479 {
480 int q;
481 int qinc;
482 q = pos;
483 while (q < end)
484 {
485 qinc = xmbtowc (&c, str + q, end - q);
486 if (inc <= 0)
487 inc = 1;
488 scm_gen_putc (c, port);
489 q += qinc;
490 }
491 }
492 if (weird)
493 scm_gen_write (scm_regular_string, "}#", 2, port);
494 }
495
496
497 \f
498
499
500 void
501 scm_init_mbstrings ()
502 {
503 #include "mbstrings.x"
504 }
505