degenerate let forms
[bpt/guile.git] / lib / iconv.c
1 /* Character set conversion.
2 Copyright (C) 1999-2001, 2007, 2009-2014 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public License along
15 with this program; if not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18
19 /* Specification. */
20 #include <iconv.h>
21
22 #include <stddef.h>
23
24 #if REPLACE_ICONV_UTF
25 # include <errno.h>
26 # include <stdint.h>
27 # include <stdlib.h>
28 # include "unistr.h"
29 # ifndef uintptr_t
30 # define uintptr_t unsigned long
31 # endif
32 #endif
33
34 #if REPLACE_ICONV_UTF
35
36 /* UTF-{16,32}{BE,LE} converters taken from GNU libiconv 1.11. */
37
38 /* Return code if invalid. (xxx_mbtowc) */
39 # define RET_ILSEQ -1
40 /* Return code if no bytes were read. (xxx_mbtowc) */
41 # define RET_TOOFEW -2
42
43 /* Return code if invalid. (xxx_wctomb) */
44 # define RET_ILUNI -1
45 /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
46 # define RET_TOOSMALL -2
47
48 /*
49 * UTF-16BE
50 */
51
52 /* Specification: RFC 2781 */
53
54 static int
55 utf16be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
56 {
57 if (n >= 2)
58 {
59 ucs4_t wc = (s[0] << 8) + s[1];
60 if (wc >= 0xd800 && wc < 0xdc00)
61 {
62 if (n >= 4)
63 {
64 ucs4_t wc2 = (s[2] << 8) + s[3];
65 if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
66 return RET_ILSEQ;
67 *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
68 return 4;
69 }
70 }
71 else if (wc >= 0xdc00 && wc < 0xe000)
72 {
73 return RET_ILSEQ;
74 }
75 else
76 {
77 *pwc = wc;
78 return 2;
79 }
80 }
81 return RET_TOOFEW;
82 }
83
84 static int
85 utf16be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
86 {
87 if (!(wc >= 0xd800 && wc < 0xe000))
88 {
89 if (wc < 0x10000)
90 {
91 if (n >= 2)
92 {
93 r[0] = (unsigned char) (wc >> 8);
94 r[1] = (unsigned char) wc;
95 return 2;
96 }
97 else
98 return RET_TOOSMALL;
99 }
100 else if (wc < 0x110000)
101 {
102 if (n >= 4)
103 {
104 ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
105 ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
106 r[0] = (unsigned char) (wc1 >> 8);
107 r[1] = (unsigned char) wc1;
108 r[2] = (unsigned char) (wc2 >> 8);
109 r[3] = (unsigned char) wc2;
110 return 4;
111 }
112 else
113 return RET_TOOSMALL;
114 }
115 }
116 return RET_ILUNI;
117 }
118
119 /*
120 * UTF-16LE
121 */
122
123 /* Specification: RFC 2781 */
124
125 static int
126 utf16le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
127 {
128 if (n >= 2)
129 {
130 ucs4_t wc = s[0] + (s[1] << 8);
131 if (wc >= 0xd800 && wc < 0xdc00)
132 {
133 if (n >= 4)
134 {
135 ucs4_t wc2 = s[2] + (s[3] << 8);
136 if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
137 return RET_ILSEQ;
138 *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
139 return 4;
140 }
141 }
142 else if (wc >= 0xdc00 && wc < 0xe000)
143 {
144 return RET_ILSEQ;
145 }
146 else
147 {
148 *pwc = wc;
149 return 2;
150 }
151 }
152 return RET_TOOFEW;
153 }
154
155 static int
156 utf16le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
157 {
158 if (!(wc >= 0xd800 && wc < 0xe000))
159 {
160 if (wc < 0x10000)
161 {
162 if (n >= 2)
163 {
164 r[0] = (unsigned char) wc;
165 r[1] = (unsigned char) (wc >> 8);
166 return 2;
167 }
168 else
169 return RET_TOOSMALL;
170 }
171 else if (wc < 0x110000)
172 {
173 if (n >= 4)
174 {
175 ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
176 ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
177 r[0] = (unsigned char) wc1;
178 r[1] = (unsigned char) (wc1 >> 8);
179 r[2] = (unsigned char) wc2;
180 r[3] = (unsigned char) (wc2 >> 8);
181 return 4;
182 }
183 else
184 return RET_TOOSMALL;
185 }
186 }
187 return RET_ILUNI;
188 }
189
190 /*
191 * UTF-32BE
192 */
193
194 /* Specification: Unicode 3.1 Standard Annex #19 */
195
196 static int
197 utf32be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
198 {
199 if (n >= 4)
200 {
201 ucs4_t wc = (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3];
202 if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
203 {
204 *pwc = wc;
205 return 4;
206 }
207 else
208 return RET_ILSEQ;
209 }
210 return RET_TOOFEW;
211 }
212
213 static int
214 utf32be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
215 {
216 if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
217 {
218 if (n >= 4)
219 {
220 r[0] = 0;
221 r[1] = (unsigned char) (wc >> 16);
222 r[2] = (unsigned char) (wc >> 8);
223 r[3] = (unsigned char) wc;
224 return 4;
225 }
226 else
227 return RET_TOOSMALL;
228 }
229 return RET_ILUNI;
230 }
231
232 /*
233 * UTF-32LE
234 */
235
236 /* Specification: Unicode 3.1 Standard Annex #19 */
237
238 static int
239 utf32le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
240 {
241 if (n >= 4)
242 {
243 ucs4_t wc = s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24);
244 if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
245 {
246 *pwc = wc;
247 return 4;
248 }
249 else
250 return RET_ILSEQ;
251 }
252 return RET_TOOFEW;
253 }
254
255 static int
256 utf32le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
257 {
258 if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
259 {
260 if (n >= 4)
261 {
262 r[0] = (unsigned char) wc;
263 r[1] = (unsigned char) (wc >> 8);
264 r[2] = (unsigned char) (wc >> 16);
265 r[3] = 0;
266 return 4;
267 }
268 else
269 return RET_TOOSMALL;
270 }
271 return RET_ILUNI;
272 }
273
274 #endif
275
276 size_t
277 rpl_iconv (iconv_t cd,
278 ICONV_CONST char **inbuf, size_t *inbytesleft,
279 char **outbuf, size_t *outbytesleft)
280 #undef iconv
281 {
282 #if REPLACE_ICONV_UTF
283 switch ((uintptr_t) cd)
284 {
285 {
286 int (*xxx_wctomb) (unsigned char *, ucs4_t, size_t);
287
288 case (uintptr_t) _ICONV_UTF8_UTF16BE:
289 xxx_wctomb = utf16be_wctomb;
290 goto loop_from_utf8;
291 case (uintptr_t) _ICONV_UTF8_UTF16LE:
292 xxx_wctomb = utf16le_wctomb;
293 goto loop_from_utf8;
294 case (uintptr_t) _ICONV_UTF8_UTF32BE:
295 xxx_wctomb = utf32be_wctomb;
296 goto loop_from_utf8;
297 case (uintptr_t) _ICONV_UTF8_UTF32LE:
298 xxx_wctomb = utf32le_wctomb;
299 goto loop_from_utf8;
300
301 loop_from_utf8:
302 if (inbuf == NULL || *inbuf == NULL)
303 return 0;
304 {
305 ICONV_CONST char *inptr = *inbuf;
306 size_t inleft = *inbytesleft;
307 char *outptr = *outbuf;
308 size_t outleft = *outbytesleft;
309 size_t res = 0;
310 while (inleft > 0)
311 {
312 ucs4_t uc;
313 int m = u8_mbtoucr (&uc, (const uint8_t *) inptr, inleft);
314 if (m <= 0)
315 {
316 if (m == -1)
317 {
318 errno = EILSEQ;
319 res = (size_t)(-1);
320 break;
321 }
322 if (m == -2)
323 {
324 errno = EINVAL;
325 res = (size_t)(-1);
326 break;
327 }
328 abort ();
329 }
330 else
331 {
332 int n = xxx_wctomb ((uint8_t *) outptr, uc, outleft);
333 if (n < 0)
334 {
335 if (n == RET_ILUNI)
336 {
337 errno = EILSEQ;
338 res = (size_t)(-1);
339 break;
340 }
341 if (n == RET_TOOSMALL)
342 {
343 errno = E2BIG;
344 res = (size_t)(-1);
345 break;
346 }
347 abort ();
348 }
349 else
350 {
351 inptr += m;
352 inleft -= m;
353 outptr += n;
354 outleft -= n;
355 }
356 }
357 }
358 *inbuf = inptr;
359 *inbytesleft = inleft;
360 *outbuf = outptr;
361 *outbytesleft = outleft;
362 return res;
363 }
364 }
365
366 {
367 int (*xxx_mbtowc) (ucs4_t *, const unsigned char *, size_t);
368
369 case (uintptr_t) _ICONV_UTF16BE_UTF8:
370 xxx_mbtowc = utf16be_mbtowc;
371 goto loop_to_utf8;
372 case (uintptr_t) _ICONV_UTF16LE_UTF8:
373 xxx_mbtowc = utf16le_mbtowc;
374 goto loop_to_utf8;
375 case (uintptr_t) _ICONV_UTF32BE_UTF8:
376 xxx_mbtowc = utf32be_mbtowc;
377 goto loop_to_utf8;
378 case (uintptr_t) _ICONV_UTF32LE_UTF8:
379 xxx_mbtowc = utf32le_mbtowc;
380 goto loop_to_utf8;
381
382 loop_to_utf8:
383 if (inbuf == NULL || *inbuf == NULL)
384 return 0;
385 {
386 ICONV_CONST char *inptr = *inbuf;
387 size_t inleft = *inbytesleft;
388 char *outptr = *outbuf;
389 size_t outleft = *outbytesleft;
390 size_t res = 0;
391 while (inleft > 0)
392 {
393 ucs4_t uc;
394 int m = xxx_mbtowc (&uc, (const uint8_t *) inptr, inleft);
395 if (m <= 0)
396 {
397 if (m == RET_ILSEQ)
398 {
399 errno = EILSEQ;
400 res = (size_t)(-1);
401 break;
402 }
403 if (m == RET_TOOFEW)
404 {
405 errno = EINVAL;
406 res = (size_t)(-1);
407 break;
408 }
409 abort ();
410 }
411 else
412 {
413 int n = u8_uctomb ((uint8_t *) outptr, uc, outleft);
414 if (n < 0)
415 {
416 if (n == -1)
417 {
418 errno = EILSEQ;
419 res = (size_t)(-1);
420 break;
421 }
422 if (n == -2)
423 {
424 errno = E2BIG;
425 res = (size_t)(-1);
426 break;
427 }
428 abort ();
429 }
430 else
431 {
432 inptr += m;
433 inleft -= m;
434 outptr += n;
435 outleft -= n;
436 }
437 }
438 }
439 *inbuf = inptr;
440 *inbytesleft = inleft;
441 *outbuf = outptr;
442 *outbytesleft = outleft;
443 return res;
444 }
445 }
446 }
447 #endif
448 return iconv (cd, inbuf, inbytesleft, outbuf, outbytesleft);
449 }