Commit | Line | Data |
---|---|---|
b0322a85 CE |
1 | /* |
2 | ** Copyright 2000-2011 Double Precision, Inc. | |
3 | ** See COPYING for distribution information. | |
4 | ** | |
5 | */ | |
6 | ||
7 | #include "unicode_config.h" | |
8 | #include "unicode.h" | |
9 | #include "../rfc822/rfc822hdr.h" | |
10 | #include <string.h> | |
11 | #include <ctype.h> | |
12 | #include <stdlib.h> | |
13 | #include <iconv.h> | |
14 | #include <errno.h> | |
15 | #if HAVE_LOCALE_H | |
16 | #if HAVE_SETLOCALE | |
17 | #include <locale.h> | |
18 | #if USE_LIBCHARSET | |
19 | #if HAVE_LOCALCHARSET_H | |
20 | #include <localcharset.h> | |
21 | #elif HAVE_LIBCHARSET_H | |
22 | #include <libcharset.h> | |
23 | #endif /* HAVE_LOCALCHARSET_H */ | |
24 | #elif HAVE_LANGINFO_CODESET | |
25 | #include <langinfo.h> | |
26 | #endif /* USE_LIBCHARSET */ | |
27 | #endif /* HAVE_SETLOCALE */ | |
28 | #endif /* HAVE_LOCALE_H */ | |
29 | ||
30 | static char default_chset_buf[32]; | |
31 | ||
32 | static void init_default_chset() | |
33 | { | |
34 | const char *old_locale=NULL; | |
35 | const char *chset=NULL; | |
36 | char *locale_cpy=NULL; | |
37 | char buf[sizeof(default_chset_buf)]; | |
38 | ||
39 | chset=getenv("MM_CHARSET"); | |
40 | ||
41 | if (chset == NULL) | |
42 | chset=getenv("CHARSET"); | |
43 | ||
44 | if (chset == NULL) | |
45 | { | |
46 | #if HAVE_LOCALE_H | |
47 | #if HAVE_SETLOCALE | |
48 | old_locale=setlocale(LC_ALL, ""); | |
49 | locale_cpy=old_locale ? strdup(old_locale):NULL; | |
50 | #if USE_LIBCHARSET | |
51 | chset = locale_charset(); | |
52 | #elif HAVE_LANGINFO_CODESET | |
53 | chset=nl_langinfo(CODESET); | |
54 | #endif | |
55 | #endif | |
56 | #endif | |
57 | } | |
58 | ||
59 | memset(buf, 0, sizeof(buf)); | |
60 | ||
61 | if (chset && | |
62 | ||
63 | /* Map GNU libc iconv oddity to us-ascii */ | |
64 | ||
65 | (strcmp(chset, "ANSI_X3.4") == 0 || | |
66 | strncmp(chset, "ANSI_X3.4-", 10) == 0)) | |
67 | chset="US-ASCII"; | |
68 | ||
69 | if (chset) | |
70 | { | |
71 | strncat(buf, chset, sizeof(buf)-1); | |
72 | } | |
73 | else | |
74 | { | |
75 | const char *p=getenv("LANG"); | |
76 | ||
77 | /* LANG is xx_yy.CHARSET@modifier */ | |
78 | ||
79 | if (p && *p && (p=strchr(p, '.')) != NULL) | |
80 | { | |
81 | const char *q=strchr(++p, '@'); | |
82 | ||
83 | if (!q) | |
84 | q=p+strlen(p); | |
85 | ||
86 | if (q-p >= sizeof(buf)-1) | |
87 | q=p+sizeof(buf)-1; | |
88 | ||
89 | memcpy(buf, p, q-p); | |
90 | buf[q-p]=0; | |
91 | } | |
92 | else | |
93 | strcpy(buf, "US-ASCII"); | |
94 | } | |
95 | ||
96 | memcpy(default_chset_buf, buf, sizeof(buf)); | |
97 | ||
98 | #if HAVE_LOCALE_H | |
99 | #if HAVE_SETLOCALE | |
100 | if (locale_cpy) | |
101 | { | |
102 | setlocale(LC_ALL, locale_cpy); | |
103 | free(locale_cpy); | |
104 | } | |
105 | #endif | |
106 | #endif | |
107 | ||
108 | } | |
109 | ||
110 | const char *unicode_default_chset() | |
111 | { | |
112 | if (default_chset_buf[0] == 0) | |
113 | init_default_chset(); | |
114 | ||
115 | return default_chset_buf; | |
116 | } | |
117 | ||
118 | ||
119 | /*****************************************************************************/ | |
120 | ||
121 | const char libmail_u_ucs4_native[]= | |
122 | #if WORDS_BIGENDIAN | |
123 | "UCS-4BE" | |
124 | #else | |
125 | "UCS-4LE" | |
126 | #endif | |
127 | ; | |
128 | ||
129 | const char libmail_u_ucs2_native[]= | |
130 | #if WORDS_BIGENDIAN | |
131 | "UCS-2BE" | |
132 | #else | |
133 | "UCS-2LE" | |
134 | #endif | |
135 | ; | |
136 | ||
137 | /* A stack of conversion modules */ | |
138 | ||
139 | struct libmail_u_convert_hdr { | |
140 | ||
141 | int (*convert_handler)(void *ptr, | |
142 | const char *text, size_t cnt); | |
143 | int (*deinit_handler)(void *ptr, int *errptr); | |
144 | void *ptr; | |
145 | ||
146 | struct libmail_u_convert_hdr *next; | |
147 | }; | |
148 | ||
149 | /* Decoding table for modified UTF7-encoding as used in imap */ | |
150 | ||
151 | static const char mbase64_lookup[]={ | |
152 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
153 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
154 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,63,-1,-1,-1, | |
155 | 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, | |
156 | -1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, | |
157 | 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, | |
158 | -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, | |
159 | 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, | |
160 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
161 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
162 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
163 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
164 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
165 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
166 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
167 | -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; | |
168 | ||
169 | static const char mbase64[]= | |
170 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; | |
171 | ||
172 | /* | |
173 | ** Conversion wrapper for converting to modified-utf7 IMAP encoding. | |
174 | ** | |
175 | ** This is done by converting to UCS2, then stacking on a module that | |
176 | ** takes that and converts UCS2 to modified-UTF7. | |
177 | ** | |
178 | ** init_nottoimaputf7() returns an opaque stack for converting to ucs2. | |
179 | */ | |
180 | ||
181 | static libmail_u_convert_handle_t | |
182 | init_nottoimaputf7(const char *src_chset, | |
183 | const char *dst_chset, | |
184 | int (*output_func)(const char *, size_t, void *), | |
185 | void *convert_arg); | |
186 | ||
187 | /* | |
188 | ** The to modified UTF7 module | |
189 | */ | |
190 | ||
191 | struct libmail_u_convert_toimaputf7 { | |
192 | ||
193 | struct libmail_u_convert_hdr hdr; | |
194 | ||
195 | /* Accumulated output buffer */ | |
196 | ||
197 | char utf7encodebuf[1024]; | |
198 | size_t utf7encodebuf_cnt; | |
199 | ||
200 | /* Accumulated bits for base64 encoding */ | |
201 | uint32_t utf7bits; | |
202 | ||
203 | /* How many bits in utf7bits */ | |
204 | uint16_t utf7bitcount; | |
205 | ||
206 | /* Flag: in base64mode */ | |
207 | uint16_t utfmode; | |
208 | ||
209 | int errflag; | |
210 | ||
211 | /* Any extra characters that should be munged */ | |
212 | ||
213 | char smapmunge[16]; | |
214 | ||
215 | /* Remembered output function */ | |
216 | ||
217 | int (*output_func)(const char *, size_t, void *); | |
218 | ||
219 | /* Remembered arg to the output function */ | |
220 | void *convert_arg; | |
221 | }; | |
222 | ||
223 | /* Macro - flush the output buffer */ | |
224 | #define toimaputf7_encode_flush(p) do { \ | |
225 | int rc; \ | |
226 | \ | |
227 | rc=(*(p)->output_func)((p)->utf7encodebuf, \ | |
228 | (p)->utf7encodebuf_cnt, \ | |
229 | (p)->convert_arg); \ | |
230 | if (rc) \ | |
231 | return ((p)->errflag=(rc)); \ | |
232 | \ | |
233 | (p)->utf7encodebuf_cnt=0; \ | |
234 | } while (0) | |
235 | ||
236 | static int toimaputf7_encode_flushfinal(struct libmail_u_convert_toimaputf7 *p) | |
237 | { | |
238 | if (p->utf7encodebuf_cnt > 0) | |
239 | toimaputf7_encode_flush(p); | |
240 | return 0; | |
241 | } | |
242 | ||
243 | /* Macro - add one char to the output buffer */ | |
244 | ||
245 | #define toimaputf7_encode_add(p,c) do { \ | |
246 | if ((p)->utf7encodebuf_cnt >= sizeof((p)->utf7encodebuf)) \ | |
247 | toimaputf7_encode_flush((p)); \ | |
248 | \ | |
249 | (p)->utf7encodebuf[(p)->utf7encodebuf_cnt++]=(c); \ | |
250 | } while (0); | |
251 | ||
252 | static int deinit_toimaputf7(void *ptr, int *errptr); | |
253 | ||
254 | static int do_convert_toutf7(const char *text, size_t cnt, void *arg); | |
255 | static int convert_utf7_handler(void *ptr, const char *text, size_t cnt); | |
256 | ||
257 | /* | |
258 | ** Create a conversion module stack | |
259 | */ | |
260 | ||
261 | libmail_u_convert_handle_t | |
262 | libmail_u_convert_init(const char *src_chset, | |
263 | const char *dst_chset, | |
264 | int (*output_func)(const char *, size_t, void *), | |
265 | void *convert_arg) | |
266 | { | |
267 | struct libmail_u_convert_toimaputf7 *toutf7; | |
268 | libmail_u_convert_handle_t h; | |
269 | const char *smapmunge; | |
270 | size_t l=strlen(unicode_x_imap_modutf7); | |
271 | ||
272 | if (strncmp(dst_chset, unicode_x_imap_modutf7, l) == 0 && | |
273 | (dst_chset[l] == 0 || dst_chset[l] == ' ')) | |
274 | { | |
275 | smapmunge=dst_chset + l; | |
276 | ||
277 | if (*smapmunge) | |
278 | ++smapmunge; | |
279 | } | |
280 | else | |
281 | return init_nottoimaputf7(src_chset, dst_chset, | |
282 | output_func, | |
283 | convert_arg); | |
284 | ||
285 | toutf7=malloc(sizeof(struct libmail_u_convert_toimaputf7)); | |
286 | ||
287 | if (!toutf7) | |
288 | return NULL; | |
289 | ||
290 | memset(toutf7, 0, sizeof(*toutf7)); | |
291 | ||
292 | h=init_nottoimaputf7(src_chset, libmail_u_ucs2_native, | |
293 | do_convert_toutf7, toutf7); | |
294 | if (!h) | |
295 | { | |
296 | free(toutf7); | |
297 | return (NULL); | |
298 | } | |
299 | ||
300 | toutf7->output_func=output_func; | |
301 | toutf7->convert_arg=convert_arg; | |
302 | ||
303 | strncat(toutf7->smapmunge, smapmunge, sizeof(toutf7->smapmunge)-1); | |
304 | ||
305 | toutf7->hdr.convert_handler=convert_utf7_handler; | |
306 | toutf7->hdr.deinit_handler=deinit_toimaputf7; | |
307 | toutf7->hdr.ptr=toutf7; | |
308 | toutf7->hdr.next=h; | |
309 | return &toutf7->hdr; | |
310 | } | |
311 | ||
312 | /* Passthrough to the wrapped stack */ | |
313 | ||
314 | static int convert_utf7_handler(void *ptr, const char *text, size_t cnt) | |
315 | { | |
316 | struct libmail_u_convert_toimaputf7 *toutf7= | |
317 | (struct libmail_u_convert_toimaputf7 *)ptr; | |
318 | ||
319 | return (*toutf7->hdr.next->convert_handler)(toutf7->hdr.next->ptr, | |
320 | text, cnt); | |
321 | } | |
322 | ||
323 | static int utf7off(struct libmail_u_convert_toimaputf7 *toutf7) | |
324 | { | |
325 | if (!toutf7->utfmode) | |
326 | return 0; | |
327 | toutf7->utfmode=0; | |
328 | ||
329 | if (toutf7->utf7bitcount > 0) | |
330 | toimaputf7_encode_add(toutf7, | |
331 | mbase64[(toutf7->utf7bits | |
332 | << (6-toutf7->utf7bitcount)) | |
333 | & 63]); | |
334 | toimaputf7_encode_add(toutf7, '-'); | |
335 | return 0; | |
336 | } | |
337 | ||
338 | ||
339 | static int do_convert_toutf7(const char *text, size_t cnt, void *arg) | |
340 | { | |
341 | struct libmail_u_convert_toimaputf7 *toutf7= | |
342 | (struct libmail_u_convert_toimaputf7 *)arg; | |
343 | ||
344 | /* We better be getting UCS-2 here! */ | |
345 | ||
346 | const uint16_t *utext=(const uint16_t *)text; | |
347 | cnt /= 2; | |
348 | ||
349 | while (cnt) | |
350 | { | |
351 | if (toutf7->errflag) | |
352 | return toutf7->errflag; | |
353 | ||
354 | if (*utext >= 0x20 && *utext <= 0x7F | |
355 | && strchr( toutf7->smapmunge, (char)*utext) == NULL) | |
356 | ||
357 | /* | |
358 | && (!toutf7->smapmunge || (*utext != '.' && *utext != '/' && | |
359 | *utext != '~' && *utext != ':'))) | |
360 | */ | |
361 | { | |
362 | if (utf7off(toutf7)) | |
363 | return toutf7->errflag; | |
364 | ||
365 | toimaputf7_encode_add(toutf7, *utext); | |
366 | ||
367 | if (*utext == '&') | |
368 | toimaputf7_encode_add(toutf7, '-'); | |
369 | ||
370 | ++utext; | |
371 | --cnt; | |
372 | continue; | |
373 | } | |
374 | ||
375 | if (!toutf7->utfmode) | |
376 | { | |
377 | toutf7->utfmode=1; | |
378 | toutf7->utf7bitcount=0; | |
379 | toimaputf7_encode_add(toutf7, '&'); | |
380 | continue; | |
381 | } | |
382 | ||
383 | toutf7->utf7bits = (toutf7->utf7bits << 16) | | |
384 | (((uint32_t)*utext) & 0xFFFF); | |
385 | toutf7->utf7bitcount += 16; | |
386 | ||
387 | ++utext; | |
388 | --cnt; | |
389 | ||
390 | /* If there's at least 6 bits, output base64-encoded char */ | |
391 | ||
392 | while (toutf7->utf7bitcount >= 6) | |
393 | { | |
394 | uint32_t v; | |
395 | int n; | |
396 | ||
397 | if (toutf7->errflag) | |
398 | return toutf7->errflag; | |
399 | ||
400 | v=toutf7->utf7bits; | |
401 | n=toutf7->utf7bitcount-6; | |
402 | toutf7->utf7bitcount -= 6; | |
403 | ||
404 | if (n > 0) | |
405 | v >>= n; | |
406 | ||
407 | toimaputf7_encode_add(toutf7, mbase64[v & 63]); | |
408 | } | |
409 | } | |
410 | ||
411 | return 0; | |
412 | } | |
413 | ||
414 | static int deinit_toimaputf7(void *ptr, int *errptr) | |
415 | { | |
416 | int rc; | |
417 | ||
418 | struct libmail_u_convert_toimaputf7 *toutf7= | |
419 | (struct libmail_u_convert_toimaputf7 *)ptr; | |
420 | ||
421 | /* Flush out the downstream stack */ | |
422 | rc=(*toutf7->hdr.next->deinit_handler)(toutf7->hdr.next->ptr, errptr); | |
423 | ||
424 | /* Make sure we're out of modified base64 */ | |
425 | ||
426 | if (rc == 0) | |
427 | rc=utf7off(toutf7); | |
428 | ||
429 | if (rc == 0 && toutf7->utf7encodebuf_cnt > 0) | |
430 | rc=toimaputf7_encode_flushfinal(toutf7); | |
431 | ||
432 | free(toutf7); | |
433 | return rc; | |
434 | } | |
435 | ||
436 | /************/ | |
437 | ||
438 | /* | |
439 | ** Convert from modified-utf7 IMAP encoding. | |
440 | ** | |
441 | ** This module converts it to UCS-2, then this is attached to a stack that | |
442 | ** converts UCS-2 to the requested charset. | |
443 | */ | |
444 | ||
445 | static libmail_u_convert_handle_t | |
446 | init_notfromimaputf7(const char *src_chset, | |
447 | const char *dst_chset, | |
448 | int (*output_func)(const char *, size_t, void *), | |
449 | void *convert_arg); | |
450 | ||
451 | struct libmail_u_convert_fromimaputf7 { | |
452 | ||
453 | struct libmail_u_convert_hdr hdr; | |
454 | ||
455 | /* Accumulated UCS-2 stream */ | |
456 | uint16_t convbuf[512]; | |
457 | size_t convbuf_cnt; | |
458 | ||
459 | /* Accumulated base64 bits */ | |
460 | uint32_t modbits; | |
461 | ||
462 | /* How many bits extracted from a base64 stream */ | |
463 | ||
464 | short modcnt; | |
465 | ||
466 | /* Flag: seen the & */ | |
467 | char seenamp; | |
468 | ||
469 | /* Flag: seen the &, and the next char wasn't - */ | |
470 | ||
471 | char inmod; | |
472 | int errflag; | |
473 | int converr; | |
474 | }; | |
475 | ||
476 | /* Flush the accumulated UCS-2 stream */ | |
477 | ||
478 | #define convert_fromutf7_flush(p) do { \ | |
479 | (p)->errflag=(*(p)->hdr.next->convert_handler) \ | |
480 | ((p)->hdr.next->ptr, \ | |
481 | (const char *)(p)->convbuf, \ | |
482 | (p)->convbuf_cnt * \ | |
483 | sizeof((p)->convbuf[0])); \ | |
484 | (p)->convbuf_cnt=0; \ | |
485 | } while (0) | |
486 | ||
487 | /* Accumulated a UCS-2 char */ | |
488 | ||
489 | #define convert_fromutf7_add(p,c) do { \ | |
490 | if ((p)->convbuf_cnt >= \ | |
491 | sizeof((p)->convbuf)/sizeof((p)->convbuf[0])) \ | |
492 | convert_fromutf7_flush((p)); \ | |
493 | (p)->convbuf[(p)->convbuf_cnt++]=(c); \ | |
494 | } while (0) | |
495 | ||
496 | ||
497 | static int convert_fromutf7(void *ptr, | |
498 | const char *text, size_t cnt); | |
499 | static int deinit_fromutf7(void *ptr, int *errptr); | |
500 | ||
501 | static libmail_u_convert_handle_t | |
502 | init_nottoimaputf7(const char *src_chset, | |
503 | const char *dst_chset, | |
504 | int (*output_func)(const char *, size_t, void *), | |
505 | void *convert_arg) | |
506 | { | |
507 | struct libmail_u_convert_fromimaputf7 *fromutf7; | |
508 | libmail_u_convert_handle_t h; | |
509 | size_t l=strlen(unicode_x_imap_modutf7); | |
510 | ||
511 | if (strncmp(src_chset, unicode_x_imap_modutf7, l) == 0 && | |
512 | (src_chset[l] == 0 || src_chset[l] == ' ')) | |
513 | ; | |
514 | else | |
515 | return init_notfromimaputf7(src_chset, dst_chset, | |
516 | output_func, | |
517 | convert_arg); | |
518 | ||
519 | fromutf7=(struct libmail_u_convert_fromimaputf7 *) | |
520 | malloc(sizeof(struct libmail_u_convert_fromimaputf7)); | |
521 | ||
522 | if (!fromutf7) | |
523 | return NULL; | |
524 | ||
525 | memset(fromutf7, 0, sizeof(*fromutf7)); | |
526 | ||
527 | /* Create a stack for converting UCS-2 to the dest charset */ | |
528 | ||
529 | h=init_notfromimaputf7(libmail_u_ucs2_native, dst_chset, | |
530 | output_func, convert_arg); | |
531 | ||
532 | if (!h) | |
533 | { | |
534 | free(fromutf7); | |
535 | return (NULL); | |
536 | } | |
537 | ||
538 | fromutf7->hdr.next=h; | |
539 | fromutf7->hdr.convert_handler=convert_fromutf7; | |
540 | fromutf7->hdr.deinit_handler=deinit_fromutf7; | |
541 | fromutf7->hdr.ptr=fromutf7; | |
542 | return &fromutf7->hdr; | |
543 | } | |
544 | ||
545 | static int convert_fromutf7(void *ptr, | |
546 | const char *text, size_t cnt) | |
547 | { | |
548 | struct libmail_u_convert_fromimaputf7 *fromutf7= | |
549 | (struct libmail_u_convert_fromimaputf7 *)ptr; | |
550 | int bits; | |
551 | ||
552 | while (cnt) | |
553 | { | |
554 | if (fromutf7->errflag) | |
555 | return fromutf7->errflag; | |
556 | ||
557 | if (!fromutf7->seenamp && *text == '&') | |
558 | { | |
559 | fromutf7->seenamp=1; | |
560 | fromutf7->inmod=0; | |
561 | fromutf7->modcnt=0; | |
562 | ++text; | |
563 | --cnt; | |
564 | continue; | |
565 | } | |
566 | ||
567 | if (fromutf7->seenamp) | |
568 | { | |
569 | if (*text == '-') | |
570 | { | |
571 | convert_fromutf7_add(fromutf7, '&'); | |
572 | ++text; | |
573 | --cnt; | |
574 | fromutf7->seenamp=0; | |
575 | continue; | |
576 | } | |
577 | fromutf7->seenamp=0; | |
578 | fromutf7->inmod=1; | |
579 | } | |
580 | ||
581 | if (!fromutf7->inmod) | |
582 | { | |
583 | /* Not in the base64 encoded stream */ | |
584 | ||
585 | convert_fromutf7_add(fromutf7, | |
586 | ((uint16_t)*text) & 0xFFFF); | |
587 | ++text; | |
588 | --cnt; | |
589 | continue; | |
590 | } | |
591 | ||
592 | if (*text == '-') | |
593 | { | |
594 | /* End of the base64 encoded stream */ | |
595 | fromutf7->inmod=0; | |
596 | ++text; | |
597 | --cnt; | |
598 | continue; | |
599 | } | |
600 | ||
601 | /* Got 6 more bits */ | |
602 | ||
603 | bits=mbase64_lookup[(unsigned char)*text]; | |
604 | ||
605 | ++text; | |
606 | --cnt; | |
607 | ||
608 | if (bits < 0) | |
609 | { | |
610 | errno=EILSEQ; | |
611 | return fromutf7->errflag=-1; | |
612 | } | |
613 | ||
614 | fromutf7->modbits = (fromutf7->modbits << 6) | bits; | |
615 | fromutf7->modcnt += 6; | |
616 | ||
617 | if (fromutf7->modcnt >= 16) | |
618 | { | |
619 | /* Got a UCS-2 char */ | |
620 | ||
621 | int shiftcnt=fromutf7->modcnt - 16; | |
622 | uint32_t v=fromutf7->modbits; | |
623 | ||
624 | if (shiftcnt) | |
625 | v >>= shiftcnt; | |
626 | ||
627 | fromutf7->modcnt -= 16; | |
628 | ||
629 | convert_fromutf7_add(fromutf7, v); | |
630 | } | |
631 | } | |
632 | return 0; | |
633 | } | |
634 | ||
635 | static int deinit_fromutf7(void *ptr, int *errptr) | |
636 | { | |
637 | struct libmail_u_convert_fromimaputf7 *fromutf7= | |
638 | (struct libmail_u_convert_fromimaputf7 *)ptr; | |
639 | int rc; | |
640 | ||
641 | if (fromutf7->seenamp || fromutf7->inmod) | |
642 | { | |
643 | if (fromutf7->errflag == 0) | |
644 | { | |
645 | fromutf7->errflag= -1; | |
646 | errno=EILSEQ; | |
647 | } | |
648 | } | |
649 | ||
650 | if (fromutf7->convbuf_cnt) | |
651 | convert_fromutf7_flush(fromutf7); | |
652 | ||
653 | rc=fromutf7->hdr.next->deinit_handler(fromutf7->hdr.next->ptr, errptr); | |
654 | ||
655 | if (fromutf7->errflag && rc == 0) | |
656 | rc=fromutf7->errflag; | |
657 | ||
658 | if (errptr && fromutf7->converr) | |
659 | *errptr=1; | |
660 | ||
661 | free(fromutf7); | |
662 | return rc; | |
663 | } | |
664 | ||
665 | /************/ | |
666 | ||
667 | /* A real conversion module, via iconv */ | |
668 | ||
669 | struct libmail_u_convert_iconv { | |
670 | ||
671 | struct libmail_u_convert_hdr hdr; | |
672 | ||
673 | iconv_t h; | |
674 | int errflag; /* Accumulated errors */ | |
675 | ||
676 | int (*output_func)(const char *, size_t, void *); | |
677 | void *convert_arg; | |
678 | ||
679 | char buffer[1024]; /* Input buffer */ | |
680 | size_t bufcnt; /* Accumulated input in buffer */ | |
681 | char skipcnt; /* Skip this many bytes upon encountering EILSEQ */ | |
682 | char skipleft; /* How many bytes are currently left to skip */ | |
683 | char converr; /* Flag - an EILSEQ was encountered */ | |
684 | } ; | |
685 | ||
686 | static int init_iconv(struct libmail_u_convert_iconv *h, | |
687 | const char *src_chset, | |
688 | const char *dst_chset, | |
689 | int (*output_func)(const char *, size_t, void *), | |
690 | void *convert_arg); | |
691 | ||
692 | static libmail_u_convert_handle_t | |
693 | init_notfromimaputf7(const char *src_chset, | |
694 | const char *dst_chset, | |
695 | int (*output_func)(const char *, size_t, void *), | |
696 | void *convert_arg) | |
697 | { | |
698 | ||
699 | ||
700 | struct libmail_u_convert_iconv *h= | |
701 | malloc(sizeof(struct libmail_u_convert_iconv)); | |
702 | ||
703 | if (!h) | |
704 | return NULL; | |
705 | ||
706 | memset(h, 0, sizeof(*h)); | |
707 | ||
708 | if (init_iconv(h, src_chset, dst_chset, output_func, convert_arg)) | |
709 | { | |
710 | free(h); | |
711 | return NULL; | |
712 | } | |
713 | return &h->hdr; | |
714 | } | |
715 | ||
716 | /* Run the stack */ | |
717 | ||
718 | int libmail_u_convert(libmail_u_convert_handle_t h, | |
719 | const char *text, size_t cnt) | |
720 | { | |
721 | return (*h->convert_handler)(h->ptr, text, cnt); | |
722 | } | |
723 | ||
724 | /* Destroy the stack */ | |
725 | ||
726 | int libmail_u_convert_deinit(libmail_u_convert_handle_t h, int *errptr) | |
727 | { | |
728 | return (*h->deinit_handler)(h, errptr); | |
729 | } | |
730 | ||
731 | static int deinit_iconv(void *ptr, int *errptr); | |
732 | static int convert_iconv(void *ptr, | |
733 | const char *text, size_t cnt); | |
734 | ||
735 | /* Initialize a single conversion module, in the stack */ | |
736 | ||
737 | static int init_iconv(struct libmail_u_convert_iconv *h, | |
738 | const char *src_chset, | |
739 | const char *dst_chset, | |
740 | int (*output_func)(const char *, size_t, void *), | |
741 | void *convert_arg) | |
742 | { | |
743 | if ((h->h=iconv_open(dst_chset, src_chset)) == (iconv_t)-1) | |
744 | return -1; | |
745 | ||
746 | h->hdr.convert_handler=convert_iconv; | |
747 | h->hdr.deinit_handler=deinit_iconv; | |
748 | h->hdr.ptr=h; | |
749 | ||
750 | h->output_func=output_func; | |
751 | h->convert_arg=convert_arg; | |
752 | ||
753 | /* Heuristically determine how many octets to skip upon an EILSEQ */ | |
754 | ||
755 | h->skipcnt=1; | |
756 | switch (src_chset[0]) { | |
757 | case 'u': | |
758 | case 'U': | |
759 | switch (src_chset[1]) { | |
760 | case 'c': | |
761 | case 'C': | |
762 | switch (src_chset[2]) { | |
763 | case 's': | |
764 | case 'S': | |
765 | if (src_chset[3] == '-') | |
766 | switch (src_chset[4]) { | |
767 | case '4': | |
768 | /* UCS-4 */ | |
769 | h->skipcnt=4; | |
770 | break; | |
771 | case '2': | |
772 | /* UCS-2 */ | |
773 | h->skipcnt=2; | |
774 | break; | |
775 | } | |
776 | } | |
777 | break; | |
778 | case 't': | |
779 | case 'T': | |
780 | switch (src_chset[2]) { | |
781 | case 'f': | |
782 | case 'F': | |
783 | if (src_chset[3] == '-') | |
784 | switch (src_chset[4]) { | |
785 | case '3': | |
786 | /* UTF-32 */ | |
787 | h->skipcnt=4; | |
788 | break; | |
789 | case '1': | |
790 | /* UTF-16 */ | |
791 | h->skipcnt=2; | |
792 | break; | |
793 | } | |
794 | } | |
795 | } | |
796 | } | |
797 | ||
798 | return 0; | |
799 | } | |
800 | ||
801 | static void convert_flush(struct libmail_u_convert_iconv *); | |
802 | static void convert_flush_iconv(struct libmail_u_convert_iconv *, const char **, | |
803 | size_t *); | |
804 | ||
805 | /* | |
806 | ** iconv conversion module. Accumulate input in an input buffer. When the | |
807 | ** input buffer is full, invoke convert_flush(). | |
808 | */ | |
809 | ||
810 | static int convert_iconv(void *ptr, | |
811 | const char *text, size_t cnt) | |
812 | { | |
813 | struct libmail_u_convert_iconv *h=(struct libmail_u_convert_iconv *)ptr; | |
814 | ||
815 | while (cnt && h->errflag == 0) | |
816 | { | |
817 | if (h->bufcnt >= sizeof(h->buffer)-1) | |
818 | { | |
819 | convert_flush(h); | |
820 | ||
821 | if (h->errflag) | |
822 | break; | |
823 | } | |
824 | ||
825 | h->buffer[h->bufcnt++]= *text++; | |
826 | --cnt; | |
827 | } | |
828 | ||
829 | return h->errflag; | |
830 | } | |
831 | ||
832 | /* | |
833 | ** Finish an iconv conversion module. Invoke convert_flush() to flush any | |
834 | ** buffered input. Invoke convert_flush_iconv() to return state to the initial | |
835 | ** conversion state. | |
836 | */ | |
837 | ||
838 | static int deinit_iconv(void *ptr, int *errptr) | |
839 | { | |
840 | int rc; | |
841 | int converr; | |
842 | struct libmail_u_convert_iconv *h=(struct libmail_u_convert_iconv *)ptr; | |
843 | libmail_u_convert_handle_t next; | |
844 | ||
845 | if (h->errflag == 0) | |
846 | convert_flush(h); | |
847 | ||
848 | if (h->bufcnt && h->errflag == 0) | |
849 | h->converr=1; | |
850 | ||
851 | if (h->errflag == 0) | |
852 | convert_flush_iconv(h, NULL, NULL); | |
853 | ||
854 | rc=h->errflag; | |
855 | converr=h->converr != 0; | |
856 | iconv_close(h->h); | |
857 | next=h->hdr.next; | |
858 | free(h); | |
859 | if (errptr) | |
860 | *errptr=converr; | |
861 | ||
862 | /* If there's another module in the stack, clean that up */ | |
863 | ||
864 | if (next) | |
865 | { | |
866 | int converrnext; | |
867 | int rcnext=libmail_u_convert_deinit(next, &converrnext); | |
868 | ||
869 | if (converrnext && errptr && *errptr == 0) | |
870 | *errptr=converr; | |
871 | ||
872 | if (rcnext && rc == 0) | |
873 | rc=rcnext; | |
874 | } | |
875 | return rc; | |
876 | } | |
877 | ||
878 | /* | |
879 | ** Invoke convert_flush_iconv() to flush the input buffer. If there's | |
880 | ** unconverted text remaining, reposition it at the beginning of the input | |
881 | ** buffer. | |
882 | */ | |
883 | ||
884 | static void convert_flush(struct libmail_u_convert_iconv *h) | |
885 | { | |
886 | const char *p; | |
887 | size_t n; | |
888 | ||
889 | if (h->bufcnt == 0 || h->errflag) | |
890 | return; | |
891 | ||
892 | p=h->buffer; | |
893 | n=h->bufcnt; | |
894 | ||
895 | convert_flush_iconv(h, &p, &n); | |
896 | ||
897 | if (h->errflag) | |
898 | return; | |
899 | ||
900 | if (h->bufcnt == n) | |
901 | n=0; /* Unexpected error, dunno what to do, punt */ | |
902 | ||
903 | h->bufcnt=0; | |
904 | ||
905 | while (n) | |
906 | { | |
907 | h->buffer[h->bufcnt]= *p; | |
908 | ||
909 | ++h->bufcnt; | |
910 | ++p; | |
911 | --n; | |
912 | } | |
913 | } | |
914 | ||
915 | /* | |
916 | ** Convert text via iconv. | |
917 | */ | |
918 | ||
919 | static void convert_flush_iconv(struct libmail_u_convert_iconv *h, | |
920 | const char **inbuf, size_t *inbytesleft) | |
921 | { | |
922 | int save_errno; | |
923 | ||
924 | while (1) | |
925 | { | |
926 | char outbuf[1024]; | |
927 | char *outp; | |
928 | size_t outleft; | |
929 | size_t n; | |
930 | size_t origin=0; | |
931 | ||
932 | if (inbytesleft) | |
933 | { | |
934 | if ((origin=*inbytesleft) == 0) | |
935 | return; | |
936 | ||
937 | if (inbuf && h->skipleft && origin) | |
938 | { | |
939 | /* Skipping after an EILSEQ */ | |
940 | ||
941 | --h->skipleft; | |
942 | --*inbytesleft; | |
943 | ++*inbuf; | |
944 | continue; | |
945 | } | |
946 | ||
947 | } | |
948 | ||
949 | if (h->errflag) | |
950 | { | |
951 | /* Quietly eat everything after a previous error */ | |
952 | ||
953 | if (inbytesleft) | |
954 | *inbytesleft=0; | |
955 | ||
956 | return; | |
957 | } | |
958 | ||
959 | outp=outbuf; | |
960 | outleft=sizeof(outbuf); | |
961 | ||
962 | n=iconv(h->h, (char **)inbuf, inbytesleft, &outp, &outleft); | |
963 | ||
964 | save_errno=errno; | |
965 | ||
966 | /* Anything produced by iconv() gets pushed down the stack */ | |
967 | ||
968 | if (outp > outbuf) | |
969 | { | |
970 | int rc=(*h->output_func)(outbuf, outp-outbuf, | |
971 | h->convert_arg); | |
972 | if (rc) | |
973 | { | |
974 | h->errflag=rc; | |
975 | return; | |
976 | } | |
977 | } | |
978 | ||
979 | if (n != (size_t)-1) | |
980 | { | |
981 | /* iconv(3) reason #2 */ | |
982 | ||
983 | break; | |
984 | } | |
985 | ||
986 | if (inbytesleft == 0) | |
987 | { | |
988 | /* | |
989 | ** An error when generating the shift sequence to | |
990 | ** return to the initial state. We don't know what to | |
991 | ** do, now. | |
992 | */ | |
993 | ||
994 | errno=EINVAL; | |
995 | h->errflag= -1; | |
996 | return; | |
997 | } | |
998 | ||
999 | /* | |
1000 | ** convert_flush() gets invoked when the 1024 char input buffer | |
1001 | ** fills or to convert input that has been buffered when | |
1002 | ** convert_chset_end() gets invoked. | |
1003 | ** | |
1004 | ** A return code of EINVAL from iconv() is iconv() encountering | |
1005 | ** an incomplete multibyte sequence. | |
1006 | ** | |
1007 | ** If iconv() failed without consuming any input: | |
1008 | ** | |
1009 | ** - iconv(3) reason #1, EILSEQ, invalid multibyte sequence | |
1010 | ** that starts at the beginning of the string we wish to | |
1011 | ** convert. Discard one character, and try again. | |
1012 | ** | |
1013 | ** - iconv(3) reason #3, EINVAL, incomplete multibyte sequence. | |
1014 | ** If it's possible to have an incomplete 1024 character long | |
1015 | ** multibyte sequence, we're in trouble. Or we've encountered | |
1016 | ** an EINVAL when flushing out the remaining buffered input, | |
1017 | ** in convert_chset_end(). In either case, it's ok to sicard | |
1018 | ** one character at a time, until we either reach the end, | |
1019 | ** or get some other result. | |
1020 | ** | |
1021 | ** - iconv(3) reason #4, E2BIG. If the 1024 character output | |
1022 | ** buffer, above, is insufficient to produce the output from a | |
1023 | ** single converted character, we're in trouble. | |
1024 | */ | |
1025 | ||
1026 | if (*inbytesleft == origin) | |
1027 | { | |
1028 | h->skipleft=h->skipcnt; | |
1029 | h->converr=1; | |
1030 | } | |
1031 | ||
1032 | /* | |
1033 | ** Stopped at an incomplete multibyte sequence, try again on | |
1034 | ** the next round. | |
1035 | */ | |
1036 | else if (save_errno == EINVAL) | |
1037 | break; | |
1038 | ||
1039 | if (save_errno == EILSEQ) | |
1040 | h->converr=1; /* Another possibility this can happen */ | |
1041 | ||
1042 | /* | |
1043 | ** If we get here because of iconv(3) reason #4, filled out | |
1044 | ** the output buffer, we should continue with the conversion. | |
1045 | ** Otherwise, upon encountering any other error condition, | |
1046 | ** reset the conversion state. | |
1047 | */ | |
1048 | if (save_errno != E2BIG) | |
1049 | iconv(h->h, NULL, NULL, NULL, NULL); | |
1050 | } | |
1051 | } | |
1052 | ||
1053 | /*****************************************************************************/ | |
1054 | ||
1055 | /* | |
1056 | ** A wrapper for libmail_u_convert() that collects the converted character | |
1057 | ** text into a buffer. This is done by passing an output function to | |
1058 | ** libmail_u_convert() that saves converted text in a linked-list | |
1059 | ** of buffers. | |
1060 | ** | |
1061 | ** Then, in the deinitialization function, the buffers get concatenated into | |
1062 | ** the final character buffer. | |
1063 | */ | |
1064 | ||
1065 | struct libmail_u_convert_cbuf { | |
1066 | struct libmail_u_convert_cbuf *next; | |
1067 | char *fragment; | |
1068 | size_t fragment_size; | |
1069 | }; | |
1070 | ||
1071 | struct libmail_u_convert_tocbuf { | |
1072 | struct libmail_u_convert_hdr hdr; | |
1073 | ||
1074 | char **cbufptr_ret; | |
1075 | size_t *cbufsize_ret; | |
1076 | int errflag; | |
1077 | size_t tot_size; | |
1078 | int nullterminate; | |
1079 | ||
1080 | struct libmail_u_convert_cbuf *first, **last; | |
1081 | }; | |
1082 | ||
1083 | static int save_tocbuf(const char *, size_t, void *); | |
1084 | static int convert_tocbuf(void *ptr, | |
1085 | const char *text, size_t cnt); | |
1086 | static int deinit_tocbuf(void *ptr, int *errptr); | |
1087 | ||
1088 | libmail_u_convert_handle_t | |
1089 | libmail_u_convert_tocbuf_init(const char *src_chset, | |
1090 | const char *dst_chset, | |
1091 | char **cbufptr_ret, | |
1092 | size_t *cbufsize_ret, | |
1093 | int nullterminate | |
1094 | ) | |
1095 | { | |
1096 | struct libmail_u_convert_tocbuf *p= | |
1097 | malloc(sizeof(struct libmail_u_convert_tocbuf)); | |
1098 | libmail_u_convert_handle_t h; | |
1099 | ||
1100 | if (!p) | |
1101 | return NULL; | |
1102 | ||
1103 | memset(p, 0, sizeof(*p)); | |
1104 | ||
1105 | h=libmail_u_convert_init(src_chset, dst_chset, save_tocbuf, p); | |
1106 | ||
1107 | if (!h) | |
1108 | { | |
1109 | free(p); | |
1110 | return NULL; | |
1111 | } | |
1112 | ||
1113 | p->cbufptr_ret=cbufptr_ret; | |
1114 | p->cbufsize_ret=cbufsize_ret; | |
1115 | p->last= &p->first; | |
1116 | p->nullterminate=nullterminate; | |
1117 | p->hdr.next=h; | |
1118 | p->hdr.convert_handler=convert_tocbuf; | |
1119 | p->hdr.deinit_handler=deinit_tocbuf; | |
1120 | p->hdr.ptr=p; | |
1121 | return &p->hdr; | |
1122 | } | |
1123 | ||
1124 | /* Capture the output of the conversion stack */ | |
1125 | ||
1126 | static int save_tocbuf(const char *text, size_t cnt, void *ptr) | |
1127 | { | |
1128 | struct libmail_u_convert_tocbuf *p= | |
1129 | (struct libmail_u_convert_tocbuf *)ptr; | |
1130 | struct libmail_u_convert_cbuf *fragment= | |
1131 | malloc(sizeof(struct libmail_u_convert_cbuf)+cnt); | |
1132 | size_t tot_size; | |
1133 | ||
1134 | if (!fragment) | |
1135 | { | |
1136 | p->errflag=1; | |
1137 | return 1; | |
1138 | } | |
1139 | ||
1140 | fragment->next=NULL; | |
1141 | fragment->fragment=(char *)(fragment+1); | |
1142 | if ((fragment->fragment_size=cnt) > 0) | |
1143 | memcpy(fragment->fragment, text, cnt); | |
1144 | ||
1145 | *(p->last)=fragment; | |
1146 | p->last=&fragment->next; | |
1147 | ||
1148 | tot_size=p->tot_size + cnt; /* Keep track of the total size saved */ | |
1149 | ||
1150 | if (tot_size < p->tot_size) /* Overflow? */ | |
1151 | { | |
1152 | errno=E2BIG; | |
1153 | return 1; | |
1154 | } | |
1155 | p->tot_size=tot_size; | |
1156 | return 0; | |
1157 | } | |
1158 | ||
1159 | /* Punt converted text down the stack */ | |
1160 | ||
1161 | static int convert_tocbuf(void *ptr, const char *text, size_t cnt) | |
1162 | { | |
1163 | struct libmail_u_convert_tocbuf *p= | |
1164 | (struct libmail_u_convert_tocbuf *)ptr; | |
1165 | ||
1166 | return libmail_u_convert(p->hdr.next, text, cnt); | |
1167 | } | |
1168 | ||
1169 | /* | |
1170 | ** Destroy the conversion stack. Destroy the downstream, then assemble the | |
1171 | ** final array. | |
1172 | */ | |
1173 | ||
1174 | static int deinit_tocbuf(void *ptr, int *errptr) | |
1175 | { | |
1176 | struct libmail_u_convert_tocbuf *p= | |
1177 | (struct libmail_u_convert_tocbuf *)ptr; | |
1178 | int rc=libmail_u_convert_deinit(p->hdr.next, errptr); | |
1179 | struct libmail_u_convert_cbuf *bufptr; | |
1180 | ||
1181 | if (rc == 0 && p->nullterminate) | |
1182 | { | |
1183 | char zero=0; | |
1184 | ||
1185 | rc=save_tocbuf( &zero, sizeof(zero), p->hdr.ptr); | |
1186 | } | |
1187 | ||
1188 | if (rc == 0) | |
1189 | { | |
1190 | if (((*p->cbufptr_ret)=malloc(p->tot_size ? p->tot_size:1)) != | |
1191 | NULL) | |
1192 | { | |
1193 | size_t i=0; | |
1194 | ||
1195 | for (bufptr=p->first; bufptr; bufptr=bufptr->next) | |
1196 | { | |
1197 | if (bufptr->fragment_size) | |
1198 | memcpy(&(*p->cbufptr_ret)[i], | |
1199 | bufptr->fragment, | |
1200 | bufptr->fragment_size); | |
1201 | i += bufptr->fragment_size; | |
1202 | } | |
1203 | (*p->cbufsize_ret)=i; | |
1204 | } | |
1205 | else | |
1206 | { | |
1207 | rc= -1; | |
1208 | } | |
1209 | } | |
1210 | ||
1211 | for (bufptr=p->first; bufptr; ) | |
1212 | { | |
1213 | struct libmail_u_convert_cbuf *b=bufptr; | |
1214 | ||
1215 | bufptr=bufptr->next; | |
1216 | ||
1217 | free(b); | |
1218 | } | |
1219 | free(p); | |
1220 | ||
1221 | return rc; | |
1222 | } | |
1223 | ||
1224 | libmail_u_convert_handle_t | |
1225 | libmail_u_convert_tocbuf_toutf8_init(const char *src_chset, | |
1226 | char **cbufptr_ret, | |
1227 | size_t *cbufsize_ret, | |
1228 | int nullterminate | |
1229 | ) | |
1230 | { | |
1231 | return libmail_u_convert_tocbuf_init(src_chset, "utf-8", | |
1232 | cbufptr_ret, cbufsize_ret, | |
1233 | nullterminate); | |
1234 | } | |
1235 | ||
1236 | libmail_u_convert_handle_t | |
1237 | libmail_u_convert_tocbuf_fromutf8_init(const char *dst_chset, | |
1238 | char **cbufptr_ret, | |
1239 | size_t *cbufsize_ret, | |
1240 | int nullterminate | |
1241 | ) | |
1242 | { | |
1243 | return libmail_u_convert_tocbuf_init("utf-8", dst_chset, | |
1244 | cbufptr_ret, cbufsize_ret, | |
1245 | nullterminate); | |
1246 | } | |
1247 | ||
1248 | char *libmail_u_convert_toutf8(const char *text, | |
1249 | const char *charset, | |
1250 | int *error) | |
1251 | { | |
1252 | char *cbufptr; | |
1253 | size_t cbufsize; | |
1254 | libmail_u_convert_handle_t h= | |
1255 | libmail_u_convert_tocbuf_toutf8_init(charset, | |
1256 | &cbufptr, | |
1257 | &cbufsize, 1); | |
1258 | ||
1259 | if (!h) | |
1260 | return NULL; | |
1261 | ||
1262 | libmail_u_convert(h, text, strlen(text)); | |
1263 | ||
1264 | if (libmail_u_convert_deinit(h, error) == 0) | |
1265 | return cbufptr; | |
1266 | ||
1267 | return NULL; | |
1268 | } | |
1269 | ||
1270 | char *libmail_u_convert_fromutf8(const char *text, | |
1271 | const char *charset, | |
1272 | int *error) | |
1273 | { | |
1274 | char *cbufptr; | |
1275 | size_t cbufsize; | |
1276 | libmail_u_convert_handle_t h= | |
1277 | libmail_u_convert_tocbuf_fromutf8_init(charset, | |
1278 | &cbufptr, | |
1279 | &cbufsize, 1); | |
1280 | ||
1281 | if (!h) | |
1282 | return NULL; | |
1283 | ||
1284 | libmail_u_convert(h, text, strlen(text)); | |
1285 | ||
1286 | if (libmail_u_convert_deinit(h, error) == 0) | |
1287 | return cbufptr; | |
1288 | ||
1289 | return NULL; | |
1290 | } | |
1291 | ||
1292 | char *libmail_u_convert_tobuf(const char *text, | |
1293 | const char *charset, | |
1294 | const char *dstcharset, | |
1295 | int *error) | |
1296 | { | |
1297 | char *cbufptr; | |
1298 | size_t cbufsize; | |
1299 | libmail_u_convert_handle_t h= | |
1300 | libmail_u_convert_tocbuf_init(charset, | |
1301 | dstcharset, | |
1302 | &cbufptr, | |
1303 | &cbufsize, 1); | |
1304 | ||
1305 | if (!h) | |
1306 | return NULL; | |
1307 | ||
1308 | libmail_u_convert(h, text, strlen(text)); | |
1309 | ||
1310 | if (libmail_u_convert_deinit(h, error) == 0) | |
1311 | return cbufptr; | |
1312 | ||
1313 | return NULL; | |
1314 | } | |
1315 | ||
1316 | /*****************************************************************************/ | |
1317 | ||
1318 | /* | |
1319 | ** Convert text to unicode_chars. Same basic approach as | |
1320 | ** libmail_u_convert_tocbuf_init(). The output character set gets specified | |
1321 | ** as UCS-4, the final output size is divided by 4, and the output buffer gets | |
1322 | ** typed as a unicode_char array. | |
1323 | */ | |
1324 | ||
1325 | struct libmail_u_convert_buf { | |
1326 | struct libmail_u_convert_buf *next; | |
1327 | unicode_char *fragment; | |
1328 | size_t fragment_size; | |
1329 | size_t max_fragment_size; | |
1330 | }; | |
1331 | ||
1332 | struct libmail_u_convert_tou { | |
1333 | struct libmail_u_convert_hdr hdr; | |
1334 | ||
1335 | unicode_char **ucptr_ret; | |
1336 | size_t *ucsize_ret; | |
1337 | int errflag; | |
1338 | size_t tot_size; | |
1339 | int nullterminate; | |
1340 | ||
1341 | struct libmail_u_convert_buf *first, *tail, **last; | |
1342 | }; | |
1343 | ||
1344 | static int save_unicode(const char *, size_t, void *); | |
1345 | static int convert_tounicode(void *ptr, | |
1346 | const char *text, size_t cnt); | |
1347 | static int deinit_tounicode(void *ptr, int *errptr); | |
1348 | ||
1349 | libmail_u_convert_handle_t | |
1350 | libmail_u_convert_tou_init(const char *src_chset, | |
1351 | unicode_char **ucptr_ret, | |
1352 | size_t *ucsize_ret, | |
1353 | int nullterminate | |
1354 | ) | |
1355 | { | |
1356 | struct libmail_u_convert_tou *p= | |
1357 | malloc(sizeof(struct libmail_u_convert_tou)); | |
1358 | libmail_u_convert_handle_t h; | |
1359 | ||
1360 | if (!p) | |
1361 | return NULL; | |
1362 | ||
1363 | memset(p, 0, sizeof(*p)); | |
1364 | ||
1365 | h=libmail_u_convert_init(src_chset, libmail_u_ucs4_native, | |
1366 | save_unicode, p); | |
1367 | ||
1368 | if (!h) | |
1369 | { | |
1370 | free(p); | |
1371 | return NULL; | |
1372 | } | |
1373 | ||
1374 | p->ucptr_ret=ucptr_ret; | |
1375 | p->ucsize_ret=ucsize_ret; | |
1376 | p->last= &p->first; | |
1377 | p->nullterminate=nullterminate; | |
1378 | p->hdr.next=h; | |
1379 | p->hdr.convert_handler=convert_tounicode; | |
1380 | p->hdr.deinit_handler=deinit_tounicode; | |
1381 | p->hdr.ptr=p; | |
1382 | return &p->hdr; | |
1383 | } | |
1384 | ||
1385 | libmail_u_convert_handle_t | |
1386 | libmail_u_convert_fromu_init(const char *dst_chset, | |
1387 | char **cbufptr_ret, | |
1388 | size_t *csize_ret, | |
1389 | int nullterminate | |
1390 | ) | |
1391 | { | |
1392 | return libmail_u_convert_tocbuf_init(libmail_u_ucs4_native, | |
1393 | dst_chset, | |
1394 | cbufptr_ret, | |
1395 | csize_ret, | |
1396 | nullterminate); | |
1397 | } | |
1398 | ||
1399 | int libmail_u_convert_uc(libmail_u_convert_handle_t handle, | |
1400 | const unicode_char *text, | |
1401 | size_t cnt) | |
1402 | { | |
1403 | return libmail_u_convert(handle, (const char *)text, | |
1404 | cnt * sizeof(*text)); | |
1405 | } | |
1406 | ||
1407 | /* Capture the output of the conversion stack */ | |
1408 | ||
1409 | static int save_unicode(const char *text, size_t cnt, void *ptr) | |
1410 | { | |
1411 | struct libmail_u_convert_tou *p= | |
1412 | (struct libmail_u_convert_tou *)ptr; | |
1413 | struct libmail_u_convert_buf *fragment; | |
1414 | size_t tot_size; | |
1415 | ||
1416 | cnt /= sizeof(unicode_char); | |
1417 | ||
1418 | tot_size=p->tot_size + cnt*sizeof(unicode_char); | |
1419 | /* Keep track of the total size saved */ | |
1420 | ||
1421 | if (p->tail) | |
1422 | { | |
1423 | size_t n=p->tail->max_fragment_size-p->tail->fragment_size; | |
1424 | ||
1425 | if (n > cnt) | |
1426 | n=cnt; | |
1427 | ||
1428 | if (n) | |
1429 | { | |
1430 | memcpy(p->tail->fragment+p->tail->fragment_size, | |
1431 | text, n*sizeof(unicode_char)); | |
1432 | ||
1433 | cnt -= n; | |
1434 | text += n*sizeof(unicode_char); | |
1435 | p->tail->fragment_size += n; | |
1436 | } | |
1437 | } | |
1438 | ||
1439 | if (cnt > 0) | |
1440 | { | |
1441 | size_t cnt_alloc=cnt; | |
1442 | ||
1443 | if (cnt_alloc < 16) | |
1444 | cnt_alloc=16; | |
1445 | ||
1446 | if ((fragment=malloc(sizeof(struct libmail_u_convert_buf) | |
1447 | +cnt_alloc*sizeof(unicode_char))) | |
1448 | == NULL) | |
1449 | { | |
1450 | p->errflag=1; | |
1451 | return 1; | |
1452 | } | |
1453 | ||
1454 | fragment->next=NULL; | |
1455 | fragment->fragment=(unicode_char *)(fragment+1); | |
1456 | fragment->max_fragment_size=cnt_alloc; | |
1457 | fragment->fragment_size=cnt; | |
1458 | memcpy(fragment->fragment, text, cnt*sizeof(unicode_char)); | |
1459 | ||
1460 | *(p->last)=fragment; | |
1461 | p->last=&fragment->next; | |
1462 | p->tail=fragment; | |
1463 | } | |
1464 | ||
1465 | if (tot_size < p->tot_size) /* Overflow? */ | |
1466 | { | |
1467 | errno=E2BIG; | |
1468 | return 1; | |
1469 | } | |
1470 | p->tot_size=tot_size; | |
1471 | return 0; | |
1472 | } | |
1473 | ||
1474 | /* Punt converted text down the stack */ | |
1475 | ||
1476 | static int convert_tounicode(void *ptr, | |
1477 | const char *text, size_t cnt) | |
1478 | { | |
1479 | struct libmail_u_convert_tou *p= | |
1480 | (struct libmail_u_convert_tou *)ptr; | |
1481 | ||
1482 | return libmail_u_convert(p->hdr.next, text, cnt); | |
1483 | } | |
1484 | ||
1485 | /* | |
1486 | ** Destroy the conversion stack. Destroy the downstream, then assemble the | |
1487 | ** final array. | |
1488 | */ | |
1489 | ||
1490 | static int deinit_tounicode(void *ptr, int *errptr) | |
1491 | { | |
1492 | struct libmail_u_convert_tou *p= | |
1493 | (struct libmail_u_convert_tou *)ptr; | |
1494 | int rc=libmail_u_convert_deinit(p->hdr.next, errptr); | |
1495 | struct libmail_u_convert_buf *bufptr; | |
1496 | ||
1497 | if (rc == 0 && p->nullterminate) | |
1498 | { | |
1499 | unicode_char zero=0; | |
1500 | ||
1501 | rc=save_unicode( (const char *)&zero, sizeof(zero), | |
1502 | p->hdr.ptr); | |
1503 | } | |
1504 | ||
1505 | if (rc == 0) | |
1506 | { | |
1507 | if (((*p->ucptr_ret)=malloc(p->tot_size ? p->tot_size:1)) != | |
1508 | NULL) | |
1509 | { | |
1510 | size_t i=0; | |
1511 | ||
1512 | for (bufptr=p->first; bufptr; bufptr=bufptr->next) | |
1513 | { | |
1514 | if (bufptr->fragment_size) | |
1515 | memcpy(&(*p->ucptr_ret)[i], | |
1516 | bufptr->fragment, | |
1517 | bufptr->fragment_size | |
1518 | *sizeof(*bufptr->fragment)); | |
1519 | i += bufptr->fragment_size; | |
1520 | } | |
1521 | (*p->ucsize_ret)=i; | |
1522 | } | |
1523 | else | |
1524 | { | |
1525 | rc= -1; | |
1526 | } | |
1527 | } | |
1528 | ||
1529 | for (bufptr=p->first; bufptr; ) | |
1530 | { | |
1531 | struct libmail_u_convert_buf *b=bufptr; | |
1532 | ||
1533 | bufptr=bufptr->next; | |
1534 | ||
1535 | free(b); | |
1536 | } | |
1537 | free(p); | |
1538 | ||
1539 | return rc; | |
1540 | } | |
1541 | ||
1542 | int libmail_u_convert_tou_tobuf(const char *text, | |
1543 | size_t text_l, | |
1544 | const char *charset, | |
1545 | unicode_char **uc, | |
1546 | size_t *ucsize, | |
1547 | int *err) | |
1548 | { | |
1549 | libmail_u_convert_handle_t h; | |
1550 | ||
1551 | if ((h=libmail_u_convert_tou_init(charset, uc, ucsize, 0)) == NULL) | |
1552 | return -1; | |
1553 | ||
1554 | if (libmail_u_convert(h, text, text_l) < 0) | |
1555 | { | |
1556 | libmail_u_convert_deinit(h, NULL); | |
1557 | return -1; | |
1558 | } | |
1559 | ||
1560 | if (libmail_u_convert_deinit(h, err)) | |
1561 | return -1; | |
1562 | ||
1563 | return 0; | |
1564 | } | |
1565 | ||
1566 | int libmail_u_convert_fromu_tobuf(const unicode_char *utext, | |
1567 | size_t utext_l, | |
1568 | const char *charset, | |
1569 | char **c, | |
1570 | size_t *csize, | |
1571 | int *err) | |
1572 | { | |
1573 | libmail_u_convert_handle_t h; | |
1574 | ||
1575 | if (utext_l == (size_t)-1) | |
1576 | { | |
1577 | for (utext_l=0; utext[utext_l]; ++utext_l) | |
1578 | ; | |
1579 | } | |
1580 | ||
1581 | if ((h=libmail_u_convert_fromu_init(charset, c, csize, 1)) == NULL) | |
1582 | return -1; | |
1583 | ||
1584 | if (libmail_u_convert_uc(h, utext, utext_l) < 0) | |
1585 | { | |
1586 | libmail_u_convert_deinit(h, NULL); | |
1587 | return -1; | |
1588 | } | |
1589 | ||
1590 | if (libmail_u_convert_deinit(h, err)) | |
1591 | return -1; | |
1592 | ||
1593 | return 0; | |
1594 | } | |
1595 | ||
1596 | char *libmail_u_convert_tocase(const char *str, | |
1597 | const char *charset, | |
1598 | unicode_char (*first_char_func)(unicode_char), | |
1599 | unicode_char (*char_func)(unicode_char)) | |
1600 | { | |
1601 | unicode_char *uc; | |
1602 | size_t ucsize; | |
1603 | size_t i; | |
1604 | int err; | |
1605 | char *c; | |
1606 | size_t csize; | |
1607 | ||
1608 | if (libmail_u_convert_tou_tobuf(str, strlen(str), | |
1609 | charset, &uc, &ucsize, &err)) | |
1610 | return NULL; | |
1611 | ||
1612 | if (err) | |
1613 | { | |
1614 | free(uc); | |
1615 | return NULL; | |
1616 | } | |
1617 | ||
1618 | for (i=0; i<ucsize; ++i) | |
1619 | { | |
1620 | uc[i]=(*first_char_func)(uc[i]); | |
1621 | ||
1622 | if (char_func) | |
1623 | first_char_func=char_func; | |
1624 | } | |
1625 | ||
1626 | if (libmail_u_convert_fromu_tobuf(uc, ucsize, | |
1627 | charset, | |
1628 | &c, &csize, &err)) | |
1629 | { | |
1630 | free(uc); | |
1631 | return NULL; | |
1632 | } | |
1633 | ||
1634 | free(uc); | |
1635 | ||
1636 | if (err) | |
1637 | { | |
1638 | free(c); | |
1639 | return NULL; | |
1640 | } | |
1641 | ||
1642 | return c; | |
1643 | } |