2 ** Copyright 2000-2011 Double Precision, Inc.
3 ** See COPYING for distribution information.
7 #include "unicode_config.h"
9 #include "../rfc822/rfc822hdr.h"
19 #if HAVE_LOCALCHARSET_H
20 #include <localcharset.h>
21 #elif HAVE_LIBCHARSET_H
22 #include <libcharset.h>
23 #endif /* HAVE_LOCALCHARSET_H */
24 #elif HAVE_LANGINFO_CODESET
26 #endif /* USE_LIBCHARSET */
27 #endif /* HAVE_SETLOCALE */
28 #endif /* HAVE_LOCALE_H */
30 static char default_chset_buf
[32];
32 static void init_default_chset()
34 const char *old_locale
=NULL
;
35 const char *chset
=NULL
;
36 char *locale_cpy
=NULL
;
37 char buf
[sizeof(default_chset_buf
)];
39 chset
=getenv("MM_CHARSET");
42 chset
=getenv("CHARSET");
48 old_locale
=setlocale(LC_ALL
, "");
49 locale_cpy
=old_locale
? strdup(old_locale
):NULL
;
51 chset
= locale_charset();
52 #elif HAVE_LANGINFO_CODESET
53 chset
=nl_langinfo(CODESET
);
59 memset(buf
, 0, sizeof(buf
));
63 /* Map GNU libc iconv oddity to us-ascii */
65 (strcmp(chset
, "ANSI_X3.4") == 0 ||
66 strncmp(chset
, "ANSI_X3.4-", 10) == 0))
71 strncat(buf
, chset
, sizeof(buf
)-1);
75 const char *p
=getenv("LANG");
77 /* LANG is xx_yy.CHARSET@modifier */
79 if (p
&& *p
&& (p
=strchr(p
, '.')) != NULL
)
81 const char *q
=strchr(++p
, '@');
86 if (q
-p
>= sizeof(buf
)-1)
93 strcpy(buf
, "US-ASCII");
96 memcpy(default_chset_buf
, buf
, sizeof(buf
));
102 setlocale(LC_ALL
, locale_cpy
);
110 const char *unicode_default_chset()
112 if (default_chset_buf
[0] == 0)
113 init_default_chset();
115 return default_chset_buf
;
119 /*****************************************************************************/
121 const char libmail_u_ucs4_native
[]=
129 const char libmail_u_ucs2_native
[]=
137 /* A stack of conversion modules */
139 struct libmail_u_convert_hdr
{
141 int (*convert_handler
)(void *ptr
,
142 const char *text
, size_t cnt
);
143 int (*deinit_handler
)(void *ptr
, int *errptr
);
146 struct libmail_u_convert_hdr
*next
;
149 /* Decoding table for modified UTF7-encoding as used in imap */
151 static const char mbase64_lookup
[]={
152 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
153 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
154 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,63,-1,-1,-1,
155 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1,
156 -1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
157 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,
158 -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
159 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1,
160 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
161 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
162 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
163 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
164 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
165 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
166 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
167 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
169 static const char mbase64
[]=
170 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
173 ** Conversion wrapper for converting to modified-utf7 IMAP encoding.
175 ** This is done by converting to UCS2, then stacking on a module that
176 ** takes that and converts UCS2 to modified-UTF7.
178 ** init_nottoimaputf7() returns an opaque stack for converting to ucs2.
181 static libmail_u_convert_handle_t
182 init_nottoimaputf7(const char *src_chset
,
183 const char *dst_chset
,
184 int (*output_func
)(const char *, size_t, void *),
188 ** The to modified UTF7 module
191 struct libmail_u_convert_toimaputf7
{
193 struct libmail_u_convert_hdr hdr
;
195 /* Accumulated output buffer */
197 char utf7encodebuf
[1024];
198 size_t utf7encodebuf_cnt
;
200 /* Accumulated bits for base64 encoding */
203 /* How many bits in utf7bits */
204 uint16_t utf7bitcount
;
206 /* Flag: in base64mode */
211 /* Any extra characters that should be munged */
215 /* Remembered output function */
217 int (*output_func
)(const char *, size_t, void *);
219 /* Remembered arg to the output function */
223 /* Macro - flush the output buffer */
224 #define toimaputf7_encode_flush(p) do { \
227 rc=(*(p)->output_func)((p)->utf7encodebuf, \
228 (p)->utf7encodebuf_cnt, \
231 return ((p)->errflag=(rc)); \
233 (p)->utf7encodebuf_cnt=0; \
236 static int toimaputf7_encode_flushfinal(struct libmail_u_convert_toimaputf7
*p
)
238 if (p
->utf7encodebuf_cnt
> 0)
239 toimaputf7_encode_flush(p
);
243 /* Macro - add one char to the output buffer */
245 #define toimaputf7_encode_add(p,c) do { \
246 if ((p)->utf7encodebuf_cnt >= sizeof((p)->utf7encodebuf)) \
247 toimaputf7_encode_flush((p)); \
249 (p)->utf7encodebuf[(p)->utf7encodebuf_cnt++]=(c); \
252 static int deinit_toimaputf7(void *ptr
, int *errptr
);
254 static int do_convert_toutf7(const char *text
, size_t cnt
, void *arg
);
255 static int convert_utf7_handler(void *ptr
, const char *text
, size_t cnt
);
258 ** Create a conversion module stack
261 libmail_u_convert_handle_t
262 libmail_u_convert_init(const char *src_chset
,
263 const char *dst_chset
,
264 int (*output_func
)(const char *, size_t, void *),
267 struct libmail_u_convert_toimaputf7
*toutf7
;
268 libmail_u_convert_handle_t h
;
269 const char *smapmunge
;
270 size_t l
=strlen(unicode_x_imap_modutf7
);
272 if (strncmp(dst_chset
, unicode_x_imap_modutf7
, l
) == 0 &&
273 (dst_chset
[l
] == 0 || dst_chset
[l
] == ' '))
275 smapmunge
=dst_chset
+ l
;
281 return init_nottoimaputf7(src_chset
, dst_chset
,
285 toutf7
=malloc(sizeof(struct libmail_u_convert_toimaputf7
));
290 memset(toutf7
, 0, sizeof(*toutf7
));
292 h
=init_nottoimaputf7(src_chset
, libmail_u_ucs2_native
,
293 do_convert_toutf7
, toutf7
);
300 toutf7
->output_func
=output_func
;
301 toutf7
->convert_arg
=convert_arg
;
303 strncat(toutf7
->smapmunge
, smapmunge
, sizeof(toutf7
->smapmunge
)-1);
305 toutf7
->hdr
.convert_handler
=convert_utf7_handler
;
306 toutf7
->hdr
.deinit_handler
=deinit_toimaputf7
;
307 toutf7
->hdr
.ptr
=toutf7
;
312 /* Passthrough to the wrapped stack */
314 static int convert_utf7_handler(void *ptr
, const char *text
, size_t cnt
)
316 struct libmail_u_convert_toimaputf7
*toutf7
=
317 (struct libmail_u_convert_toimaputf7
*)ptr
;
319 return (*toutf7
->hdr
.next
->convert_handler
)(toutf7
->hdr
.next
->ptr
,
323 static int utf7off(struct libmail_u_convert_toimaputf7
*toutf7
)
325 if (!toutf7
->utfmode
)
329 if (toutf7
->utf7bitcount
> 0)
330 toimaputf7_encode_add(toutf7
,
331 mbase64
[(toutf7
->utf7bits
332 << (6-toutf7
->utf7bitcount
))
334 toimaputf7_encode_add(toutf7
, '-');
339 static int do_convert_toutf7(const char *text
, size_t cnt
, void *arg
)
341 struct libmail_u_convert_toimaputf7
*toutf7
=
342 (struct libmail_u_convert_toimaputf7
*)arg
;
344 /* We better be getting UCS-2 here! */
346 const uint16_t *utext
=(const uint16_t *)text
;
352 return toutf7
->errflag
;
354 if (*utext
>= 0x20 && *utext
<= 0x7F
355 && strchr( toutf7
->smapmunge
, (char)*utext
) == NULL
)
358 && (!toutf7->smapmunge || (*utext != '.' && *utext != '/' &&
359 *utext != '~' && *utext != ':')))
363 return toutf7
->errflag
;
365 toimaputf7_encode_add(toutf7
, *utext
);
368 toimaputf7_encode_add(toutf7
, '-');
375 if (!toutf7
->utfmode
)
378 toutf7
->utf7bitcount
=0;
379 toimaputf7_encode_add(toutf7
, '&');
383 toutf7
->utf7bits
= (toutf7
->utf7bits
<< 16) |
384 (((uint32_t)*utext
) & 0xFFFF);
385 toutf7
->utf7bitcount
+= 16;
390 /* If there's at least 6 bits, output base64-encoded char */
392 while (toutf7
->utf7bitcount
>= 6)
398 return toutf7
->errflag
;
401 n
=toutf7
->utf7bitcount
-6;
402 toutf7
->utf7bitcount
-= 6;
407 toimaputf7_encode_add(toutf7
, mbase64
[v
& 63]);
414 static int deinit_toimaputf7(void *ptr
, int *errptr
)
418 struct libmail_u_convert_toimaputf7
*toutf7
=
419 (struct libmail_u_convert_toimaputf7
*)ptr
;
421 /* Flush out the downstream stack */
422 rc
=(*toutf7
->hdr
.next
->deinit_handler
)(toutf7
->hdr
.next
->ptr
, errptr
);
424 /* Make sure we're out of modified base64 */
429 if (rc
== 0 && toutf7
->utf7encodebuf_cnt
> 0)
430 rc
=toimaputf7_encode_flushfinal(toutf7
);
439 ** Convert from modified-utf7 IMAP encoding.
441 ** This module converts it to UCS-2, then this is attached to a stack that
442 ** converts UCS-2 to the requested charset.
445 static libmail_u_convert_handle_t
446 init_notfromimaputf7(const char *src_chset
,
447 const char *dst_chset
,
448 int (*output_func
)(const char *, size_t, void *),
451 struct libmail_u_convert_fromimaputf7
{
453 struct libmail_u_convert_hdr hdr
;
455 /* Accumulated UCS-2 stream */
456 uint16_t convbuf
[512];
459 /* Accumulated base64 bits */
462 /* How many bits extracted from a base64 stream */
466 /* Flag: seen the & */
469 /* Flag: seen the &, and the next char wasn't - */
476 /* Flush the accumulated UCS-2 stream */
478 #define convert_fromutf7_flush(p) do { \
479 (p)->errflag=(*(p)->hdr.next->convert_handler) \
480 ((p)->hdr.next->ptr, \
481 (const char *)(p)->convbuf, \
483 sizeof((p)->convbuf[0])); \
484 (p)->convbuf_cnt=0; \
487 /* Accumulated a UCS-2 char */
489 #define convert_fromutf7_add(p,c) do { \
490 if ((p)->convbuf_cnt >= \
491 sizeof((p)->convbuf)/sizeof((p)->convbuf[0])) \
492 convert_fromutf7_flush((p)); \
493 (p)->convbuf[(p)->convbuf_cnt++]=(c); \
497 static int convert_fromutf7(void *ptr
,
498 const char *text
, size_t cnt
);
499 static int deinit_fromutf7(void *ptr
, int *errptr
);
501 static libmail_u_convert_handle_t
502 init_nottoimaputf7(const char *src_chset
,
503 const char *dst_chset
,
504 int (*output_func
)(const char *, size_t, void *),
507 struct libmail_u_convert_fromimaputf7
*fromutf7
;
508 libmail_u_convert_handle_t h
;
509 size_t l
=strlen(unicode_x_imap_modutf7
);
511 if (strncmp(src_chset
, unicode_x_imap_modutf7
, l
) == 0 &&
512 (src_chset
[l
] == 0 || src_chset
[l
] == ' '))
515 return init_notfromimaputf7(src_chset
, dst_chset
,
519 fromutf7
=(struct libmail_u_convert_fromimaputf7
*)
520 malloc(sizeof(struct libmail_u_convert_fromimaputf7
));
525 memset(fromutf7
, 0, sizeof(*fromutf7
));
527 /* Create a stack for converting UCS-2 to the dest charset */
529 h
=init_notfromimaputf7(libmail_u_ucs2_native
, dst_chset
,
530 output_func
, convert_arg
);
538 fromutf7
->hdr
.next
=h
;
539 fromutf7
->hdr
.convert_handler
=convert_fromutf7
;
540 fromutf7
->hdr
.deinit_handler
=deinit_fromutf7
;
541 fromutf7
->hdr
.ptr
=fromutf7
;
542 return &fromutf7
->hdr
;
545 static int convert_fromutf7(void *ptr
,
546 const char *text
, size_t cnt
)
548 struct libmail_u_convert_fromimaputf7
*fromutf7
=
549 (struct libmail_u_convert_fromimaputf7
*)ptr
;
554 if (fromutf7
->errflag
)
555 return fromutf7
->errflag
;
557 if (!fromutf7
->seenamp
&& *text
== '&')
567 if (fromutf7
->seenamp
)
571 convert_fromutf7_add(fromutf7
, '&');
581 if (!fromutf7
->inmod
)
583 /* Not in the base64 encoded stream */
585 convert_fromutf7_add(fromutf7
,
586 ((uint16_t)*text
) & 0xFFFF);
594 /* End of the base64 encoded stream */
601 /* Got 6 more bits */
603 bits
=mbase64_lookup
[(unsigned char)*text
];
611 return fromutf7
->errflag
=-1;
614 fromutf7
->modbits
= (fromutf7
->modbits
<< 6) | bits
;
615 fromutf7
->modcnt
+= 6;
617 if (fromutf7
->modcnt
>= 16)
619 /* Got a UCS-2 char */
621 int shiftcnt
=fromutf7
->modcnt
- 16;
622 uint32_t v
=fromutf7
->modbits
;
627 fromutf7
->modcnt
-= 16;
629 convert_fromutf7_add(fromutf7
, v
);
635 static int deinit_fromutf7(void *ptr
, int *errptr
)
637 struct libmail_u_convert_fromimaputf7
*fromutf7
=
638 (struct libmail_u_convert_fromimaputf7
*)ptr
;
641 if (fromutf7
->seenamp
|| fromutf7
->inmod
)
643 if (fromutf7
->errflag
== 0)
645 fromutf7
->errflag
= -1;
650 if (fromutf7
->convbuf_cnt
)
651 convert_fromutf7_flush(fromutf7
);
653 rc
=fromutf7
->hdr
.next
->deinit_handler(fromutf7
->hdr
.next
->ptr
, errptr
);
655 if (fromutf7
->errflag
&& rc
== 0)
656 rc
=fromutf7
->errflag
;
658 if (errptr
&& fromutf7
->converr
)
667 /* A real conversion module, via iconv */
669 struct libmail_u_convert_iconv
{
671 struct libmail_u_convert_hdr hdr
;
674 int errflag
; /* Accumulated errors */
676 int (*output_func
)(const char *, size_t, void *);
679 char buffer
[1024]; /* Input buffer */
680 size_t bufcnt
; /* Accumulated input in buffer */
681 char skipcnt
; /* Skip this many bytes upon encountering EILSEQ */
682 char skipleft
; /* How many bytes are currently left to skip */
683 char converr
; /* Flag - an EILSEQ was encountered */
686 static int init_iconv(struct libmail_u_convert_iconv
*h
,
687 const char *src_chset
,
688 const char *dst_chset
,
689 int (*output_func
)(const char *, size_t, void *),
692 static libmail_u_convert_handle_t
693 init_notfromimaputf7(const char *src_chset
,
694 const char *dst_chset
,
695 int (*output_func
)(const char *, size_t, void *),
700 struct libmail_u_convert_iconv
*h
=
701 malloc(sizeof(struct libmail_u_convert_iconv
));
706 memset(h
, 0, sizeof(*h
));
708 if (init_iconv(h
, src_chset
, dst_chset
, output_func
, convert_arg
))
718 int libmail_u_convert(libmail_u_convert_handle_t h
,
719 const char *text
, size_t cnt
)
721 return (*h
->convert_handler
)(h
->ptr
, text
, cnt
);
724 /* Destroy the stack */
726 int libmail_u_convert_deinit(libmail_u_convert_handle_t h
, int *errptr
)
728 return (*h
->deinit_handler
)(h
, errptr
);
731 static int deinit_iconv(void *ptr
, int *errptr
);
732 static int convert_iconv(void *ptr
,
733 const char *text
, size_t cnt
);
735 /* Initialize a single conversion module, in the stack */
737 static int init_iconv(struct libmail_u_convert_iconv
*h
,
738 const char *src_chset
,
739 const char *dst_chset
,
740 int (*output_func
)(const char *, size_t, void *),
743 if ((h
->h
=iconv_open(dst_chset
, src_chset
)) == (iconv_t
)-1)
746 h
->hdr
.convert_handler
=convert_iconv
;
747 h
->hdr
.deinit_handler
=deinit_iconv
;
750 h
->output_func
=output_func
;
751 h
->convert_arg
=convert_arg
;
753 /* Heuristically determine how many octets to skip upon an EILSEQ */
756 switch (src_chset
[0]) {
759 switch (src_chset
[1]) {
762 switch (src_chset
[2]) {
765 if (src_chset
[3] == '-')
766 switch (src_chset
[4]) {
780 switch (src_chset
[2]) {
783 if (src_chset
[3] == '-')
784 switch (src_chset
[4]) {
801 static void convert_flush(struct libmail_u_convert_iconv
*);
802 static void convert_flush_iconv(struct libmail_u_convert_iconv
*, const char **,
806 ** iconv conversion module. Accumulate input in an input buffer. When the
807 ** input buffer is full, invoke convert_flush().
810 static int convert_iconv(void *ptr
,
811 const char *text
, size_t cnt
)
813 struct libmail_u_convert_iconv
*h
=(struct libmail_u_convert_iconv
*)ptr
;
815 while (cnt
&& h
->errflag
== 0)
817 if (h
->bufcnt
>= sizeof(h
->buffer
)-1)
825 h
->buffer
[h
->bufcnt
++]= *text
++;
833 ** Finish an iconv conversion module. Invoke convert_flush() to flush any
834 ** buffered input. Invoke convert_flush_iconv() to return state to the initial
838 static int deinit_iconv(void *ptr
, int *errptr
)
842 struct libmail_u_convert_iconv
*h
=(struct libmail_u_convert_iconv
*)ptr
;
843 libmail_u_convert_handle_t next
;
848 if (h
->bufcnt
&& h
->errflag
== 0)
852 convert_flush_iconv(h
, NULL
, NULL
);
855 converr
=h
->converr
!= 0;
862 /* If there's another module in the stack, clean that up */
867 int rcnext
=libmail_u_convert_deinit(next
, &converrnext
);
869 if (converrnext
&& errptr
&& *errptr
== 0)
872 if (rcnext
&& rc
== 0)
879 ** Invoke convert_flush_iconv() to flush the input buffer. If there's
880 ** unconverted text remaining, reposition it at the beginning of the input
884 static void convert_flush(struct libmail_u_convert_iconv
*h
)
889 if (h
->bufcnt
== 0 || h
->errflag
)
895 convert_flush_iconv(h
, &p
, &n
);
901 n
=0; /* Unexpected error, dunno what to do, punt */
907 h
->buffer
[h
->bufcnt
]= *p
;
916 ** Convert text via iconv.
919 static void convert_flush_iconv(struct libmail_u_convert_iconv
*h
,
920 const char **inbuf
, size_t *inbytesleft
)
934 if ((origin
=*inbytesleft
) == 0)
937 if (inbuf
&& h
->skipleft
&& origin
)
939 /* Skipping after an EILSEQ */
951 /* Quietly eat everything after a previous error */
960 outleft
=sizeof(outbuf
);
962 n
=iconv(h
->h
, (char **)inbuf
, inbytesleft
, &outp
, &outleft
);
966 /* Anything produced by iconv() gets pushed down the stack */
970 int rc
=(*h
->output_func
)(outbuf
, outp
-outbuf
,
981 /* iconv(3) reason #2 */
986 if (inbytesleft
== 0)
989 ** An error when generating the shift sequence to
990 ** return to the initial state. We don't know what to
1000 ** convert_flush() gets invoked when the 1024 char input buffer
1001 ** fills or to convert input that has been buffered when
1002 ** convert_chset_end() gets invoked.
1004 ** A return code of EINVAL from iconv() is iconv() encountering
1005 ** an incomplete multibyte sequence.
1007 ** If iconv() failed without consuming any input:
1009 ** - iconv(3) reason #1, EILSEQ, invalid multibyte sequence
1010 ** that starts at the beginning of the string we wish to
1011 ** convert. Discard one character, and try again.
1013 ** - iconv(3) reason #3, EINVAL, incomplete multibyte sequence.
1014 ** If it's possible to have an incomplete 1024 character long
1015 ** multibyte sequence, we're in trouble. Or we've encountered
1016 ** an EINVAL when flushing out the remaining buffered input,
1017 ** in convert_chset_end(). In either case, it's ok to sicard
1018 ** one character at a time, until we either reach the end,
1019 ** or get some other result.
1021 ** - iconv(3) reason #4, E2BIG. If the 1024 character output
1022 ** buffer, above, is insufficient to produce the output from a
1023 ** single converted character, we're in trouble.
1026 if (*inbytesleft
== origin
)
1028 h
->skipleft
=h
->skipcnt
;
1033 ** Stopped at an incomplete multibyte sequence, try again on
1036 else if (save_errno
== EINVAL
)
1039 if (save_errno
== EILSEQ
)
1040 h
->converr
=1; /* Another possibility this can happen */
1043 ** If we get here because of iconv(3) reason #4, filled out
1044 ** the output buffer, we should continue with the conversion.
1045 ** Otherwise, upon encountering any other error condition,
1046 ** reset the conversion state.
1048 if (save_errno
!= E2BIG
)
1049 iconv(h
->h
, NULL
, NULL
, NULL
, NULL
);
1053 /*****************************************************************************/
1056 ** A wrapper for libmail_u_convert() that collects the converted character
1057 ** text into a buffer. This is done by passing an output function to
1058 ** libmail_u_convert() that saves converted text in a linked-list
1061 ** Then, in the deinitialization function, the buffers get concatenated into
1062 ** the final character buffer.
1065 struct libmail_u_convert_cbuf
{
1066 struct libmail_u_convert_cbuf
*next
;
1068 size_t fragment_size
;
1071 struct libmail_u_convert_tocbuf
{
1072 struct libmail_u_convert_hdr hdr
;
1075 size_t *cbufsize_ret
;
1080 struct libmail_u_convert_cbuf
*first
, **last
;
1083 static int save_tocbuf(const char *, size_t, void *);
1084 static int convert_tocbuf(void *ptr
,
1085 const char *text
, size_t cnt
);
1086 static int deinit_tocbuf(void *ptr
, int *errptr
);
1088 libmail_u_convert_handle_t
1089 libmail_u_convert_tocbuf_init(const char *src_chset
,
1090 const char *dst_chset
,
1092 size_t *cbufsize_ret
,
1096 struct libmail_u_convert_tocbuf
*p
=
1097 malloc(sizeof(struct libmail_u_convert_tocbuf
));
1098 libmail_u_convert_handle_t h
;
1103 memset(p
, 0, sizeof(*p
));
1105 h
=libmail_u_convert_init(src_chset
, dst_chset
, save_tocbuf
, p
);
1113 p
->cbufptr_ret
=cbufptr_ret
;
1114 p
->cbufsize_ret
=cbufsize_ret
;
1116 p
->nullterminate
=nullterminate
;
1118 p
->hdr
.convert_handler
=convert_tocbuf
;
1119 p
->hdr
.deinit_handler
=deinit_tocbuf
;
1124 /* Capture the output of the conversion stack */
1126 static int save_tocbuf(const char *text
, size_t cnt
, void *ptr
)
1128 struct libmail_u_convert_tocbuf
*p
=
1129 (struct libmail_u_convert_tocbuf
*)ptr
;
1130 struct libmail_u_convert_cbuf
*fragment
=
1131 malloc(sizeof(struct libmail_u_convert_cbuf
)+cnt
);
1140 fragment
->next
=NULL
;
1141 fragment
->fragment
=(char *)(fragment
+1);
1142 if ((fragment
->fragment_size
=cnt
) > 0)
1143 memcpy(fragment
->fragment
, text
, cnt
);
1145 *(p
->last
)=fragment
;
1146 p
->last
=&fragment
->next
;
1148 tot_size
=p
->tot_size
+ cnt
; /* Keep track of the total size saved */
1150 if (tot_size
< p
->tot_size
) /* Overflow? */
1155 p
->tot_size
=tot_size
;
1159 /* Punt converted text down the stack */
1161 static int convert_tocbuf(void *ptr
, const char *text
, size_t cnt
)
1163 struct libmail_u_convert_tocbuf
*p
=
1164 (struct libmail_u_convert_tocbuf
*)ptr
;
1166 return libmail_u_convert(p
->hdr
.next
, text
, cnt
);
1170 ** Destroy the conversion stack. Destroy the downstream, then assemble the
1174 static int deinit_tocbuf(void *ptr
, int *errptr
)
1176 struct libmail_u_convert_tocbuf
*p
=
1177 (struct libmail_u_convert_tocbuf
*)ptr
;
1178 int rc
=libmail_u_convert_deinit(p
->hdr
.next
, errptr
);
1179 struct libmail_u_convert_cbuf
*bufptr
;
1181 if (rc
== 0 && p
->nullterminate
)
1185 rc
=save_tocbuf( &zero
, sizeof(zero
), p
->hdr
.ptr
);
1190 if (((*p
->cbufptr_ret
)=malloc(p
->tot_size
? p
->tot_size
:1)) !=
1195 for (bufptr
=p
->first
; bufptr
; bufptr
=bufptr
->next
)
1197 if (bufptr
->fragment_size
)
1198 memcpy(&(*p
->cbufptr_ret
)[i
],
1200 bufptr
->fragment_size
);
1201 i
+= bufptr
->fragment_size
;
1203 (*p
->cbufsize_ret
)=i
;
1211 for (bufptr
=p
->first
; bufptr
; )
1213 struct libmail_u_convert_cbuf
*b
=bufptr
;
1215 bufptr
=bufptr
->next
;
1224 libmail_u_convert_handle_t
1225 libmail_u_convert_tocbuf_toutf8_init(const char *src_chset
,
1227 size_t *cbufsize_ret
,
1231 return libmail_u_convert_tocbuf_init(src_chset
, "utf-8",
1232 cbufptr_ret
, cbufsize_ret
,
1236 libmail_u_convert_handle_t
1237 libmail_u_convert_tocbuf_fromutf8_init(const char *dst_chset
,
1239 size_t *cbufsize_ret
,
1243 return libmail_u_convert_tocbuf_init("utf-8", dst_chset
,
1244 cbufptr_ret
, cbufsize_ret
,
1248 char *libmail_u_convert_toutf8(const char *text
,
1249 const char *charset
,
1254 libmail_u_convert_handle_t h
=
1255 libmail_u_convert_tocbuf_toutf8_init(charset
,
1262 libmail_u_convert(h
, text
, strlen(text
));
1264 if (libmail_u_convert_deinit(h
, error
) == 0)
1270 char *libmail_u_convert_fromutf8(const char *text
,
1271 const char *charset
,
1276 libmail_u_convert_handle_t h
=
1277 libmail_u_convert_tocbuf_fromutf8_init(charset
,
1284 libmail_u_convert(h
, text
, strlen(text
));
1286 if (libmail_u_convert_deinit(h
, error
) == 0)
1292 char *libmail_u_convert_tobuf(const char *text
,
1293 const char *charset
,
1294 const char *dstcharset
,
1299 libmail_u_convert_handle_t h
=
1300 libmail_u_convert_tocbuf_init(charset
,
1308 libmail_u_convert(h
, text
, strlen(text
));
1310 if (libmail_u_convert_deinit(h
, error
) == 0)
1316 /*****************************************************************************/
1319 ** Convert text to unicode_chars. Same basic approach as
1320 ** libmail_u_convert_tocbuf_init(). The output character set gets specified
1321 ** as UCS-4, the final output size is divided by 4, and the output buffer gets
1322 ** typed as a unicode_char array.
1325 struct libmail_u_convert_buf
{
1326 struct libmail_u_convert_buf
*next
;
1327 unicode_char
*fragment
;
1328 size_t fragment_size
;
1329 size_t max_fragment_size
;
1332 struct libmail_u_convert_tou
{
1333 struct libmail_u_convert_hdr hdr
;
1335 unicode_char
**ucptr_ret
;
1341 struct libmail_u_convert_buf
*first
, *tail
, **last
;
1344 static int save_unicode(const char *, size_t, void *);
1345 static int convert_tounicode(void *ptr
,
1346 const char *text
, size_t cnt
);
1347 static int deinit_tounicode(void *ptr
, int *errptr
);
1349 libmail_u_convert_handle_t
1350 libmail_u_convert_tou_init(const char *src_chset
,
1351 unicode_char
**ucptr_ret
,
1356 struct libmail_u_convert_tou
*p
=
1357 malloc(sizeof(struct libmail_u_convert_tou
));
1358 libmail_u_convert_handle_t h
;
1363 memset(p
, 0, sizeof(*p
));
1365 h
=libmail_u_convert_init(src_chset
, libmail_u_ucs4_native
,
1374 p
->ucptr_ret
=ucptr_ret
;
1375 p
->ucsize_ret
=ucsize_ret
;
1377 p
->nullterminate
=nullterminate
;
1379 p
->hdr
.convert_handler
=convert_tounicode
;
1380 p
->hdr
.deinit_handler
=deinit_tounicode
;
1385 libmail_u_convert_handle_t
1386 libmail_u_convert_fromu_init(const char *dst_chset
,
1392 return libmail_u_convert_tocbuf_init(libmail_u_ucs4_native
,
1399 int libmail_u_convert_uc(libmail_u_convert_handle_t handle
,
1400 const unicode_char
*text
,
1403 return libmail_u_convert(handle
, (const char *)text
,
1404 cnt
* sizeof(*text
));
1407 /* Capture the output of the conversion stack */
1409 static int save_unicode(const char *text
, size_t cnt
, void *ptr
)
1411 struct libmail_u_convert_tou
*p
=
1412 (struct libmail_u_convert_tou
*)ptr
;
1413 struct libmail_u_convert_buf
*fragment
;
1416 cnt
/= sizeof(unicode_char
);
1418 tot_size
=p
->tot_size
+ cnt
*sizeof(unicode_char
);
1419 /* Keep track of the total size saved */
1423 size_t n
=p
->tail
->max_fragment_size
-p
->tail
->fragment_size
;
1430 memcpy(p
->tail
->fragment
+p
->tail
->fragment_size
,
1431 text
, n
*sizeof(unicode_char
));
1434 text
+= n
*sizeof(unicode_char
);
1435 p
->tail
->fragment_size
+= n
;
1441 size_t cnt_alloc
=cnt
;
1446 if ((fragment
=malloc(sizeof(struct libmail_u_convert_buf
)
1447 +cnt_alloc
*sizeof(unicode_char
)))
1454 fragment
->next
=NULL
;
1455 fragment
->fragment
=(unicode_char
*)(fragment
+1);
1456 fragment
->max_fragment_size
=cnt_alloc
;
1457 fragment
->fragment_size
=cnt
;
1458 memcpy(fragment
->fragment
, text
, cnt
*sizeof(unicode_char
));
1460 *(p
->last
)=fragment
;
1461 p
->last
=&fragment
->next
;
1465 if (tot_size
< p
->tot_size
) /* Overflow? */
1470 p
->tot_size
=tot_size
;
1474 /* Punt converted text down the stack */
1476 static int convert_tounicode(void *ptr
,
1477 const char *text
, size_t cnt
)
1479 struct libmail_u_convert_tou
*p
=
1480 (struct libmail_u_convert_tou
*)ptr
;
1482 return libmail_u_convert(p
->hdr
.next
, text
, cnt
);
1486 ** Destroy the conversion stack. Destroy the downstream, then assemble the
1490 static int deinit_tounicode(void *ptr
, int *errptr
)
1492 struct libmail_u_convert_tou
*p
=
1493 (struct libmail_u_convert_tou
*)ptr
;
1494 int rc
=libmail_u_convert_deinit(p
->hdr
.next
, errptr
);
1495 struct libmail_u_convert_buf
*bufptr
;
1497 if (rc
== 0 && p
->nullterminate
)
1499 unicode_char zero
=0;
1501 rc
=save_unicode( (const char *)&zero
, sizeof(zero
),
1507 if (((*p
->ucptr_ret
)=malloc(p
->tot_size
? p
->tot_size
:1)) !=
1512 for (bufptr
=p
->first
; bufptr
; bufptr
=bufptr
->next
)
1514 if (bufptr
->fragment_size
)
1515 memcpy(&(*p
->ucptr_ret
)[i
],
1517 bufptr
->fragment_size
1518 *sizeof(*bufptr
->fragment
));
1519 i
+= bufptr
->fragment_size
;
1529 for (bufptr
=p
->first
; bufptr
; )
1531 struct libmail_u_convert_buf
*b
=bufptr
;
1533 bufptr
=bufptr
->next
;
1542 int libmail_u_convert_tou_tobuf(const char *text
,
1544 const char *charset
,
1549 libmail_u_convert_handle_t h
;
1551 if ((h
=libmail_u_convert_tou_init(charset
, uc
, ucsize
, 0)) == NULL
)
1554 if (libmail_u_convert(h
, text
, text_l
) < 0)
1556 libmail_u_convert_deinit(h
, NULL
);
1560 if (libmail_u_convert_deinit(h
, err
))
1566 int libmail_u_convert_fromu_tobuf(const unicode_char
*utext
,
1568 const char *charset
,
1573 libmail_u_convert_handle_t h
;
1575 if (utext_l
== (size_t)-1)
1577 for (utext_l
=0; utext
[utext_l
]; ++utext_l
)
1581 if ((h
=libmail_u_convert_fromu_init(charset
, c
, csize
, 1)) == NULL
)
1584 if (libmail_u_convert_uc(h
, utext
, utext_l
) < 0)
1586 libmail_u_convert_deinit(h
, NULL
);
1590 if (libmail_u_convert_deinit(h
, err
))
1596 char *libmail_u_convert_tocase(const char *str
,
1597 const char *charset
,
1598 unicode_char (*first_char_func
)(unicode_char
),
1599 unicode_char (*char_func
)(unicode_char
))
1608 if (libmail_u_convert_tou_tobuf(str
, strlen(str
),
1609 charset
, &uc
, &ucsize
, &err
))
1618 for (i
=0; i
<ucsize
; ++i
)
1620 uc
[i
]=(*first_char_func
)(uc
[i
]);
1623 first_char_func
=char_func
;
1626 if (libmail_u_convert_fromu_tobuf(uc
, ucsize
,