X-Git-Url: https://git.hcoop.net/hcoop/debian/courier-authlib.git/blobdiff_plain/781cfcb8fd50934c470e0dabf79d32ab333dec68..f77892e671e1261ec26e5133f86b8a52635f3dd2:/libs/unicode/unicode.c diff --git a/libs/unicode/unicode.c b/libs/unicode/unicode.c new file mode 100644 index 0000000..4ca098b --- /dev/null +++ b/libs/unicode/unicode.c @@ -0,0 +1,1643 @@ +/* +** Copyright 2000-2011 Double Precision, Inc. +** See COPYING for distribution information. +** +*/ + +#include "unicode_config.h" +#include "unicode.h" +#include "../rfc822/rfc822hdr.h" +#include +#include +#include +#include +#include +#if HAVE_LOCALE_H +#if HAVE_SETLOCALE +#include +#if USE_LIBCHARSET +#if HAVE_LOCALCHARSET_H +#include +#elif HAVE_LIBCHARSET_H +#include +#endif /* HAVE_LOCALCHARSET_H */ +#elif HAVE_LANGINFO_CODESET +#include +#endif /* USE_LIBCHARSET */ +#endif /* HAVE_SETLOCALE */ +#endif /* HAVE_LOCALE_H */ + +static char default_chset_buf[32]; + +static void init_default_chset() +{ + const char *old_locale=NULL; + const char *chset=NULL; + char *locale_cpy=NULL; + char buf[sizeof(default_chset_buf)]; + + chset=getenv("MM_CHARSET"); + + if (chset == NULL) + chset=getenv("CHARSET"); + + if (chset == NULL) + { +#if HAVE_LOCALE_H +#if HAVE_SETLOCALE + old_locale=setlocale(LC_ALL, ""); + locale_cpy=old_locale ? strdup(old_locale):NULL; +#if USE_LIBCHARSET + chset = locale_charset(); +#elif HAVE_LANGINFO_CODESET + chset=nl_langinfo(CODESET); +#endif +#endif +#endif + } + + memset(buf, 0, sizeof(buf)); + + if (chset && + + /* Map GNU libc iconv oddity to us-ascii */ + + (strcmp(chset, "ANSI_X3.4") == 0 || + strncmp(chset, "ANSI_X3.4-", 10) == 0)) + chset="US-ASCII"; + + if (chset) + { + strncat(buf, chset, sizeof(buf)-1); + } + else + { + const char *p=getenv("LANG"); + + /* LANG is xx_yy.CHARSET@modifier */ + + if (p && *p && (p=strchr(p, '.')) != NULL) + { + const char *q=strchr(++p, '@'); + + if (!q) + q=p+strlen(p); + + if (q-p >= sizeof(buf)-1) + q=p+sizeof(buf)-1; + + memcpy(buf, p, q-p); + buf[q-p]=0; + } + else + strcpy(buf, "US-ASCII"); + } + + memcpy(default_chset_buf, buf, sizeof(buf)); + +#if HAVE_LOCALE_H +#if HAVE_SETLOCALE + if (locale_cpy) + { + setlocale(LC_ALL, locale_cpy); + free(locale_cpy); + } +#endif +#endif + +} + +const char *unicode_default_chset() +{ + if (default_chset_buf[0] == 0) + init_default_chset(); + + return default_chset_buf; +} + + +/*****************************************************************************/ + +const char libmail_u_ucs4_native[]= +#if WORDS_BIGENDIAN + "UCS-4BE" +#else + "UCS-4LE" +#endif + ; + +const char libmail_u_ucs2_native[]= +#if WORDS_BIGENDIAN + "UCS-2BE" +#else + "UCS-2LE" +#endif + ; + +/* A stack of conversion modules */ + +struct libmail_u_convert_hdr { + + int (*convert_handler)(void *ptr, + const char *text, size_t cnt); + int (*deinit_handler)(void *ptr, int *errptr); + void *ptr; + + struct libmail_u_convert_hdr *next; +}; + +/* Decoding table for modified UTF7-encoding as used in imap */ + +static const char mbase64_lookup[]={ + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,63,-1,-1,-1, + 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, + -1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, + 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, + -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, + 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; + +static const char mbase64[]= + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; + +/* +** Conversion wrapper for converting to modified-utf7 IMAP encoding. +** +** This is done by converting to UCS2, then stacking on a module that +** takes that and converts UCS2 to modified-UTF7. +** +** init_nottoimaputf7() returns an opaque stack for converting to ucs2. +*/ + +static libmail_u_convert_handle_t +init_nottoimaputf7(const char *src_chset, + const char *dst_chset, + int (*output_func)(const char *, size_t, void *), + void *convert_arg); + +/* +** The to modified UTF7 module +*/ + +struct libmail_u_convert_toimaputf7 { + + struct libmail_u_convert_hdr hdr; + + /* Accumulated output buffer */ + + char utf7encodebuf[1024]; + size_t utf7encodebuf_cnt; + + /* Accumulated bits for base64 encoding */ + uint32_t utf7bits; + + /* How many bits in utf7bits */ + uint16_t utf7bitcount; + + /* Flag: in base64mode */ + uint16_t utfmode; + + int errflag; + + /* Any extra characters that should be munged */ + + char smapmunge[16]; + + /* Remembered output function */ + + int (*output_func)(const char *, size_t, void *); + + /* Remembered arg to the output function */ + void *convert_arg; +}; + +/* Macro - flush the output buffer */ +#define toimaputf7_encode_flush(p) do { \ + int rc; \ + \ + rc=(*(p)->output_func)((p)->utf7encodebuf, \ + (p)->utf7encodebuf_cnt, \ + (p)->convert_arg); \ + if (rc) \ + return ((p)->errflag=(rc)); \ + \ + (p)->utf7encodebuf_cnt=0; \ + } while (0) + +static int toimaputf7_encode_flushfinal(struct libmail_u_convert_toimaputf7 *p) +{ + if (p->utf7encodebuf_cnt > 0) + toimaputf7_encode_flush(p); + return 0; +} + +/* Macro - add one char to the output buffer */ + +#define toimaputf7_encode_add(p,c) do { \ + if ((p)->utf7encodebuf_cnt >= sizeof((p)->utf7encodebuf)) \ + toimaputf7_encode_flush((p)); \ + \ + (p)->utf7encodebuf[(p)->utf7encodebuf_cnt++]=(c); \ + } while (0); + +static int deinit_toimaputf7(void *ptr, int *errptr); + +static int do_convert_toutf7(const char *text, size_t cnt, void *arg); +static int convert_utf7_handler(void *ptr, const char *text, size_t cnt); + +/* +** Create a conversion module stack +*/ + +libmail_u_convert_handle_t +libmail_u_convert_init(const char *src_chset, + const char *dst_chset, + int (*output_func)(const char *, size_t, void *), + void *convert_arg) +{ + struct libmail_u_convert_toimaputf7 *toutf7; + libmail_u_convert_handle_t h; + const char *smapmunge; + size_t l=strlen(unicode_x_imap_modutf7); + + if (strncmp(dst_chset, unicode_x_imap_modutf7, l) == 0 && + (dst_chset[l] == 0 || dst_chset[l] == ' ')) + { + smapmunge=dst_chset + l; + + if (*smapmunge) + ++smapmunge; + } + else + return init_nottoimaputf7(src_chset, dst_chset, + output_func, + convert_arg); + + toutf7=malloc(sizeof(struct libmail_u_convert_toimaputf7)); + + if (!toutf7) + return NULL; + + memset(toutf7, 0, sizeof(*toutf7)); + + h=init_nottoimaputf7(src_chset, libmail_u_ucs2_native, + do_convert_toutf7, toutf7); + if (!h) + { + free(toutf7); + return (NULL); + } + + toutf7->output_func=output_func; + toutf7->convert_arg=convert_arg; + + strncat(toutf7->smapmunge, smapmunge, sizeof(toutf7->smapmunge)-1); + + toutf7->hdr.convert_handler=convert_utf7_handler; + toutf7->hdr.deinit_handler=deinit_toimaputf7; + toutf7->hdr.ptr=toutf7; + toutf7->hdr.next=h; + return &toutf7->hdr; +} + +/* Passthrough to the wrapped stack */ + +static int convert_utf7_handler(void *ptr, const char *text, size_t cnt) +{ + struct libmail_u_convert_toimaputf7 *toutf7= + (struct libmail_u_convert_toimaputf7 *)ptr; + + return (*toutf7->hdr.next->convert_handler)(toutf7->hdr.next->ptr, + text, cnt); +} + +static int utf7off(struct libmail_u_convert_toimaputf7 *toutf7) +{ + if (!toutf7->utfmode) + return 0; + toutf7->utfmode=0; + + if (toutf7->utf7bitcount > 0) + toimaputf7_encode_add(toutf7, + mbase64[(toutf7->utf7bits + << (6-toutf7->utf7bitcount)) + & 63]); + toimaputf7_encode_add(toutf7, '-'); + return 0; +} + + +static int do_convert_toutf7(const char *text, size_t cnt, void *arg) +{ + struct libmail_u_convert_toimaputf7 *toutf7= + (struct libmail_u_convert_toimaputf7 *)arg; + + /* We better be getting UCS-2 here! */ + + const uint16_t *utext=(const uint16_t *)text; + cnt /= 2; + + while (cnt) + { + if (toutf7->errflag) + return toutf7->errflag; + + if (*utext >= 0x20 && *utext <= 0x7F + && strchr( toutf7->smapmunge, (char)*utext) == NULL) + + /* + && (!toutf7->smapmunge || (*utext != '.' && *utext != '/' && + *utext != '~' && *utext != ':'))) + */ + { + if (utf7off(toutf7)) + return toutf7->errflag; + + toimaputf7_encode_add(toutf7, *utext); + + if (*utext == '&') + toimaputf7_encode_add(toutf7, '-'); + + ++utext; + --cnt; + continue; + } + + if (!toutf7->utfmode) + { + toutf7->utfmode=1; + toutf7->utf7bitcount=0; + toimaputf7_encode_add(toutf7, '&'); + continue; + } + + toutf7->utf7bits = (toutf7->utf7bits << 16) | + (((uint32_t)*utext) & 0xFFFF); + toutf7->utf7bitcount += 16; + + ++utext; + --cnt; + + /* If there's at least 6 bits, output base64-encoded char */ + + while (toutf7->utf7bitcount >= 6) + { + uint32_t v; + int n; + + if (toutf7->errflag) + return toutf7->errflag; + + v=toutf7->utf7bits; + n=toutf7->utf7bitcount-6; + toutf7->utf7bitcount -= 6; + + if (n > 0) + v >>= n; + + toimaputf7_encode_add(toutf7, mbase64[v & 63]); + } + } + + return 0; +} + +static int deinit_toimaputf7(void *ptr, int *errptr) +{ + int rc; + + struct libmail_u_convert_toimaputf7 *toutf7= + (struct libmail_u_convert_toimaputf7 *)ptr; + + /* Flush out the downstream stack */ + rc=(*toutf7->hdr.next->deinit_handler)(toutf7->hdr.next->ptr, errptr); + + /* Make sure we're out of modified base64 */ + + if (rc == 0) + rc=utf7off(toutf7); + + if (rc == 0 && toutf7->utf7encodebuf_cnt > 0) + rc=toimaputf7_encode_flushfinal(toutf7); + + free(toutf7); + return rc; +} + +/************/ + +/* +** Convert from modified-utf7 IMAP encoding. +** +** This module converts it to UCS-2, then this is attached to a stack that +** converts UCS-2 to the requested charset. +*/ + +static libmail_u_convert_handle_t +init_notfromimaputf7(const char *src_chset, + const char *dst_chset, + int (*output_func)(const char *, size_t, void *), + void *convert_arg); + +struct libmail_u_convert_fromimaputf7 { + + struct libmail_u_convert_hdr hdr; + + /* Accumulated UCS-2 stream */ + uint16_t convbuf[512]; + size_t convbuf_cnt; + + /* Accumulated base64 bits */ + uint32_t modbits; + + /* How many bits extracted from a base64 stream */ + + short modcnt; + + /* Flag: seen the & */ + char seenamp; + + /* Flag: seen the &, and the next char wasn't - */ + + char inmod; + int errflag; + int converr; +}; + +/* Flush the accumulated UCS-2 stream */ + +#define convert_fromutf7_flush(p) do { \ + (p)->errflag=(*(p)->hdr.next->convert_handler) \ + ((p)->hdr.next->ptr, \ + (const char *)(p)->convbuf, \ + (p)->convbuf_cnt * \ + sizeof((p)->convbuf[0])); \ + (p)->convbuf_cnt=0; \ + } while (0) + +/* Accumulated a UCS-2 char */ + +#define convert_fromutf7_add(p,c) do { \ + if ((p)->convbuf_cnt >= \ + sizeof((p)->convbuf)/sizeof((p)->convbuf[0])) \ + convert_fromutf7_flush((p)); \ + (p)->convbuf[(p)->convbuf_cnt++]=(c); \ + } while (0) + + +static int convert_fromutf7(void *ptr, + const char *text, size_t cnt); +static int deinit_fromutf7(void *ptr, int *errptr); + +static libmail_u_convert_handle_t +init_nottoimaputf7(const char *src_chset, + const char *dst_chset, + int (*output_func)(const char *, size_t, void *), + void *convert_arg) +{ + struct libmail_u_convert_fromimaputf7 *fromutf7; + libmail_u_convert_handle_t h; + size_t l=strlen(unicode_x_imap_modutf7); + + if (strncmp(src_chset, unicode_x_imap_modutf7, l) == 0 && + (src_chset[l] == 0 || src_chset[l] == ' ')) + ; + else + return init_notfromimaputf7(src_chset, dst_chset, + output_func, + convert_arg); + + fromutf7=(struct libmail_u_convert_fromimaputf7 *) + malloc(sizeof(struct libmail_u_convert_fromimaputf7)); + + if (!fromutf7) + return NULL; + + memset(fromutf7, 0, sizeof(*fromutf7)); + + /* Create a stack for converting UCS-2 to the dest charset */ + + h=init_notfromimaputf7(libmail_u_ucs2_native, dst_chset, + output_func, convert_arg); + + if (!h) + { + free(fromutf7); + return (NULL); + } + + fromutf7->hdr.next=h; + fromutf7->hdr.convert_handler=convert_fromutf7; + fromutf7->hdr.deinit_handler=deinit_fromutf7; + fromutf7->hdr.ptr=fromutf7; + return &fromutf7->hdr; +} + +static int convert_fromutf7(void *ptr, + const char *text, size_t cnt) +{ + struct libmail_u_convert_fromimaputf7 *fromutf7= + (struct libmail_u_convert_fromimaputf7 *)ptr; + int bits; + + while (cnt) + { + if (fromutf7->errflag) + return fromutf7->errflag; + + if (!fromutf7->seenamp && *text == '&') + { + fromutf7->seenamp=1; + fromutf7->inmod=0; + fromutf7->modcnt=0; + ++text; + --cnt; + continue; + } + + if (fromutf7->seenamp) + { + if (*text == '-') + { + convert_fromutf7_add(fromutf7, '&'); + ++text; + --cnt; + fromutf7->seenamp=0; + continue; + } + fromutf7->seenamp=0; + fromutf7->inmod=1; + } + + if (!fromutf7->inmod) + { + /* Not in the base64 encoded stream */ + + convert_fromutf7_add(fromutf7, + ((uint16_t)*text) & 0xFFFF); + ++text; + --cnt; + continue; + } + + if (*text == '-') + { + /* End of the base64 encoded stream */ + fromutf7->inmod=0; + ++text; + --cnt; + continue; + } + + /* Got 6 more bits */ + + bits=mbase64_lookup[(unsigned char)*text]; + + ++text; + --cnt; + + if (bits < 0) + { + errno=EILSEQ; + return fromutf7->errflag=-1; + } + + fromutf7->modbits = (fromutf7->modbits << 6) | bits; + fromutf7->modcnt += 6; + + if (fromutf7->modcnt >= 16) + { + /* Got a UCS-2 char */ + + int shiftcnt=fromutf7->modcnt - 16; + uint32_t v=fromutf7->modbits; + + if (shiftcnt) + v >>= shiftcnt; + + fromutf7->modcnt -= 16; + + convert_fromutf7_add(fromutf7, v); + } + } + return 0; +} + +static int deinit_fromutf7(void *ptr, int *errptr) +{ + struct libmail_u_convert_fromimaputf7 *fromutf7= + (struct libmail_u_convert_fromimaputf7 *)ptr; + int rc; + + if (fromutf7->seenamp || fromutf7->inmod) + { + if (fromutf7->errflag == 0) + { + fromutf7->errflag= -1; + errno=EILSEQ; + } + } + + if (fromutf7->convbuf_cnt) + convert_fromutf7_flush(fromutf7); + + rc=fromutf7->hdr.next->deinit_handler(fromutf7->hdr.next->ptr, errptr); + + if (fromutf7->errflag && rc == 0) + rc=fromutf7->errflag; + + if (errptr && fromutf7->converr) + *errptr=1; + + free(fromutf7); + return rc; +} + +/************/ + +/* A real conversion module, via iconv */ + +struct libmail_u_convert_iconv { + + struct libmail_u_convert_hdr hdr; + + iconv_t h; + int errflag; /* Accumulated errors */ + + int (*output_func)(const char *, size_t, void *); + void *convert_arg; + + char buffer[1024]; /* Input buffer */ + size_t bufcnt; /* Accumulated input in buffer */ + char skipcnt; /* Skip this many bytes upon encountering EILSEQ */ + char skipleft; /* How many bytes are currently left to skip */ + char converr; /* Flag - an EILSEQ was encountered */ +} ; + +static int init_iconv(struct libmail_u_convert_iconv *h, + const char *src_chset, + const char *dst_chset, + int (*output_func)(const char *, size_t, void *), + void *convert_arg); + +static libmail_u_convert_handle_t +init_notfromimaputf7(const char *src_chset, + const char *dst_chset, + int (*output_func)(const char *, size_t, void *), + void *convert_arg) +{ + + + struct libmail_u_convert_iconv *h= + malloc(sizeof(struct libmail_u_convert_iconv)); + + if (!h) + return NULL; + + memset(h, 0, sizeof(*h)); + + if (init_iconv(h, src_chset, dst_chset, output_func, convert_arg)) + { + free(h); + return NULL; + } + return &h->hdr; +} + +/* Run the stack */ + +int libmail_u_convert(libmail_u_convert_handle_t h, + const char *text, size_t cnt) +{ + return (*h->convert_handler)(h->ptr, text, cnt); +} + +/* Destroy the stack */ + +int libmail_u_convert_deinit(libmail_u_convert_handle_t h, int *errptr) +{ + return (*h->deinit_handler)(h, errptr); +} + +static int deinit_iconv(void *ptr, int *errptr); +static int convert_iconv(void *ptr, + const char *text, size_t cnt); + +/* Initialize a single conversion module, in the stack */ + +static int init_iconv(struct libmail_u_convert_iconv *h, + const char *src_chset, + const char *dst_chset, + int (*output_func)(const char *, size_t, void *), + void *convert_arg) +{ + if ((h->h=iconv_open(dst_chset, src_chset)) == (iconv_t)-1) + return -1; + + h->hdr.convert_handler=convert_iconv; + h->hdr.deinit_handler=deinit_iconv; + h->hdr.ptr=h; + + h->output_func=output_func; + h->convert_arg=convert_arg; + + /* Heuristically determine how many octets to skip upon an EILSEQ */ + + h->skipcnt=1; + switch (src_chset[0]) { + case 'u': + case 'U': + switch (src_chset[1]) { + case 'c': + case 'C': + switch (src_chset[2]) { + case 's': + case 'S': + if (src_chset[3] == '-') + switch (src_chset[4]) { + case '4': + /* UCS-4 */ + h->skipcnt=4; + break; + case '2': + /* UCS-2 */ + h->skipcnt=2; + break; + } + } + break; + case 't': + case 'T': + switch (src_chset[2]) { + case 'f': + case 'F': + if (src_chset[3] == '-') + switch (src_chset[4]) { + case '3': + /* UTF-32 */ + h->skipcnt=4; + break; + case '1': + /* UTF-16 */ + h->skipcnt=2; + break; + } + } + } + } + + return 0; +} + +static void convert_flush(struct libmail_u_convert_iconv *); +static void convert_flush_iconv(struct libmail_u_convert_iconv *, const char **, + size_t *); + +/* +** iconv conversion module. Accumulate input in an input buffer. When the +** input buffer is full, invoke convert_flush(). +*/ + +static int convert_iconv(void *ptr, + const char *text, size_t cnt) +{ + struct libmail_u_convert_iconv *h=(struct libmail_u_convert_iconv *)ptr; + + while (cnt && h->errflag == 0) + { + if (h->bufcnt >= sizeof(h->buffer)-1) + { + convert_flush(h); + + if (h->errflag) + break; + } + + h->buffer[h->bufcnt++]= *text++; + --cnt; + } + + return h->errflag; +} + +/* +** Finish an iconv conversion module. Invoke convert_flush() to flush any +** buffered input. Invoke convert_flush_iconv() to return state to the initial +** conversion state. +*/ + +static int deinit_iconv(void *ptr, int *errptr) +{ + int rc; + int converr; + struct libmail_u_convert_iconv *h=(struct libmail_u_convert_iconv *)ptr; + libmail_u_convert_handle_t next; + + if (h->errflag == 0) + convert_flush(h); + + if (h->bufcnt && h->errflag == 0) + h->converr=1; + + if (h->errflag == 0) + convert_flush_iconv(h, NULL, NULL); + + rc=h->errflag; + converr=h->converr != 0; + iconv_close(h->h); + next=h->hdr.next; + free(h); + if (errptr) + *errptr=converr; + + /* If there's another module in the stack, clean that up */ + + if (next) + { + int converrnext; + int rcnext=libmail_u_convert_deinit(next, &converrnext); + + if (converrnext && errptr && *errptr == 0) + *errptr=converr; + + if (rcnext && rc == 0) + rc=rcnext; + } + return rc; +} + +/* +** Invoke convert_flush_iconv() to flush the input buffer. If there's +** unconverted text remaining, reposition it at the beginning of the input +** buffer. +*/ + +static void convert_flush(struct libmail_u_convert_iconv *h) +{ + const char *p; + size_t n; + + if (h->bufcnt == 0 || h->errflag) + return; + + p=h->buffer; + n=h->bufcnt; + + convert_flush_iconv(h, &p, &n); + + if (h->errflag) + return; + + if (h->bufcnt == n) + n=0; /* Unexpected error, dunno what to do, punt */ + + h->bufcnt=0; + + while (n) + { + h->buffer[h->bufcnt]= *p; + + ++h->bufcnt; + ++p; + --n; + } +} + +/* +** Convert text via iconv. +*/ + +static void convert_flush_iconv(struct libmail_u_convert_iconv *h, + const char **inbuf, size_t *inbytesleft) +{ + int save_errno; + + while (1) + { + char outbuf[1024]; + char *outp; + size_t outleft; + size_t n; + size_t origin=0; + + if (inbytesleft) + { + if ((origin=*inbytesleft) == 0) + return; + + if (inbuf && h->skipleft && origin) + { + /* Skipping after an EILSEQ */ + + --h->skipleft; + --*inbytesleft; + ++*inbuf; + continue; + } + + } + + if (h->errflag) + { + /* Quietly eat everything after a previous error */ + + if (inbytesleft) + *inbytesleft=0; + + return; + } + + outp=outbuf; + outleft=sizeof(outbuf); + + n=iconv(h->h, (char **)inbuf, inbytesleft, &outp, &outleft); + + save_errno=errno; + + /* Anything produced by iconv() gets pushed down the stack */ + + if (outp > outbuf) + { + int rc=(*h->output_func)(outbuf, outp-outbuf, + h->convert_arg); + if (rc) + { + h->errflag=rc; + return; + } + } + + if (n != (size_t)-1) + { + /* iconv(3) reason #2 */ + + break; + } + + if (inbytesleft == 0) + { + /* + ** An error when generating the shift sequence to + ** return to the initial state. We don't know what to + ** do, now. + */ + + errno=EINVAL; + h->errflag= -1; + return; + } + + /* + ** convert_flush() gets invoked when the 1024 char input buffer + ** fills or to convert input that has been buffered when + ** convert_chset_end() gets invoked. + ** + ** A return code of EINVAL from iconv() is iconv() encountering + ** an incomplete multibyte sequence. + ** + ** If iconv() failed without consuming any input: + ** + ** - iconv(3) reason #1, EILSEQ, invalid multibyte sequence + ** that starts at the beginning of the string we wish to + ** convert. Discard one character, and try again. + ** + ** - iconv(3) reason #3, EINVAL, incomplete multibyte sequence. + ** If it's possible to have an incomplete 1024 character long + ** multibyte sequence, we're in trouble. Or we've encountered + ** an EINVAL when flushing out the remaining buffered input, + ** in convert_chset_end(). In either case, it's ok to sicard + ** one character at a time, until we either reach the end, + ** or get some other result. + ** + ** - iconv(3) reason #4, E2BIG. If the 1024 character output + ** buffer, above, is insufficient to produce the output from a + ** single converted character, we're in trouble. + */ + + if (*inbytesleft == origin) + { + h->skipleft=h->skipcnt; + h->converr=1; + } + + /* + ** Stopped at an incomplete multibyte sequence, try again on + ** the next round. + */ + else if (save_errno == EINVAL) + break; + + if (save_errno == EILSEQ) + h->converr=1; /* Another possibility this can happen */ + + /* + ** If we get here because of iconv(3) reason #4, filled out + ** the output buffer, we should continue with the conversion. + ** Otherwise, upon encountering any other error condition, + ** reset the conversion state. + */ + if (save_errno != E2BIG) + iconv(h->h, NULL, NULL, NULL, NULL); + } +} + +/*****************************************************************************/ + +/* +** A wrapper for libmail_u_convert() that collects the converted character +** text into a buffer. This is done by passing an output function to +** libmail_u_convert() that saves converted text in a linked-list +** of buffers. +** +** Then, in the deinitialization function, the buffers get concatenated into +** the final character buffer. +*/ + +struct libmail_u_convert_cbuf { + struct libmail_u_convert_cbuf *next; + char *fragment; + size_t fragment_size; +}; + +struct libmail_u_convert_tocbuf { + struct libmail_u_convert_hdr hdr; + + char **cbufptr_ret; + size_t *cbufsize_ret; + int errflag; + size_t tot_size; + int nullterminate; + + struct libmail_u_convert_cbuf *first, **last; +}; + +static int save_tocbuf(const char *, size_t, void *); +static int convert_tocbuf(void *ptr, + const char *text, size_t cnt); +static int deinit_tocbuf(void *ptr, int *errptr); + +libmail_u_convert_handle_t +libmail_u_convert_tocbuf_init(const char *src_chset, + const char *dst_chset, + char **cbufptr_ret, + size_t *cbufsize_ret, + int nullterminate + ) +{ + struct libmail_u_convert_tocbuf *p= + malloc(sizeof(struct libmail_u_convert_tocbuf)); + libmail_u_convert_handle_t h; + + if (!p) + return NULL; + + memset(p, 0, sizeof(*p)); + + h=libmail_u_convert_init(src_chset, dst_chset, save_tocbuf, p); + + if (!h) + { + free(p); + return NULL; + } + + p->cbufptr_ret=cbufptr_ret; + p->cbufsize_ret=cbufsize_ret; + p->last= &p->first; + p->nullterminate=nullterminate; + p->hdr.next=h; + p->hdr.convert_handler=convert_tocbuf; + p->hdr.deinit_handler=deinit_tocbuf; + p->hdr.ptr=p; + return &p->hdr; +} + +/* Capture the output of the conversion stack */ + +static int save_tocbuf(const char *text, size_t cnt, void *ptr) +{ + struct libmail_u_convert_tocbuf *p= + (struct libmail_u_convert_tocbuf *)ptr; + struct libmail_u_convert_cbuf *fragment= + malloc(sizeof(struct libmail_u_convert_cbuf)+cnt); + size_t tot_size; + + if (!fragment) + { + p->errflag=1; + return 1; + } + + fragment->next=NULL; + fragment->fragment=(char *)(fragment+1); + if ((fragment->fragment_size=cnt) > 0) + memcpy(fragment->fragment, text, cnt); + + *(p->last)=fragment; + p->last=&fragment->next; + + tot_size=p->tot_size + cnt; /* Keep track of the total size saved */ + + if (tot_size < p->tot_size) /* Overflow? */ + { + errno=E2BIG; + return 1; + } + p->tot_size=tot_size; + return 0; +} + +/* Punt converted text down the stack */ + +static int convert_tocbuf(void *ptr, const char *text, size_t cnt) +{ + struct libmail_u_convert_tocbuf *p= + (struct libmail_u_convert_tocbuf *)ptr; + + return libmail_u_convert(p->hdr.next, text, cnt); +} + +/* +** Destroy the conversion stack. Destroy the downstream, then assemble the +** final array. +*/ + +static int deinit_tocbuf(void *ptr, int *errptr) +{ + struct libmail_u_convert_tocbuf *p= + (struct libmail_u_convert_tocbuf *)ptr; + int rc=libmail_u_convert_deinit(p->hdr.next, errptr); + struct libmail_u_convert_cbuf *bufptr; + + if (rc == 0 && p->nullterminate) + { + char zero=0; + + rc=save_tocbuf( &zero, sizeof(zero), p->hdr.ptr); + } + + if (rc == 0) + { + if (((*p->cbufptr_ret)=malloc(p->tot_size ? p->tot_size:1)) != + NULL) + { + size_t i=0; + + for (bufptr=p->first; bufptr; bufptr=bufptr->next) + { + if (bufptr->fragment_size) + memcpy(&(*p->cbufptr_ret)[i], + bufptr->fragment, + bufptr->fragment_size); + i += bufptr->fragment_size; + } + (*p->cbufsize_ret)=i; + } + else + { + rc= -1; + } + } + + for (bufptr=p->first; bufptr; ) + { + struct libmail_u_convert_cbuf *b=bufptr; + + bufptr=bufptr->next; + + free(b); + } + free(p); + + return rc; +} + +libmail_u_convert_handle_t +libmail_u_convert_tocbuf_toutf8_init(const char *src_chset, + char **cbufptr_ret, + size_t *cbufsize_ret, + int nullterminate + ) +{ + return libmail_u_convert_tocbuf_init(src_chset, "utf-8", + cbufptr_ret, cbufsize_ret, + nullterminate); +} + +libmail_u_convert_handle_t +libmail_u_convert_tocbuf_fromutf8_init(const char *dst_chset, + char **cbufptr_ret, + size_t *cbufsize_ret, + int nullterminate + ) +{ + return libmail_u_convert_tocbuf_init("utf-8", dst_chset, + cbufptr_ret, cbufsize_ret, + nullterminate); +} + +char *libmail_u_convert_toutf8(const char *text, + const char *charset, + int *error) +{ + char *cbufptr; + size_t cbufsize; + libmail_u_convert_handle_t h= + libmail_u_convert_tocbuf_toutf8_init(charset, + &cbufptr, + &cbufsize, 1); + + if (!h) + return NULL; + + libmail_u_convert(h, text, strlen(text)); + + if (libmail_u_convert_deinit(h, error) == 0) + return cbufptr; + + return NULL; +} + +char *libmail_u_convert_fromutf8(const char *text, + const char *charset, + int *error) +{ + char *cbufptr; + size_t cbufsize; + libmail_u_convert_handle_t h= + libmail_u_convert_tocbuf_fromutf8_init(charset, + &cbufptr, + &cbufsize, 1); + + if (!h) + return NULL; + + libmail_u_convert(h, text, strlen(text)); + + if (libmail_u_convert_deinit(h, error) == 0) + return cbufptr; + + return NULL; +} + +char *libmail_u_convert_tobuf(const char *text, + const char *charset, + const char *dstcharset, + int *error) +{ + char *cbufptr; + size_t cbufsize; + libmail_u_convert_handle_t h= + libmail_u_convert_tocbuf_init(charset, + dstcharset, + &cbufptr, + &cbufsize, 1); + + if (!h) + return NULL; + + libmail_u_convert(h, text, strlen(text)); + + if (libmail_u_convert_deinit(h, error) == 0) + return cbufptr; + + return NULL; +} + +/*****************************************************************************/ + +/* +** Convert text to unicode_chars. Same basic approach as +** libmail_u_convert_tocbuf_init(). The output character set gets specified +** as UCS-4, the final output size is divided by 4, and the output buffer gets +** typed as a unicode_char array. +*/ + +struct libmail_u_convert_buf { + struct libmail_u_convert_buf *next; + unicode_char *fragment; + size_t fragment_size; + size_t max_fragment_size; +}; + +struct libmail_u_convert_tou { + struct libmail_u_convert_hdr hdr; + + unicode_char **ucptr_ret; + size_t *ucsize_ret; + int errflag; + size_t tot_size; + int nullterminate; + + struct libmail_u_convert_buf *first, *tail, **last; +}; + +static int save_unicode(const char *, size_t, void *); +static int convert_tounicode(void *ptr, + const char *text, size_t cnt); +static int deinit_tounicode(void *ptr, int *errptr); + +libmail_u_convert_handle_t +libmail_u_convert_tou_init(const char *src_chset, + unicode_char **ucptr_ret, + size_t *ucsize_ret, + int nullterminate + ) +{ + struct libmail_u_convert_tou *p= + malloc(sizeof(struct libmail_u_convert_tou)); + libmail_u_convert_handle_t h; + + if (!p) + return NULL; + + memset(p, 0, sizeof(*p)); + + h=libmail_u_convert_init(src_chset, libmail_u_ucs4_native, + save_unicode, p); + + if (!h) + { + free(p); + return NULL; + } + + p->ucptr_ret=ucptr_ret; + p->ucsize_ret=ucsize_ret; + p->last= &p->first; + p->nullterminate=nullterminate; + p->hdr.next=h; + p->hdr.convert_handler=convert_tounicode; + p->hdr.deinit_handler=deinit_tounicode; + p->hdr.ptr=p; + return &p->hdr; +} + +libmail_u_convert_handle_t +libmail_u_convert_fromu_init(const char *dst_chset, + char **cbufptr_ret, + size_t *csize_ret, + int nullterminate + ) +{ + return libmail_u_convert_tocbuf_init(libmail_u_ucs4_native, + dst_chset, + cbufptr_ret, + csize_ret, + nullterminate); +} + +int libmail_u_convert_uc(libmail_u_convert_handle_t handle, + const unicode_char *text, + size_t cnt) +{ + return libmail_u_convert(handle, (const char *)text, + cnt * sizeof(*text)); +} + +/* Capture the output of the conversion stack */ + +static int save_unicode(const char *text, size_t cnt, void *ptr) +{ + struct libmail_u_convert_tou *p= + (struct libmail_u_convert_tou *)ptr; + struct libmail_u_convert_buf *fragment; + size_t tot_size; + + cnt /= sizeof(unicode_char); + + tot_size=p->tot_size + cnt*sizeof(unicode_char); + /* Keep track of the total size saved */ + + if (p->tail) + { + size_t n=p->tail->max_fragment_size-p->tail->fragment_size; + + if (n > cnt) + n=cnt; + + if (n) + { + memcpy(p->tail->fragment+p->tail->fragment_size, + text, n*sizeof(unicode_char)); + + cnt -= n; + text += n*sizeof(unicode_char); + p->tail->fragment_size += n; + } + } + + if (cnt > 0) + { + size_t cnt_alloc=cnt; + + if (cnt_alloc < 16) + cnt_alloc=16; + + if ((fragment=malloc(sizeof(struct libmail_u_convert_buf) + +cnt_alloc*sizeof(unicode_char))) + == NULL) + { + p->errflag=1; + return 1; + } + + fragment->next=NULL; + fragment->fragment=(unicode_char *)(fragment+1); + fragment->max_fragment_size=cnt_alloc; + fragment->fragment_size=cnt; + memcpy(fragment->fragment, text, cnt*sizeof(unicode_char)); + + *(p->last)=fragment; + p->last=&fragment->next; + p->tail=fragment; + } + + if (tot_size < p->tot_size) /* Overflow? */ + { + errno=E2BIG; + return 1; + } + p->tot_size=tot_size; + return 0; +} + +/* Punt converted text down the stack */ + +static int convert_tounicode(void *ptr, + const char *text, size_t cnt) +{ + struct libmail_u_convert_tou *p= + (struct libmail_u_convert_tou *)ptr; + + return libmail_u_convert(p->hdr.next, text, cnt); +} + +/* +** Destroy the conversion stack. Destroy the downstream, then assemble the +** final array. +*/ + +static int deinit_tounicode(void *ptr, int *errptr) +{ + struct libmail_u_convert_tou *p= + (struct libmail_u_convert_tou *)ptr; + int rc=libmail_u_convert_deinit(p->hdr.next, errptr); + struct libmail_u_convert_buf *bufptr; + + if (rc == 0 && p->nullterminate) + { + unicode_char zero=0; + + rc=save_unicode( (const char *)&zero, sizeof(zero), + p->hdr.ptr); + } + + if (rc == 0) + { + if (((*p->ucptr_ret)=malloc(p->tot_size ? p->tot_size:1)) != + NULL) + { + size_t i=0; + + for (bufptr=p->first; bufptr; bufptr=bufptr->next) + { + if (bufptr->fragment_size) + memcpy(&(*p->ucptr_ret)[i], + bufptr->fragment, + bufptr->fragment_size + *sizeof(*bufptr->fragment)); + i += bufptr->fragment_size; + } + (*p->ucsize_ret)=i; + } + else + { + rc= -1; + } + } + + for (bufptr=p->first; bufptr; ) + { + struct libmail_u_convert_buf *b=bufptr; + + bufptr=bufptr->next; + + free(b); + } + free(p); + + return rc; +} + +int libmail_u_convert_tou_tobuf(const char *text, + size_t text_l, + const char *charset, + unicode_char **uc, + size_t *ucsize, + int *err) +{ + libmail_u_convert_handle_t h; + + if ((h=libmail_u_convert_tou_init(charset, uc, ucsize, 0)) == NULL) + return -1; + + if (libmail_u_convert(h, text, text_l) < 0) + { + libmail_u_convert_deinit(h, NULL); + return -1; + } + + if (libmail_u_convert_deinit(h, err)) + return -1; + + return 0; +} + +int libmail_u_convert_fromu_tobuf(const unicode_char *utext, + size_t utext_l, + const char *charset, + char **c, + size_t *csize, + int *err) +{ + libmail_u_convert_handle_t h; + + if (utext_l == (size_t)-1) + { + for (utext_l=0; utext[utext_l]; ++utext_l) + ; + } + + if ((h=libmail_u_convert_fromu_init(charset, c, csize, 1)) == NULL) + return -1; + + if (libmail_u_convert_uc(h, utext, utext_l) < 0) + { + libmail_u_convert_deinit(h, NULL); + return -1; + } + + if (libmail_u_convert_deinit(h, err)) + return -1; + + return 0; +} + +char *libmail_u_convert_tocase(const char *str, + const char *charset, + unicode_char (*first_char_func)(unicode_char), + unicode_char (*char_func)(unicode_char)) +{ + unicode_char *uc; + size_t ucsize; + size_t i; + int err; + char *c; + size_t csize; + + if (libmail_u_convert_tou_tobuf(str, strlen(str), + charset, &uc, &ucsize, &err)) + return NULL; + + if (err) + { + free(uc); + return NULL; + } + + for (i=0; i