+++ /dev/null
-/*
-** Copyright 2000-2011 Double Precision, Inc.
-** See COPYING for distribution information.
-**
-*/
-
-#include "unicode_config.h"
-#include "unicode.h"
-#include "../rfc822/rfc822hdr.h"
-#include <string.h>
-#include <ctype.h>
-#include <stdlib.h>
-#include <iconv.h>
-#include <errno.h>
-#if HAVE_LOCALE_H
-#if HAVE_SETLOCALE
-#include <locale.h>
-#if USE_LIBCHARSET
-#if HAVE_LOCALCHARSET_H
-#include <localcharset.h>
-#elif HAVE_LIBCHARSET_H
-#include <libcharset.h>
-#endif /* HAVE_LOCALCHARSET_H */
-#elif HAVE_LANGINFO_CODESET
-#include <langinfo.h>
-#endif /* USE_LIBCHARSET */
-#endif /* HAVE_SETLOCALE */
-#endif /* HAVE_LOCALE_H */
-
-static char default_chset_buf[32];
-
-static void init_default_chset()
-{
- const char *old_locale=NULL;
- const char *chset=NULL;
- char *locale_cpy=NULL;
- char buf[sizeof(default_chset_buf)];
-
- chset=getenv("MM_CHARSET");
-
- if (chset == NULL)
- chset=getenv("CHARSET");
-
- if (chset == NULL)
- {
-#if HAVE_LOCALE_H
-#if HAVE_SETLOCALE
- old_locale=setlocale(LC_ALL, "");
- locale_cpy=old_locale ? strdup(old_locale):NULL;
-#if USE_LIBCHARSET
- chset = locale_charset();
-#elif HAVE_LANGINFO_CODESET
- chset=nl_langinfo(CODESET);
-#endif
-#endif
-#endif
- }
-
- memset(buf, 0, sizeof(buf));
-
- if (chset &&
-
- /* Map GNU libc iconv oddity to us-ascii */
-
- (strcmp(chset, "ANSI_X3.4") == 0 ||
- strncmp(chset, "ANSI_X3.4-", 10) == 0))
- chset="US-ASCII";
-
- if (chset)
- {
- strncat(buf, chset, sizeof(buf)-1);
- }
- else
- {
- const char *p=getenv("LANG");
-
- /* LANG is xx_yy.CHARSET@modifier */
-
- if (p && *p && (p=strchr(p, '.')) != NULL)
- {
- const char *q=strchr(++p, '@');
-
- if (!q)
- q=p+strlen(p);
-
- if (q-p >= sizeof(buf)-1)
- q=p+sizeof(buf)-1;
-
- memcpy(buf, p, q-p);
- buf[q-p]=0;
- }
- else
- strcpy(buf, "US-ASCII");
- }
-
- memcpy(default_chset_buf, buf, sizeof(buf));
-
-#if HAVE_LOCALE_H
-#if HAVE_SETLOCALE
- if (locale_cpy)
- {
- setlocale(LC_ALL, locale_cpy);
- free(locale_cpy);
- }
-#endif
-#endif
-
-}
-
-const char *unicode_default_chset()
-{
- if (default_chset_buf[0] == 0)
- init_default_chset();
-
- return default_chset_buf;
-}
-
-
-/*****************************************************************************/
-
-const char libmail_u_ucs4_native[]=
-#if WORDS_BIGENDIAN
- "UCS-4BE"
-#else
- "UCS-4LE"
-#endif
- ;
-
-const char libmail_u_ucs2_native[]=
-#if WORDS_BIGENDIAN
- "UCS-2BE"
-#else
- "UCS-2LE"
-#endif
- ;
-
-/* A stack of conversion modules */
-
-struct libmail_u_convert_hdr {
-
- int (*convert_handler)(void *ptr,
- const char *text, size_t cnt);
- int (*deinit_handler)(void *ptr, int *errptr);
- void *ptr;
-
- struct libmail_u_convert_hdr *next;
-};
-
-/* Decoding table for modified UTF7-encoding as used in imap */
-
-static const char mbase64_lookup[]={
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,63,-1,-1,-1,
- 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1,
- -1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
- 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,
- -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
- 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1,
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
-
-static const char mbase64[]=
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
-
-/*
-** Conversion wrapper for converting to modified-utf7 IMAP encoding.
-**
-** This is done by converting to UCS2, then stacking on a module that
-** takes that and converts UCS2 to modified-UTF7.
-**
-** init_nottoimaputf7() returns an opaque stack for converting to ucs2.
-*/
-
-static libmail_u_convert_handle_t
-init_nottoimaputf7(const char *src_chset,
- const char *dst_chset,
- int (*output_func)(const char *, size_t, void *),
- void *convert_arg);
-
-/*
-** The to modified UTF7 module
-*/
-
-struct libmail_u_convert_toimaputf7 {
-
- struct libmail_u_convert_hdr hdr;
-
- /* Accumulated output buffer */
-
- char utf7encodebuf[1024];
- size_t utf7encodebuf_cnt;
-
- /* Accumulated bits for base64 encoding */
- uint32_t utf7bits;
-
- /* How many bits in utf7bits */
- uint16_t utf7bitcount;
-
- /* Flag: in base64mode */
- uint16_t utfmode;
-
- int errflag;
-
- /* Any extra characters that should be munged */
-
- char smapmunge[16];
-
- /* Remembered output function */
-
- int (*output_func)(const char *, size_t, void *);
-
- /* Remembered arg to the output function */
- void *convert_arg;
-};
-
-/* Macro - flush the output buffer */
-#define toimaputf7_encode_flush(p) do { \
- int rc; \
- \
- rc=(*(p)->output_func)((p)->utf7encodebuf, \
- (p)->utf7encodebuf_cnt, \
- (p)->convert_arg); \
- if (rc) \
- return ((p)->errflag=(rc)); \
- \
- (p)->utf7encodebuf_cnt=0; \
- } while (0)
-
-static int toimaputf7_encode_flushfinal(struct libmail_u_convert_toimaputf7 *p)
-{
- if (p->utf7encodebuf_cnt > 0)
- toimaputf7_encode_flush(p);
- return 0;
-}
-
-/* Macro - add one char to the output buffer */
-
-#define toimaputf7_encode_add(p,c) do { \
- if ((p)->utf7encodebuf_cnt >= sizeof((p)->utf7encodebuf)) \
- toimaputf7_encode_flush((p)); \
- \
- (p)->utf7encodebuf[(p)->utf7encodebuf_cnt++]=(c); \
- } while (0);
-
-static int deinit_toimaputf7(void *ptr, int *errptr);
-
-static int do_convert_toutf7(const char *text, size_t cnt, void *arg);
-static int convert_utf7_handler(void *ptr, const char *text, size_t cnt);
-
-/*
-** Create a conversion module stack
-*/
-
-libmail_u_convert_handle_t
-libmail_u_convert_init(const char *src_chset,
- const char *dst_chset,
- int (*output_func)(const char *, size_t, void *),
- void *convert_arg)
-{
- struct libmail_u_convert_toimaputf7 *toutf7;
- libmail_u_convert_handle_t h;
- const char *smapmunge;
- size_t l=strlen(unicode_x_imap_modutf7);
-
- if (strncmp(dst_chset, unicode_x_imap_modutf7, l) == 0 &&
- (dst_chset[l] == 0 || dst_chset[l] == ' '))
- {
- smapmunge=dst_chset + l;
-
- if (*smapmunge)
- ++smapmunge;
- }
- else
- return init_nottoimaputf7(src_chset, dst_chset,
- output_func,
- convert_arg);
-
- toutf7=malloc(sizeof(struct libmail_u_convert_toimaputf7));
-
- if (!toutf7)
- return NULL;
-
- memset(toutf7, 0, sizeof(*toutf7));
-
- h=init_nottoimaputf7(src_chset, libmail_u_ucs2_native,
- do_convert_toutf7, toutf7);
- if (!h)
- {
- free(toutf7);
- return (NULL);
- }
-
- toutf7->output_func=output_func;
- toutf7->convert_arg=convert_arg;
-
- strncat(toutf7->smapmunge, smapmunge, sizeof(toutf7->smapmunge)-1);
-
- toutf7->hdr.convert_handler=convert_utf7_handler;
- toutf7->hdr.deinit_handler=deinit_toimaputf7;
- toutf7->hdr.ptr=toutf7;
- toutf7->hdr.next=h;
- return &toutf7->hdr;
-}
-
-/* Passthrough to the wrapped stack */
-
-static int convert_utf7_handler(void *ptr, const char *text, size_t cnt)
-{
- struct libmail_u_convert_toimaputf7 *toutf7=
- (struct libmail_u_convert_toimaputf7 *)ptr;
-
- return (*toutf7->hdr.next->convert_handler)(toutf7->hdr.next->ptr,
- text, cnt);
-}
-
-static int utf7off(struct libmail_u_convert_toimaputf7 *toutf7)
-{
- if (!toutf7->utfmode)
- return 0;
- toutf7->utfmode=0;
-
- if (toutf7->utf7bitcount > 0)
- toimaputf7_encode_add(toutf7,
- mbase64[(toutf7->utf7bits
- << (6-toutf7->utf7bitcount))
- & 63]);
- toimaputf7_encode_add(toutf7, '-');
- return 0;
-}
-
-
-static int do_convert_toutf7(const char *text, size_t cnt, void *arg)
-{
- struct libmail_u_convert_toimaputf7 *toutf7=
- (struct libmail_u_convert_toimaputf7 *)arg;
-
- /* We better be getting UCS-2 here! */
-
- const uint16_t *utext=(const uint16_t *)text;
- cnt /= 2;
-
- while (cnt)
- {
- if (toutf7->errflag)
- return toutf7->errflag;
-
- if (*utext >= 0x20 && *utext <= 0x7F
- && strchr( toutf7->smapmunge, (char)*utext) == NULL)
-
- /*
- && (!toutf7->smapmunge || (*utext != '.' && *utext != '/' &&
- *utext != '~' && *utext != ':')))
- */
- {
- if (utf7off(toutf7))
- return toutf7->errflag;
-
- toimaputf7_encode_add(toutf7, *utext);
-
- if (*utext == '&')
- toimaputf7_encode_add(toutf7, '-');
-
- ++utext;
- --cnt;
- continue;
- }
-
- if (!toutf7->utfmode)
- {
- toutf7->utfmode=1;
- toutf7->utf7bitcount=0;
- toimaputf7_encode_add(toutf7, '&');
- continue;
- }
-
- toutf7->utf7bits = (toutf7->utf7bits << 16) |
- (((uint32_t)*utext) & 0xFFFF);
- toutf7->utf7bitcount += 16;
-
- ++utext;
- --cnt;
-
- /* If there's at least 6 bits, output base64-encoded char */
-
- while (toutf7->utf7bitcount >= 6)
- {
- uint32_t v;
- int n;
-
- if (toutf7->errflag)
- return toutf7->errflag;
-
- v=toutf7->utf7bits;
- n=toutf7->utf7bitcount-6;
- toutf7->utf7bitcount -= 6;
-
- if (n > 0)
- v >>= n;
-
- toimaputf7_encode_add(toutf7, mbase64[v & 63]);
- }
- }
-
- return 0;
-}
-
-static int deinit_toimaputf7(void *ptr, int *errptr)
-{
- int rc;
-
- struct libmail_u_convert_toimaputf7 *toutf7=
- (struct libmail_u_convert_toimaputf7 *)ptr;
-
- /* Flush out the downstream stack */
- rc=(*toutf7->hdr.next->deinit_handler)(toutf7->hdr.next->ptr, errptr);
-
- /* Make sure we're out of modified base64 */
-
- if (rc == 0)
- rc=utf7off(toutf7);
-
- if (rc == 0 && toutf7->utf7encodebuf_cnt > 0)
- rc=toimaputf7_encode_flushfinal(toutf7);
-
- free(toutf7);
- return rc;
-}
-
-/************/
-
-/*
-** Convert from modified-utf7 IMAP encoding.
-**
-** This module converts it to UCS-2, then this is attached to a stack that
-** converts UCS-2 to the requested charset.
-*/
-
-static libmail_u_convert_handle_t
-init_notfromimaputf7(const char *src_chset,
- const char *dst_chset,
- int (*output_func)(const char *, size_t, void *),
- void *convert_arg);
-
-struct libmail_u_convert_fromimaputf7 {
-
- struct libmail_u_convert_hdr hdr;
-
- /* Accumulated UCS-2 stream */
- uint16_t convbuf[512];
- size_t convbuf_cnt;
-
- /* Accumulated base64 bits */
- uint32_t modbits;
-
- /* How many bits extracted from a base64 stream */
-
- short modcnt;
-
- /* Flag: seen the & */
- char seenamp;
-
- /* Flag: seen the &, and the next char wasn't - */
-
- char inmod;
- int errflag;
- int converr;
-};
-
-/* Flush the accumulated UCS-2 stream */
-
-#define convert_fromutf7_flush(p) do { \
- (p)->errflag=(*(p)->hdr.next->convert_handler) \
- ((p)->hdr.next->ptr, \
- (const char *)(p)->convbuf, \
- (p)->convbuf_cnt * \
- sizeof((p)->convbuf[0])); \
- (p)->convbuf_cnt=0; \
- } while (0)
-
-/* Accumulated a UCS-2 char */
-
-#define convert_fromutf7_add(p,c) do { \
- if ((p)->convbuf_cnt >= \
- sizeof((p)->convbuf)/sizeof((p)->convbuf[0])) \
- convert_fromutf7_flush((p)); \
- (p)->convbuf[(p)->convbuf_cnt++]=(c); \
- } while (0)
-
-
-static int convert_fromutf7(void *ptr,
- const char *text, size_t cnt);
-static int deinit_fromutf7(void *ptr, int *errptr);
-
-static libmail_u_convert_handle_t
-init_nottoimaputf7(const char *src_chset,
- const char *dst_chset,
- int (*output_func)(const char *, size_t, void *),
- void *convert_arg)
-{
- struct libmail_u_convert_fromimaputf7 *fromutf7;
- libmail_u_convert_handle_t h;
- size_t l=strlen(unicode_x_imap_modutf7);
-
- if (strncmp(src_chset, unicode_x_imap_modutf7, l) == 0 &&
- (src_chset[l] == 0 || src_chset[l] == ' '))
- ;
- else
- return init_notfromimaputf7(src_chset, dst_chset,
- output_func,
- convert_arg);
-
- fromutf7=(struct libmail_u_convert_fromimaputf7 *)
- malloc(sizeof(struct libmail_u_convert_fromimaputf7));
-
- if (!fromutf7)
- return NULL;
-
- memset(fromutf7, 0, sizeof(*fromutf7));
-
- /* Create a stack for converting UCS-2 to the dest charset */
-
- h=init_notfromimaputf7(libmail_u_ucs2_native, dst_chset,
- output_func, convert_arg);
-
- if (!h)
- {
- free(fromutf7);
- return (NULL);
- }
-
- fromutf7->hdr.next=h;
- fromutf7->hdr.convert_handler=convert_fromutf7;
- fromutf7->hdr.deinit_handler=deinit_fromutf7;
- fromutf7->hdr.ptr=fromutf7;
- return &fromutf7->hdr;
-}
-
-static int convert_fromutf7(void *ptr,
- const char *text, size_t cnt)
-{
- struct libmail_u_convert_fromimaputf7 *fromutf7=
- (struct libmail_u_convert_fromimaputf7 *)ptr;
- int bits;
-
- while (cnt)
- {
- if (fromutf7->errflag)
- return fromutf7->errflag;
-
- if (!fromutf7->seenamp && *text == '&')
- {
- fromutf7->seenamp=1;
- fromutf7->inmod=0;
- fromutf7->modcnt=0;
- ++text;
- --cnt;
- continue;
- }
-
- if (fromutf7->seenamp)
- {
- if (*text == '-')
- {
- convert_fromutf7_add(fromutf7, '&');
- ++text;
- --cnt;
- fromutf7->seenamp=0;
- continue;
- }
- fromutf7->seenamp=0;
- fromutf7->inmod=1;
- }
-
- if (!fromutf7->inmod)
- {
- /* Not in the base64 encoded stream */
-
- convert_fromutf7_add(fromutf7,
- ((uint16_t)*text) & 0xFFFF);
- ++text;
- --cnt;
- continue;
- }
-
- if (*text == '-')
- {
- /* End of the base64 encoded stream */
- fromutf7->inmod=0;
- ++text;
- --cnt;
- continue;
- }
-
- /* Got 6 more bits */
-
- bits=mbase64_lookup[(unsigned char)*text];
-
- ++text;
- --cnt;
-
- if (bits < 0)
- {
- errno=EILSEQ;
- return fromutf7->errflag=-1;
- }
-
- fromutf7->modbits = (fromutf7->modbits << 6) | bits;
- fromutf7->modcnt += 6;
-
- if (fromutf7->modcnt >= 16)
- {
- /* Got a UCS-2 char */
-
- int shiftcnt=fromutf7->modcnt - 16;
- uint32_t v=fromutf7->modbits;
-
- if (shiftcnt)
- v >>= shiftcnt;
-
- fromutf7->modcnt -= 16;
-
- convert_fromutf7_add(fromutf7, v);
- }
- }
- return 0;
-}
-
-static int deinit_fromutf7(void *ptr, int *errptr)
-{
- struct libmail_u_convert_fromimaputf7 *fromutf7=
- (struct libmail_u_convert_fromimaputf7 *)ptr;
- int rc;
-
- if (fromutf7->seenamp || fromutf7->inmod)
- {
- if (fromutf7->errflag == 0)
- {
- fromutf7->errflag= -1;
- errno=EILSEQ;
- }
- }
-
- if (fromutf7->convbuf_cnt)
- convert_fromutf7_flush(fromutf7);
-
- rc=fromutf7->hdr.next->deinit_handler(fromutf7->hdr.next->ptr, errptr);
-
- if (fromutf7->errflag && rc == 0)
- rc=fromutf7->errflag;
-
- if (errptr && fromutf7->converr)
- *errptr=1;
-
- free(fromutf7);
- return rc;
-}
-
-/************/
-
-/* A real conversion module, via iconv */
-
-struct libmail_u_convert_iconv {
-
- struct libmail_u_convert_hdr hdr;
-
- iconv_t h;
- int errflag; /* Accumulated errors */
-
- int (*output_func)(const char *, size_t, void *);
- void *convert_arg;
-
- char buffer[1024]; /* Input buffer */
- size_t bufcnt; /* Accumulated input in buffer */
- char skipcnt; /* Skip this many bytes upon encountering EILSEQ */
- char skipleft; /* How many bytes are currently left to skip */
- char converr; /* Flag - an EILSEQ was encountered */
-} ;
-
-static int init_iconv(struct libmail_u_convert_iconv *h,
- const char *src_chset,
- const char *dst_chset,
- int (*output_func)(const char *, size_t, void *),
- void *convert_arg);
-
-static libmail_u_convert_handle_t
-init_notfromimaputf7(const char *src_chset,
- const char *dst_chset,
- int (*output_func)(const char *, size_t, void *),
- void *convert_arg)
-{
-
-
- struct libmail_u_convert_iconv *h=
- malloc(sizeof(struct libmail_u_convert_iconv));
-
- if (!h)
- return NULL;
-
- memset(h, 0, sizeof(*h));
-
- if (init_iconv(h, src_chset, dst_chset, output_func, convert_arg))
- {
- free(h);
- return NULL;
- }
- return &h->hdr;
-}
-
-/* Run the stack */
-
-int libmail_u_convert(libmail_u_convert_handle_t h,
- const char *text, size_t cnt)
-{
- return (*h->convert_handler)(h->ptr, text, cnt);
-}
-
-/* Destroy the stack */
-
-int libmail_u_convert_deinit(libmail_u_convert_handle_t h, int *errptr)
-{
- return (*h->deinit_handler)(h, errptr);
-}
-
-static int deinit_iconv(void *ptr, int *errptr);
-static int convert_iconv(void *ptr,
- const char *text, size_t cnt);
-
-/* Initialize a single conversion module, in the stack */
-
-static int init_iconv(struct libmail_u_convert_iconv *h,
- const char *src_chset,
- const char *dst_chset,
- int (*output_func)(const char *, size_t, void *),
- void *convert_arg)
-{
- if ((h->h=iconv_open(dst_chset, src_chset)) == (iconv_t)-1)
- return -1;
-
- h->hdr.convert_handler=convert_iconv;
- h->hdr.deinit_handler=deinit_iconv;
- h->hdr.ptr=h;
-
- h->output_func=output_func;
- h->convert_arg=convert_arg;
-
- /* Heuristically determine how many octets to skip upon an EILSEQ */
-
- h->skipcnt=1;
- switch (src_chset[0]) {
- case 'u':
- case 'U':
- switch (src_chset[1]) {
- case 'c':
- case 'C':
- switch (src_chset[2]) {
- case 's':
- case 'S':
- if (src_chset[3] == '-')
- switch (src_chset[4]) {
- case '4':
- /* UCS-4 */
- h->skipcnt=4;
- break;
- case '2':
- /* UCS-2 */
- h->skipcnt=2;
- break;
- }
- }
- break;
- case 't':
- case 'T':
- switch (src_chset[2]) {
- case 'f':
- case 'F':
- if (src_chset[3] == '-')
- switch (src_chset[4]) {
- case '3':
- /* UTF-32 */
- h->skipcnt=4;
- break;
- case '1':
- /* UTF-16 */
- h->skipcnt=2;
- break;
- }
- }
- }
- }
-
- return 0;
-}
-
-static void convert_flush(struct libmail_u_convert_iconv *);
-static void convert_flush_iconv(struct libmail_u_convert_iconv *, const char **,
- size_t *);
-
-/*
-** iconv conversion module. Accumulate input in an input buffer. When the
-** input buffer is full, invoke convert_flush().
-*/
-
-static int convert_iconv(void *ptr,
- const char *text, size_t cnt)
-{
- struct libmail_u_convert_iconv *h=(struct libmail_u_convert_iconv *)ptr;
-
- while (cnt && h->errflag == 0)
- {
- if (h->bufcnt >= sizeof(h->buffer)-1)
- {
- convert_flush(h);
-
- if (h->errflag)
- break;
- }
-
- h->buffer[h->bufcnt++]= *text++;
- --cnt;
- }
-
- return h->errflag;
-}
-
-/*
-** Finish an iconv conversion module. Invoke convert_flush() to flush any
-** buffered input. Invoke convert_flush_iconv() to return state to the initial
-** conversion state.
-*/
-
-static int deinit_iconv(void *ptr, int *errptr)
-{
- int rc;
- int converr;
- struct libmail_u_convert_iconv *h=(struct libmail_u_convert_iconv *)ptr;
- libmail_u_convert_handle_t next;
-
- if (h->errflag == 0)
- convert_flush(h);
-
- if (h->bufcnt && h->errflag == 0)
- h->converr=1;
-
- if (h->errflag == 0)
- convert_flush_iconv(h, NULL, NULL);
-
- rc=h->errflag;
- converr=h->converr != 0;
- iconv_close(h->h);
- next=h->hdr.next;
- free(h);
- if (errptr)
- *errptr=converr;
-
- /* If there's another module in the stack, clean that up */
-
- if (next)
- {
- int converrnext;
- int rcnext=libmail_u_convert_deinit(next, &converrnext);
-
- if (converrnext && errptr && *errptr == 0)
- *errptr=converr;
-
- if (rcnext && rc == 0)
- rc=rcnext;
- }
- return rc;
-}
-
-/*
-** Invoke convert_flush_iconv() to flush the input buffer. If there's
-** unconverted text remaining, reposition it at the beginning of the input
-** buffer.
-*/
-
-static void convert_flush(struct libmail_u_convert_iconv *h)
-{
- const char *p;
- size_t n;
-
- if (h->bufcnt == 0 || h->errflag)
- return;
-
- p=h->buffer;
- n=h->bufcnt;
-
- convert_flush_iconv(h, &p, &n);
-
- if (h->errflag)
- return;
-
- if (h->bufcnt == n)
- n=0; /* Unexpected error, dunno what to do, punt */
-
- h->bufcnt=0;
-
- while (n)
- {
- h->buffer[h->bufcnt]= *p;
-
- ++h->bufcnt;
- ++p;
- --n;
- }
-}
-
-/*
-** Convert text via iconv.
-*/
-
-static void convert_flush_iconv(struct libmail_u_convert_iconv *h,
- const char **inbuf, size_t *inbytesleft)
-{
- int save_errno;
-
- while (1)
- {
- char outbuf[1024];
- char *outp;
- size_t outleft;
- size_t n;
- size_t origin=0;
-
- if (inbytesleft)
- {
- if ((origin=*inbytesleft) == 0)
- return;
-
- if (inbuf && h->skipleft && origin)
- {
- /* Skipping after an EILSEQ */
-
- --h->skipleft;
- --*inbytesleft;
- ++*inbuf;
- continue;
- }
-
- }
-
- if (h->errflag)
- {
- /* Quietly eat everything after a previous error */
-
- if (inbytesleft)
- *inbytesleft=0;
-
- return;
- }
-
- outp=outbuf;
- outleft=sizeof(outbuf);
-
- n=iconv(h->h, (char **)inbuf, inbytesleft, &outp, &outleft);
-
- save_errno=errno;
-
- /* Anything produced by iconv() gets pushed down the stack */
-
- if (outp > outbuf)
- {
- int rc=(*h->output_func)(outbuf, outp-outbuf,
- h->convert_arg);
- if (rc)
- {
- h->errflag=rc;
- return;
- }
- }
-
- if (n != (size_t)-1)
- {
- /* iconv(3) reason #2 */
-
- break;
- }
-
- if (inbytesleft == 0)
- {
- /*
- ** An error when generating the shift sequence to
- ** return to the initial state. We don't know what to
- ** do, now.
- */
-
- errno=EINVAL;
- h->errflag= -1;
- return;
- }
-
- /*
- ** convert_flush() gets invoked when the 1024 char input buffer
- ** fills or to convert input that has been buffered when
- ** convert_chset_end() gets invoked.
- **
- ** A return code of EINVAL from iconv() is iconv() encountering
- ** an incomplete multibyte sequence.
- **
- ** If iconv() failed without consuming any input:
- **
- ** - iconv(3) reason #1, EILSEQ, invalid multibyte sequence
- ** that starts at the beginning of the string we wish to
- ** convert. Discard one character, and try again.
- **
- ** - iconv(3) reason #3, EINVAL, incomplete multibyte sequence.
- ** If it's possible to have an incomplete 1024 character long
- ** multibyte sequence, we're in trouble. Or we've encountered
- ** an EINVAL when flushing out the remaining buffered input,
- ** in convert_chset_end(). In either case, it's ok to sicard
- ** one character at a time, until we either reach the end,
- ** or get some other result.
- **
- ** - iconv(3) reason #4, E2BIG. If the 1024 character output
- ** buffer, above, is insufficient to produce the output from a
- ** single converted character, we're in trouble.
- */
-
- if (*inbytesleft == origin)
- {
- h->skipleft=h->skipcnt;
- h->converr=1;
- }
-
- /*
- ** Stopped at an incomplete multibyte sequence, try again on
- ** the next round.
- */
- else if (save_errno == EINVAL)
- break;
-
- if (save_errno == EILSEQ)
- h->converr=1; /* Another possibility this can happen */
-
- /*
- ** If we get here because of iconv(3) reason #4, filled out
- ** the output buffer, we should continue with the conversion.
- ** Otherwise, upon encountering any other error condition,
- ** reset the conversion state.
- */
- if (save_errno != E2BIG)
- iconv(h->h, NULL, NULL, NULL, NULL);
- }
-}
-
-/*****************************************************************************/
-
-/*
-** A wrapper for libmail_u_convert() that collects the converted character
-** text into a buffer. This is done by passing an output function to
-** libmail_u_convert() that saves converted text in a linked-list
-** of buffers.
-**
-** Then, in the deinitialization function, the buffers get concatenated into
-** the final character buffer.
-*/
-
-struct libmail_u_convert_cbuf {
- struct libmail_u_convert_cbuf *next;
- char *fragment;
- size_t fragment_size;
-};
-
-struct libmail_u_convert_tocbuf {
- struct libmail_u_convert_hdr hdr;
-
- char **cbufptr_ret;
- size_t *cbufsize_ret;
- int errflag;
- size_t tot_size;
- int nullterminate;
-
- struct libmail_u_convert_cbuf *first, **last;
-};
-
-static int save_tocbuf(const char *, size_t, void *);
-static int convert_tocbuf(void *ptr,
- const char *text, size_t cnt);
-static int deinit_tocbuf(void *ptr, int *errptr);
-
-libmail_u_convert_handle_t
-libmail_u_convert_tocbuf_init(const char *src_chset,
- const char *dst_chset,
- char **cbufptr_ret,
- size_t *cbufsize_ret,
- int nullterminate
- )
-{
- struct libmail_u_convert_tocbuf *p=
- malloc(sizeof(struct libmail_u_convert_tocbuf));
- libmail_u_convert_handle_t h;
-
- if (!p)
- return NULL;
-
- memset(p, 0, sizeof(*p));
-
- h=libmail_u_convert_init(src_chset, dst_chset, save_tocbuf, p);
-
- if (!h)
- {
- free(p);
- return NULL;
- }
-
- p->cbufptr_ret=cbufptr_ret;
- p->cbufsize_ret=cbufsize_ret;
- p->last= &p->first;
- p->nullterminate=nullterminate;
- p->hdr.next=h;
- p->hdr.convert_handler=convert_tocbuf;
- p->hdr.deinit_handler=deinit_tocbuf;
- p->hdr.ptr=p;
- return &p->hdr;
-}
-
-/* Capture the output of the conversion stack */
-
-static int save_tocbuf(const char *text, size_t cnt, void *ptr)
-{
- struct libmail_u_convert_tocbuf *p=
- (struct libmail_u_convert_tocbuf *)ptr;
- struct libmail_u_convert_cbuf *fragment=
- malloc(sizeof(struct libmail_u_convert_cbuf)+cnt);
- size_t tot_size;
-
- if (!fragment)
- {
- p->errflag=1;
- return 1;
- }
-
- fragment->next=NULL;
- fragment->fragment=(char *)(fragment+1);
- if ((fragment->fragment_size=cnt) > 0)
- memcpy(fragment->fragment, text, cnt);
-
- *(p->last)=fragment;
- p->last=&fragment->next;
-
- tot_size=p->tot_size + cnt; /* Keep track of the total size saved */
-
- if (tot_size < p->tot_size) /* Overflow? */
- {
- errno=E2BIG;
- return 1;
- }
- p->tot_size=tot_size;
- return 0;
-}
-
-/* Punt converted text down the stack */
-
-static int convert_tocbuf(void *ptr, const char *text, size_t cnt)
-{
- struct libmail_u_convert_tocbuf *p=
- (struct libmail_u_convert_tocbuf *)ptr;
-
- return libmail_u_convert(p->hdr.next, text, cnt);
-}
-
-/*
-** Destroy the conversion stack. Destroy the downstream, then assemble the
-** final array.
-*/
-
-static int deinit_tocbuf(void *ptr, int *errptr)
-{
- struct libmail_u_convert_tocbuf *p=
- (struct libmail_u_convert_tocbuf *)ptr;
- int rc=libmail_u_convert_deinit(p->hdr.next, errptr);
- struct libmail_u_convert_cbuf *bufptr;
-
- if (rc == 0 && p->nullterminate)
- {
- char zero=0;
-
- rc=save_tocbuf( &zero, sizeof(zero), p->hdr.ptr);
- }
-
- if (rc == 0)
- {
- if (((*p->cbufptr_ret)=malloc(p->tot_size ? p->tot_size:1)) !=
- NULL)
- {
- size_t i=0;
-
- for (bufptr=p->first; bufptr; bufptr=bufptr->next)
- {
- if (bufptr->fragment_size)
- memcpy(&(*p->cbufptr_ret)[i],
- bufptr->fragment,
- bufptr->fragment_size);
- i += bufptr->fragment_size;
- }
- (*p->cbufsize_ret)=i;
- }
- else
- {
- rc= -1;
- }
- }
-
- for (bufptr=p->first; bufptr; )
- {
- struct libmail_u_convert_cbuf *b=bufptr;
-
- bufptr=bufptr->next;
-
- free(b);
- }
- free(p);
-
- return rc;
-}
-
-libmail_u_convert_handle_t
-libmail_u_convert_tocbuf_toutf8_init(const char *src_chset,
- char **cbufptr_ret,
- size_t *cbufsize_ret,
- int nullterminate
- )
-{
- return libmail_u_convert_tocbuf_init(src_chset, "utf-8",
- cbufptr_ret, cbufsize_ret,
- nullterminate);
-}
-
-libmail_u_convert_handle_t
-libmail_u_convert_tocbuf_fromutf8_init(const char *dst_chset,
- char **cbufptr_ret,
- size_t *cbufsize_ret,
- int nullterminate
- )
-{
- return libmail_u_convert_tocbuf_init("utf-8", dst_chset,
- cbufptr_ret, cbufsize_ret,
- nullterminate);
-}
-
-char *libmail_u_convert_toutf8(const char *text,
- const char *charset,
- int *error)
-{
- char *cbufptr;
- size_t cbufsize;
- libmail_u_convert_handle_t h=
- libmail_u_convert_tocbuf_toutf8_init(charset,
- &cbufptr,
- &cbufsize, 1);
-
- if (!h)
- return NULL;
-
- libmail_u_convert(h, text, strlen(text));
-
- if (libmail_u_convert_deinit(h, error) == 0)
- return cbufptr;
-
- return NULL;
-}
-
-char *libmail_u_convert_fromutf8(const char *text,
- const char *charset,
- int *error)
-{
- char *cbufptr;
- size_t cbufsize;
- libmail_u_convert_handle_t h=
- libmail_u_convert_tocbuf_fromutf8_init(charset,
- &cbufptr,
- &cbufsize, 1);
-
- if (!h)
- return NULL;
-
- libmail_u_convert(h, text, strlen(text));
-
- if (libmail_u_convert_deinit(h, error) == 0)
- return cbufptr;
-
- return NULL;
-}
-
-char *libmail_u_convert_tobuf(const char *text,
- const char *charset,
- const char *dstcharset,
- int *error)
-{
- char *cbufptr;
- size_t cbufsize;
- libmail_u_convert_handle_t h=
- libmail_u_convert_tocbuf_init(charset,
- dstcharset,
- &cbufptr,
- &cbufsize, 1);
-
- if (!h)
- return NULL;
-
- libmail_u_convert(h, text, strlen(text));
-
- if (libmail_u_convert_deinit(h, error) == 0)
- return cbufptr;
-
- return NULL;
-}
-
-/*****************************************************************************/
-
-/*
-** Convert text to unicode_chars. Same basic approach as
-** libmail_u_convert_tocbuf_init(). The output character set gets specified
-** as UCS-4, the final output size is divided by 4, and the output buffer gets
-** typed as a unicode_char array.
-*/
-
-struct libmail_u_convert_buf {
- struct libmail_u_convert_buf *next;
- unicode_char *fragment;
- size_t fragment_size;
- size_t max_fragment_size;
-};
-
-struct libmail_u_convert_tou {
- struct libmail_u_convert_hdr hdr;
-
- unicode_char **ucptr_ret;
- size_t *ucsize_ret;
- int errflag;
- size_t tot_size;
- int nullterminate;
-
- struct libmail_u_convert_buf *first, *tail, **last;
-};
-
-static int save_unicode(const char *, size_t, void *);
-static int convert_tounicode(void *ptr,
- const char *text, size_t cnt);
-static int deinit_tounicode(void *ptr, int *errptr);
-
-libmail_u_convert_handle_t
-libmail_u_convert_tou_init(const char *src_chset,
- unicode_char **ucptr_ret,
- size_t *ucsize_ret,
- int nullterminate
- )
-{
- struct libmail_u_convert_tou *p=
- malloc(sizeof(struct libmail_u_convert_tou));
- libmail_u_convert_handle_t h;
-
- if (!p)
- return NULL;
-
- memset(p, 0, sizeof(*p));
-
- h=libmail_u_convert_init(src_chset, libmail_u_ucs4_native,
- save_unicode, p);
-
- if (!h)
- {
- free(p);
- return NULL;
- }
-
- p->ucptr_ret=ucptr_ret;
- p->ucsize_ret=ucsize_ret;
- p->last= &p->first;
- p->nullterminate=nullterminate;
- p->hdr.next=h;
- p->hdr.convert_handler=convert_tounicode;
- p->hdr.deinit_handler=deinit_tounicode;
- p->hdr.ptr=p;
- return &p->hdr;
-}
-
-libmail_u_convert_handle_t
-libmail_u_convert_fromu_init(const char *dst_chset,
- char **cbufptr_ret,
- size_t *csize_ret,
- int nullterminate
- )
-{
- return libmail_u_convert_tocbuf_init(libmail_u_ucs4_native,
- dst_chset,
- cbufptr_ret,
- csize_ret,
- nullterminate);
-}
-
-int libmail_u_convert_uc(libmail_u_convert_handle_t handle,
- const unicode_char *text,
- size_t cnt)
-{
- return libmail_u_convert(handle, (const char *)text,
- cnt * sizeof(*text));
-}
-
-/* Capture the output of the conversion stack */
-
-static int save_unicode(const char *text, size_t cnt, void *ptr)
-{
- struct libmail_u_convert_tou *p=
- (struct libmail_u_convert_tou *)ptr;
- struct libmail_u_convert_buf *fragment;
- size_t tot_size;
-
- cnt /= sizeof(unicode_char);
-
- tot_size=p->tot_size + cnt*sizeof(unicode_char);
- /* Keep track of the total size saved */
-
- if (p->tail)
- {
- size_t n=p->tail->max_fragment_size-p->tail->fragment_size;
-
- if (n > cnt)
- n=cnt;
-
- if (n)
- {
- memcpy(p->tail->fragment+p->tail->fragment_size,
- text, n*sizeof(unicode_char));
-
- cnt -= n;
- text += n*sizeof(unicode_char);
- p->tail->fragment_size += n;
- }
- }
-
- if (cnt > 0)
- {
- size_t cnt_alloc=cnt;
-
- if (cnt_alloc < 16)
- cnt_alloc=16;
-
- if ((fragment=malloc(sizeof(struct libmail_u_convert_buf)
- +cnt_alloc*sizeof(unicode_char)))
- == NULL)
- {
- p->errflag=1;
- return 1;
- }
-
- fragment->next=NULL;
- fragment->fragment=(unicode_char *)(fragment+1);
- fragment->max_fragment_size=cnt_alloc;
- fragment->fragment_size=cnt;
- memcpy(fragment->fragment, text, cnt*sizeof(unicode_char));
-
- *(p->last)=fragment;
- p->last=&fragment->next;
- p->tail=fragment;
- }
-
- if (tot_size < p->tot_size) /* Overflow? */
- {
- errno=E2BIG;
- return 1;
- }
- p->tot_size=tot_size;
- return 0;
-}
-
-/* Punt converted text down the stack */
-
-static int convert_tounicode(void *ptr,
- const char *text, size_t cnt)
-{
- struct libmail_u_convert_tou *p=
- (struct libmail_u_convert_tou *)ptr;
-
- return libmail_u_convert(p->hdr.next, text, cnt);
-}
-
-/*
-** Destroy the conversion stack. Destroy the downstream, then assemble the
-** final array.
-*/
-
-static int deinit_tounicode(void *ptr, int *errptr)
-{
- struct libmail_u_convert_tou *p=
- (struct libmail_u_convert_tou *)ptr;
- int rc=libmail_u_convert_deinit(p->hdr.next, errptr);
- struct libmail_u_convert_buf *bufptr;
-
- if (rc == 0 && p->nullterminate)
- {
- unicode_char zero=0;
-
- rc=save_unicode( (const char *)&zero, sizeof(zero),
- p->hdr.ptr);
- }
-
- if (rc == 0)
- {
- if (((*p->ucptr_ret)=malloc(p->tot_size ? p->tot_size:1)) !=
- NULL)
- {
- size_t i=0;
-
- for (bufptr=p->first; bufptr; bufptr=bufptr->next)
- {
- if (bufptr->fragment_size)
- memcpy(&(*p->ucptr_ret)[i],
- bufptr->fragment,
- bufptr->fragment_size
- *sizeof(*bufptr->fragment));
- i += bufptr->fragment_size;
- }
- (*p->ucsize_ret)=i;
- }
- else
- {
- rc= -1;
- }
- }
-
- for (bufptr=p->first; bufptr; )
- {
- struct libmail_u_convert_buf *b=bufptr;
-
- bufptr=bufptr->next;
-
- free(b);
- }
- free(p);
-
- return rc;
-}
-
-int libmail_u_convert_tou_tobuf(const char *text,
- size_t text_l,
- const char *charset,
- unicode_char **uc,
- size_t *ucsize,
- int *err)
-{
- libmail_u_convert_handle_t h;
-
- if ((h=libmail_u_convert_tou_init(charset, uc, ucsize, 0)) == NULL)
- return -1;
-
- if (libmail_u_convert(h, text, text_l) < 0)
- {
- libmail_u_convert_deinit(h, NULL);
- return -1;
- }
-
- if (libmail_u_convert_deinit(h, err))
- return -1;
-
- return 0;
-}
-
-int libmail_u_convert_fromu_tobuf(const unicode_char *utext,
- size_t utext_l,
- const char *charset,
- char **c,
- size_t *csize,
- int *err)
-{
- libmail_u_convert_handle_t h;
-
- if (utext_l == (size_t)-1)
- {
- for (utext_l=0; utext[utext_l]; ++utext_l)
- ;
- }
-
- if ((h=libmail_u_convert_fromu_init(charset, c, csize, 1)) == NULL)
- return -1;
-
- if (libmail_u_convert_uc(h, utext, utext_l) < 0)
- {
- libmail_u_convert_deinit(h, NULL);
- return -1;
- }
-
- if (libmail_u_convert_deinit(h, err))
- return -1;
-
- return 0;
-}
-
-char *libmail_u_convert_tocase(const char *str,
- const char *charset,
- unicode_char (*first_char_func)(unicode_char),
- unicode_char (*char_func)(unicode_char))
-{
- unicode_char *uc;
- size_t ucsize;
- size_t i;
- int err;
- char *c;
- size_t csize;
-
- if (libmail_u_convert_tou_tobuf(str, strlen(str),
- charset, &uc, &ucsize, &err))
- return NULL;
-
- if (err)
- {
- free(uc);
- return NULL;
- }
-
- for (i=0; i<ucsize; ++i)
- {
- uc[i]=(*first_char_func)(uc[i]);
-
- if (char_func)
- first_char_func=char_func;
- }
-
- if (libmail_u_convert_fromu_tobuf(uc, ucsize,
- charset,
- &c, &csize, &err))
- {
- free(uc);
- return NULL;
- }
-
- free(uc);
-
- if (err)
- {
- free(c);
- return NULL;
- }
-
- return c;
-}