--- /dev/null
+/*
+** Copyright 2003 Double Precision, Inc.
+** See COPYING for distribution information.
+**
+** $Id: utf7.c,v 1.3 2004/05/23 14:28:25 mrsam Exp $
+*/
+
+#include "unicode.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+static const char base64tab[]=
+"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+struct dec_base64_struct {
+
+ unsigned char base64buf[4];
+ int cnt;
+
+ unicode_char uc1;
+ int ucnt;
+
+ int flushing;
+ int flushed;
+};
+
+/* Poor man's base64 decoder */
+
+static int dec_b64_uchar(struct dec_base64_struct *dc,
+ unsigned char uuc,
+ unicode_char **buffer,
+ size_t *buflen)
+{
+ if (dc->flushed)
+ return 0; /* Flushing trailing junk */
+
+ dc->uc1 <<= 8;
+ dc->uc1 |= uuc;
+
+ ++dc->ucnt;
+
+ if (dc->ucnt == 2)
+ {
+ if (dc->uc1 < 0xD800 || dc->uc1 > 0xDFFF)
+ /* Not surrogate pair */
+ {
+ if ( *buffer )
+ (*buffer)[*buflen]=dc->uc1;
+ ++*buflen;
+ dc->uc1=0;
+ dc->ucnt=0;
+ if (dc->flushing)
+ dc->flushed=1;
+ return (0);
+ }
+
+ if (dc->uc1 > 0xDBFF)
+ return -1; /* Bad surrogate pair */
+ return 0;
+ }
+
+ if (dc->ucnt == 4)
+ {
+ unicode_char uc2= (dc->uc1 >> 16) & 0xFFFF;
+
+ dc->uc1 &= 0xFFFF;
+
+ if (dc->uc1 < 0xDC00 || dc->uc1 > 0xDFFF)
+ return -1;
+
+
+ if (*buffer)
+ {
+ (*buffer)[*buflen] = ((uc2 & 0x3FF) << 10) |
+ (dc->uc1 & 0x3FF);
+ }
+ ++*buflen;
+
+ dc->uc1=0;
+ dc->ucnt=0;
+ if (dc->flushing)
+ dc->flushed=1;
+ return (0);
+ }
+ return 0;
+}
+
+static int dec_b64_char(struct dec_base64_struct *dc,
+ char c,
+ unicode_char **buffer,
+ size_t *buflen)
+{
+ dc->base64buf[dc->cnt]=c;
+ if (++dc->cnt >= 4) /* Four characters to base64 decode */
+ {
+ char a,b,c;
+
+ int w=dc->base64buf[0];
+ int x=dc->base64buf[1];
+ int y=dc->base64buf[2];
+ int z=dc->base64buf[3];
+
+ a= (w << 2) | (x >> 4);
+ b= (x << 4) | (y >> 2);
+ c= (y << 6) | z;
+ dc->cnt=0;
+
+ if (dec_b64_uchar(dc, a, buffer, buflen))
+ return -1;
+
+ if (dec_b64_uchar(dc, b, buffer, buflen))
+ return -1;
+
+ if (dec_b64_uchar(dc, c, buffer, buflen))
+ return -1;
+ }
+ return 0;
+}
+
+static unicode_char *tou(const struct unicode_info *foo, const char *p,
+ int *err)
+{
+ int pass;
+ size_t i;
+ unicode_char *buffer=NULL;
+ size_t buflen=0;
+
+ /* Two passes. Count the output, alloc buffer, do it */
+
+ for (pass=0; pass<2; pass++)
+ {
+ if (pass)
+ {
+ if ((buffer=malloc((buflen+1)*sizeof(unicode_char)))
+ == NULL)
+ return NULL;
+ }
+ buflen=0;
+
+ for (i=0; p[i]; i++)
+ {
+ char *q;
+ struct dec_base64_struct dc;
+
+ if (p[i] != '+')
+ {
+ if (buffer)
+ {
+ buffer[buflen]=(unsigned char)p[i];
+ }
+ ++buflen;
+ continue;
+ }
+
+ if (p[++i] == 0)
+ break;
+
+ if (p[i] == '-')
+ {
+ if (buffer)
+ buffer[buflen]='+';
+ ++buflen;
+ continue;
+ }
+
+ dc.cnt=0;
+ dc.ucnt=0;
+ dc.uc1=0;
+ dc.flushing=0;
+ dc.flushed=0;
+
+ while ( p[i] && (q=strchr(base64tab, p[i])) != NULL)
+ {
+ if (dec_b64_char(&dc, (q-base64tab),
+ &buffer,
+ &buflen))
+ {
+ if (err)
+ {
+ *err=i;
+ errno=EINVAL;
+ return NULL;
+ }
+
+ /* Recover from decoding error */
+
+ dc.cnt=0;
+ dc.ucnt=0;
+ dc.uc1=0;
+ }
+ ++i;
+ }
+
+ dc.flushing=1;
+
+ while (dc.cnt > 0)
+ {
+ if (dec_b64_char(&dc, 0,
+ &buffer,
+ &buflen))
+ {
+ if (err)
+ {
+ *err=i;
+ errno=EINVAL;
+ return NULL;
+ }
+ dc.cnt=0;
+ dc.ucnt=0;
+ dc.uc1=0;
+ }
+ }
+
+ if (p[i] == 0)
+ break;
+
+ if (p[i] != '-')
+ --i;
+ }
+
+
+ if (pass)
+ buffer[buflen]=0;
+ }
+
+ return buffer;
+}
+
+/* Poor man's base64 encoder */
+
+struct enc_base64_struct {
+
+ char base64buf[3];
+ int cnt;
+ int clip;
+};
+
+static void encode_base64_char(struct enc_base64_struct *p,
+ char c,
+ char **buffer,
+ size_t *buflen)
+{
+ p->base64buf[p->cnt]=c;
+
+ if (++p->cnt >= 3) /* Encode three octets in base64 */
+ {
+ int a, b, c;
+ int d, e, f, g;
+
+ a=(unsigned char)p->base64buf[0];
+ b=(unsigned char)p->base64buf[1];
+ c=(unsigned char)p->base64buf[2];
+
+ d=base64tab[ a >> 2 ];
+ e=base64tab[ ((a & 3 ) << 4) | (b >> 4)];
+ f=base64tab[ ((b & 15) << 2) | (c >> 6)];
+ g=base64tab[ c & 63 ];
+
+ p->cnt=0;
+
+ if (*buffer)
+ {
+ (*buffer)[*buflen]=d;
+ (*buffer)[*buflen+1]=e;
+ }
+ *buflen += 2;
+
+ if (p->clip < 2) /* Clip trailing junk, don't need it */
+ {
+ if (*buffer)
+ {
+ (*buffer)[*buflen]=f;
+ }
+ ++*buflen;
+ }
+
+ if (p->clip < 1)
+ {
+ if (*buffer)
+ {
+ (*buffer)[*buflen]=g;
+ }
+ ++*buflen;
+ }
+ }
+}
+
+static void encode_base64_u16(struct enc_base64_struct *p,
+ unicode_char uc,
+ char **buffer,
+ size_t *buflen)
+{
+ encode_base64_char(p, (uc >> 8) & 255, buffer, buflen);
+ encode_base64_char(p, uc & 255, buffer, buflen);
+}
+
+static void encode_base64_u32(struct enc_base64_struct *p,
+ unicode_char uc,
+ char **buffer,
+ size_t *buflen)
+{
+ if ((uc >= 0xD800 && uc <= 0xDFFF) /* Really illegal, but punt */
+ || uc > 0xFFFFU)
+ {
+ encode_base64_u16(p, ((uc >> 10) & 0x3FF) | 0xD800,
+ buffer, buflen);
+ encode_base64_u16(p, (uc & 0x3FF) | 0xDC00,
+ buffer, buflen);
+ }
+ else
+ {
+ encode_base64_u16(p, uc, buffer, buflen);
+ }
+}
+
+
+#define LITERAL(c) ( (c) == '\n' || (c) == '\r' || (c) == '\t' || (c) == ' ' \
+ || ( (c) >= 33 && c <= 125 && c != 92))
+
+static char *fromu(const struct unicode_info *foo, const unicode_char *p,
+ int *err)
+{
+ char *buffer=0;
+ size_t buflen=0;
+ int pass;
+ size_t i;
+
+ for (pass=0; pass<2; pass++)
+ {
+ if (pass)
+ {
+ if ((buffer=malloc(buflen+1)) == NULL)
+ return NULL;
+ }
+ buflen=0;
+
+ for (i=0; p[i]; i++)
+ {
+ struct enc_base64_struct eb;
+
+ if (p[i] == '+')
+ {
+ if (pass)
+ {
+ buffer[buflen]='+';
+ buffer[buflen+1]='-';
+ }
+ buflen += 2;
+ continue;
+ }
+
+ if (LITERAL(p[i]))
+ {
+ if (pass)
+ {
+ buffer[buflen]=(char)p[i];
+ }
+ ++buflen;
+ continue;
+ }
+
+ if (pass)
+ buffer[buflen]='+';
+ ++buflen;
+
+ eb.cnt=0;
+ eb.clip=0;
+
+ do
+ {
+ if (p[i] >= 0x10FFFF)
+ {
+ if (err)
+ {
+ *err=i;
+ errno=EINVAL;
+ return NULL;
+ }
+ encode_base64_u32(&eb, 0xFFFD,
+ &buffer, &buflen);
+ }
+ else
+ {
+ encode_base64_u32(&eb, p[i],
+ &buffer, &buflen);
+ }
+ ++i;
+ } while ( p[i] && !LITERAL(p[i]));
+
+ switch (eb.cnt) {
+ case 2:
+ eb.clip=2;
+ break;
+ case 3:
+ eb.clip=1;
+ break;
+ }
+
+ while (eb.cnt)
+ {
+ encode_base64_char(&eb, 0, &buffer, &buflen);
+ }
+
+ if (!p[i])
+ {
+ if (pass)
+ {
+ buffer[buflen]='-';
+ }
+ ++buflen;
+ break;
+ }
+
+ if (p[i] == '-')
+ {
+ if (pass)
+ {
+ buffer[buflen]='-';
+ }
+ ++buflen;
+ }
+ --i;
+ }
+
+ if (pass)
+ buffer[buflen]=0;
+ }
+
+ return buffer;
+}
+
+
+/*
+** UTF7.toupper/tolower/totitle is implemented by converting UTF8 to
+** UCS-4, applying the unicode table lookup, then converting it back to
+** UTF7
+*/
+
+static char *toupper_func(const struct unicode_info *u,
+ const char *cp, int *ip)
+{
+ unicode_char *uc=tou(u, cp, ip), *p;
+ char *s;
+
+ if (!uc) return (0);
+
+ for (p=uc; *p; p++)
+ *p=unicode_uc(*p);
+
+ s=fromu(u, uc, NULL);
+ if (!s && ip)
+ *ip=0;
+ free(uc);
+ return (s);
+}
+
+static char *tolower_func(const struct unicode_info *u,
+ const char *cp, int *ip)
+{
+ unicode_char *uc=tou(u, cp, ip), *p;
+ char *s;
+
+ if (!uc) return (0);
+
+ for (p=uc; *p; p++)
+ *p=unicode_lc(*p);
+
+ s=fromu(u, uc, NULL);
+ free(uc);
+ if (!s && ip)
+ *ip=0;
+ return (s);
+}
+
+static char *totitle_func(const struct unicode_info *u,
+ const char *cp, int *ip)
+{
+ unicode_char *uc=tou(u, cp, ip), *p;
+ char *s;
+
+ if (!uc) return (0);
+
+ for (p=uc; *p; p++)
+ *p=unicode_tc(*p);
+
+ s=fromu(u, uc, NULL);
+ if (!s && ip)
+ *ip=0;
+ free(uc);
+ return (s);
+}
+
+const struct unicode_info unicode_UTF7 = {
+ "UTF-7",
+ UNICODE_UTF | UNICODE_MB |
+ UNICODE_HEADER_QUOPRI | UNICODE_BODY_QUOPRI,
+ tou,
+ fromu,
+ toupper_func,
+ tolower_func,
+ totitle_func};
+
+#if 0
+extern const struct unicode_info unicode_UTF8;
+
+int main(int argc, char **argv)
+{
+ char *a, *b, *c;
+
+ a=unicode_xconvert("A+ImIDkQ.", &unicode_UTF7,
+ &unicode_UTF8);
+ b=unicode_xconvert("Hi Mom -+Jjo--!", &unicode_UTF7,
+ &unicode_UTF8);
+ c=unicode_xconvert("+ZeVnLIqe-", &unicode_UTF7,
+ &unicode_UTF8);
+
+ printf("%s\n", a);
+ printf("%s\n", b);
+ printf("%s\n", c);
+
+ printf("%s\n", unicode_xconvert(a, &unicode_UTF8, &unicode_UTF7));
+ printf("%s\n", unicode_xconvert(b, &unicode_UTF8, &unicode_UTF7));
+ printf("%s\n", unicode_xconvert(c, &unicode_UTF8, &unicode_UTF7));
+
+ return 0;
+}
+#endif