Imported Debian patch 0.63.0-6
[hcoop/debian/courier-authlib.git] / unicode / utf7.c
diff --git a/unicode/utf7.c b/unicode/utf7.c
new file mode 100644 (file)
index 0000000..9532532
--- /dev/null
@@ -0,0 +1,529 @@
+/*
+** Copyright 2003 Double Precision, Inc.
+** See COPYING for distribution information.
+**
+** $Id: utf7.c,v 1.3 2004/05/23 14:28:25 mrsam Exp $
+*/
+
+#include "unicode.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+static const char base64tab[]=
+"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+struct dec_base64_struct {
+
+       unsigned char base64buf[4];
+       int cnt;
+
+       unicode_char uc1;
+       int ucnt;
+
+       int flushing;
+       int flushed;
+};
+
+/* Poor man's base64 decoder */
+
+static int dec_b64_uchar(struct dec_base64_struct *dc,
+                        unsigned char uuc,
+                        unicode_char **buffer,
+                        size_t *buflen)
+{
+       if (dc->flushed)
+               return 0; /* Flushing trailing junk */
+
+       dc->uc1 <<= 8;
+       dc->uc1 |= uuc;
+
+       ++dc->ucnt;
+
+       if (dc->ucnt == 2)
+       {
+               if (dc->uc1 < 0xD800 || dc->uc1 > 0xDFFF)
+                       /* Not surrogate pair */
+               {
+                       if ( *buffer )
+                               (*buffer)[*buflen]=dc->uc1;
+                       ++*buflen;
+                       dc->uc1=0;
+                       dc->ucnt=0;
+                       if (dc->flushing)
+                               dc->flushed=1;
+                       return (0);
+               }
+
+               if (dc->uc1 > 0xDBFF)
+                       return -1; /* Bad surrogate pair */
+               return 0;
+       }
+
+       if (dc->ucnt == 4)
+       {
+               unicode_char uc2= (dc->uc1 >> 16) & 0xFFFF;
+
+               dc->uc1 &= 0xFFFF;
+
+               if (dc->uc1 < 0xDC00 || dc->uc1 > 0xDFFF)
+                       return -1;
+
+
+               if (*buffer)
+               {
+                       (*buffer)[*buflen] = ((uc2 & 0x3FF) << 10) |
+                               (dc->uc1 & 0x3FF);
+               }
+               ++*buflen;
+
+               dc->uc1=0;
+               dc->ucnt=0;
+               if (dc->flushing)
+                       dc->flushed=1;
+               return (0);
+       }
+       return 0;
+}
+
+static int dec_b64_char(struct dec_base64_struct *dc,
+                       char c,
+                       unicode_char **buffer,
+                       size_t *buflen)
+{
+       dc->base64buf[dc->cnt]=c;
+       if (++dc->cnt >= 4) /* Four characters to base64 decode */
+       {
+               char    a,b,c;
+
+               int     w=dc->base64buf[0];
+               int     x=dc->base64buf[1];
+               int     y=dc->base64buf[2];
+               int     z=dc->base64buf[3];
+
+               a= (w << 2) | (x >> 4);
+               b= (x << 4) | (y >> 2);
+               c= (y << 6) | z;
+               dc->cnt=0;
+
+               if (dec_b64_uchar(dc, a, buffer, buflen))
+                       return -1;
+
+               if (dec_b64_uchar(dc, b, buffer, buflen))
+                       return -1;
+
+               if (dec_b64_uchar(dc, c, buffer, buflen))
+                       return -1;
+       }
+       return 0;
+}
+
+static unicode_char *tou(const struct unicode_info *foo, const char *p,
+                        int *err)
+{
+       int pass;
+       size_t i;
+       unicode_char *buffer=NULL;
+       size_t buflen=0;
+
+       /* Two passes.  Count the output, alloc buffer, do it */
+
+       for (pass=0; pass<2; pass++)
+       {
+               if (pass)
+               {
+                       if ((buffer=malloc((buflen+1)*sizeof(unicode_char)))
+                           == NULL)
+                               return NULL;
+               }
+               buflen=0;
+
+               for (i=0; p[i]; i++)
+               {
+                       char *q;
+                       struct dec_base64_struct dc;
+
+                       if (p[i] != '+')
+                       {
+                               if (buffer)
+                               {
+                                       buffer[buflen]=(unsigned char)p[i];
+                               }
+                               ++buflen;
+                               continue;
+                       }
+
+                       if (p[++i] == 0)
+                               break;
+
+                       if (p[i] == '-')
+                       {
+                               if (buffer)
+                                       buffer[buflen]='+';
+                               ++buflen;
+                               continue;
+                       }
+
+                       dc.cnt=0;
+                       dc.ucnt=0;
+                       dc.uc1=0;
+                       dc.flushing=0;
+                       dc.flushed=0;
+
+                       while ( p[i] && (q=strchr(base64tab, p[i])) != NULL)
+                       {
+                               if (dec_b64_char(&dc, (q-base64tab),
+                                                &buffer,
+                                                &buflen))
+                               {
+                                       if (err)
+                                       {
+                                               *err=i;
+                                               errno=EINVAL;
+                                               return NULL;
+                                       }
+
+                                       /* Recover from decoding error */
+
+                                       dc.cnt=0;
+                                       dc.ucnt=0;
+                                       dc.uc1=0;
+                               }
+                               ++i;
+                       }
+
+                       dc.flushing=1;
+
+                       while (dc.cnt > 0)
+                       {
+                               if (dec_b64_char(&dc, 0,
+                                                &buffer,
+                                                &buflen))
+                               {
+                                       if (err)
+                                       {
+                                               *err=i;
+                                               errno=EINVAL;
+                                               return NULL;
+                                       }
+                                       dc.cnt=0;
+                                       dc.ucnt=0;
+                                       dc.uc1=0;
+                               }
+                       }
+
+                       if (p[i] == 0)
+                               break;
+
+                       if (p[i] != '-')
+                               --i;
+               }
+
+
+               if (pass)
+                       buffer[buflen]=0;
+       }
+
+       return buffer;
+}
+
+/* Poor man's base64 encoder */
+
+struct enc_base64_struct {
+
+       char base64buf[3];
+       int cnt;
+       int clip;
+};
+
+static void encode_base64_char(struct enc_base64_struct *p,
+                              char c,
+                              char **buffer,
+                              size_t *buflen)
+{
+       p->base64buf[p->cnt]=c;
+
+       if (++p->cnt >= 3) /* Encode three octets in base64 */
+       {
+               int a, b, c;
+               int d, e, f, g;
+
+               a=(unsigned char)p->base64buf[0];
+               b=(unsigned char)p->base64buf[1];
+               c=(unsigned char)p->base64buf[2];
+
+               d=base64tab[ a >> 2 ];
+               e=base64tab[ ((a & 3 ) << 4) | (b >> 4)];
+               f=base64tab[ ((b & 15) << 2) | (c >> 6)];
+               g=base64tab[ c & 63 ];
+
+               p->cnt=0;
+
+               if (*buffer)
+               {
+                       (*buffer)[*buflen]=d;
+                       (*buffer)[*buflen+1]=e;
+               }
+               *buflen += 2;
+
+               if (p->clip < 2) /* Clip trailing junk, don't need it */
+               {
+                       if (*buffer)
+                       {
+                               (*buffer)[*buflen]=f;
+                       }
+                       ++*buflen;
+               }
+
+               if (p->clip < 1)
+               {
+                       if (*buffer)
+                       {
+                               (*buffer)[*buflen]=g;
+                       }
+                       ++*buflen;
+               }
+       }
+}
+
+static void encode_base64_u16(struct enc_base64_struct *p,
+                             unicode_char uc,
+                             char **buffer,
+                             size_t *buflen)
+{
+       encode_base64_char(p, (uc >> 8) & 255, buffer, buflen);
+       encode_base64_char(p, uc & 255, buffer, buflen);
+}
+
+static void encode_base64_u32(struct enc_base64_struct *p,
+                             unicode_char uc,
+                             char **buffer,
+                             size_t *buflen)
+{
+       if ((uc >= 0xD800 && uc <= 0xDFFF) /* Really illegal, but punt */
+           || uc > 0xFFFFU)
+       {
+               encode_base64_u16(p, ((uc >> 10) & 0x3FF) | 0xD800,
+                                 buffer, buflen);
+               encode_base64_u16(p, (uc & 0x3FF) | 0xDC00,
+                                 buffer, buflen);
+       }
+       else
+       {
+               encode_base64_u16(p, uc, buffer, buflen);
+       }
+}
+
+
+#define LITERAL(c) ( (c) == '\n' || (c) == '\r' || (c) == '\t' || (c) == ' ' \
+       || ( (c) >= 33 && c <= 125 && c != 92))
+
+static char *fromu(const struct unicode_info *foo, const unicode_char *p,
+                  int *err)
+{
+       char *buffer=0;
+       size_t buflen=0;
+       int pass;
+       size_t i;
+
+       for (pass=0; pass<2; pass++)
+       {
+               if (pass)
+               {
+                       if ((buffer=malloc(buflen+1)) == NULL)
+                               return NULL;
+               }
+               buflen=0;
+
+               for (i=0; p[i]; i++)
+               {
+                       struct enc_base64_struct eb;
+
+                       if (p[i] == '+')
+                       {
+                               if (pass)
+                               {
+                                       buffer[buflen]='+';
+                                       buffer[buflen+1]='-';
+                               }
+                               buflen += 2;
+                               continue;
+                       }
+
+                       if (LITERAL(p[i]))
+                       {
+                               if (pass)
+                               {
+                                       buffer[buflen]=(char)p[i];
+                               }
+                               ++buflen;
+                               continue;
+                       }
+
+                       if (pass)
+                               buffer[buflen]='+';
+                       ++buflen;
+
+                       eb.cnt=0;
+                       eb.clip=0;
+
+                       do
+                       {
+                               if (p[i] >= 0x10FFFF)
+                               {
+                                       if (err)
+                                       {
+                                               *err=i;
+                                               errno=EINVAL;
+                                               return NULL;
+                                       }
+                                       encode_base64_u32(&eb, 0xFFFD,
+                                                         &buffer, &buflen);
+                               }
+                               else
+                               {
+                                       encode_base64_u32(&eb, p[i],
+                                                         &buffer, &buflen);
+                               }
+                               ++i;
+                       } while ( p[i] && !LITERAL(p[i]));
+
+                       switch (eb.cnt) {
+                       case 2:
+                               eb.clip=2;
+                               break;
+                       case 3:
+                               eb.clip=1;
+                               break;
+                       }
+                                       
+                       while (eb.cnt)
+                       {
+                               encode_base64_char(&eb, 0, &buffer, &buflen);
+                       }
+
+                       if (!p[i])
+                       {
+                               if (pass)
+                               {
+                                       buffer[buflen]='-';
+                               }
+                               ++buflen;
+                               break;
+                       }
+
+                       if (p[i] == '-')
+                       {
+                               if (pass)
+                               {
+                                       buffer[buflen]='-';
+                               }
+                               ++buflen;
+                       }
+                       --i;
+               }
+
+               if (pass)
+                       buffer[buflen]=0;
+       }
+
+       return buffer;
+}
+
+
+/*
+** UTF7.toupper/tolower/totitle is implemented by converting UTF8 to
+** UCS-4, applying the unicode table lookup, then converting it back to
+** UTF7
+*/
+
+static char *toupper_func(const struct unicode_info *u,
+                         const char *cp, int *ip)
+{
+       unicode_char *uc=tou(u, cp, ip), *p;
+       char *s;
+
+       if (!uc) return (0);
+
+       for (p=uc; *p; p++)
+               *p=unicode_uc(*p);
+
+       s=fromu(u, uc, NULL);
+       if (!s && ip)
+               *ip=0;
+       free(uc);
+       return (s);
+}
+
+static char *tolower_func(const struct unicode_info *u,
+                         const char *cp, int *ip)
+{
+       unicode_char *uc=tou(u, cp, ip), *p;
+       char *s;
+
+       if (!uc) return (0);
+
+       for (p=uc; *p; p++)
+               *p=unicode_lc(*p);
+
+       s=fromu(u, uc, NULL);
+       free(uc);
+       if (!s && ip)
+               *ip=0;
+       return (s);
+}
+
+static char *totitle_func(const struct unicode_info *u,
+                         const char *cp, int *ip)
+{
+       unicode_char *uc=tou(u, cp, ip), *p;
+       char *s;
+
+       if (!uc) return (0);
+
+       for (p=uc; *p; p++)
+               *p=unicode_tc(*p);
+
+       s=fromu(u, uc, NULL);
+       if (!s && ip)
+               *ip=0;
+       free(uc);
+       return (s);
+}
+
+const struct unicode_info unicode_UTF7 = {
+       "UTF-7",
+       UNICODE_UTF | UNICODE_MB |
+       UNICODE_HEADER_QUOPRI | UNICODE_BODY_QUOPRI,
+       tou,
+       fromu,
+       toupper_func,
+       tolower_func,
+       totitle_func};
+
+#if 0
+extern const struct unicode_info unicode_UTF8;
+
+int main(int argc, char **argv)
+{
+       char *a, *b, *c;
+
+       a=unicode_xconvert("A+ImIDkQ.", &unicode_UTF7,
+                          &unicode_UTF8);
+       b=unicode_xconvert("Hi Mom -+Jjo--!", &unicode_UTF7,
+                          &unicode_UTF8);
+       c=unicode_xconvert("+ZeVnLIqe-", &unicode_UTF7,
+                          &unicode_UTF8);
+
+       printf("%s\n", a);
+       printf("%s\n", b);
+       printf("%s\n", c);
+
+       printf("%s\n", unicode_xconvert(a, &unicode_UTF8, &unicode_UTF7));
+       printf("%s\n", unicode_xconvert(b, &unicode_UTF8, &unicode_UTF7));
+       printf("%s\n", unicode_xconvert(c, &unicode_UTF8, &unicode_UTF7));
+
+       return 0;
+}
+#endif