2 ** Copyright 2003 Double Precision, Inc.
3 ** See COPYING for distribution information.
5 ** $Id: utf7.c,v 1.3 2004/05/23 14:28:25 mrsam Exp $
14 static const char base64tab
[]=
15 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
17 struct dec_base64_struct
{
19 unsigned char base64buf
[4];
29 /* Poor man's base64 decoder */
31 static int dec_b64_uchar(struct dec_base64_struct
*dc
,
33 unicode_char
**buffer
,
37 return 0; /* Flushing trailing junk */
46 if (dc
->uc1
< 0xD800 || dc
->uc1
> 0xDFFF)
47 /* Not surrogate pair */
50 (*buffer
)[*buflen
]=dc
->uc1
;
60 return -1; /* Bad surrogate pair */
66 unicode_char uc2
= (dc
->uc1
>> 16) & 0xFFFF;
70 if (dc
->uc1
< 0xDC00 || dc
->uc1
> 0xDFFF)
76 (*buffer
)[*buflen
] = ((uc2
& 0x3FF) << 10) |
90 static int dec_b64_char(struct dec_base64_struct
*dc
,
92 unicode_char
**buffer
,
95 dc
->base64buf
[dc
->cnt
]=c
;
96 if (++dc
->cnt
>= 4) /* Four characters to base64 decode */
100 int w
=dc
->base64buf
[0];
101 int x
=dc
->base64buf
[1];
102 int y
=dc
->base64buf
[2];
103 int z
=dc
->base64buf
[3];
105 a
= (w
<< 2) | (x
>> 4);
106 b
= (x
<< 4) | (y
>> 2);
110 if (dec_b64_uchar(dc
, a
, buffer
, buflen
))
113 if (dec_b64_uchar(dc
, b
, buffer
, buflen
))
116 if (dec_b64_uchar(dc
, c
, buffer
, buflen
))
122 static unicode_char
*tou(const struct unicode_info
*foo
, const char *p
,
127 unicode_char
*buffer
=NULL
;
130 /* Two passes. Count the output, alloc buffer, do it */
132 for (pass
=0; pass
<2; pass
++)
136 if ((buffer
=malloc((buflen
+1)*sizeof(unicode_char
)))
145 struct dec_base64_struct dc
;
151 buffer
[buflen
]=(unsigned char)p
[i
];
174 while ( p
[i
] && (q
=strchr(base64tab
, p
[i
])) != NULL
)
176 if (dec_b64_char(&dc
, (q
-base64tab
),
187 /* Recover from decoding error */
200 if (dec_b64_char(&dc
, 0,
231 /* Poor man's base64 encoder */
233 struct enc_base64_struct
{
240 static void encode_base64_char(struct enc_base64_struct
*p
,
245 p
->base64buf
[p
->cnt
]=c
;
247 if (++p
->cnt
>= 3) /* Encode three octets in base64 */
252 a
=(unsigned char)p
->base64buf
[0];
253 b
=(unsigned char)p
->base64buf
[1];
254 c
=(unsigned char)p
->base64buf
[2];
256 d
=base64tab
[ a
>> 2 ];
257 e
=base64tab
[ ((a
& 3 ) << 4) | (b
>> 4)];
258 f
=base64tab
[ ((b
& 15) << 2) | (c
>> 6)];
259 g
=base64tab
[ c
& 63 ];
265 (*buffer
)[*buflen
]=d
;
266 (*buffer
)[*buflen
+1]=e
;
270 if (p
->clip
< 2) /* Clip trailing junk, don't need it */
274 (*buffer
)[*buflen
]=f
;
283 (*buffer
)[*buflen
]=g
;
290 static void encode_base64_u16(struct enc_base64_struct
*p
,
295 encode_base64_char(p
, (uc
>> 8) & 255, buffer
, buflen
);
296 encode_base64_char(p
, uc
& 255, buffer
, buflen
);
299 static void encode_base64_u32(struct enc_base64_struct
*p
,
304 if ((uc
>= 0xD800 && uc
<= 0xDFFF) /* Really illegal, but punt */
307 encode_base64_u16(p
, ((uc
>> 10) & 0x3FF) | 0xD800,
309 encode_base64_u16(p
, (uc
& 0x3FF) | 0xDC00,
314 encode_base64_u16(p
, uc
, buffer
, buflen
);
319 #define LITERAL(c) ( (c) == '\n' || (c) == '\r' || (c) == '\t' || (c) == ' ' \
320 || ( (c) >= 33 && c <= 125 && c != 92))
322 static char *fromu(const struct unicode_info
*foo
, const unicode_char
*p
,
330 for (pass
=0; pass
<2; pass
++)
334 if ((buffer
=malloc(buflen
+1)) == NULL
)
341 struct enc_base64_struct eb
;
348 buffer
[buflen
+1]='-';
358 buffer
[buflen
]=(char)p
[i
];
373 if (p
[i
] >= 0x10FFFF)
381 encode_base64_u32(&eb
, 0xFFFD,
386 encode_base64_u32(&eb
, p
[i
],
390 } while ( p
[i
] && !LITERAL(p
[i
]));
403 encode_base64_char(&eb
, 0, &buffer
, &buflen
);
436 ** UTF7.toupper/tolower/totitle is implemented by converting UTF8 to
437 ** UCS-4, applying the unicode table lookup, then converting it back to
441 static char *toupper_func(const struct unicode_info
*u
,
442 const char *cp
, int *ip
)
444 unicode_char
*uc
=tou(u
, cp
, ip
), *p
;
452 s
=fromu(u
, uc
, NULL
);
459 static char *tolower_func(const struct unicode_info
*u
,
460 const char *cp
, int *ip
)
462 unicode_char
*uc
=tou(u
, cp
, ip
), *p
;
470 s
=fromu(u
, uc
, NULL
);
477 static char *totitle_func(const struct unicode_info
*u
,
478 const char *cp
, int *ip
)
480 unicode_char
*uc
=tou(u
, cp
, ip
), *p
;
488 s
=fromu(u
, uc
, NULL
);
495 const struct unicode_info unicode_UTF7
= {
497 UNICODE_UTF
| UNICODE_MB
|
498 UNICODE_HEADER_QUOPRI
| UNICODE_BODY_QUOPRI
,
506 extern const struct unicode_info unicode_UTF8
;
508 int main(int argc
, char **argv
)
512 a
=unicode_xconvert("A+ImIDkQ.", &unicode_UTF7
,
514 b
=unicode_xconvert("Hi Mom -+Jjo--!", &unicode_UTF7
,
516 c
=unicode_xconvert("+ZeVnLIqe-", &unicode_UTF7
,
523 printf("%s\n", unicode_xconvert(a
, &unicode_UTF8
, &unicode_UTF7
));
524 printf("%s\n", unicode_xconvert(b
, &unicode_UTF8
, &unicode_UTF7
));
525 printf("%s\n", unicode_xconvert(c
, &unicode_UTF8
, &unicode_UTF7
));