Imported Upstream version 0.63.0
[hcoop/debian/courier-authlib.git] / unicode / utf7.c
1 /*
2 ** Copyright 2003 Double Precision, Inc.
3 ** See COPYING for distribution information.
4 **
5 ** $Id: utf7.c,v 1.3 2004/05/23 14:28:25 mrsam Exp $
6 */
7
8 #include "unicode.h"
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <errno.h>
13
14 static const char base64tab[]=
15 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
16
17 struct dec_base64_struct {
18
19 unsigned char base64buf[4];
20 int cnt;
21
22 unicode_char uc1;
23 int ucnt;
24
25 int flushing;
26 int flushed;
27 };
28
29 /* Poor man's base64 decoder */
30
31 static int dec_b64_uchar(struct dec_base64_struct *dc,
32 unsigned char uuc,
33 unicode_char **buffer,
34 size_t *buflen)
35 {
36 if (dc->flushed)
37 return 0; /* Flushing trailing junk */
38
39 dc->uc1 <<= 8;
40 dc->uc1 |= uuc;
41
42 ++dc->ucnt;
43
44 if (dc->ucnt == 2)
45 {
46 if (dc->uc1 < 0xD800 || dc->uc1 > 0xDFFF)
47 /* Not surrogate pair */
48 {
49 if ( *buffer )
50 (*buffer)[*buflen]=dc->uc1;
51 ++*buflen;
52 dc->uc1=0;
53 dc->ucnt=0;
54 if (dc->flushing)
55 dc->flushed=1;
56 return (0);
57 }
58
59 if (dc->uc1 > 0xDBFF)
60 return -1; /* Bad surrogate pair */
61 return 0;
62 }
63
64 if (dc->ucnt == 4)
65 {
66 unicode_char uc2= (dc->uc1 >> 16) & 0xFFFF;
67
68 dc->uc1 &= 0xFFFF;
69
70 if (dc->uc1 < 0xDC00 || dc->uc1 > 0xDFFF)
71 return -1;
72
73
74 if (*buffer)
75 {
76 (*buffer)[*buflen] = ((uc2 & 0x3FF) << 10) |
77 (dc->uc1 & 0x3FF);
78 }
79 ++*buflen;
80
81 dc->uc1=0;
82 dc->ucnt=0;
83 if (dc->flushing)
84 dc->flushed=1;
85 return (0);
86 }
87 return 0;
88 }
89
90 static int dec_b64_char(struct dec_base64_struct *dc,
91 char c,
92 unicode_char **buffer,
93 size_t *buflen)
94 {
95 dc->base64buf[dc->cnt]=c;
96 if (++dc->cnt >= 4) /* Four characters to base64 decode */
97 {
98 char a,b,c;
99
100 int w=dc->base64buf[0];
101 int x=dc->base64buf[1];
102 int y=dc->base64buf[2];
103 int z=dc->base64buf[3];
104
105 a= (w << 2) | (x >> 4);
106 b= (x << 4) | (y >> 2);
107 c= (y << 6) | z;
108 dc->cnt=0;
109
110 if (dec_b64_uchar(dc, a, buffer, buflen))
111 return -1;
112
113 if (dec_b64_uchar(dc, b, buffer, buflen))
114 return -1;
115
116 if (dec_b64_uchar(dc, c, buffer, buflen))
117 return -1;
118 }
119 return 0;
120 }
121
122 static unicode_char *tou(const struct unicode_info *foo, const char *p,
123 int *err)
124 {
125 int pass;
126 size_t i;
127 unicode_char *buffer=NULL;
128 size_t buflen=0;
129
130 /* Two passes. Count the output, alloc buffer, do it */
131
132 for (pass=0; pass<2; pass++)
133 {
134 if (pass)
135 {
136 if ((buffer=malloc((buflen+1)*sizeof(unicode_char)))
137 == NULL)
138 return NULL;
139 }
140 buflen=0;
141
142 for (i=0; p[i]; i++)
143 {
144 char *q;
145 struct dec_base64_struct dc;
146
147 if (p[i] != '+')
148 {
149 if (buffer)
150 {
151 buffer[buflen]=(unsigned char)p[i];
152 }
153 ++buflen;
154 continue;
155 }
156
157 if (p[++i] == 0)
158 break;
159
160 if (p[i] == '-')
161 {
162 if (buffer)
163 buffer[buflen]='+';
164 ++buflen;
165 continue;
166 }
167
168 dc.cnt=0;
169 dc.ucnt=0;
170 dc.uc1=0;
171 dc.flushing=0;
172 dc.flushed=0;
173
174 while ( p[i] && (q=strchr(base64tab, p[i])) != NULL)
175 {
176 if (dec_b64_char(&dc, (q-base64tab),
177 &buffer,
178 &buflen))
179 {
180 if (err)
181 {
182 *err=i;
183 errno=EINVAL;
184 return NULL;
185 }
186
187 /* Recover from decoding error */
188
189 dc.cnt=0;
190 dc.ucnt=0;
191 dc.uc1=0;
192 }
193 ++i;
194 }
195
196 dc.flushing=1;
197
198 while (dc.cnt > 0)
199 {
200 if (dec_b64_char(&dc, 0,
201 &buffer,
202 &buflen))
203 {
204 if (err)
205 {
206 *err=i;
207 errno=EINVAL;
208 return NULL;
209 }
210 dc.cnt=0;
211 dc.ucnt=0;
212 dc.uc1=0;
213 }
214 }
215
216 if (p[i] == 0)
217 break;
218
219 if (p[i] != '-')
220 --i;
221 }
222
223
224 if (pass)
225 buffer[buflen]=0;
226 }
227
228 return buffer;
229 }
230
231 /* Poor man's base64 encoder */
232
233 struct enc_base64_struct {
234
235 char base64buf[3];
236 int cnt;
237 int clip;
238 };
239
240 static void encode_base64_char(struct enc_base64_struct *p,
241 char c,
242 char **buffer,
243 size_t *buflen)
244 {
245 p->base64buf[p->cnt]=c;
246
247 if (++p->cnt >= 3) /* Encode three octets in base64 */
248 {
249 int a, b, c;
250 int d, e, f, g;
251
252 a=(unsigned char)p->base64buf[0];
253 b=(unsigned char)p->base64buf[1];
254 c=(unsigned char)p->base64buf[2];
255
256 d=base64tab[ a >> 2 ];
257 e=base64tab[ ((a & 3 ) << 4) | (b >> 4)];
258 f=base64tab[ ((b & 15) << 2) | (c >> 6)];
259 g=base64tab[ c & 63 ];
260
261 p->cnt=0;
262
263 if (*buffer)
264 {
265 (*buffer)[*buflen]=d;
266 (*buffer)[*buflen+1]=e;
267 }
268 *buflen += 2;
269
270 if (p->clip < 2) /* Clip trailing junk, don't need it */
271 {
272 if (*buffer)
273 {
274 (*buffer)[*buflen]=f;
275 }
276 ++*buflen;
277 }
278
279 if (p->clip < 1)
280 {
281 if (*buffer)
282 {
283 (*buffer)[*buflen]=g;
284 }
285 ++*buflen;
286 }
287 }
288 }
289
290 static void encode_base64_u16(struct enc_base64_struct *p,
291 unicode_char uc,
292 char **buffer,
293 size_t *buflen)
294 {
295 encode_base64_char(p, (uc >> 8) & 255, buffer, buflen);
296 encode_base64_char(p, uc & 255, buffer, buflen);
297 }
298
299 static void encode_base64_u32(struct enc_base64_struct *p,
300 unicode_char uc,
301 char **buffer,
302 size_t *buflen)
303 {
304 if ((uc >= 0xD800 && uc <= 0xDFFF) /* Really illegal, but punt */
305 || uc > 0xFFFFU)
306 {
307 encode_base64_u16(p, ((uc >> 10) & 0x3FF) | 0xD800,
308 buffer, buflen);
309 encode_base64_u16(p, (uc & 0x3FF) | 0xDC00,
310 buffer, buflen);
311 }
312 else
313 {
314 encode_base64_u16(p, uc, buffer, buflen);
315 }
316 }
317
318
319 #define LITERAL(c) ( (c) == '\n' || (c) == '\r' || (c) == '\t' || (c) == ' ' \
320 || ( (c) >= 33 && c <= 125 && c != 92))
321
322 static char *fromu(const struct unicode_info *foo, const unicode_char *p,
323 int *err)
324 {
325 char *buffer=0;
326 size_t buflen=0;
327 int pass;
328 size_t i;
329
330 for (pass=0; pass<2; pass++)
331 {
332 if (pass)
333 {
334 if ((buffer=malloc(buflen+1)) == NULL)
335 return NULL;
336 }
337 buflen=0;
338
339 for (i=0; p[i]; i++)
340 {
341 struct enc_base64_struct eb;
342
343 if (p[i] == '+')
344 {
345 if (pass)
346 {
347 buffer[buflen]='+';
348 buffer[buflen+1]='-';
349 }
350 buflen += 2;
351 continue;
352 }
353
354 if (LITERAL(p[i]))
355 {
356 if (pass)
357 {
358 buffer[buflen]=(char)p[i];
359 }
360 ++buflen;
361 continue;
362 }
363
364 if (pass)
365 buffer[buflen]='+';
366 ++buflen;
367
368 eb.cnt=0;
369 eb.clip=0;
370
371 do
372 {
373 if (p[i] >= 0x10FFFF)
374 {
375 if (err)
376 {
377 *err=i;
378 errno=EINVAL;
379 return NULL;
380 }
381 encode_base64_u32(&eb, 0xFFFD,
382 &buffer, &buflen);
383 }
384 else
385 {
386 encode_base64_u32(&eb, p[i],
387 &buffer, &buflen);
388 }
389 ++i;
390 } while ( p[i] && !LITERAL(p[i]));
391
392 switch (eb.cnt) {
393 case 2:
394 eb.clip=2;
395 break;
396 case 3:
397 eb.clip=1;
398 break;
399 }
400
401 while (eb.cnt)
402 {
403 encode_base64_char(&eb, 0, &buffer, &buflen);
404 }
405
406 if (!p[i])
407 {
408 if (pass)
409 {
410 buffer[buflen]='-';
411 }
412 ++buflen;
413 break;
414 }
415
416 if (p[i] == '-')
417 {
418 if (pass)
419 {
420 buffer[buflen]='-';
421 }
422 ++buflen;
423 }
424 --i;
425 }
426
427 if (pass)
428 buffer[buflen]=0;
429 }
430
431 return buffer;
432 }
433
434
435 /*
436 ** UTF7.toupper/tolower/totitle is implemented by converting UTF8 to
437 ** UCS-4, applying the unicode table lookup, then converting it back to
438 ** UTF7
439 */
440
441 static char *toupper_func(const struct unicode_info *u,
442 const char *cp, int *ip)
443 {
444 unicode_char *uc=tou(u, cp, ip), *p;
445 char *s;
446
447 if (!uc) return (0);
448
449 for (p=uc; *p; p++)
450 *p=unicode_uc(*p);
451
452 s=fromu(u, uc, NULL);
453 if (!s && ip)
454 *ip=0;
455 free(uc);
456 return (s);
457 }
458
459 static char *tolower_func(const struct unicode_info *u,
460 const char *cp, int *ip)
461 {
462 unicode_char *uc=tou(u, cp, ip), *p;
463 char *s;
464
465 if (!uc) return (0);
466
467 for (p=uc; *p; p++)
468 *p=unicode_lc(*p);
469
470 s=fromu(u, uc, NULL);
471 free(uc);
472 if (!s && ip)
473 *ip=0;
474 return (s);
475 }
476
477 static char *totitle_func(const struct unicode_info *u,
478 const char *cp, int *ip)
479 {
480 unicode_char *uc=tou(u, cp, ip), *p;
481 char *s;
482
483 if (!uc) return (0);
484
485 for (p=uc; *p; p++)
486 *p=unicode_tc(*p);
487
488 s=fromu(u, uc, NULL);
489 if (!s && ip)
490 *ip=0;
491 free(uc);
492 return (s);
493 }
494
495 const struct unicode_info unicode_UTF7 = {
496 "UTF-7",
497 UNICODE_UTF | UNICODE_MB |
498 UNICODE_HEADER_QUOPRI | UNICODE_BODY_QUOPRI,
499 tou,
500 fromu,
501 toupper_func,
502 tolower_func,
503 totitle_func};
504
505 #if 0
506 extern const struct unicode_info unicode_UTF8;
507
508 int main(int argc, char **argv)
509 {
510 char *a, *b, *c;
511
512 a=unicode_xconvert("A+ImIDkQ.", &unicode_UTF7,
513 &unicode_UTF8);
514 b=unicode_xconvert("Hi Mom -+Jjo--!", &unicode_UTF7,
515 &unicode_UTF8);
516 c=unicode_xconvert("+ZeVnLIqe-", &unicode_UTF7,
517 &unicode_UTF8);
518
519 printf("%s\n", a);
520 printf("%s\n", b);
521 printf("%s\n", c);
522
523 printf("%s\n", unicode_xconvert(a, &unicode_UTF8, &unicode_UTF7));
524 printf("%s\n", unicode_xconvert(b, &unicode_UTF8, &unicode_UTF7));
525 printf("%s\n", unicode_xconvert(c, &unicode_UTF8, &unicode_UTF7));
526
527 return 0;
528 }
529 #endif