Imported Upstream version 0.63.0
[hcoop/debian/courier-authlib.git] / rfc822 / rfc2047.c
CommitLineData
d9898ee8 1/*
8d138742 2** Copyright 1998 - 2009 Double Precision, Inc. See COPYING for
d9898ee8 3** distribution information.
4*/
5
6
7#include <stdio.h>
8#include <ctype.h>
9#include <string.h>
10#include <stdlib.h>
8d138742 11#include <errno.h>
d9898ee8 12
13#include "rfc822.h"
8d138742 14#include "rfc822hdr.h"
d9898ee8 15#include "rfc2047.h"
8d138742
CE
16#include "../unicode/unicode.h"
17#if LIBIDN
18#include <idna.h>
19#include <stringprep.h>
20#endif
d9898ee8 21
8d138742 22static const char rcsid[]="$Id: rfc2047.c,v 1.23 2009/11/18 03:38:50 mrsam Exp $";
d9898ee8 23
24#define RFC2047_ENCODE_FOLDLENGTH 76
25
26static const char xdigit[]="0123456789ABCDEF";
27static const char base64tab[]=
28"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
29
8d138742
CE
30static char *a_rfc2047_encode_str(const char *str, const char *charset,
31 int isaddress);
d9898ee8 32
8d138742
CE
33static void rfc2047_encode_header_do(const struct rfc822a *a,
34 const char *charset,
35 void (*print_func)(char, void *),
36 void (*print_separator)(const char *,
37 void *), void *ptr)
d9898ee8 38{
8d138742
CE
39 rfc822_print_common(a, &a_rfc2047_encode_str, charset,
40 print_func, print_separator, ptr);
d9898ee8 41}
42
8d138742
CE
43static char *rfc822_encode_domain_int(const char *pfix,
44 size_t pfix_len,
45 const char *domain)
d9898ee8 46{
8d138742 47 char *q;
d9898ee8 48
8d138742
CE
49#if LIBIDN
50 int err;
51 char *p;
d9898ee8 52
8d138742 53 err=idna_to_ascii_8z(domain, &p, 0);
d9898ee8 54
8d138742 55 if (err != IDNA_SUCCESS)
d9898ee8 56 {
8d138742
CE
57 errno=EINVAL;
58 return NULL;
d9898ee8 59 }
8d138742
CE
60#else
61 char *p;
d9898ee8 62
8d138742 63 p=strdup(domain);
d9898ee8 64
8d138742
CE
65 if (!p)
66 return NULL;
67#endif
d9898ee8 68
8d138742 69 q=malloc(strlen(p)+pfix_len+1);
d9898ee8 70
8d138742 71 if (!q)
d9898ee8 72 {
8d138742
CE
73 free(p);
74 return NULL;
d9898ee8 75 }
d9898ee8 76
8d138742
CE
77 if (pfix_len)
78 memcpy(q, pfix, pfix_len);
d9898ee8 79
8d138742
CE
80 strcpy(q + pfix_len, p);
81 free(p);
82 return q;
d9898ee8 83}
84
8d138742
CE
85char *rfc822_encode_domain(const char *address,
86 const char *charset)
d9898ee8 87{
8d138742
CE
88 const struct unicode_info *ui=unicode_find(charset);
89 const char *cp;
90 char *p, *q;
d9898ee8 91
8d138742 92 if (!ui)
d9898ee8 93 {
8d138742
CE
94 errno=EINVAL;
95 return NULL;
d9898ee8 96 }
d9898ee8 97
8d138742 98 p=unicode_convert(address, ui, &unicode_UTF8);
d9898ee8 99
8d138742
CE
100 if (!p)
101 return NULL;
d9898ee8 102
8d138742 103 cp=strchr(p, '@');
d9898ee8 104
8d138742 105 if (!cp)
d9898ee8 106 {
8d138742
CE
107 q=rfc822_encode_domain_int("", 0, p);
108 free(p);
109 return q;
d9898ee8 110 }
111
8d138742
CE
112 ++cp;
113 q=rfc822_encode_domain_int(p, cp-p, cp);
114 free(p);
115 return q;
d9898ee8 116}
117
8d138742
CE
118static char *a_rfc2047_encode_str(const char *str, const char *charset,
119 int isaddress)
d9898ee8 120{
8d138742
CE
121 size_t l;
122 char *p;
d9898ee8 123
8d138742
CE
124 if (isaddress)
125 return rfc822_encode_domain(str, charset);
d9898ee8 126
8d138742
CE
127 for (l=0; str[l]; l++)
128 if (str[l] & 0x80)
129 break;
d9898ee8 130
8d138742 131 if (str[l] == 0)
d9898ee8 132 {
8d138742 133 size_t n;
d9898ee8 134
8d138742
CE
135 for (l=0; str[l]; l++)
136 if (strchr(RFC822_SPECIALS, str[l]))
137 break;
d9898ee8 138
8d138742
CE
139 if (str[l] == 0)
140 return (strdup(str));
d9898ee8 141
8d138742
CE
142 for (n=3, l=0; str[l]; l++)
143 {
144 switch (str[l]) {
145 case '"':
146 case '\\':
147 ++n;
d9898ee8 148 break;
8d138742 149 }
d9898ee8 150
8d138742
CE
151 ++n;
152 }
d9898ee8 153
8d138742 154 p=malloc(n);
d9898ee8 155
8d138742
CE
156 if (!p)
157 return NULL;
d9898ee8 158
8d138742 159 p[0]='"';
d9898ee8 160
8d138742 161 for (n=1, l=0; str[l]; l++)
d9898ee8 162 {
8d138742
CE
163 switch (str[l]) {
164 case '"':
165 case '\\':
166 p[n++]='\\';
167 break;
168 }
d9898ee8 169
8d138742
CE
170 p[n++]=str[l];
171 }
172 p[n++]='"';
173 p[n]=0;
d9898ee8 174
8d138742
CE
175 return (p);
176 }
d9898ee8 177
8d138742
CE
178 return rfc2047_encode_str(str, charset, rfc2047_qp_allow_word);
179}
d9898ee8 180
181static void count(char c, void *p);
182static void counts2(const char *c, void *p);
183static void save(char c, void *p);
184static void saves2(const char *c, void *p);
185
8d138742
CE
186char *rfc2047_encode_header_addr(const struct rfc822a *a,
187 const char *charset)
d9898ee8 188{
189size_t l;
190char *s, *p;
191
192 l=1;
193 rfc2047_encode_header_do(a, charset, &count, &counts2, &l);
194 if ((s=malloc(l)) == 0) return (0);
195 p=s;
196 rfc2047_encode_header_do(a, charset, &save, &saves2, &p);
197 *p=0;
198 return (s);
199}
200
8d138742
CE
201
202char *rfc2047_encode_header_tobuf(const char *name, /* Header name */
203 const char *header, /* Header's contents */
204 const char *charset)
205{
206 if (rfc822hdr_is_addr(name))
207 {
208 char *s=0;
209
210 struct rfc822t *t;
211 struct rfc822a *a;
212
213 if ((t=rfc822t_alloc_new(header, NULL, NULL)) != 0)
214 {
215 if ((a=rfc822a_alloc(t)) != 0)
216 {
217 s=rfc2047_encode_header_addr(a, charset);
218 rfc822a_free(a);
219 }
220 rfc822t_free(t);
221 }
222 return s;
223 }
224
225 return rfc2047_encode_str(header, charset, rfc2047_qp_allow_word);
226}
227
d9898ee8 228static void count(char c, void *p)
229{
230 ++*(size_t *)p;
231}
232
233static void counts2(const char *c, void *p)
234{
8d138742
CE
235 if (*c == ',')
236 count(*c++, p);
237
238 count('\n', p);
239 count(' ', p);
d9898ee8 240
241 while (*c) count(*c++, p);
242}
243
244static void save(char c, void *p)
245{
246 **(char **)p=c;
247 ++*(char **)p;
248}
249
250static void saves2(const char *c, void *p)
251{
8d138742
CE
252 if (*c == ',')
253 save(*c++, p);
254
255 save('\n', p);
256 save(' ', p);
d9898ee8 257
258 while (*c) save(*c++, p);
259}
260
261static int encodebase64(const char *ptr, size_t len, const char *charset,
262 int (*func)(const char *, size_t, void *), void *arg,
263 size_t foldlen, size_t offset)
264{
265 unsigned char ibuf[3];
266 char obuf[4];
267 int i, rc;
268
269 while (len)
270 {
271 if ((rc=(*func)("=?", 2, arg)) ||
272 (rc=(*func)(charset, strlen(charset), arg))||
273 (rc=(*func)("?B?", 3, arg)))
274 return rc;
275 i = offset + 2 + strlen(charset) + 3;
276 offset = 0;
277
278 while (len)
279 {
280 size_t n=len > 3 ? 3:len;
281
282 ibuf[0]= ptr[0];
283 if (n>1)
284 ibuf[1]=ptr[1];
285 else
286 ibuf[1]=0;
287 if (n>2)
288 ibuf[2]=ptr[2];
289 else
290 ibuf[2]=0;
291 ptr += n;
292 len -= n;
293
294 obuf[0] = base64tab[ ibuf[0] >>2 ];
295 obuf[1] = base64tab[(ibuf[0] & 0x03)<<4|ibuf[1]>>4];
296 obuf[2] = base64tab[(ibuf[1] & 0x0F)<<2|ibuf[2]>>6];
297 obuf[3] = base64tab[ ibuf[2] & 0x3F ];
298 if (n < 2)
299 obuf[2] = '=';
300 if (n < 3)
301 obuf[3] = '=';
302
303 if ((rc=(*func)(obuf, 4, arg)))
304 return rc;
305
306 i += 4;
307 if (foldlen && i + 2 > foldlen - 1 + 4)
308 break;
309 }
310
311 if ((rc=(*func)("?=", 2, arg)))
312 return rc;
313 if (len)
314 /*
315 * Encoded-words must be sepalated by
316 * linear-white-space.
317 */
318 if ((rc=(*func)(" ", 1, arg)))
319 return rc;
320 }
321 return 0;
322}
323
324#define ISSPACE(i) ((i)=='\t' || (i)=='\r' || (i)=='\n' || (i)==' ')
325#define DOENCODE(i) (((i) & 0x80) || (i)=='"' || (i)=='=' || \
326 ((unsigned char)(i) < 0x20 && !ISSPACE(i)) || \
327 !(*qp_allow)(i))
328
d9898ee8 329int rfc2047_encode_callback_base64(const char *str, const char *charset,
330 int (*qp_allow)(char),
331 int (*func)(const char *, size_t, void *),
332 void *arg)
333{
334int rc;
335int dummy=-1;
336size_t i;
337size_t offset=27; /* FIXME: initial offset for line length */
338const struct unicode_info *uiptr = unicode_find(charset);
339unicode_char *ustr, *uptr;
340
341 if (!str || !*str)
342 return 0;
343
344 for (i=0; str[i]; i++)
345 if (DOENCODE(str[i]))
346 break;
347 if (str[i] == 0)
348 return i? (*func)(str, strlen(str), arg): 0;
349
350 /*
351 * Multibyte or stateful charsets must be encoded with care of
352 * character boundaries. Charsets with replaceable capability can be
353 * encoded replacing errorneous characters. Otherwise, output without
354 * care of character boundaries or errors.
355 */
356 if (!uiptr ||
357 !(uiptr->flags & (UNICODE_MB | UNICODE_SISO)) ||
358 (!(uiptr->flags & UNICODE_REPLACEABLE) &&
359 !(ustr = (uiptr->c2u)(uiptr, str, &dummy))) ||
360 !(ustr = (uiptr->c2u)(uiptr, str, NULL)))
361 return encodebase64(str, strlen(str), charset, func, arg,
362 RFC2047_ENCODE_FOLDLENGTH, offset);
363
364 uptr = ustr;
365 while (*uptr)
366 {
367 unicode_char save_uc;
368 char *wstr=NULL;
369 size_t i, end, j;
370
371 if ((i = offset + 2 + strlen(charset) + 3) >
372 RFC2047_ENCODE_FOLDLENGTH - 2)
373 /* Keep room for at least one character. */
374 i = RFC2047_ENCODE_FOLDLENGTH - 2;
375 offset = 0;
376
377 /*
378 * Figure out where to break encoded-word.
379 * Take a small chunk of Unicode string and convert it back to
380 * the original charset. If the result exseeds line length,
381 * try again with a shorter chunk.
382 */
383 end = 0;
384 while (uptr[end] && end < (RFC2047_ENCODE_FOLDLENGTH - i) / 2)
385 end++;
386 /*
387 * FIXME: Unicode character with `combining'
388 * property etc. should not be treated as
389 * separate character.
390 */
391 j = end;
392 while (j)
393 {
394 save_uc = uptr[j];
395 uptr[j] = (unicode_char)0;
396 wstr = (uiptr->u2c)(uiptr, uptr, &dummy);
397 uptr[j] = save_uc;
398
399 if (!wstr)
400 /* Possiblly a part of one character extracted to
401 * multiple Unicode characters (e.g. base unicode
402 * character of one combined character). Try on
403 * shorter chunk.
404 */
405 {
406 if (j == 0)
407 break;
408
409 j--; /* FIXME */
410 continue;
411 }
412
413 if (i + ((strlen(wstr) + 3-1) / 3) * 4 + 2 >
414 RFC2047_ENCODE_FOLDLENGTH - 1)
415 /*
416 * Encoded string exceeded line length.
417 * Try on shorter chunk.
418 */
419 {
420 size_t k=j;
421
422 j--; /* FIXME */
423 if (j == 0)
424 /* Only one character exeeds line length.
425 * Anyway, encode it. */
426 {
427 j = k;
428 break;
429 }
430 free(wstr);
431 continue;
432 }
433
434 break;
435 }
436
437 if (!wstr)
438 {
439 end = 1;
440 rc = encodebase64("?", 1, charset, func, arg, 0, 0);
441 }
442 else
443 {
444 end = j;
445 rc = encodebase64(wstr, strlen(wstr),
446 charset, func, arg, 0, 0);
447 free(wstr);
448 }
449 if (rc)
450 {
451 free(ustr);
452 return rc;
453 }
454 uptr += end;
455
456 if (*uptr)
457 /*
458 * Encoded-words must be sepalated by
459 * linear-white-space.
460 */
461 if ((rc=(*func)(" ", 1, arg)))
462 {
463 free(ustr);
464 return rc;
465 }
466 }
467 free(ustr);
468 return 0;
469}
d9898ee8 470
471#define DOENCODEWORD(c) \
472 (((c) & 0x80) || (c) == '"' || (unsigned char)(c) <= 0x20 || \
473 (c) == '_' || (c) == '=' || (c) == '?' || !(*qp_allow)(c))
474
475int rfc2047_encode_callback(const char *str, const char *charset,
476 int (*qp_allow)(char),
477 int (*func)(const char *, size_t, void *),
478 void *arg)
479{
480int rc;
481int maxlen;
d9898ee8 482const struct unicode_info *ci = unicode_find(charset);
d9898ee8 483
484 if (!str || !*str)
485 return 0;
486
d9898ee8 487 if (ci && ci->flags & UNICODE_SISO)
488 return rfc2047_encode_callback_base64(str, charset, qp_allow,
489 func, arg);
d9898ee8 490
491 /* otherwise, output quoted-printable-encoded. */
492
493 while (*str)
494 {
495 size_t i, j, n, c;
496
497 for (i=0; str[i]; i++)
498 if (!ISSPACE((int)(unsigned char)str[i])
499 && DOENCODE(str[i]))
500 break;
501 if (str[i] == 0)
502 return ( i ? (*func)(str, i, arg):0);
503
504 /* Find start of word */
505
506 while (i)
507 {
508 --i;
509 if (ISSPACE((int)(unsigned char)str[i]))
510 {
511 ++i;
512 break;
513 }
514 }
515 if (i)
516 {
517 rc= (*func)(str, i, arg);
518 if (rc) return (rc);
519 str += i;
520 }
521
522 /*
523 ** Figure out when to stop MIME decoding. Consecutive
524 ** MIME-encoded words are MIME-encoded together.
525 */
526
527 i=0;
528
529 for (;;)
530 {
531 for ( ; str[i]; i++)
532 if (ISSPACE((int)(unsigned char)str[i]))
533 break;
534 if (str[i] == 0)
535 break;
536
537 for (c=i; str[c] && ISSPACE((int)(unsigned char)str[c]);
538 ++c)
539 ;
540
541 for (; str[c]; c++)
542 if (ISSPACE((int)(unsigned char)str[c]) ||
543 DOENCODE(str[c]))
544 break;
545
546 if (str[c] == 0 || ISSPACE((int)(unsigned char)str[c]))
547 break;
548 i=c;
549 }
550
551 /*
552 ** Figure out whether base64 is a better choice.
553 */
554
555 n=0;
556
557 for (j=0; j<i; j++)
558 if (DOENCODEWORD(str[j]))
559 ++n;
560
561 if (n > i/10)
562 {
563 encodebase64(str, i, charset, func, arg,
564 70, 0);
565 str += i;
566 continue;
567 }
568
569
570
571 /* Output mimeified text, insert spaces at 70+ character
572 ** boundaries for line wrapping.
573 */
574
575 maxlen=strlen(charset)+10;
576
577 if (maxlen < 65)
578 maxlen=74-maxlen;
579 else
580 maxlen=10;
581
582 c=0;
583 while (i)
584 {
585 if (c == 0)
586 {
587 if ( (rc=(*func)("=?", 2, arg)) != 0 ||
588 (rc=(*func)(charset, strlen(charset),
589 arg)) != 0 ||
590 (rc=(*func)("?Q?", 3, arg)) != 0)
591 return (rc);
592 c += strlen(charset)+5;
593 }
594
595 if (DOENCODEWORD(*str))
596 {
597 char buf[3];
598
599 buf[0]='=';
600 buf[1]=xdigit[ ( *str >> 4) & 0x0F ];
601 buf[2]=xdigit[ *str & 0x0F ];
602
603 if ( (rc=*str == ' ' ? (*func)("_", 1, arg)
604 : (*func)(buf, 3, arg)) != 0)
605 return (rc);
606 c += *str == ' ' ? 1:3;
607 ++str;
608 --i;
609 }
610 else
611 {
612 for (j=0; j < i && !DOENCODEWORD(str[j]); j++)
613 if (j + c >= maxlen)
614 break;
615 if ( (rc=(*func)(str, j, arg)) != 0)
616 return (rc);
617 c += j;
618 str += j;
619 i -= j;
620 }
621
622 if (i == 0 || c >= maxlen)
623 {
624 if ( (rc=(*func)("?= ", i ? 3:2, arg)) != 0)
625 return (rc);
626
627 c=0;
628 }
629 }
630 }
631 return (0);
632}
633
634static int count_char(const char *c, size_t l, void *p)
635{
636size_t *i=(size_t *)p;
637
638 *i += l;
639 return (0);
640}
641
642static int save_char(const char *c, size_t l, void *p)
643{
644char **s=(char **)p;
645
646 memcpy(*s, c, l);
647 *s += l;
648 return (0);
649}
650
651char *rfc2047_encode_str(const char *str, const char *charset,
652 int (*qp_allow)(char c))
653{
654size_t i=1;
655char *s, *p;
656
657 (void)rfc2047_encode_callback(str, charset,
658 qp_allow,
659 &count_char, &i);
660 if ((s=malloc(i)) == 0) return (0);
661 p=s;
662 (void)rfc2047_encode_callback(str, charset,
663 qp_allow,
664 &save_char, &p);
665 *p=0;
666 return (s);
667}
668
669int rfc2047_qp_allow_any(char c)
670{
671 return 1;
672}
673
674int rfc2047_qp_allow_comment(char c)
675{
676 if (c == '(' || c == ')' || c == '"')
677 return 0;
678 return 1;
679}
680
681int rfc2047_qp_allow_word(char c)
682{
683 return strchr(base64tab, c) != NULL ||
684 strchr("*-=_", c) != NULL;
685}