Imported Upstream version 0.66.1
[hcoop/debian/courier-authlib.git] / libs / rfc822 / rfc2047.c
1 /*
2 ** Copyright 1998 - 2011 Double Precision, Inc. See COPYING for
3 ** distribution information.
4 */
5
6 #include "rfc822.h"
7 #include <stdio.h>
8 #include <ctype.h>
9 #include <string.h>
10 #include <stdlib.h>
11 #include <errno.h>
12
13 #include "rfc822hdr.h"
14 #include "rfc2047.h"
15 #include "../unicode/unicode.h"
16 #if LIBIDN
17 #include <idna.h>
18 #include <stringprep.h>
19 #endif
20
21
22 #define RFC2047_ENCODE_FOLDLENGTH 76
23
24 static const char xdigit[]="0123456789ABCDEF";
25 static const char base64tab[]=
26 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
27
28 static char *a_rfc2047_encode_str(const char *str, const char *charset,
29 int isaddress);
30
31 static void rfc2047_encode_header_do(const struct rfc822a *a,
32 const char *charset,
33 void (*print_func)(char, void *),
34 void (*print_separator)(const char *,
35 void *), void *ptr)
36 {
37 rfc822_print_common(a, &a_rfc2047_encode_str, charset,
38 print_func, print_separator, ptr);
39 }
40
41 static char *rfc822_encode_domain_int(const char *pfix,
42 size_t pfix_len,
43 const char *domain)
44 {
45 char *q;
46
47 #if LIBIDN
48 int err;
49 char *p;
50 size_t s=strlen(domain)+16;
51 char *cpy=malloc(s);
52
53 if (!cpy)
54 return NULL;
55
56 /*
57 ** Invalid UTF-8 can make libidn go off the deep end. Add
58 ** padding as a workaround.
59 */
60
61 memset(cpy, 0, s);
62 strcpy(cpy, domain);
63
64 err=idna_to_ascii_8z(cpy, &p, 0);
65 free(cpy);
66
67 if (err != IDNA_SUCCESS)
68 {
69 errno=EINVAL;
70 return NULL;
71 }
72 #else
73 char *p;
74
75 p=strdup(domain);
76
77 if (!p)
78 return NULL;
79 #endif
80
81 q=malloc(strlen(p)+pfix_len+1);
82
83 if (!q)
84 {
85 free(p);
86 return NULL;
87 }
88
89 if (pfix_len)
90 memcpy(q, pfix, pfix_len);
91
92 strcpy(q + pfix_len, p);
93 free(p);
94 return q;
95 }
96
97 char *rfc822_encode_domain(const char *address,
98 const char *charset)
99 {
100 char *p=libmail_u_convert_tobuf(address, charset, "utf-8", NULL);
101 char *cp, *q;
102
103 if (!p)
104 return NULL;
105
106 cp=strchr(p, '@');
107
108 if (!cp)
109 {
110 q=rfc822_encode_domain_int("", 0, p);
111 free(p);
112 return q;
113 }
114
115 ++cp;
116 q=rfc822_encode_domain_int(p, cp-p, cp);
117 free(p);
118 return q;
119 }
120
121 static char *a_rfc2047_encode_str(const char *str, const char *charset,
122 int isaddress)
123 {
124 size_t l;
125 char *p;
126
127 if (isaddress)
128 return rfc822_encode_domain(str, charset);
129
130 for (l=0; str[l]; l++)
131 if (str[l] & 0x80)
132 break;
133
134 if (str[l] == 0)
135 {
136 size_t n;
137
138 for (l=0; str[l]; l++)
139 if (strchr(RFC822_SPECIALS, str[l]))
140 break;
141
142 if (str[l] == 0)
143 return (strdup(str));
144
145 for (n=3, l=0; str[l]; l++)
146 {
147 switch (str[l]) {
148 case '"':
149 case '\\':
150 ++n;
151 break;
152 }
153
154 ++n;
155 }
156
157 p=malloc(n);
158
159 if (!p)
160 return NULL;
161
162 p[0]='"';
163
164 for (n=1, l=0; str[l]; l++)
165 {
166 switch (str[l]) {
167 case '"':
168 case '\\':
169 p[n++]='\\';
170 break;
171 }
172
173 p[n++]=str[l];
174 }
175 p[n++]='"';
176 p[n]=0;
177
178 return (p);
179 }
180
181 return rfc2047_encode_str(str, charset, rfc2047_qp_allow_word);
182 }
183
184 static void count(char c, void *p);
185 static void counts2(const char *c, void *p);
186 static void save(char c, void *p);
187 static void saves2(const char *c, void *p);
188
189 char *rfc2047_encode_header_addr(const struct rfc822a *a,
190 const char *charset)
191 {
192 size_t l;
193 char *s, *p;
194
195 l=1;
196 rfc2047_encode_header_do(a, charset, &count, &counts2, &l);
197 if ((s=malloc(l)) == 0) return (0);
198 p=s;
199 rfc2047_encode_header_do(a, charset, &save, &saves2, &p);
200 *p=0;
201 return (s);
202 }
203
204
205 char *rfc2047_encode_header_tobuf(const char *name, /* Header name */
206 const char *header, /* Header's contents */
207 const char *charset)
208 {
209 if (rfc822hdr_is_addr(name))
210 {
211 char *s=0;
212
213 struct rfc822t *t;
214 struct rfc822a *a;
215
216 if ((t=rfc822t_alloc_new(header, NULL, NULL)) != 0)
217 {
218 if ((a=rfc822a_alloc(t)) != 0)
219 {
220 s=rfc2047_encode_header_addr(a, charset);
221 rfc822a_free(a);
222 }
223 rfc822t_free(t);
224 }
225 return s;
226 }
227
228 return rfc2047_encode_str(header, charset, rfc2047_qp_allow_word);
229 }
230
231 static void count(char c, void *p)
232 {
233 ++*(size_t *)p;
234 }
235
236 static void counts2(const char *c, void *p)
237 {
238 if (*c == ',')
239 count(*c++, p);
240
241 count('\n', p);
242 count(' ', p);
243
244 while (*c) count(*c++, p);
245 }
246
247 static void save(char c, void *p)
248 {
249 **(char **)p=c;
250 ++*(char **)p;
251 }
252
253 static void saves2(const char *c, void *p)
254 {
255 if (*c == ',')
256 save(*c++, p);
257
258 save('\n', p);
259 save(' ', p);
260
261 while (*c) save(*c++, p);
262 }
263
264 static int encodebase64(const char *ptr, size_t len, const char *charset,
265 int (*qp_allow)(char),
266 int (*func)(const char *, size_t, void *), void *arg)
267 {
268 unsigned char ibuf[3];
269 char obuf[4];
270 int rc;
271
272 if ((rc=(*func)("=?", 2, arg)) ||
273 (rc=(*func)(charset, strlen(charset), arg))||
274 (rc=(*func)("?B?", 3, arg)))
275 return rc;
276
277 while (len)
278 {
279 size_t n=len > 3 ? 3:len;
280
281 ibuf[0]= ptr[0];
282 if (n>1)
283 ibuf[1]=ptr[1];
284 else
285 ibuf[1]=0;
286 if (n>2)
287 ibuf[2]=ptr[2];
288 else
289 ibuf[2]=0;
290 ptr += n;
291 len -= n;
292
293 obuf[0] = base64tab[ ibuf[0] >>2 ];
294 obuf[1] = base64tab[(ibuf[0] & 0x03)<<4|ibuf[1]>>4];
295 obuf[2] = base64tab[(ibuf[1] & 0x0F)<<2|ibuf[2]>>6];
296 obuf[3] = base64tab[ ibuf[2] & 0x3F ];
297 if (n < 2)
298 obuf[2] = '=';
299 if (n < 3)
300 obuf[3] = '=';
301
302 if ((rc=(*func)(obuf, 4, arg)))
303 return rc;
304 }
305
306 if ((rc=(*func)("?=", 2, arg)))
307 return rc;
308 return 0;
309 }
310
311 #define ISSPACE(i) ((i)=='\t' || (i)=='\r' || (i)=='\n' || (i)==' ')
312 #define DOENCODEWORD(c) \
313 ((c) < 0x20 || (c) > 0x7F || (c) == '"' || \
314 (c) == '_' || (c) == '=' || (c) == '?' || !(*qp_allow)((char)c))
315
316 /*
317 ** Encode a character stream using quoted-printable encoding.
318 */
319 static int encodeqp(const char *ptr, size_t len,
320 const char *charset,
321 int (*qp_allow)(char),
322 int (*func)(const char *, size_t, void *), void *arg)
323 {
324 size_t i;
325 int rc;
326 char buf[3];
327
328 if ((rc=(*func)("=?", 2, arg)) ||
329 (rc=(*func)(charset, strlen(charset), arg))||
330 (rc=(*func)("?Q?", 3, arg)))
331 return rc;
332
333 for (i=0; i<len; ++i)
334 {
335 size_t j;
336
337 for (j=i; j<len; ++j)
338 {
339 if (ptr[j] == ' ' || DOENCODEWORD(ptr[j]))
340 break;
341 }
342
343 if (j > i)
344 {
345 rc=(*func)(ptr+i, j-i, arg);
346
347 if (rc)
348 return rc;
349 if (j >= len)
350 break;
351 }
352 i=j;
353
354 if (ptr[i] == ' ')
355 rc=(*func)("_", 1, arg);
356 else
357 {
358 buf[0]='=';
359 buf[1]=xdigit[ ( ptr[i] >> 4) & 0x0F ];
360 buf[2]=xdigit[ ptr[i] & 0x0F ];
361
362 rc=(*func)(buf, 3, arg);
363 }
364
365 if (rc)
366 return rc;
367 }
368
369 return (*func)("?=", 2, arg);
370 }
371
372 /*
373 ** Calculate whether the next word should be RFC2047-encoded.
374 **
375 ** Returns 0 if not, 1 if any character in the next word is flagged by
376 ** DOENCODEWORD().
377 */
378
379 static int encode_word(const unicode_char *uc,
380 size_t ucsize,
381 int (*qp_allow)(char),
382
383 /*
384 ** Points to the starting offset of word in uc.
385 ** At exit, points to the end of the word in uc.
386 */
387 size_t *word_ptr)
388 {
389 size_t i;
390 int encode=0;
391
392 for (i=*word_ptr; i<ucsize; ++i)
393 {
394 if (ISSPACE(uc[i]))
395 break;
396
397 if (DOENCODEWORD(uc[i]))
398 encode=1;
399 }
400
401 *word_ptr=i;
402 return encode;
403 }
404
405 /*
406 ** Calculate whether the next sequence of words should be RFC2047-encoded.
407 **
408 ** Whatever encode_word() returns for the first word, look at the next word
409 ** and keep going as long as encode_word() keeps returning the same value.
410 */
411
412 static int encode_words(const unicode_char *uc,
413 size_t ucsize,
414 int (*qp_allow)(char),
415
416 /*
417 ** Points to the starting offset of words in uc.
418 ** At exit, points to the end of the words in uc.
419 */
420
421 size_t *word_ptr)
422 {
423 size_t i= *word_ptr, j, k;
424
425 int flag=encode_word(uc, ucsize, qp_allow, &i);
426
427 if (!flag)
428 {
429 *word_ptr=i;
430 return flag;
431 }
432
433 j=i;
434
435 while (j < ucsize)
436 {
437 if (ISSPACE(uc[j]))
438 {
439 ++j;
440 continue;
441 }
442
443 k=j;
444
445 if (!encode_word(uc, ucsize, qp_allow, &k))
446 break;
447 i=j=k;
448 }
449
450 *word_ptr=i;
451 return flag;
452 }
453
454 /*
455 ** Encode a sequence of words.
456 */
457 static int do_encode_words_method(const unicode_char *uc,
458 size_t ucsize,
459 const char *charset,
460 int (*qp_allow)(char),
461 size_t offset,
462 int (*encoder)(const char *ptr, size_t len,
463 const char *charset,
464 int (*qp_allow)(char),
465 int (*func)(const char *,
466 size_t, void *),
467 void *arg),
468 int (*func)(const char *, size_t, void *),
469 void *arg)
470 {
471 char *p;
472 size_t psize;
473 int rc;
474 int first=1;
475
476 while (ucsize)
477 {
478 size_t j;
479 size_t i;
480
481 if (!first)
482 {
483 rc=(*func)(" ", 1, arg);
484
485 if (rc)
486 return rc;
487 }
488 first=0;
489
490 j=(RFC2047_ENCODE_FOLDLENGTH-offset)/2;
491
492 if (j >= ucsize)
493 j=ucsize;
494 else
495 {
496 /*
497 ** Do not split rfc2047-encoded works across a
498 ** grapheme break.
499 */
500
501 for (i=j; i > 0; --i)
502 if (unicode_grapheme_break(uc[i-1], uc[i]))
503 {
504 j=i;
505 break;
506 }
507 }
508
509 if ((rc=libmail_u_convert_fromu_tobuf(uc, j, charset,
510 &p, &psize,
511 NULL)) != 0)
512 return rc;
513
514
515 if (psize && p[psize-1] == 0)
516 --psize;
517
518 rc=(*encoder)(p, psize, charset, qp_allow,
519 func, arg);
520 free(p);
521 if (rc)
522 return rc;
523 offset=0;
524 ucsize -= j;
525 uc += j;
526 }
527 return 0;
528 }
529
530 static int cnt_conv(const char *dummy, size_t n, void *arg)
531 {
532 *(size_t *)arg += n;
533 return 0;
534 }
535
536 /*
537 ** Encode, or not encode, words.
538 */
539
540 static int do_encode_words(const unicode_char *uc,
541 size_t ucsize,
542 const char *charset,
543 int flag,
544 int (*qp_allow)(char),
545 size_t offset,
546 int (*func)(const char *, size_t, void *),
547 void *arg)
548 {
549 char *p;
550 size_t psize;
551 int rc;
552 size_t b64len, qlen;
553
554 /*
555 ** Convert from unicode
556 */
557
558 if ((rc=libmail_u_convert_fromu_tobuf(uc, ucsize, charset,
559 &p, &psize,
560 NULL)) != 0)
561 return rc;
562
563 if (psize && p[psize-1] == 0)
564 --psize;
565
566 if (!flag) /* If not converting, then the job is done */
567 {
568 rc=(*func)(p, psize, arg);
569 free(p);
570 return rc;
571 }
572 free(p);
573
574 /*
575 ** Try first quoted-printable, then base64, then pick whichever
576 ** one gives the shortest results.
577 */
578 qlen=0;
579 b64len=0;
580
581 rc=do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
582 &encodeqp, cnt_conv, &qlen);
583 if (rc)
584 return rc;
585
586 rc=do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
587 &encodebase64, cnt_conv, &b64len);
588 if (rc)
589 return rc;
590
591 return do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
592 qlen < b64len ? encodeqp:encodebase64,
593 func, arg);
594 }
595
596 /*
597 ** RFC2047-encoding pass.
598 */
599 static int rfc2047_encode_callback(const unicode_char *uc,
600 size_t ucsize,
601 const char *charset,
602 int (*qp_allow)(char),
603 int (*func)(const char *, size_t, void *),
604 void *arg)
605 {
606 int rc;
607 size_t i;
608 int flag;
609
610 size_t offset=27; /* FIXME: initial offset for line length */
611
612 while (ucsize)
613 {
614 /* Pass along all the whitespace */
615
616 if (ISSPACE(*uc))
617 {
618 char c= *uc++;
619 --ucsize;
620
621 if ((rc=(*func)(&c, 1, arg)) != 0)
622 return rc;
623 continue;
624 }
625
626 i=0;
627
628 /* Check if the next word needs to be encoded, or not. */
629
630 flag=encode_words(uc, ucsize, qp_allow, &i);
631
632 /*
633 ** Then proceed to encode, or not encode, the following words.
634 */
635
636 if ((rc=do_encode_words(uc, i, charset, flag,
637 qp_allow, offset,
638 func, arg)) != 0)
639 return rc;
640
641 offset=0;
642 uc += i;
643 ucsize -= i;
644 }
645
646 return 0;
647 }
648
649
650 static int count_char(const char *c, size_t l, void *p)
651 {
652 size_t *i=(size_t *)p;
653
654 *i += l;
655 return (0);
656 }
657
658 static int save_char(const char *c, size_t l, void *p)
659 {
660 char **s=(char **)p;
661
662 memcpy(*s, c, l);
663 *s += l;
664 return (0);
665 }
666
667 char *rfc2047_encode_str(const char *str, const char *charset,
668 int (*qp_allow)(char c))
669 {
670 size_t i=1;
671 char *s, *p;
672 unicode_char *uc;
673 size_t ucsize;
674 int err;
675
676 /* Convert string to unicode */
677
678 if (libmail_u_convert_tou_tobuf(str, strlen(str), charset,
679 &uc, &ucsize, &err))
680 return NULL;
681
682 /*
683 ** Perform two passes: calculate size of the buffer where the
684 ** encoded string gets saved into, then allocate the buffer and
685 ** do a second pass to actually do it.
686 */
687
688 if (rfc2047_encode_callback(uc, ucsize,
689 charset,
690 qp_allow,
691 &count_char, &i))
692 {
693 free(uc);
694 return NULL;
695 }
696
697 if ((s=malloc(i)) == 0)
698 {
699 free(uc);
700 return NULL;
701 }
702
703 p=s;
704 (void)rfc2047_encode_callback(uc, ucsize,
705 charset,
706 qp_allow,
707 &save_char, &p);
708 *p=0;
709 free(uc);
710 return (s);
711 }
712
713 int rfc2047_qp_allow_any(char c)
714 {
715 return 1;
716 }
717
718 int rfc2047_qp_allow_comment(char c)
719 {
720 if (c == '(' || c == ')' || c == '"')
721 return 0;
722 return 1;
723 }
724
725 int rfc2047_qp_allow_word(char c)
726 {
727 return strchr(base64tab, c) != NULL ||
728 strchr("*-=_", c) != NULL;
729 }