Import Debian changes 0.66.4-9
[hcoop/debian/courier-authlib.git] / libs / rfc822 / rfc2047.c
CommitLineData
b0322a85
CE
1/*
2** Copyright 1998 - 2011 Double Precision, Inc. See COPYING for
3** distribution information.
4*/
5
6#include "rfc822.h"
7#include <stdio.h>
8#include <ctype.h>
9#include <string.h>
10#include <stdlib.h>
11#include <errno.h>
d50284c4 12#include <courier-unicode.h>
b0322a85
CE
13
14#include "rfc822hdr.h"
15#include "rfc2047.h"
b0322a85
CE
16#if LIBIDN
17#include <idna.h>
18#include <stringprep.h>
19#endif
20
21
22#define RFC2047_ENCODE_FOLDLENGTH 76
23
24static const char xdigit[]="0123456789ABCDEF";
25static const char base64tab[]=
26"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
27
28static char *a_rfc2047_encode_str(const char *str, const char *charset,
29 int isaddress);
30
31static void rfc2047_encode_header_do(const struct rfc822a *a,
32 const char *charset,
33 void (*print_func)(char, void *),
34 void (*print_separator)(const char *,
35 void *), void *ptr)
36{
37 rfc822_print_common(a, &a_rfc2047_encode_str, charset,
38 print_func, print_separator, ptr);
39}
40
41static char *rfc822_encode_domain_int(const char *pfix,
42 size_t pfix_len,
43 const char *domain)
44{
45 char *q;
46
47#if LIBIDN
48 int err;
49 char *p;
50 size_t s=strlen(domain)+16;
51 char *cpy=malloc(s);
52
53 if (!cpy)
54 return NULL;
55
56 /*
57 ** Invalid UTF-8 can make libidn go off the deep end. Add
58 ** padding as a workaround.
59 */
60
61 memset(cpy, 0, s);
62 strcpy(cpy, domain);
63
64 err=idna_to_ascii_8z(cpy, &p, 0);
65 free(cpy);
66
67 if (err != IDNA_SUCCESS)
68 {
69 errno=EINVAL;
70 return NULL;
71 }
72#else
73 char *p;
74
75 p=strdup(domain);
76
77 if (!p)
78 return NULL;
79#endif
80
81 q=malloc(strlen(p)+pfix_len+1);
82
83 if (!q)
84 {
85 free(p);
86 return NULL;
87 }
88
89 if (pfix_len)
90 memcpy(q, pfix, pfix_len);
91
92 strcpy(q + pfix_len, p);
93 free(p);
94 return q;
95}
96
97char *rfc822_encode_domain(const char *address,
98 const char *charset)
99{
d50284c4 100 char *p=unicode_convert_tobuf(address, charset, "utf-8", NULL);
b0322a85
CE
101 char *cp, *q;
102
103 if (!p)
104 return NULL;
105
106 cp=strchr(p, '@');
107
108 if (!cp)
109 {
110 q=rfc822_encode_domain_int("", 0, p);
111 free(p);
112 return q;
113 }
114
115 ++cp;
116 q=rfc822_encode_domain_int(p, cp-p, cp);
117 free(p);
118 return q;
119}
120
121static char *a_rfc2047_encode_str(const char *str, const char *charset,
122 int isaddress)
123{
124 size_t l;
125 char *p;
126
127 if (isaddress)
128 return rfc822_encode_domain(str, charset);
129
130 for (l=0; str[l]; l++)
131 if (str[l] & 0x80)
132 break;
133
134 if (str[l] == 0)
135 {
136 size_t n;
137
138 for (l=0; str[l]; l++)
139 if (strchr(RFC822_SPECIALS, str[l]))
140 break;
141
142 if (str[l] == 0)
143 return (strdup(str));
144
145 for (n=3, l=0; str[l]; l++)
146 {
147 switch (str[l]) {
148 case '"':
149 case '\\':
150 ++n;
151 break;
152 }
153
154 ++n;
155 }
156
157 p=malloc(n);
158
159 if (!p)
160 return NULL;
161
162 p[0]='"';
163
164 for (n=1, l=0; str[l]; l++)
165 {
166 switch (str[l]) {
167 case '"':
168 case '\\':
169 p[n++]='\\';
170 break;
171 }
172
173 p[n++]=str[l];
174 }
175 p[n++]='"';
176 p[n]=0;
177
178 return (p);
179 }
180
181 return rfc2047_encode_str(str, charset, rfc2047_qp_allow_word);
182}
183
184static void count(char c, void *p);
185static void counts2(const char *c, void *p);
186static void save(char c, void *p);
187static void saves2(const char *c, void *p);
188
189char *rfc2047_encode_header_addr(const struct rfc822a *a,
190 const char *charset)
191{
192size_t l;
193char *s, *p;
194
195 l=1;
196 rfc2047_encode_header_do(a, charset, &count, &counts2, &l);
197 if ((s=malloc(l)) == 0) return (0);
198 p=s;
199 rfc2047_encode_header_do(a, charset, &save, &saves2, &p);
200 *p=0;
201 return (s);
202}
203
204
205char *rfc2047_encode_header_tobuf(const char *name, /* Header name */
206 const char *header, /* Header's contents */
207 const char *charset)
208{
209 if (rfc822hdr_is_addr(name))
210 {
211 char *s=0;
212
213 struct rfc822t *t;
214 struct rfc822a *a;
215
216 if ((t=rfc822t_alloc_new(header, NULL, NULL)) != 0)
217 {
218 if ((a=rfc822a_alloc(t)) != 0)
219 {
220 s=rfc2047_encode_header_addr(a, charset);
221 rfc822a_free(a);
222 }
223 rfc822t_free(t);
224 }
225 return s;
226 }
227
228 return rfc2047_encode_str(header, charset, rfc2047_qp_allow_word);
229}
230
231static void count(char c, void *p)
232{
233 ++*(size_t *)p;
234}
235
236static void counts2(const char *c, void *p)
237{
238 if (*c == ',')
239 count(*c++, p);
240
241 count('\n', p);
242 count(' ', p);
243
244 while (*c) count(*c++, p);
245}
246
247static void save(char c, void *p)
248{
249 **(char **)p=c;
250 ++*(char **)p;
251}
252
253static void saves2(const char *c, void *p)
254{
255 if (*c == ',')
256 save(*c++, p);
257
258 save('\n', p);
259 save(' ', p);
260
261 while (*c) save(*c++, p);
262}
263
264static int encodebase64(const char *ptr, size_t len, const char *charset,
265 int (*qp_allow)(char),
266 int (*func)(const char *, size_t, void *), void *arg)
267{
268 unsigned char ibuf[3];
269 char obuf[4];
270 int rc;
271
272 if ((rc=(*func)("=?", 2, arg)) ||
273 (rc=(*func)(charset, strlen(charset), arg))||
274 (rc=(*func)("?B?", 3, arg)))
275 return rc;
276
277 while (len)
278 {
279 size_t n=len > 3 ? 3:len;
280
281 ibuf[0]= ptr[0];
282 if (n>1)
283 ibuf[1]=ptr[1];
284 else
285 ibuf[1]=0;
286 if (n>2)
287 ibuf[2]=ptr[2];
288 else
289 ibuf[2]=0;
290 ptr += n;
291 len -= n;
292
293 obuf[0] = base64tab[ ibuf[0] >>2 ];
294 obuf[1] = base64tab[(ibuf[0] & 0x03)<<4|ibuf[1]>>4];
295 obuf[2] = base64tab[(ibuf[1] & 0x0F)<<2|ibuf[2]>>6];
296 obuf[3] = base64tab[ ibuf[2] & 0x3F ];
297 if (n < 2)
298 obuf[2] = '=';
299 if (n < 3)
300 obuf[3] = '=';
301
302 if ((rc=(*func)(obuf, 4, arg)))
303 return rc;
304 }
305
306 if ((rc=(*func)("?=", 2, arg)))
307 return rc;
308 return 0;
309}
310
311#define ISSPACE(i) ((i)=='\t' || (i)=='\r' || (i)=='\n' || (i)==' ')
312#define DOENCODEWORD(c) \
313 ((c) < 0x20 || (c) > 0x7F || (c) == '"' || \
314 (c) == '_' || (c) == '=' || (c) == '?' || !(*qp_allow)((char)c))
315
316/*
317** Encode a character stream using quoted-printable encoding.
318*/
319static int encodeqp(const char *ptr, size_t len,
320 const char *charset,
321 int (*qp_allow)(char),
322 int (*func)(const char *, size_t, void *), void *arg)
323{
324 size_t i;
325 int rc;
326 char buf[3];
327
328 if ((rc=(*func)("=?", 2, arg)) ||
329 (rc=(*func)(charset, strlen(charset), arg))||
330 (rc=(*func)("?Q?", 3, arg)))
331 return rc;
332
333 for (i=0; i<len; ++i)
334 {
335 size_t j;
336
337 for (j=i; j<len; ++j)
338 {
339 if (ptr[j] == ' ' || DOENCODEWORD(ptr[j]))
340 break;
341 }
342
343 if (j > i)
344 {
345 rc=(*func)(ptr+i, j-i, arg);
346
347 if (rc)
348 return rc;
349 if (j >= len)
350 break;
351 }
352 i=j;
353
354 if (ptr[i] == ' ')
355 rc=(*func)("_", 1, arg);
356 else
357 {
358 buf[0]='=';
359 buf[1]=xdigit[ ( ptr[i] >> 4) & 0x0F ];
360 buf[2]=xdigit[ ptr[i] & 0x0F ];
361
362 rc=(*func)(buf, 3, arg);
363 }
364
365 if (rc)
366 return rc;
367 }
368
369 return (*func)("?=", 2, arg);
370}
371
372/*
373** Calculate whether the next word should be RFC2047-encoded.
374**
375** Returns 0 if not, 1 if any character in the next word is flagged by
376** DOENCODEWORD().
377*/
378
379static int encode_word(const unicode_char *uc,
380 size_t ucsize,
381 int (*qp_allow)(char),
382
383 /*
384 ** Points to the starting offset of word in uc.
385 ** At exit, points to the end of the word in uc.
386 */
387 size_t *word_ptr)
388{
389 size_t i;
390 int encode=0;
391
392 for (i=*word_ptr; i<ucsize; ++i)
393 {
394 if (ISSPACE(uc[i]))
395 break;
396
397 if (DOENCODEWORD(uc[i]))
398 encode=1;
399 }
400
401 *word_ptr=i;
402 return encode;
403}
404
405/*
406** Calculate whether the next sequence of words should be RFC2047-encoded.
407**
408** Whatever encode_word() returns for the first word, look at the next word
409** and keep going as long as encode_word() keeps returning the same value.
410*/
411
412static int encode_words(const unicode_char *uc,
413 size_t ucsize,
414 int (*qp_allow)(char),
415
416 /*
417 ** Points to the starting offset of words in uc.
418 ** At exit, points to the end of the words in uc.
419 */
420
421 size_t *word_ptr)
422{
423 size_t i= *word_ptr, j, k;
424
425 int flag=encode_word(uc, ucsize, qp_allow, &i);
426
427 if (!flag)
428 {
429 *word_ptr=i;
430 return flag;
431 }
432
433 j=i;
434
435 while (j < ucsize)
436 {
437 if (ISSPACE(uc[j]))
438 {
439 ++j;
440 continue;
441 }
442
443 k=j;
444
445 if (!encode_word(uc, ucsize, qp_allow, &k))
446 break;
447 i=j=k;
448 }
449
450 *word_ptr=i;
451 return flag;
452}
453
454/*
455** Encode a sequence of words.
456*/
457static int do_encode_words_method(const unicode_char *uc,
458 size_t ucsize,
459 const char *charset,
460 int (*qp_allow)(char),
461 size_t offset,
462 int (*encoder)(const char *ptr, size_t len,
463 const char *charset,
464 int (*qp_allow)(char),
465 int (*func)(const char *,
466 size_t, void *),
467 void *arg),
468 int (*func)(const char *, size_t, void *),
469 void *arg)
470{
471 char *p;
472 size_t psize;
473 int rc;
474 int first=1;
475
476 while (ucsize)
477 {
478 size_t j;
479 size_t i;
480
481 if (!first)
482 {
483 rc=(*func)(" ", 1, arg);
484
485 if (rc)
486 return rc;
487 }
488 first=0;
489
490 j=(RFC2047_ENCODE_FOLDLENGTH-offset)/2;
491
492 if (j >= ucsize)
493 j=ucsize;
494 else
495 {
496 /*
497 ** Do not split rfc2047-encoded works across a
498 ** grapheme break.
499 */
500
501 for (i=j; i > 0; --i)
502 if (unicode_grapheme_break(uc[i-1], uc[i]))
503 {
504 j=i;
505 break;
506 }
507 }
508
d50284c4 509 if ((rc=unicode_convert_fromu_tobuf(uc, j, charset,
b0322a85
CE
510 &p, &psize,
511 NULL)) != 0)
512 return rc;
513
514
515 if (psize && p[psize-1] == 0)
516 --psize;
517
518 rc=(*encoder)(p, psize, charset, qp_allow,
519 func, arg);
520 free(p);
521 if (rc)
522 return rc;
523 offset=0;
524 ucsize -= j;
525 uc += j;
526 }
527 return 0;
528}
529
530static int cnt_conv(const char *dummy, size_t n, void *arg)
531{
532 *(size_t *)arg += n;
533 return 0;
534}
535
536/*
537** Encode, or not encode, words.
538*/
539
540static int do_encode_words(const unicode_char *uc,
541 size_t ucsize,
542 const char *charset,
543 int flag,
544 int (*qp_allow)(char),
545 size_t offset,
546 int (*func)(const char *, size_t, void *),
547 void *arg)
548{
549 char *p;
550 size_t psize;
551 int rc;
552 size_t b64len, qlen;
553
554 /*
555 ** Convert from unicode
556 */
557
d50284c4 558 if ((rc=unicode_convert_fromu_tobuf(uc, ucsize, charset,
b0322a85
CE
559 &p, &psize,
560 NULL)) != 0)
561 return rc;
562
563 if (psize && p[psize-1] == 0)
564 --psize;
565
566 if (!flag) /* If not converting, then the job is done */
567 {
568 rc=(*func)(p, psize, arg);
569 free(p);
570 return rc;
571 }
572 free(p);
573
574 /*
575 ** Try first quoted-printable, then base64, then pick whichever
576 ** one gives the shortest results.
577 */
578 qlen=0;
579 b64len=0;
580
581 rc=do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
582 &encodeqp, cnt_conv, &qlen);
583 if (rc)
584 return rc;
585
586 rc=do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
587 &encodebase64, cnt_conv, &b64len);
588 if (rc)
589 return rc;
590
591 return do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
592 qlen < b64len ? encodeqp:encodebase64,
593 func, arg);
594}
595
596/*
597** RFC2047-encoding pass.
598*/
599static int rfc2047_encode_callback(const unicode_char *uc,
600 size_t ucsize,
601 const char *charset,
602 int (*qp_allow)(char),
603 int (*func)(const char *, size_t, void *),
604 void *arg)
605{
606 int rc;
607 size_t i;
608 int flag;
609
610 size_t offset=27; /* FIXME: initial offset for line length */
611
612 while (ucsize)
613 {
614 /* Pass along all the whitespace */
615
616 if (ISSPACE(*uc))
617 {
618 char c= *uc++;
619 --ucsize;
620
621 if ((rc=(*func)(&c, 1, arg)) != 0)
622 return rc;
623 continue;
624 }
625
626 i=0;
627
628 /* Check if the next word needs to be encoded, or not. */
629
630 flag=encode_words(uc, ucsize, qp_allow, &i);
631
632 /*
633 ** Then proceed to encode, or not encode, the following words.
634 */
635
636 if ((rc=do_encode_words(uc, i, charset, flag,
637 qp_allow, offset,
638 func, arg)) != 0)
639 return rc;
640
641 offset=0;
642 uc += i;
643 ucsize -= i;
644 }
645
646 return 0;
647}
648
649
650static int count_char(const char *c, size_t l, void *p)
651{
652size_t *i=(size_t *)p;
653
654 *i += l;
655 return (0);
656}
657
658static int save_char(const char *c, size_t l, void *p)
659{
660char **s=(char **)p;
661
662 memcpy(*s, c, l);
663 *s += l;
664 return (0);
665}
666
667char *rfc2047_encode_str(const char *str, const char *charset,
668 int (*qp_allow)(char c))
669{
670 size_t i=1;
671 char *s, *p;
672 unicode_char *uc;
673 size_t ucsize;
674 int err;
675
676 /* Convert string to unicode */
677
d50284c4 678 if (unicode_convert_tou_tobuf(str, strlen(str), charset,
b0322a85
CE
679 &uc, &ucsize, &err))
680 return NULL;
681
682 /*
683 ** Perform two passes: calculate size of the buffer where the
684 ** encoded string gets saved into, then allocate the buffer and
685 ** do a second pass to actually do it.
686 */
687
688 if (rfc2047_encode_callback(uc, ucsize,
689 charset,
690 qp_allow,
691 &count_char, &i))
692 {
693 free(uc);
694 return NULL;
695 }
696
697 if ((s=malloc(i)) == 0)
698 {
699 free(uc);
700 return NULL;
701 }
702
703 p=s;
704 (void)rfc2047_encode_callback(uc, ucsize,
705 charset,
706 qp_allow,
707 &save_char, &p);
708 *p=0;
709 free(uc);
710 return (s);
711}
712
713int rfc2047_qp_allow_any(char c)
714{
715 return 1;
716}
717
718int rfc2047_qp_allow_comment(char c)
719{
720 if (c == '(' || c == ')' || c == '"')
721 return 0;
722 return 1;
723}
724
725int rfc2047_qp_allow_word(char c)
726{
727 return strchr(base64tab, c) != NULL ||
728 strchr("*-=_", c) != NULL;
729}