2 ** Copyright 1998 - 2011 Double Precision, Inc. See COPYING for
3 ** distribution information.
13 #include "rfc822hdr.h"
15 #include "../unicode/unicode.h"
18 #include <stringprep.h>
22 #define RFC2047_ENCODE_FOLDLENGTH 76
24 static const char xdigit
[]="0123456789ABCDEF";
25 static const char base64tab
[]=
26 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
28 static char *a_rfc2047_encode_str(const char *str
, const char *charset
,
31 static void rfc2047_encode_header_do(const struct rfc822a
*a
,
33 void (*print_func
)(char, void *),
34 void (*print_separator
)(const char *,
37 rfc822_print_common(a
, &a_rfc2047_encode_str
, charset
,
38 print_func
, print_separator
, ptr
);
41 static char *rfc822_encode_domain_int(const char *pfix
,
50 size_t s
=strlen(domain
)+16;
57 ** Invalid UTF-8 can make libidn go off the deep end. Add
58 ** padding as a workaround.
64 err
=idna_to_ascii_8z(cpy
, &p
, 0);
67 if (err
!= IDNA_SUCCESS
)
81 q
=malloc(strlen(p
)+pfix_len
+1);
90 memcpy(q
, pfix
, pfix_len
);
92 strcpy(q
+ pfix_len
, p
);
97 char *rfc822_encode_domain(const char *address
,
100 char *p
=libmail_u_convert_tobuf(address
, charset
, "utf-8", NULL
);
110 q
=rfc822_encode_domain_int("", 0, p
);
116 q
=rfc822_encode_domain_int(p
, cp
-p
, cp
);
121 static char *a_rfc2047_encode_str(const char *str
, const char *charset
,
128 return rfc822_encode_domain(str
, charset
);
130 for (l
=0; str
[l
]; l
++)
138 for (l
=0; str
[l
]; l
++)
139 if (strchr(RFC822_SPECIALS
, str
[l
]))
143 return (strdup(str
));
145 for (n
=3, l
=0; str
[l
]; l
++)
164 for (n
=1, l
=0; str
[l
]; l
++)
181 return rfc2047_encode_str(str
, charset
, rfc2047_qp_allow_word
);
184 static void count(char c
, void *p
);
185 static void counts2(const char *c
, void *p
);
186 static void save(char c
, void *p
);
187 static void saves2(const char *c
, void *p
);
189 char *rfc2047_encode_header_addr(const struct rfc822a
*a
,
196 rfc2047_encode_header_do(a
, charset
, &count
, &counts2
, &l
);
197 if ((s
=malloc(l
)) == 0) return (0);
199 rfc2047_encode_header_do(a
, charset
, &save
, &saves2
, &p
);
205 char *rfc2047_encode_header_tobuf(const char *name
, /* Header name */
206 const char *header
, /* Header's contents */
209 if (rfc822hdr_is_addr(name
))
216 if ((t
=rfc822t_alloc_new(header
, NULL
, NULL
)) != 0)
218 if ((a
=rfc822a_alloc(t
)) != 0)
220 s
=rfc2047_encode_header_addr(a
, charset
);
228 return rfc2047_encode_str(header
, charset
, rfc2047_qp_allow_word
);
231 static void count(char c
, void *p
)
236 static void counts2(const char *c
, void *p
)
244 while (*c
) count(*c
++, p
);
247 static void save(char c
, void *p
)
253 static void saves2(const char *c
, void *p
)
261 while (*c
) save(*c
++, p
);
264 static int encodebase64(const char *ptr
, size_t len
, const char *charset
,
265 int (*qp_allow
)(char),
266 int (*func
)(const char *, size_t, void *), void *arg
)
268 unsigned char ibuf
[3];
272 if ((rc
=(*func
)("=?", 2, arg
)) ||
273 (rc
=(*func
)(charset
, strlen(charset
), arg
))||
274 (rc
=(*func
)("?B?", 3, arg
)))
279 size_t n
=len
> 3 ? 3:len
;
293 obuf
[0] = base64tab
[ ibuf
[0] >>2 ];
294 obuf
[1] = base64tab
[(ibuf
[0] & 0x03)<<4|ibuf
[1]>>4];
295 obuf
[2] = base64tab
[(ibuf
[1] & 0x0F)<<2|ibuf
[2]>>6];
296 obuf
[3] = base64tab
[ ibuf
[2] & 0x3F ];
302 if ((rc
=(*func
)(obuf
, 4, arg
)))
306 if ((rc
=(*func
)("?=", 2, arg
)))
311 #define ISSPACE(i) ((i)=='\t' || (i)=='\r' || (i)=='\n' || (i)==' ')
312 #define DOENCODEWORD(c) \
313 ((c) < 0x20 || (c) > 0x7F || (c) == '"' || \
314 (c) == '_' || (c) == '=' || (c) == '?' || !(*qp_allow)((char)c))
317 ** Encode a character stream using quoted-printable encoding.
319 static int encodeqp(const char *ptr
, size_t len
,
321 int (*qp_allow
)(char),
322 int (*func
)(const char *, size_t, void *), void *arg
)
328 if ((rc
=(*func
)("=?", 2, arg
)) ||
329 (rc
=(*func
)(charset
, strlen(charset
), arg
))||
330 (rc
=(*func
)("?Q?", 3, arg
)))
333 for (i
=0; i
<len
; ++i
)
337 for (j
=i
; j
<len
; ++j
)
339 if (ptr
[j
] == ' ' || DOENCODEWORD(ptr
[j
]))
345 rc
=(*func
)(ptr
+i
, j
-i
, arg
);
355 rc
=(*func
)("_", 1, arg
);
359 buf
[1]=xdigit
[ ( ptr
[i
] >> 4) & 0x0F ];
360 buf
[2]=xdigit
[ ptr
[i
] & 0x0F ];
362 rc
=(*func
)(buf
, 3, arg
);
369 return (*func
)("?=", 2, arg
);
373 ** Calculate whether the next word should be RFC2047-encoded.
375 ** Returns 0 if not, 1 if any character in the next word is flagged by
379 static int encode_word(const unicode_char
*uc
,
381 int (*qp_allow
)(char),
384 ** Points to the starting offset of word in uc.
385 ** At exit, points to the end of the word in uc.
392 for (i
=*word_ptr
; i
<ucsize
; ++i
)
397 if (DOENCODEWORD(uc
[i
]))
406 ** Calculate whether the next sequence of words should be RFC2047-encoded.
408 ** Whatever encode_word() returns for the first word, look at the next word
409 ** and keep going as long as encode_word() keeps returning the same value.
412 static int encode_words(const unicode_char
*uc
,
414 int (*qp_allow
)(char),
417 ** Points to the starting offset of words in uc.
418 ** At exit, points to the end of the words in uc.
423 size_t i
= *word_ptr
, j
, k
;
425 int flag
=encode_word(uc
, ucsize
, qp_allow
, &i
);
445 if (!encode_word(uc
, ucsize
, qp_allow
, &k
))
455 ** Encode a sequence of words.
457 static int do_encode_words_method(const unicode_char
*uc
,
460 int (*qp_allow
)(char),
462 int (*encoder
)(const char *ptr
, size_t len
,
464 int (*qp_allow
)(char),
465 int (*func
)(const char *,
468 int (*func
)(const char *, size_t, void *),
483 rc
=(*func
)(" ", 1, arg
);
490 j
=(RFC2047_ENCODE_FOLDLENGTH
-offset
)/2;
497 ** Do not split rfc2047-encoded works across a
501 for (i
=j
; i
> 0; --i
)
502 if (unicode_grapheme_break(uc
[i
-1], uc
[i
]))
509 if ((rc
=libmail_u_convert_fromu_tobuf(uc
, j
, charset
,
515 if (psize
&& p
[psize
-1] == 0)
518 rc
=(*encoder
)(p
, psize
, charset
, qp_allow
,
530 static int cnt_conv(const char *dummy
, size_t n
, void *arg
)
537 ** Encode, or not encode, words.
540 static int do_encode_words(const unicode_char
*uc
,
544 int (*qp_allow
)(char),
546 int (*func
)(const char *, size_t, void *),
555 ** Convert from unicode
558 if ((rc
=libmail_u_convert_fromu_tobuf(uc
, ucsize
, charset
,
563 if (psize
&& p
[psize
-1] == 0)
566 if (!flag
) /* If not converting, then the job is done */
568 rc
=(*func
)(p
, psize
, arg
);
575 ** Try first quoted-printable, then base64, then pick whichever
576 ** one gives the shortest results.
581 rc
=do_encode_words_method(uc
, ucsize
, charset
, qp_allow
, offset
,
582 &encodeqp
, cnt_conv
, &qlen
);
586 rc
=do_encode_words_method(uc
, ucsize
, charset
, qp_allow
, offset
,
587 &encodebase64
, cnt_conv
, &b64len
);
591 return do_encode_words_method(uc
, ucsize
, charset
, qp_allow
, offset
,
592 qlen
< b64len
? encodeqp
:encodebase64
,
597 ** RFC2047-encoding pass.
599 static int rfc2047_encode_callback(const unicode_char
*uc
,
602 int (*qp_allow
)(char),
603 int (*func
)(const char *, size_t, void *),
610 size_t offset
=27; /* FIXME: initial offset for line length */
614 /* Pass along all the whitespace */
621 if ((rc
=(*func
)(&c
, 1, arg
)) != 0)
628 /* Check if the next word needs to be encoded, or not. */
630 flag
=encode_words(uc
, ucsize
, qp_allow
, &i
);
633 ** Then proceed to encode, or not encode, the following words.
636 if ((rc
=do_encode_words(uc
, i
, charset
, flag
,
650 static int count_char(const char *c
, size_t l
, void *p
)
652 size_t *i
=(size_t *)p
;
658 static int save_char(const char *c
, size_t l
, void *p
)
667 char *rfc2047_encode_str(const char *str
, const char *charset
,
668 int (*qp_allow
)(char c
))
676 /* Convert string to unicode */
678 if (libmail_u_convert_tou_tobuf(str
, strlen(str
), charset
,
683 ** Perform two passes: calculate size of the buffer where the
684 ** encoded string gets saved into, then allocate the buffer and
685 ** do a second pass to actually do it.
688 if (rfc2047_encode_callback(uc
, ucsize
,
697 if ((s
=malloc(i
)) == 0)
704 (void)rfc2047_encode_callback(uc
, ucsize
,
713 int rfc2047_qp_allow_any(char c
)
718 int rfc2047_qp_allow_comment(char c
)
720 if (c
== '(' || c
== ')' || c
== '"')
725 int rfc2047_qp_allow_word(char c
)
727 return strchr(base64tab
, c
) != NULL
||
728 strchr("*-=_", c
) != NULL
;