2 ** Copyright 1998 - 2009 Double Precision, Inc. See COPYING for
3 ** distribution information.
14 #include "rfc822hdr.h"
16 #include "../unicode/unicode.h"
19 #include <stringprep.h>
22 static const char rcsid
[]="$Id: rfc2047.c,v 1.23 2009/11/18 03:38:50 mrsam Exp $";
24 #define RFC2047_ENCODE_FOLDLENGTH 76
26 static const char xdigit
[]="0123456789ABCDEF";
27 static const char base64tab
[]=
28 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
30 static char *a_rfc2047_encode_str(const char *str
, const char *charset
,
33 static void rfc2047_encode_header_do(const struct rfc822a
*a
,
35 void (*print_func
)(char, void *),
36 void (*print_separator
)(const char *,
39 rfc822_print_common(a
, &a_rfc2047_encode_str
, charset
,
40 print_func
, print_separator
, ptr
);
43 static char *rfc822_encode_domain_int(const char *pfix
,
53 err
=idna_to_ascii_8z(domain
, &p
, 0);
55 if (err
!= IDNA_SUCCESS
)
69 q
=malloc(strlen(p
)+pfix_len
+1);
78 memcpy(q
, pfix
, pfix_len
);
80 strcpy(q
+ pfix_len
, p
);
85 char *rfc822_encode_domain(const char *address
,
88 const struct unicode_info
*ui
=unicode_find(charset
);
98 p
=unicode_convert(address
, ui
, &unicode_UTF8
);
107 q
=rfc822_encode_domain_int("", 0, p
);
113 q
=rfc822_encode_domain_int(p
, cp
-p
, cp
);
118 static char *a_rfc2047_encode_str(const char *str
, const char *charset
,
125 return rfc822_encode_domain(str
, charset
);
127 for (l
=0; str
[l
]; l
++)
135 for (l
=0; str
[l
]; l
++)
136 if (strchr(RFC822_SPECIALS
, str
[l
]))
140 return (strdup(str
));
142 for (n
=3, l
=0; str
[l
]; l
++)
161 for (n
=1, l
=0; str
[l
]; l
++)
178 return rfc2047_encode_str(str
, charset
, rfc2047_qp_allow_word
);
181 static void count(char c
, void *p
);
182 static void counts2(const char *c
, void *p
);
183 static void save(char c
, void *p
);
184 static void saves2(const char *c
, void *p
);
186 char *rfc2047_encode_header_addr(const struct rfc822a
*a
,
193 rfc2047_encode_header_do(a
, charset
, &count
, &counts2
, &l
);
194 if ((s
=malloc(l
)) == 0) return (0);
196 rfc2047_encode_header_do(a
, charset
, &save
, &saves2
, &p
);
202 char *rfc2047_encode_header_tobuf(const char *name
, /* Header name */
203 const char *header
, /* Header's contents */
206 if (rfc822hdr_is_addr(name
))
213 if ((t
=rfc822t_alloc_new(header
, NULL
, NULL
)) != 0)
215 if ((a
=rfc822a_alloc(t
)) != 0)
217 s
=rfc2047_encode_header_addr(a
, charset
);
225 return rfc2047_encode_str(header
, charset
, rfc2047_qp_allow_word
);
228 static void count(char c
, void *p
)
233 static void counts2(const char *c
, void *p
)
241 while (*c
) count(*c
++, p
);
244 static void save(char c
, void *p
)
250 static void saves2(const char *c
, void *p
)
258 while (*c
) save(*c
++, p
);
261 static int encodebase64(const char *ptr
, size_t len
, const char *charset
,
262 int (*func
)(const char *, size_t, void *), void *arg
,
263 size_t foldlen
, size_t offset
)
265 unsigned char ibuf
[3];
271 if ((rc
=(*func
)("=?", 2, arg
)) ||
272 (rc
=(*func
)(charset
, strlen(charset
), arg
))||
273 (rc
=(*func
)("?B?", 3, arg
)))
275 i
= offset
+ 2 + strlen(charset
) + 3;
280 size_t n
=len
> 3 ? 3:len
;
294 obuf
[0] = base64tab
[ ibuf
[0] >>2 ];
295 obuf
[1] = base64tab
[(ibuf
[0] & 0x03)<<4|ibuf
[1]>>4];
296 obuf
[2] = base64tab
[(ibuf
[1] & 0x0F)<<2|ibuf
[2]>>6];
297 obuf
[3] = base64tab
[ ibuf
[2] & 0x3F ];
303 if ((rc
=(*func
)(obuf
, 4, arg
)))
307 if (foldlen
&& i
+ 2 > foldlen
- 1 + 4)
311 if ((rc
=(*func
)("?=", 2, arg
)))
315 * Encoded-words must be sepalated by
316 * linear-white-space.
318 if ((rc
=(*func
)(" ", 1, arg
)))
324 #define ISSPACE(i) ((i)=='\t' || (i)=='\r' || (i)=='\n' || (i)==' ')
325 #define DOENCODE(i) (((i) & 0x80) || (i)=='"' || (i)=='=' || \
326 ((unsigned char)(i) < 0x20 && !ISSPACE(i)) || \
329 int rfc2047_encode_callback_base64(const char *str
, const char *charset
,
330 int (*qp_allow
)(char),
331 int (*func
)(const char *, size_t, void *),
337 size_t offset
=27; /* FIXME: initial offset for line length */
338 const struct unicode_info
*uiptr
= unicode_find(charset
);
339 unicode_char
*ustr
, *uptr
;
344 for (i
=0; str
[i
]; i
++)
345 if (DOENCODE(str
[i
]))
348 return i
? (*func
)(str
, strlen(str
), arg
): 0;
351 * Multibyte or stateful charsets must be encoded with care of
352 * character boundaries. Charsets with replaceable capability can be
353 * encoded replacing errorneous characters. Otherwise, output without
354 * care of character boundaries or errors.
357 !(uiptr
->flags
& (UNICODE_MB
| UNICODE_SISO
)) ||
358 (!(uiptr
->flags
& UNICODE_REPLACEABLE
) &&
359 !(ustr
= (uiptr
->c2u
)(uiptr
, str
, &dummy
))) ||
360 !(ustr
= (uiptr
->c2u
)(uiptr
, str
, NULL
)))
361 return encodebase64(str
, strlen(str
), charset
, func
, arg
,
362 RFC2047_ENCODE_FOLDLENGTH
, offset
);
367 unicode_char save_uc
;
371 if ((i
= offset
+ 2 + strlen(charset
) + 3) >
372 RFC2047_ENCODE_FOLDLENGTH
- 2)
373 /* Keep room for at least one character. */
374 i
= RFC2047_ENCODE_FOLDLENGTH
- 2;
378 * Figure out where to break encoded-word.
379 * Take a small chunk of Unicode string and convert it back to
380 * the original charset. If the result exseeds line length,
381 * try again with a shorter chunk.
384 while (uptr
[end
] && end
< (RFC2047_ENCODE_FOLDLENGTH
- i
) / 2)
387 * FIXME: Unicode character with `combining'
388 * property etc. should not be treated as
389 * separate character.
395 uptr
[j
] = (unicode_char
)0;
396 wstr
= (uiptr
->u2c
)(uiptr
, uptr
, &dummy
);
400 /* Possiblly a part of one character extracted to
401 * multiple Unicode characters (e.g. base unicode
402 * character of one combined character). Try on
413 if (i
+ ((strlen(wstr
) + 3-1) / 3) * 4 + 2 >
414 RFC2047_ENCODE_FOLDLENGTH
- 1)
416 * Encoded string exceeded line length.
417 * Try on shorter chunk.
424 /* Only one character exeeds line length.
425 * Anyway, encode it. */
440 rc
= encodebase64("?", 1, charset
, func
, arg
, 0, 0);
445 rc
= encodebase64(wstr
, strlen(wstr
),
446 charset
, func
, arg
, 0, 0);
458 * Encoded-words must be sepalated by
459 * linear-white-space.
461 if ((rc
=(*func
)(" ", 1, arg
)))
471 #define DOENCODEWORD(c) \
472 (((c) & 0x80) || (c) == '"' || (unsigned char)(c) <= 0x20 || \
473 (c) == '_' || (c) == '=' || (c) == '?' || !(*qp_allow)(c))
475 int rfc2047_encode_callback(const char *str
, const char *charset
,
476 int (*qp_allow
)(char),
477 int (*func
)(const char *, size_t, void *),
482 const struct unicode_info
*ci
= unicode_find(charset
);
487 if (ci
&& ci
->flags
& UNICODE_SISO
)
488 return rfc2047_encode_callback_base64(str
, charset
, qp_allow
,
491 /* otherwise, output quoted-printable-encoded. */
497 for (i
=0; str
[i
]; i
++)
498 if (!ISSPACE((int)(unsigned char)str
[i
])
502 return ( i
? (*func
)(str
, i
, arg
):0);
504 /* Find start of word */
509 if (ISSPACE((int)(unsigned char)str
[i
]))
517 rc
= (*func
)(str
, i
, arg
);
523 ** Figure out when to stop MIME decoding. Consecutive
524 ** MIME-encoded words are MIME-encoded together.
532 if (ISSPACE((int)(unsigned char)str
[i
]))
537 for (c
=i
; str
[c
] && ISSPACE((int)(unsigned char)str
[c
]);
542 if (ISSPACE((int)(unsigned char)str
[c
]) ||
546 if (str
[c
] == 0 || ISSPACE((int)(unsigned char)str
[c
]))
552 ** Figure out whether base64 is a better choice.
558 if (DOENCODEWORD(str
[j
]))
563 encodebase64(str
, i
, charset
, func
, arg
,
571 /* Output mimeified text, insert spaces at 70+ character
572 ** boundaries for line wrapping.
575 maxlen
=strlen(charset
)+10;
587 if ( (rc
=(*func
)("=?", 2, arg
)) != 0 ||
588 (rc
=(*func
)(charset
, strlen(charset
),
590 (rc
=(*func
)("?Q?", 3, arg
)) != 0)
592 c
+= strlen(charset
)+5;
595 if (DOENCODEWORD(*str
))
600 buf
[1]=xdigit
[ ( *str
>> 4) & 0x0F ];
601 buf
[2]=xdigit
[ *str
& 0x0F ];
603 if ( (rc
=*str
== ' ' ? (*func
)("_", 1, arg
)
604 : (*func
)(buf
, 3, arg
)) != 0)
606 c
+= *str
== ' ' ? 1:3;
612 for (j
=0; j
< i
&& !DOENCODEWORD(str
[j
]); j
++)
615 if ( (rc
=(*func
)(str
, j
, arg
)) != 0)
622 if (i
== 0 || c
>= maxlen
)
624 if ( (rc
=(*func
)("?= ", i
? 3:2, arg
)) != 0)
634 static int count_char(const char *c
, size_t l
, void *p
)
636 size_t *i
=(size_t *)p
;
642 static int save_char(const char *c
, size_t l
, void *p
)
651 char *rfc2047_encode_str(const char *str
, const char *charset
,
652 int (*qp_allow
)(char c
))
657 (void)rfc2047_encode_callback(str
, charset
,
660 if ((s
=malloc(i
)) == 0) return (0);
662 (void)rfc2047_encode_callback(str
, charset
,
669 int rfc2047_qp_allow_any(char c
)
674 int rfc2047_qp_allow_comment(char c
)
676 if (c
== '(' || c
== ')' || c
== '"')
681 int rfc2047_qp_allow_word(char c
)
683 return strchr(base64tab
, c
) != NULL
||
684 strchr("*-=_", c
) != NULL
;