2 ** Copyright 1998 - 2004 Double Precision, Inc. See COPYING for
3 ** distribution information.
15 static const char rcsid
[]="$Id: rfc2047.c,v 1.20 2006/01/22 03:33:24 mrsam Exp $";
17 #define RFC2047_ENCODE_FOLDLENGTH 76
19 static const char xdigit
[]="0123456789ABCDEF";
20 static const char base64tab
[]=
21 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
24 static char *rfc2047_search_quote(const char **ptr
)
29 while (**ptr
&& **ptr
!= '?')
31 if ((s
=malloc( *ptr
- p
+ 1)) == 0)
42 c
=toupper( (int)(unsigned char)c
);
44 return (p
? p
-xdigit
:0);
47 static unsigned char decode64tab
[256];
48 static int decode64tab_init
=0;
50 static size_t decodebase64(char *ptr
, size_t cnt
)
56 if (!decode64tab_init
)
58 for (i
=0; i
<256; i
++) decode64tab
[i
]=0;
60 decode64tab
[(int)(base64tab
[i
])]=i
;
61 decode64tab
[ (int)'=' ] = 99;
67 for (j
=0; j
<i
; j
+= 4)
69 int w
=decode64tab
[(int)(unsigned char)ptr
[j
]];
70 int x
=decode64tab
[(int)(unsigned char)ptr
[j
+1]];
71 int y
=decode64tab
[(int)(unsigned char)ptr
[j
+2]];
72 int z
=decode64tab
[(int)(unsigned char)ptr
[j
+3]];
74 a
= (w
<< 2) | (x
>> 4);
75 b
= (x
<< 4) | (y
>> 2);
87 ** This is the main rfc2047 decoding function. It receives rfc2047-encoded
88 ** text, and a callback function. The callback function is repeatedly
89 ** called, each time receiving a piece of decoded text. The decoded
90 ** info includes a text fragment - string, string length arg - followed
91 ** by the character set, followed by a context pointer that is received
92 ** from the caller. If the callback function returns non-zero, rfc2047
93 ** decoding terminates, returning the result code. Otherwise,
94 ** rfc2047_decode returns 0 after a successfull decoding (-1 if malloc
98 int rfc2047_decode(const char *text
, int (*func
)(const char *, int,
100 const char *, void *),
109 char *enctext_s
=NULL
, *chset_s
=NULL
, *lang_s
=NULL
;
111 #define FREE_SAVED { \
112 if (enctext_s) free(enctext_s); \
114 if (chset_s) free(chset_s); \
118 while (text
&& *text
)
120 if (text
[0] != '=' || text
[1] != '?')
125 if (text
[0] == '=' && text
[1] == '?')
127 if (!isspace((int)(unsigned char)*text
))
131 if (text
> p
&& !had_last_word
)
136 rc
=(*func
)(enctext_s
,
137 strlen(enctext_s
), chset_s
,
142 rc
=(*func
)(p
, text
-p
, 0, 0, arg
);
149 if ((chset
=rfc2047_search_quote( &text
)) == 0)
155 if ((encoding
=rfc2047_search_quote( &text
)) == 0)
162 if ((enctext
=rfc2047_search_quote( &text
)) == 0)
169 if (*text
== '?' && text
[1] == '=')
171 if (strcmp(encoding
, "Q") == 0 || strcmp(encoding
, "q") == 0)
175 for (q
=r
=enctext
; *q
; )
179 if (*q
== '=' && q
[1] && q
[2])
182 nyb(q
[1])*16+nyb(q
[2]));
194 else if (strcmp(encoding
, "B") == 0 || strcmp(encoding
, "b")==0)
196 enctext
[decodebase64(enctext
, strlen(enctext
))]=0;
199 lang
=strrchr(chset
, '*'); /* RFC 2231 language */
207 * If charset or language is changed, flush buffer.
208 * Otherwise, add decoded string to buffer.
210 if ((lang_s
&& lang
&& strcasecmp(lang_s
, lang
) != 0) ||
211 (!lang_s
&& lang
) || (lang_s
&& !lang
) ||
212 strcasecmp(chset_s
, chset
) != 0)
214 rc
=(*func
)(enctext_s
, strlen(enctext_s
),
215 chset_s
, lang_s
, arg
);
231 if (!(p
=malloc(strlen(enctext_s
) +
232 strlen(enctext
) + 1)))
240 strcat(strcpy(p
, enctext_s
), enctext
);
256 had_last_word
=1; /* Ignore blanks between enc words */
262 rc
=(*func
)(enctext_s
, strlen(enctext_s
), chset_s
, lang_s
, arg
);
271 ** rfc2047_decode_simple just strips out the rfc2047 decoding, throwing away
272 ** the character set. This is done by calling rfc2047_decode twice, once
273 ** to count the number of characters in the decoded text, the second time to
283 static int count_simple(const char *txt
, int len
, const char *chset
,
284 const char *lang
, void *arg
)
286 struct simple_info
*iarg
= (struct simple_info
*)arg
;
293 static int save_simple(const char *txt
, int len
, const char *chset
,
297 struct simple_info
*iarg
= (struct simple_info
*)arg
;
299 memcpy(iarg
->string
+iarg
->index
, txt
, len
);
304 char *rfc2047_decode_simple(const char *text
)
306 struct simple_info info
;
309 if (rfc2047_decode(text
, &count_simple
, &info
))
312 if ((info
.string
=malloc(info
.index
)) == 0) return (0);
314 if (rfc2047_decode(text
, &save_simple
, &info
))
319 info
.string
[info
.index
]=0;
320 return (info
.string
);
324 ** rfc2047_decode_enhanced is like simply, but prefixes the character set
325 ** name before the text, in brackets.
328 static int do_enhanced(const char *txt
, int len
, const char *chset
,
331 int (*func
)(const char *, int, const char *,
332 const char *, void *)
336 struct simple_info
*info
=(struct simple_info
*)arg
;
338 if (chset
&& info
->mychset
&& strcasecmp(chset
, info
->mychset
) == 0)
343 rc
= (*func
)(" [", 2, 0, 0, arg
);
345 rc
= (*func
)(chset
, strlen(chset
), 0, 0, arg
);
347 rc
= (*func
)("*", 1, 0, 0, arg
);
349 rc
= (*func
)(lang
, strlen(lang
), 0, 0, arg
);
351 rc
= (*func
)("] ", 2, 0, 0, arg
);
355 rc
= (*func
)(txt
, len
, 0, 0, arg
);
359 static int count_enhanced(const char *txt
, int len
, const char *chset
,
363 return (do_enhanced(txt
, len
, chset
, lang
, arg
, &count_simple
));
366 static int save_enhanced(const char *txt
, int len
, const char *chset
,
370 return (do_enhanced(txt
, len
, chset
, lang
, arg
, &save_simple
));
373 char *rfc2047_decode_enhanced(const char *text
, const char *mychset
)
375 struct simple_info info
;
377 info
.mychset
=mychset
;
379 if (rfc2047_decode(text
, &count_enhanced
, &info
))
382 if ((info
.string
=malloc(info
.index
)) == 0) return (0);
384 if (rfc2047_decode(text
, &save_enhanced
, &info
))
389 info
.string
[info
.index
]=0;
390 return (info
.string
);
393 void rfc2047_print(const struct rfc822a
*a
,
395 void (*print_func
)(char, void *),
396 void (*print_separator
)(const char *, void *), void *ptr
)
398 rfc822_print_common(a
, &rfc2047_decode_enhanced
, charset
,
399 print_func
, print_separator
, ptr
);
402 static char *a_rfc2047_encode_str(const char *str
, const char *charset
);
404 static void rfc2047_encode_header_do(const struct rfc822a
*a
,
406 void (*print_func
)(char, void *),
407 void (*print_separator
)(const char *, void *), void *ptr
)
409 rfc822_print_common(a
, &a_rfc2047_encode_str
, charset
,
410 print_func
, print_separator
, ptr
);
414 ** When MIMEifying names from an RFC822 list of addresses, strip quotes
415 ** before MIMEifying them, and add them afterwards.
418 static char *a_rfc2047_encode_str(const char *str
, const char *charset
)
422 int (*qp_func
)(char);
425 for (l
=0; str
[l
]; l
++)
429 return (strdup(str
));
433 if (*str
== '"' && str
[l
-1] == '"')
434 qp_func
=rfc2047_qp_allow_word
;
435 else if (*str
== '(' && str
[l
-1] == ')')
436 qp_func
=rfc2047_qp_allow_comment
;
438 return (rfc2047_encode_str(str
, charset
,
439 rfc2047_qp_allow_comment
));
445 memcpy(p
, str
+1, l
-2);
449 if (*r
== '\\' && r
[1])
456 s
=rfc2047_encode_str(p
, charset
, qp_func
);
459 if (save_char
== '(')
461 p
=malloc(strlen(s
)+3);
477 static void count(char c
, void *p
);
478 static void counts2(const char *c
, void *p
);
479 static void save(char c
, void *p
);
480 static void saves2(const char *c
, void *p
);
482 char *rfc2047_encode_header(const struct rfc822a
*a
,
489 rfc2047_encode_header_do(a
, charset
, &count
, &counts2
, &l
);
490 if ((s
=malloc(l
)) == 0) return (0);
492 rfc2047_encode_header_do(a
, charset
, &save
, &saves2
, &p
);
497 static void count(char c
, void *p
)
502 static void counts2(const char *c
, void *p
)
504 if (strcmp(c
, ", ") == 0)
507 while (*c
) count(*c
++, p
);
510 static void save(char c
, void *p
)
516 static void saves2(const char *c
, void *p
)
518 if (strcmp(c
, ", ") == 0)
521 while (*c
) save(*c
++, p
);
524 static int encodebase64(const char *ptr
, size_t len
, const char *charset
,
525 int (*func
)(const char *, size_t, void *), void *arg
,
526 size_t foldlen
, size_t offset
)
528 unsigned char ibuf
[3];
534 if ((rc
=(*func
)("=?", 2, arg
)) ||
535 (rc
=(*func
)(charset
, strlen(charset
), arg
))||
536 (rc
=(*func
)("?B?", 3, arg
)))
538 i
= offset
+ 2 + strlen(charset
) + 3;
543 size_t n
=len
> 3 ? 3:len
;
557 obuf
[0] = base64tab
[ ibuf
[0] >>2 ];
558 obuf
[1] = base64tab
[(ibuf
[0] & 0x03)<<4|ibuf
[1]>>4];
559 obuf
[2] = base64tab
[(ibuf
[1] & 0x0F)<<2|ibuf
[2]>>6];
560 obuf
[3] = base64tab
[ ibuf
[2] & 0x3F ];
566 if ((rc
=(*func
)(obuf
, 4, arg
)))
570 if (foldlen
&& i
+ 2 > foldlen
- 1 + 4)
574 if ((rc
=(*func
)("?=", 2, arg
)))
578 * Encoded-words must be sepalated by
579 * linear-white-space.
581 if ((rc
=(*func
)(" ", 1, arg
)))
587 #define ISSPACE(i) ((i)=='\t' || (i)=='\r' || (i)=='\n' || (i)==' ')
588 #define DOENCODE(i) (((i) & 0x80) || (i)=='"' || (i)=='=' || \
589 ((unsigned char)(i) < 0x20 && !ISSPACE(i)) || \
594 #include "../unicode/unicode.h"
596 int rfc2047_encode_callback_base64(const char *str
, const char *charset
,
597 int (*qp_allow
)(char),
598 int (*func
)(const char *, size_t, void *),
604 size_t offset
=27; /* FIXME: initial offset for line length */
605 const struct unicode_info
*uiptr
= unicode_find(charset
);
606 unicode_char
*ustr
, *uptr
;
611 for (i
=0; str
[i
]; i
++)
612 if (DOENCODE(str
[i
]))
615 return i
? (*func
)(str
, strlen(str
), arg
): 0;
618 * Multibyte or stateful charsets must be encoded with care of
619 * character boundaries. Charsets with replaceable capability can be
620 * encoded replacing errorneous characters. Otherwise, output without
621 * care of character boundaries or errors.
624 !(uiptr
->flags
& (UNICODE_MB
| UNICODE_SISO
)) ||
625 (!(uiptr
->flags
& UNICODE_REPLACEABLE
) &&
626 !(ustr
= (uiptr
->c2u
)(uiptr
, str
, &dummy
))) ||
627 !(ustr
= (uiptr
->c2u
)(uiptr
, str
, NULL
)))
628 return encodebase64(str
, strlen(str
), charset
, func
, arg
,
629 RFC2047_ENCODE_FOLDLENGTH
, offset
);
634 unicode_char save_uc
;
638 if ((i
= offset
+ 2 + strlen(charset
) + 3) >
639 RFC2047_ENCODE_FOLDLENGTH
- 2)
640 /* Keep room for at least one character. */
641 i
= RFC2047_ENCODE_FOLDLENGTH
- 2;
645 * Figure out where to break encoded-word.
646 * Take a small chunk of Unicode string and convert it back to
647 * the original charset. If the result exseeds line length,
648 * try again with a shorter chunk.
651 while (uptr
[end
] && end
< (RFC2047_ENCODE_FOLDLENGTH
- i
) / 2)
654 * FIXME: Unicode character with `combining'
655 * property etc. should not be treated as
656 * separate character.
662 uptr
[j
] = (unicode_char
)0;
663 wstr
= (uiptr
->u2c
)(uiptr
, uptr
, &dummy
);
667 /* Possiblly a part of one character extracted to
668 * multiple Unicode characters (e.g. base unicode
669 * character of one combined character). Try on
680 if (i
+ ((strlen(wstr
) + 3-1) / 3) * 4 + 2 >
681 RFC2047_ENCODE_FOLDLENGTH
- 1)
683 * Encoded string exceeded line length.
684 * Try on shorter chunk.
691 /* Only one character exeeds line length.
692 * Anyway, encode it. */
707 rc
= encodebase64("?", 1, charset
, func
, arg
, 0, 0);
712 rc
= encodebase64(wstr
, strlen(wstr
),
713 charset
, func
, arg
, 0, 0);
725 * Encoded-words must be sepalated by
726 * linear-white-space.
728 if ((rc
=(*func
)(" ", 1, arg
)))
739 #define DOENCODEWORD(c) \
740 (((c) & 0x80) || (c) == '"' || (unsigned char)(c) <= 0x20 || \
741 (c) == '_' || (c) == '=' || (c) == '?' || !(*qp_allow)(c))
743 int rfc2047_encode_callback(const char *str
, const char *charset
,
744 int (*qp_allow
)(char),
745 int (*func
)(const char *, size_t, void *),
751 const struct unicode_info
*ci
= unicode_find(charset
);
759 if (ci
&& ci
->flags
& UNICODE_SISO
)
760 return rfc2047_encode_callback_base64(str
, charset
, qp_allow
,
764 /* otherwise, output quoted-printable-encoded. */
770 for (i
=0; str
[i
]; i
++)
771 if (!ISSPACE((int)(unsigned char)str
[i
])
775 return ( i
? (*func
)(str
, i
, arg
):0);
777 /* Find start of word */
782 if (ISSPACE((int)(unsigned char)str
[i
]))
790 rc
= (*func
)(str
, i
, arg
);
796 ** Figure out when to stop MIME decoding. Consecutive
797 ** MIME-encoded words are MIME-encoded together.
805 if (ISSPACE((int)(unsigned char)str
[i
]))
810 for (c
=i
; str
[c
] && ISSPACE((int)(unsigned char)str
[c
]);
815 if (ISSPACE((int)(unsigned char)str
[c
]) ||
819 if (str
[c
] == 0 || ISSPACE((int)(unsigned char)str
[c
]))
825 ** Figure out whether base64 is a better choice.
831 if (DOENCODEWORD(str
[j
]))
836 encodebase64(str
, i
, charset
, func
, arg
,
844 /* Output mimeified text, insert spaces at 70+ character
845 ** boundaries for line wrapping.
848 maxlen
=strlen(charset
)+10;
860 if ( (rc
=(*func
)("=?", 2, arg
)) != 0 ||
861 (rc
=(*func
)(charset
, strlen(charset
),
863 (rc
=(*func
)("?Q?", 3, arg
)) != 0)
865 c
+= strlen(charset
)+5;
868 if (DOENCODEWORD(*str
))
873 buf
[1]=xdigit
[ ( *str
>> 4) & 0x0F ];
874 buf
[2]=xdigit
[ *str
& 0x0F ];
876 if ( (rc
=*str
== ' ' ? (*func
)("_", 1, arg
)
877 : (*func
)(buf
, 3, arg
)) != 0)
879 c
+= *str
== ' ' ? 1:3;
885 for (j
=0; j
< i
&& !DOENCODEWORD(str
[j
]); j
++)
888 if ( (rc
=(*func
)(str
, j
, arg
)) != 0)
895 if (i
== 0 || c
>= maxlen
)
897 if ( (rc
=(*func
)("?= ", i
? 3:2, arg
)) != 0)
907 static int count_char(const char *c
, size_t l
, void *p
)
909 size_t *i
=(size_t *)p
;
915 static int save_char(const char *c
, size_t l
, void *p
)
924 char *rfc2047_encode_str(const char *str
, const char *charset
,
925 int (*qp_allow
)(char c
))
930 (void)rfc2047_encode_callback(str
, charset
,
933 if ((s
=malloc(i
)) == 0) return (0);
935 (void)rfc2047_encode_callback(str
, charset
,
942 int rfc2047_qp_allow_any(char c
)
947 int rfc2047_qp_allow_comment(char c
)
949 if (c
== '(' || c
== ')' || c
== '"')
954 int rfc2047_qp_allow_word(char c
)
956 return strchr(base64tab
, c
) != NULL
||
957 strchr("*-=_", c
) != NULL
;