[hcoop/debian/courier-authlib.git] / libs / rfc822 / rfc2047.c

/*
** Copyright 1998 - 2011 Double Precision, Inc.  See COPYING for
** distribution information.
*/

#include	"rfc822.h"
#include	<stdio.h>
#include	<ctype.h>
#include	<string.h>
#include	<stdlib.h>
#include	<errno.h>
#include	<courier-unicode.h>

#include	"rfc822hdr.h"
#include	"rfc2047.h"
#if LIBIDN
#include <idna.h>
#include <stringprep.h>
#endif


#define	RFC2047_ENCODE_FOLDLENGTH	76

static const char xdigit[]="0123456789ABCDEF";
static const char base64tab[]=
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

static char *a_rfc2047_encode_str(const char *str, const char *charset,
				  int isaddress);

static void rfc2047_encode_header_do(const struct rfc822a *a,
				     const char *charset,
				     void (*print_func)(char, void *),
				     void (*print_separator)(const char *,
							     void *), void *ptr)
{
	rfc822_print_common(a, &a_rfc2047_encode_str, charset,
			    print_func, print_separator, ptr);
}

static char *rfc822_encode_domain_int(const char *pfix,
				      size_t pfix_len,
				      const char *domain)
{
	char *q;

#if LIBIDN
	int err;
	char *p;
	size_t s=strlen(domain)+16;
	char *cpy=malloc(s);

	if (!cpy)
		return NULL;

	/*
	** Invalid UTF-8 can make libidn go off the deep end. Add
	** padding as a workaround.
	*/

	memset(cpy, 0, s);
	strcpy(cpy, domain);

	err=idna_to_ascii_8z(cpy, &p, 0);
	free(cpy);

	if (err != IDNA_SUCCESS)
	{
		errno=EINVAL;
		return NULL;
	}
#else
	char *p;

	p=strdup(domain);

	if (!p)
		return NULL;
#endif

	q=malloc(strlen(p)+pfix_len+1);

	if (!q)
	{
		free(p);
		return NULL;
	}

	if (pfix_len)
		memcpy(q, pfix, pfix_len);

	strcpy(q + pfix_len, p);
	free(p);
	return q;
}

char *rfc822_encode_domain(const char *address,
			   const char *charset)
{
	char *p=unicode_convert_tobuf(address, charset, "utf-8", NULL);
	char *cp, *q;

	if (!p)
		return NULL;

	cp=strchr(p, '@');

	if (!cp)
	{
		q=rfc822_encode_domain_int("", 0, p);
		free(p);
		return q;
	}

	++cp;
	q=rfc822_encode_domain_int(p, cp-p, cp);
	free(p);
	return q;
}

static char *a_rfc2047_encode_str(const char *str, const char *charset,
				  int isaddress)
{
	size_t	l;
	char	*p;

	if (isaddress)
		return rfc822_encode_domain(str, charset);

	for (l=0; str[l]; l++)
		if (str[l] & 0x80)
			break;

	if (str[l] == 0)
	{
		size_t n;

		for (l=0; str[l]; l++)
			if (strchr(RFC822_SPECIALS, str[l]))
				break;

		if (str[l] == 0)
			return (strdup(str));

		for (n=3, l=0; str[l]; l++)
		{
			switch (str[l]) {
			case '"':
			case '\\':
				++n;
			break;
			}

			++n;
		}

		p=malloc(n);

		if (!p)
			return NULL;

		p[0]='"';

		for (n=1, l=0; str[l]; l++)
		{
			switch (str[l]) {
			case '"':
			case '\\':
				p[n++]='\\';
			break;
			}

			p[n++]=str[l];
		}
		p[n++]='"';
		p[n]=0;

		return (p);
	}

	return rfc2047_encode_str(str, charset, rfc2047_qp_allow_word);
}

static void count(char c, void *p);
static void counts2(const char *c, void *p);
static void save(char c, void *p);
static void saves2(const char *c, void *p);

char *rfc2047_encode_header_addr(const struct rfc822a *a,
			    const char *charset)
{
size_t	l;
char	*s, *p;

	l=1;
	rfc2047_encode_header_do(a, charset, &count, &counts2, &l);
	if ((s=malloc(l)) == 0)	return (0);
	p=s;
	rfc2047_encode_header_do(a, charset, &save, &saves2, &p);
	*p=0;
	return (s);
}


char *rfc2047_encode_header_tobuf(const char *name, /* Header name */
				  const char *header, /* Header's contents */
				  const char *charset)
{
	if (rfc822hdr_is_addr(name))
	{
		char *s=0;

		struct rfc822t *t;
		struct rfc822a *a;

		if ((t=rfc822t_alloc_new(header, NULL, NULL)) != 0)
		{
			if ((a=rfc822a_alloc(t)) != 0)
			{
				s=rfc2047_encode_header_addr(a, charset);
				rfc822a_free(a);
			}
			rfc822t_free(t);
		}
		return s;
	}

	return rfc2047_encode_str(header, charset, rfc2047_qp_allow_word);
}

static void count(char c, void *p)
{
	++*(size_t *)p;
}

static void counts2(const char *c, void *p)
{
	if (*c == ',')
		count(*c++, p);

	count('\n', p);
	count(' ', p);

	while (*c)	count(*c++, p);
}

static void save(char c, void *p)
{
	**(char **)p=c;
	++*(char **)p;
}

static void saves2(const char *c, void *p)
{
	if (*c == ',')
		save(*c++, p);

	save('\n', p);
	save(' ', p);

	while (*c)	save(*c++, p);
}

static int encodebase64(const char *ptr, size_t len, const char *charset,
			int (*qp_allow)(char),
			int (*func)(const char *, size_t, void *), void *arg)
{
	unsigned char ibuf[3];
	char obuf[4];
	int	rc;

	if ((rc=(*func)("=?", 2, arg)) ||
	    (rc=(*func)(charset, strlen(charset), arg))||
	    (rc=(*func)("?B?", 3, arg)))
		return rc;

	while (len)
	{
		size_t n=len > 3 ? 3:len;

		ibuf[0]= ptr[0];
		if (n>1)
			ibuf[1]=ptr[1];
		else
			ibuf[1]=0;
		if (n>2)
			ibuf[2]=ptr[2];
		else
			ibuf[2]=0;
		ptr += n;
		len -= n;

		obuf[0] = base64tab[ ibuf[0]        >>2 ];
		obuf[1] = base64tab[(ibuf[0] & 0x03)<<4|ibuf[1]>>4];
		obuf[2] = base64tab[(ibuf[1] & 0x0F)<<2|ibuf[2]>>6];
		obuf[3] = base64tab[ ibuf[2] & 0x3F ];
		if (n < 2)
			obuf[2] = '=';
		if (n < 3)
			obuf[3] = '=';

		if ((rc=(*func)(obuf, 4, arg)))
			return rc;
	}

	if ((rc=(*func)("?=", 2, arg)))
		return rc;
	return 0;
}

#define ISSPACE(i) ((i)=='\t' || (i)=='\r' || (i)=='\n' || (i)==' ')
#define DOENCODEWORD(c) \
	((c) < 0x20 || (c) > 0x7F || (c) == '"' || \
	 (c) == '_' || (c) == '=' || (c) == '?' || !(*qp_allow)((char)c))

/*
** Encode a character stream using quoted-printable encoding.
*/
static int encodeqp(const char *ptr, size_t len,
		    const char *charset,
		    int (*qp_allow)(char),
		    int (*func)(const char *, size_t, void *), void *arg)
{
	size_t i;
	int rc;
	char buf[3];

	if ((rc=(*func)("=?", 2, arg)) ||
	    (rc=(*func)(charset, strlen(charset), arg))||
	    (rc=(*func)("?Q?", 3, arg)))
		return rc;

	for (i=0; i<len; ++i)
	{
		size_t j;

		for (j=i; j<len; ++j)
		{
			if (ptr[j] == ' ' || DOENCODEWORD(ptr[j]))
				break;
		}

		if (j > i)
		{
			rc=(*func)(ptr+i, j-i, arg);

			if (rc)
				return rc;
			if (j >= len)
				break;
		}
		i=j;

		if (ptr[i] == ' ')
			rc=(*func)("_", 1, arg);
		else
		{
			buf[0]='=';
			buf[1]=xdigit[ ( ptr[i] >> 4) & 0x0F ];
			buf[2]=xdigit[ ptr[i] & 0x0F ];

			rc=(*func)(buf, 3, arg);
		}

		if (rc)
			return rc;
	}

	return (*func)("?=", 2, arg);
}

/*
** Calculate whether the next word should be RFC2047-encoded.
**
** Returns 0 if not, 1 if any character in the next word is flagged by
** DOENCODEWORD().
*/

static int encode_word(const unicode_char *uc,
		       size_t ucsize,
		       int (*qp_allow)(char),

		       /*
		       ** Points to the starting offset of word in uc.
		       ** At exit, points to the end of the word in uc.
		       */
		       size_t *word_ptr)
{
	size_t i;
	int encode=0;

	for (i=*word_ptr; i<ucsize; ++i)
	{
		if (ISSPACE(uc[i]))
			break;

		if (DOENCODEWORD(uc[i]))
			encode=1;
	}

	*word_ptr=i;
	return encode;
}

/*
** Calculate whether the next sequence of words should be RFC2047-encoded.
**
** Whatever encode_word() returns for the first word, look at the next word
** and keep going as long as encode_word() keeps returning the same value.
*/

static int encode_words(const unicode_char *uc,
			size_t ucsize,
			int (*qp_allow)(char),

			/*
			** Points to the starting offset of words in uc.
			** At exit, points to the end of the words in uc.
			*/

			size_t *word_ptr)
{
	size_t i= *word_ptr, j, k;

	int flag=encode_word(uc, ucsize, qp_allow, &i);

	if (!flag)
	{
		*word_ptr=i;
		return flag;
	}

	j=i;

	while (j < ucsize)
	{
		if (ISSPACE(uc[j]))
		{
			++j;
			continue;
		}

		k=j;

		if (!encode_word(uc, ucsize, qp_allow, &k))
			break;
		i=j=k;
	}

	*word_ptr=i;
	return flag;
}

/*
** Encode a sequence of words.
*/
static int do_encode_words_method(const unicode_char *uc,
				  size_t ucsize,
				  const char *charset,
				  int (*qp_allow)(char),
				  size_t offset,
				  int (*encoder)(const char *ptr, size_t len,
						 const char *charset,
						 int (*qp_allow)(char),
						 int (*func)(const char *,
							     size_t, void *),
						 void *arg),
				  int (*func)(const char *, size_t, void *),
				  void *arg)
{
	char    *p;
	size_t  psize;
	int rc;
	int first=1;

	while (ucsize)
	{
		size_t j;
		size_t i;

		if (!first)
		{
			rc=(*func)(" ", 1, arg);

			if (rc)
				return rc;
		}
		first=0;

		j=(RFC2047_ENCODE_FOLDLENGTH-offset)/2;

		if (j >= ucsize)
			j=ucsize;
		else
		{
			/*
			** Do not split rfc2047-encoded works across a
			** grapheme break.
			*/

			for (i=j; i > 0; --i)
				if (unicode_grapheme_break(uc[i-1], uc[i]))
				{
					j=i;
					break;
				}
		}

		if ((rc=unicode_convert_fromu_tobuf(uc, j, charset,
						      &p, &psize,
						      NULL)) != 0)
			return rc;


		if (psize && p[psize-1] == 0)
			--psize;

		rc=(*encoder)(p, psize, charset, qp_allow,
			      func, arg);
		free(p);
		if (rc)
			return rc;
		offset=0;
		ucsize -= j;
		uc += j;
	}
	return 0;
}

static int cnt_conv(const char *dummy, size_t n, void *arg)
{
	*(size_t *)arg += n;
	return 0;
}

/*
** Encode, or not encode, words.
*/

static int do_encode_words(const unicode_char *uc,
			   size_t ucsize,
			   const char *charset,
			   int flag,
			   int (*qp_allow)(char),
			   size_t offset,
			   int (*func)(const char *, size_t, void *),
			   void *arg)
{
	char    *p;
	size_t  psize;
	int rc;
	size_t b64len, qlen;

	/*
	** Convert from unicode
	*/

	if ((rc=unicode_convert_fromu_tobuf(uc, ucsize, charset,
					      &p, &psize,
					      NULL)) != 0)
		return rc;

	if (psize && p[psize-1] == 0)
		--psize;

	if (!flag) /* If not converting, then the job is done */
	{
		rc=(*func)(p, psize, arg);
		free(p);
		return rc;
	}
	free(p);

	/*
	** Try first quoted-printable, then base64, then pick whichever
	** one gives the shortest results.
	*/
	qlen=0;
	b64len=0;

	rc=do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
				  &encodeqp, cnt_conv, &qlen);
	if (rc)
		return rc;

	rc=do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
				  &encodebase64, cnt_conv, &b64len);
	if (rc)
		return rc;

	return do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
				      qlen < b64len ? encodeqp:encodebase64,
				      func, arg);
}

/*
** RFC2047-encoding pass.
*/
static int rfc2047_encode_callback(const unicode_char *uc,
				   size_t ucsize,
				   const char *charset,
				   int (*qp_allow)(char),
				   int (*func)(const char *, size_t, void *),
				   void *arg)
{
	int	rc;
	size_t	i;
	int	flag;

	size_t	offset=27; /* FIXME: initial offset for line length */

	while (ucsize)
	{
		/* Pass along all the whitespace */

		if (ISSPACE(*uc))
		{
			char c= *uc++;
			--ucsize;

			if ((rc=(*func)(&c, 1, arg)) != 0)
				return rc;
			continue;
		}

		i=0;

		/* Check if the next word needs to be encoded, or not. */

		flag=encode_words(uc, ucsize, qp_allow, &i);

		/*
		** Then proceed to encode, or not encode, the following words.
		*/

		if ((rc=do_encode_words(uc, i, charset, flag,
					qp_allow, offset,
					func, arg)) != 0)
			return rc;

		offset=0;
		uc += i;
		ucsize -= i;
	}

	return 0;
}


static int count_char(const char *c, size_t l, void *p)
{
size_t *i=(size_t *)p;

	*i += l;
	return (0);
}

static int save_char(const char *c, size_t l, void *p)
{
char **s=(char **)p;

	memcpy(*s, c, l);
	*s += l;
	return (0);
}

char *rfc2047_encode_str(const char *str, const char *charset,
			 int (*qp_allow)(char c))
{
	size_t	i=1;
	char	*s, *p;
	unicode_char *uc;
	size_t ucsize;
	int err;

	/* Convert string to unicode */

	if (unicode_convert_tou_tobuf(str, strlen(str), charset,
					&uc, &ucsize, &err))
		return NULL;

	/*
	** Perform two passes: calculate size of the buffer where the
	** encoded string gets saved into, then allocate the buffer and
	** do a second pass to actually do it.
	*/

	if (rfc2047_encode_callback(uc, ucsize,
				    charset,
				    qp_allow,
				    &count_char, &i))
	{
		free(uc);
		return NULL;
	}

	if ((s=malloc(i)) == 0)
	{
		free(uc);
		return NULL;
	}

	p=s;
	(void)rfc2047_encode_callback(uc, ucsize,
				      charset,
				      qp_allow,
				      &save_char, &p);
	*p=0;
	free(uc);
	return (s);
}

int rfc2047_qp_allow_any(char c)
{
	return 1;
}

int rfc2047_qp_allow_comment(char c)
{
	if (c == '(' || c == ')' || c == '"')
		return 0;
	return 1;
}

int rfc2047_qp_allow_word(char c)
{
	return strchr(base64tab, c) != NULL ||
	       strchr("*-=_", c) != NULL;
}
Commit	Line	Data
b0322a85 CE	1	/*
	2	** Copyright 1998 - 2011 Double Precision, Inc. See COPYING for
	3	** distribution information.
	4	*/
	5
	6	#include "rfc822.h"
	7	#include <stdio.h>
	8	#include <ctype.h>
	9	#include <string.h>
	10	#include <stdlib.h>
	11	#include <errno.h>
d50284c4	12	#include <courier-unicode.h>
b0322a85 CE	13
	14	#include "rfc822hdr.h"
	15	#include "rfc2047.h"
b0322a85 CE	16	#if LIBIDN
	17	#include <idna.h>
	18	#include <stringprep.h>
	19	#endif
	20
	21
	22	#define RFC2047_ENCODE_FOLDLENGTH 76
	23
	24	static const char xdigit[]="0123456789ABCDEF";
	25	static const char base64tab[]=
	26	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
	27
	28	static char a_rfc2047_encode_str(const char str, const char *charset,
	29	int isaddress);
	30
	31	static void rfc2047_encode_header_do(const struct rfc822a *a,
	32	const char *charset,
	33	void (print_func)(char, void ),
	34	void (print_separator)(const char ,
	35	void ), void ptr)
	36	{
	37	rfc822_print_common(a, &a_rfc2047_encode_str, charset,
	38	print_func, print_separator, ptr);
	39	}
	40
	41	static char rfc822_encode_domain_int(const char pfix,
	42	size_t pfix_len,
	43	const char *domain)
	44	{
	45	char *q;
	46
	47	#if LIBIDN
	48	int err;
	49	char *p;
	50	size_t s=strlen(domain)+16;
	51	char *cpy=malloc(s);
	52
	53	if (!cpy)
	54	return NULL;
	55
	56	/*
	57	** Invalid UTF-8 can make libidn go off the deep end. Add
	58	** padding as a workaround.
	59	*/
	60
	61	memset(cpy, 0, s);
	62	strcpy(cpy, domain);
	63
	64	err=idna_to_ascii_8z(cpy, &p, 0);
	65	free(cpy);
	66
	67	if (err != IDNA_SUCCESS)
	68	{
	69	errno=EINVAL;
	70	return NULL;
	71	}
	72	#else
	73	char *p;
	74
	75	p=strdup(domain);
	76
	77	if (!p)
	78	return NULL;
	79	#endif
80
81	q=malloc(strlen(p)+pfix_len+1);
82
83	if (!q)
84	{
85	free(p);
86	return NULL;
87	}
88
89	if (pfix_len)
90	memcpy(q, pfix, pfix_len);
91
92	strcpy(q + pfix_len, p);
93	free(p);
94	return q;
95	}
96
97	char rfc822_encode_domain(const char address,
98	const char *charset)
99	{
d50284c4	100	char *p=unicode_convert_tobuf(address, charset, "utf-8", NULL);
b0322a85 CE	101	char cp, q;
	102
	103	if (!p)
	104	return NULL;
	105
	106	cp=strchr(p, '@');
	107
	108	if (!cp)
	109	{
	110	q=rfc822_encode_domain_int("", 0, p);
	111	free(p);
	112	return q;
	113	}
	114
	115	++cp;
	116	q=rfc822_encode_domain_int(p, cp-p, cp);
	117	free(p);
	118	return q;
	119	}
	120
	121	static char a_rfc2047_encode_str(const char str, const char *charset,
	122	int isaddress)
	123	{
	124	size_t l;
	125	char *p;
	126
	127	if (isaddress)
	128	return rfc822_encode_domain(str, charset);
	129
	130	for (l=0; str[l]; l++)
	131	if (str[l] & 0x80)
	132	break;
	133
	134	if (str[l] == 0)
	135	{
	136	size_t n;
	137
	138	for (l=0; str[l]; l++)
	139	if (strchr(RFC822_SPECIALS, str[l]))
	140	break;
	141
	142	if (str[l] == 0)
	143	return (strdup(str));
	144
	145	for (n=3, l=0; str[l]; l++)
	146	{
	147	switch (str[l]) {
	148	case '"':
	149	case '\\':
	150	++n;
	151	break;
	152	}
	153
	154	++n;
	155	}
	156
	157	p=malloc(n);
	158
	159	if (!p)
	160	return NULL;
	161
	162	p[0]='"';
	163
	164	for (n=1, l=0; str[l]; l++)
165	{
166	switch (str[l]) {
167	case '"':
168	case '\\':
169	p[n++]='\\';
170	break;
171	}
172
173	p[n++]=str[l];
174	}
175	p[n++]='"';
176	p[n]=0;
177
178	return (p);
179	}
180
181	return rfc2047_encode_str(str, charset, rfc2047_qp_allow_word);
182	}
183
184	static void count(char c, void *p);
185	static void counts2(const char c, void p);
186	static void save(char c, void *p);
187	static void saves2(const char c, void p);
188
189	char rfc2047_encode_header_addr(const struct rfc822a a,
190	const char *charset)
191	{
192	size_t l;
193	char s, p;
194
195	l=1;
196	rfc2047_encode_header_do(a, charset, &count, &counts2, &l);
197	if ((s=malloc(l)) == 0) return (0);
198	p=s;
199	rfc2047_encode_header_do(a, charset, &save, &saves2, &p);
200	*p=0;
201	return (s);
202	}
203
204
205	char rfc2047_encode_header_tobuf(const char name, /* Header name */
206	const char header, / Header's contents */
207	const char *charset)
208	{
209	if (rfc822hdr_is_addr(name))
210	{
211	char *s=0;
212
213	struct rfc822t *t;
214	struct rfc822a *a;
215
216	if ((t=rfc822t_alloc_new(header, NULL, NULL)) != 0)
217	{
218	if ((a=rfc822a_alloc(t)) != 0)
219	{
220	s=rfc2047_encode_header_addr(a, charset);
221	rfc822a_free(a);
222	}
223	rfc822t_free(t);
224	}
225	return s;
226	}
227
228	return rfc2047_encode_str(header, charset, rfc2047_qp_allow_word);
229	}
230
231	static void count(char c, void *p)
232	{
233	++(size_t )p;
234	}
235
236	static void counts2(const char c, void p)
237	{
238	if (*c == ',')
239	count(*c++, p);
240
241	count('\n', p);
242	count(' ', p);
243
244	while (c) count(c++, p);
245	}
246
247	static void save(char c, void *p)
248	{
249	(char )p=c;
250	++(char *)p;
251	}
252
253	static void saves2(const char c, void p)
254	{
255	if (*c == ',')
256	save(*c++, p);
257
258	save('\n', p);
259	save(' ', p);
260
261	while (c) save(c++, p);
262	}
263
264	static int encodebase64(const char ptr, size_t len, const char charset,
265	int (*qp_allow)(char),
266	int (func)(const char , size_t, void ), void arg)
267	{
268	unsigned char ibuf[3];
269	char obuf[4];
270	int rc;
271
272	if ((rc=(*func)("=?", 2, arg)) \|\|
273	(rc=(*func)(charset, strlen(charset), arg))\|\|
274	(rc=(*func)("?B?", 3, arg)))
275	return rc;
276
277	while (len)
278	{
279	size_t n=len > 3 ? 3:len;
280
281	ibuf[0]= ptr[0];
282	if (n>1)
283	ibuf[1]=ptr[1];
284	else
285	ibuf[1]=0;
286	if (n>2)
287	ibuf[2]=ptr[2];
288	else
289	ibuf[2]=0;
290	ptr += n;
291	len -= n;
292
293	obuf[0] = base64tab[ ibuf[0] >>2 ];
294	obuf[1] = base64tab[(ibuf[0] & 0x03)<<4\|ibuf[1]>>4];
295	obuf[2] = base64tab[(ibuf[1] & 0x0F)<<2\|ibuf[2]>>6];
296	obuf[3] = base64tab[ ibuf[2] & 0x3F ];
297	if (n < 2)
298	obuf[2] = '=';
299	if (n < 3)
300	obuf[3] = '=';
301
302	if ((rc=(*func)(obuf, 4, arg)))
303	return rc;
304	}
305
306	if ((rc=(*func)("?=", 2, arg)))
307	return rc;
308	return 0;
309	}
310
311	#define ISSPACE(i) ((i)=='\t' \|\| (i)=='\r' \|\| (i)=='\n' \|\| (i)==' ')
312	#define DOENCODEWORD(c) \
313	((c) < 0x20 \|\| (c) > 0x7F \|\| (c) == '"' \|\| \
314	(c) == '_' \|\| (c) == '=' \|\| (c) == '?' \|\| !(*qp_allow)((char)c))
315
316	/*
317	** Encode a character stream using quoted-printable encoding.
318	*/
319	static int encodeqp(const char *ptr, size_t len,
320	const char *charset,
321	int (*qp_allow)(char),
322	int (func)(const char , size_t, void ), void arg)
323	{
324	size_t i;
325	int rc;
326	char buf[3];
327
328	if ((rc=(*func)("=?", 2, arg)) \|\|
329	(rc=(*func)(charset, strlen(charset), arg))\|\|
330	(rc=(*func)("?Q?", 3, arg)))
331	return rc;
332
333	for (i=0; i<len; ++i)
334	{
335	size_t j;
336
337	for (j=i; j<len; ++j)
338	{
339	if (ptr[j] == ' ' \|\| DOENCODEWORD(ptr[j]))
340	break;
341	}
342
343	if (j > i)
344	{
345	rc=(*func)(ptr+i, j-i, arg);
346
347	if (rc)
348	return rc;
349	if (j >= len)
350	break;
351	}
352	i=j;
353
354	if (ptr[i] == ' ')
355	rc=(*func)("_", 1, arg);
356	else
357	{
358	buf[0]='=';
359	buf[1]=xdigit[ ( ptr[i] >> 4) & 0x0F ];
360	buf[2]=xdigit[ ptr[i] & 0x0F ];
361
362	rc=(*func)(buf, 3, arg);
363	}
364
365	if (rc)
366	return rc;
367	}
368
369	return (*func)("?=", 2, arg);
370	}
371
372	/*
373	** Calculate whether the next word should be RFC2047-encoded.
374	**
375	** Returns 0 if not, 1 if any character in the next word is flagged by
376	** DOENCODEWORD().
377	*/
378
379	static int encode_word(const unicode_char *uc,
380	size_t ucsize,
381	int (*qp_allow)(char),
382
383	/*
384	** Points to the starting offset of word in uc.
385	** At exit, points to the end of the word in uc.
386	*/
387	size_t *word_ptr)
388	{
389	size_t i;
390	int encode=0;
391
392	for (i=*word_ptr; i<ucsize; ++i)
393	{
394	if (ISSPACE(uc[i]))
395	break;
396
397	if (DOENCODEWORD(uc[i]))
398	encode=1;
399	}
400
401	*word_ptr=i;
402	return encode;
403	}
404
405	/*
406	** Calculate whether the next sequence of words should be RFC2047-encoded.
407	**
408	** Whatever encode_word() returns for the first word, look at the next word
409	** and keep going as long as encode_word() keeps returning the same value.
410	*/
411
412	static int encode_words(const unicode_char *uc,
413	size_t ucsize,
414	int (*qp_allow)(char),
415
416	/*
417	** Points to the starting offset of words in uc.
418	** At exit, points to the end of the words in uc.
419	*/
420
421	size_t *word_ptr)
422	{
423	size_t i= *word_ptr, j, k;
424
425	int flag=encode_word(uc, ucsize, qp_allow, &i);
426
427	if (!flag)
428	{
429	*word_ptr=i;
430	return flag;
431	}
432
433	j=i;
434
435	while (j < ucsize)
436	{
437	if (ISSPACE(uc[j]))
438	{
439	++j;
440	continue;
441	}
442
443	k=j;
444
445	if (!encode_word(uc, ucsize, qp_allow, &k))
446	break;
447	i=j=k;
448	}
449
450	*word_ptr=i;
451	return flag;
452	}
453
454	/*
455	** Encode a sequence of words.
456	*/
457	static int do_encode_words_method(const unicode_char *uc,
458	size_t ucsize,
459	const char *charset,
460	int (*qp_allow)(char),
461	size_t offset,
462	int (encoder)(const char ptr, size_t len,
463	const char *charset,
464	int (*qp_allow)(char),
465	int (func)(const char ,
466	size_t, void *),
467	void *arg),
468	int (func)(const char , size_t, void *),
469	void *arg)
470	{
471	char *p;
472	size_t psize;
473	int rc;
474	int first=1;
475
476	while (ucsize)
477	{
478	size_t j;
479	size_t i;
480
481	if (!first)
482	{
483	rc=(*func)(" ", 1, arg);
484
485	if (rc)
486	return rc;
487	}
488	first=0;
489
490	j=(RFC2047_ENCODE_FOLDLENGTH-offset)/2;
491
492	if (j >= ucsize)
493	j=ucsize;
494	else
495	{
496	/*
497	** Do not split rfc2047-encoded works across a
498	** grapheme break.
499	*/
500
501	for (i=j; i > 0; --i)
502	if (unicode_grapheme_break(uc[i-1], uc[i]))
503	{
504	j=i;
505	break;
506	}
507	}
508
d50284c4	509	if ((rc=unicode_convert_fromu_tobuf(uc, j, charset,
b0322a85 CE	510	&p, &psize,
	511	NULL)) != 0)
	512	return rc;
	513
	514
	515	if (psize && p[psize-1] == 0)
	516	--psize;
	517
	518	rc=(*encoder)(p, psize, charset, qp_allow,
	519	func, arg);
	520	free(p);
	521	if (rc)
	522	return rc;
	523	offset=0;
	524	ucsize -= j;
	525	uc += j;
	526	}
	527	return 0;
	528	}
	529
	530	static int cnt_conv(const char dummy, size_t n, void arg)
	531	{
	532	(size_t )arg += n;
	533	return 0;
	534	}
	535
	536	/*
	537	** Encode, or not encode, words.
	538	*/
	539
	540	static int do_encode_words(const unicode_char *uc,
	541	size_t ucsize,
	542	const char *charset,
	543	int flag,
	544	int (*qp_allow)(char),
	545	size_t offset,
	546	int (func)(const char , size_t, void *),
	547	void *arg)
	548	{
	549	char *p;
	550	size_t psize;
	551	int rc;
	552	size_t b64len, qlen;
	553
	554	/*
	555	** Convert from unicode
	556	*/
	557
d50284c4	558	if ((rc=unicode_convert_fromu_tobuf(uc, ucsize, charset,
b0322a85 CE	559	&p, &psize,
	560	NULL)) != 0)
	561	return rc;
	562
	563	if (psize && p[psize-1] == 0)
	564	--psize;
	565
	566	if (!flag) /* If not converting, then the job is done */
	567	{
	568	rc=(*func)(p, psize, arg);
	569	free(p);
	570	return rc;
	571	}
	572	free(p);
	573
	574	/*
	575	** Try first quoted-printable, then base64, then pick whichever
	576	** one gives the shortest results.
	577	*/
	578	qlen=0;
	579	b64len=0;
	580
	581	rc=do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
	582	&encodeqp, cnt_conv, &qlen);
	583	if (rc)
	584	return rc;
	585
	586	rc=do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
	587	&encodebase64, cnt_conv, &b64len);
	588	if (rc)
	589	return rc;
	590
	591	return do_encode_words_method(uc, ucsize, charset, qp_allow, offset,
	592	qlen < b64len ? encodeqp:encodebase64,
	593	func, arg);
	594	}
	595
	596	/*
	597	** RFC2047-encoding pass.
	598	*/
	599	static int rfc2047_encode_callback(const unicode_char *uc,
	600	size_t ucsize,
	601	const char *charset,
	602	int (*qp_allow)(char),
	603	int (func)(const char , size_t, void *),
	604	void *arg)
	605	{
	606	int rc;
	607	size_t i;
	608	int flag;
	609
	610	size_t offset=27; /* FIXME: initial offset for line length */
	611
	612	while (ucsize)
	613	{
	614	/* Pass along all the whitespace */
	615
	616	if (ISSPACE(*uc))
	617	{
	618	char c= *uc++;
	619	--ucsize;
	620
	621	if ((rc=(*func)(&c, 1, arg)) != 0)
	622	return rc;
623	continue;
624	}
625
626	i=0;
627
628	/* Check if the next word needs to be encoded, or not. */
629
630	flag=encode_words(uc, ucsize, qp_allow, &i);
631
632	/*
633	** Then proceed to encode, or not encode, the following words.
634	*/
635
636	if ((rc=do_encode_words(uc, i, charset, flag,
637	qp_allow, offset,
638	func, arg)) != 0)
639	return rc;
640
641	offset=0;
642	uc += i;
643	ucsize -= i;
644	}
645
646	return 0;
647	}
648
649
650	static int count_char(const char c, size_t l, void p)
651	{
652	size_t i=(size_t )p;
653
654	*i += l;
655	return (0);
656	}
657
658	static int save_char(const char c, size_t l, void p)
659	{
660	char s=(char )p;
661
662	memcpy(*s, c, l);
663	*s += l;
664	return (0);
665	}
666
667	char rfc2047_encode_str(const char str, const char *charset,
668	int (*qp_allow)(char c))
669	{
670	size_t i=1;
671	char s, p;
672	unicode_char *uc;
673	size_t ucsize;
674	int err;
675
676	/* Convert string to unicode */
677
d50284c4	678	if (unicode_convert_tou_tobuf(str, strlen(str), charset,
b0322a85 CE	679	&uc, &ucsize, &err))
	680	return NULL;
	681
	682	/*
	683	** Perform two passes: calculate size of the buffer where the
	684	** encoded string gets saved into, then allocate the buffer and
	685	** do a second pass to actually do it.
	686	*/
	687
	688	if (rfc2047_encode_callback(uc, ucsize,
	689	charset,
	690	qp_allow,
	691	&count_char, &i))
	692	{
	693	free(uc);
	694	return NULL;
	695	}
	696
	697	if ((s=malloc(i)) == 0)
	698	{
	699	free(uc);
	700	return NULL;
	701	}
	702
	703	p=s;
	704	(void)rfc2047_encode_callback(uc, ucsize,
	705	charset,
	706	qp_allow,
	707	&save_char, &p);
	708	*p=0;
	709	free(uc);
	710	return (s);
	711	}
	712
	713	int rfc2047_qp_allow_any(char c)
	714	{
	715	return 1;
	716	}
	717
	718	int rfc2047_qp_allow_comment(char c)
	719	{
	720	if (c == '(' \|\| c == ')' \|\| c == '"')
	721	return 0;
	722	return 1;
	723	}
	724
	725	int rfc2047_qp_allow_word(char c)
	726	{
	727	return strchr(base64tab, c) != NULL \|\|
	728	strchr("*-=_", c) != NULL;
	729	}