HCoop Git - hcoop/debian/courier-authlib.git/blame_incremental

... / ...

Commit	Line	Data
	1	#ifndef unicode_h
	2	#define unicode_h
	3
	4	/*
	5	** Copyright 2000-2011 Double Precision, Inc.
	6	** See COPYING for distribution information.
	7	**
	8	*/
	9
	10	#ifdef __cplusplus
	11
	12	#include <string>
	13	#include <vector>
	14	#include <list>
	15
	16	extern "C" {
	17	#endif
	18
	19	#if 0
	20	}
	21	#endif
	22
	23	#include "unicode/unicode_config.h"
	24
	25	#include <stdlib.h>
	26
	27	#include <stdio.h>
	28	#if HAVE_WCHAR_H
	29	#include <wchar.h>
	30	#endif
	31
	32	#if HAVE_STDDEF_H
	33	#include <stddef.h>
	34	#endif
	35	#include <stdint.h>
	36
	37	#include <sys/types.h>
	38
	39	typedef uint32_t unicode_char;
	40
	41	/*
	42	** The system default character set, from the locale.
	43	*/
	44
	45	extern const char *unicode_default_chset();
	46
	47	/* Unicode upper/lower/title case conversion functions */
	48
	49	extern unicode_char unicode_uc(unicode_char);
	50	extern unicode_char unicode_lc(unicode_char);
	51	extern unicode_char unicode_tc(unicode_char);
	52
	53	/*
	54	** Look up HTML 4.0/XHTML entity.
	55	**
	56	** n="amp", etc...
	57	**
	58	** Returns the unicode entity value, or 0 if no such entity is defined.
	59	*/
	60
	61	unicode_char unicode_html40ent_lookup(const char *n);
	62
	63	/*
	64	**
	65	** Return "width" of unicode character.
	66	**
	67	** This is defined as follows: for characters having the F or W property in
	68	** tr11 (EastAsianWidth), unicode_wcwidth() returns 2.
	69	**
	70	** Otherwise, characters having the BK, CR, LF, CM, NL, WJ, and ZW line
	71	** breaking property as per tr14, unicode_wcwdith() returns 0. For all other
	72	** cases, 1.
	73	**
	74	** This provides a rough estimate of the "width" of the character if its
	75	** shown on a text console.
	76	*/
	77
	78	extern int unicode_wcwidth(unicode_char c);
	79	extern size_t unicode_wcwidth_str(const unicode_char *c);
	80
	81	/*
	82	** The unicode-ish isspace()
	83	*/
	84	extern int unicode_isspace(unicode_char ch);
	85
	86	/* Internal unicode table lookup function */
	87
	88	extern uint8_t unicode_tab_lookup(unicode_char ch,
	89	const size_t *unicode_indextab,
	90	size_t unicode_indextab_sizeof,
	91	const uint8_t (*unicode_rangetab)[2],
	92	const uint8_t *unicode_classtab,
	93	uint8_t uclass);
	94
	95	/*
	96	** Implementation of grapheme cluster boundary rules, as per tr29,
	97	** including GB9a and GB9b.
	98	**
	99	** Returns non-zero if there's a grapheme break between the two referenced
	100	** characters.
	101	*/
	102
	103	int unicode_grapheme_break(unicode_char a, unicode_char b);
	104
	105	/*
	106	** Implementation of line break rules, as per tr14.
	107	**
	108	** Invoke unicode_lb_init() to initialize the linebreaking algorithm. The
	109	** first parameter is a callback function that gets invoked with two
	110	** arguments: UNICODE_LB_{MANDATORY\|NONE\|ALLOWED}, and a passthrough argument.
	111	** The second parameter to unicode_lb_init() is the opaque passthrough
	112	** pointer, that is passed as the second argument to the callback function
	113	** with no further interpretation.
	114	**
	115	** unicode_lb_init() returns an opaque handle. Invoke unicode_lb_next(),
	116	** passing the handle and one unicode character. Repeatedly invoke
	117	** unicode_lb_next() to specify the input string for the linebreaking
	118	** algorithm, then invoke unicode_lb_end() to finish calculating the
	119	** linebreaking algorithm, and deallocate the opaque linebreaking handle.
	120	**
	121	** The callback function gets invoked once for each invocation of
	122	** unicode_lb_next(). The contract is that before unicode_lb_end() returns,
	123	** the callback function will get invoked the exact number of times that
	124	** unicode_lb_next(), as long as each invocation of the callback function
	125	** returned 0; nothing more, nothing less. The first parameter to the callback
	126	** function will be one of the following values:
	127	**
	128	** UNICODE_LB_MANDATORY - a linebreak is MANDATORY before the corresponding
	129	** character.
	130	** UNICODE_LB_NONE - a linebreak is PROHIBITED before the corresponding
	131	** character.
	132	** UNICODE_LB_ALLOWED - a linebreak is OPTIONAL before the corresponding
	133	** character (the preceding character is a space, or an equivalent).
	134	**
	135	** The callback function should return 0. A non-zero value indicates an
	136	** error, which gets propagated up to the caller. The contract that the
	137	** callback function gets invoked the same number of times that
	138	** unicode_lb_next() gets invoked is now broken.
	139	*/
	140
	141	#define UNICODE_LB_MANDATORY -1
	142	#define UNICODE_LB_NONE 0
	143	#define UNICODE_LB_ALLOWED 1
	144
	145	struct unicode_lb_info;
	146
	147	typedef struct unicode_lb_info *unicode_lb_info_t;
	148
	149	/*
	150	** Allocate a linebreaking handle.
	151	*/
	152	extern unicode_lb_info_t unicode_lb_init(int (cb_func)(int, void ),
	153	void *cb_arg);
	154
	155	/*
	156	** Feed the next character through the linebreaking algorithm.
	157	** A non-zero return code indicates that the callback function was invoked
	158	** and it returned a non-zero return code (which is propagated as a return
	159	** value). unicode_lb_end() must still be invoked, in this case.
	160	**
	161	** A zero return code indicates that if the callback function was invoked,
	162	** it returned 0.
	163	*/
	164
	165	extern int unicode_lb_next(unicode_lb_info_t i, unicode_char ch);
	166
	167	/*
	168	** Convenience function that invokes unicode_lb_next() with a list of
	169	** unicode chars. Returns 0 if all invocations of unicode_lb_next() returned
	170	** 0, or the first non-zero return value from unicode_lb_next().
	171	*/
	172
	173	extern int unicode_lb_next_cnt(unicode_lb_info_t i,
	174	const unicode_char *chars,
	175	size_t cnt);
	176
	177	/*
	178	** Finish the linebreaking algorithm.
	179	**
	180	** A non-zero return code indicates that the callback function was invoked
	181	** and it returned a non-zero return code (which is propagated as a return
	182	** value).
	183	**
	184	** A zero return code indicates that if the callback function was invoked,
	185	** it returned 0, and that the callback function was invoked exactly the same
	186	** number of times that unicode_lb_next() was invoked.
	187	**
	188	** In all case, the linebreak handle will no longer be valid when this
	189	** function returns.
	190	*/
	191
	192	extern int unicode_lb_end(unicode_lb_info_t i);
	193
	194	/*
	195	** An alternative linebreak API where the callback function receives the
	196	** original unicode character in addition to its linebreak value.
	197	**
	198	** User unicode_lbc_init(), unicode_lbc_next(), and unicode_lbc_end(), whose
	199	** semantics are the same as their _lb_ counterparts.
	200	*/
	201
	202	struct unicode_lbc_info;
	203
	204	typedef struct unicode_lbc_info *unicode_lbc_info_t;
	205
	206	extern unicode_lbc_info_t unicode_lbc_init(int (*cb_func)(int, unicode_char,
	207	void *),
	208	void *cb_arg);
	209	extern int unicode_lbc_next(unicode_lbc_info_t i, unicode_char ch);
	210	extern int unicode_lbc_end(unicode_lbc_info_t i);
	211
	212	/*
	213	** Set linebreaking options.
	214	**
	215	** OPTIONS SUBJECT TO CHANGE.
	216	*/
	217
	218	extern void unicode_lb_set_opts(unicode_lb_info_t i, int opts);
	219
	220	extern void unicode_lbc_set_opts(unicode_lbc_info_t i, int opts);
	221
	222	/*
	223	** Tailorization of LB24: Prevent pluses, as in "C++", from breaking.
	224	**
	225	** Adds the following to LB24:
	226	**
	227	** PR x PR
	228	**
	229	** AL x PR
	230	**
	231	** ID x PR
	232	**/
	233	#define UNICODE_LB_OPT_PRBREAK 0x0001
	234
	235
	236	/*
	237	** Tailored / breaking rules.
	238	**
	239	** Adds the following rule to LB13:
	240	**
	241	** SY x EX
	242	**
	243	** SY x AL
	244	**
	245	** SY x ID
	246	**
	247	** SP ÷ SY, which takes precedence over "x SY".
	248	*/
	249	#define UNICODE_LB_OPT_SYBREAK 0x0002
	250
	251	/*
	252	** Tailored / breaking rules.
	253	**
	254	** This reclassifies U+2013 and U+2014 as class WJ, prohibiting breaks before
	255	** and after mdash and ndash.
	256	*/
	257	#define UNICODE_LB_OPT_DASHWJ 0x0004
	258
	259	/*
	260	** Implemention of word break rules, as per tr29.
	261	**
	262	** Invoke unicode_wb_init() to initialize the wordbreaking algorithm. The
	263	** first parameter is a callback function that gets invoked with two
	264	** arguments: an int flag, and a passthrough argument. The second parameter to
	265	** unicode_wb_init() is the opaque passthrough pointer, that is passed as the
	266	** second argument to the callback function with no further interpretation.
	267	**
	268	** unicode_wb_init() returns an opaque handle. Invoke unicode_wb_next(),
	269	** passing the handle and one unicode character. Repeatedly invoke
	270	** unicode_wb_next() to specify the input string for the wordbreaking
	271	** algorithm, then invoke unicode_wb_end() to finish calculating the
	272	** wordbreaking algorithm, and deallocate the opaque wordbreaking handle.
	273	**
	274	** The callback function gets invoked once for each invocation of
	275	** unicode_wb_next(). The contract is that before unicode_wb_end() returns,
	276	** the callback function will get invoked the exact number of times that
	277	** unicode_wb_next(), as long as each invocation of the callback function
	278	** returned 0; nothing more, nothing less. The first parameter to the callback
	279	** function will be an int. A non-zero value indicates that there is a word
	280	** break between this character and the preceding one.
	281	**
	282	** The callback function should return 0. A non-zero value indicates an
	283	** error, which gets propagated up to the caller. The contract that the
	284	** callback function gets invoked the same number of times that
	285	** unicode_lb_next() gets invoked is now broken.
	286	*/
	287
	288	struct unicode_wb_info;
	289
	290	typedef struct unicode_wb_info *unicode_wb_info_t;
	291
	292	/*
	293	** Allocate a wordbreaking handle.
	294	*/
	295	extern unicode_wb_info_t unicode_wb_init(int (cb_func)(int, void ),
	296	void *cb_arg);
	297
	298	/*
	299	** Feed the next character through the wordbreaking algorithm.
	300	** A non-zero return code indicates that the callback function was invoked
	301	** and it returned a non-zero return code (which is propagated as a return
	302	** value). unicode_wb_end() must still be invoked, in this case.
	303	**
	304	** A zero return code indicates that if the callback function was invoked,
	305	** it returned 0.
	306	*/
	307
	308	extern int unicode_wb_next(unicode_wb_info_t i, unicode_char ch);
	309
	310	/*
	311	** Convenience function that invokes unicode_wb_next() with a list of
	312	** unicode chars. Returns 0 if all invocations of unicode_wb_next() returned
	313	** 0, or the first non-zero return value from unicode_wb_next().
	314	*/
	315
	316	extern int unicode_wb_next_cnt(unicode_wb_info_t i,
	317	const unicode_char *chars,
	318	size_t cnt);
	319
	320	/*
	321	** Finish the wordbreaking algorithm.
	322	**
	323	** A non-zero return code indicates that the callback function was invoked
	324	** and it returned a non-zero return code (which is propagated as a return
	325	** value).
	326	**
	327	** A zero return code indicates that if the callback function was invoked,
	328	** it returned 0, and that the callback function was invoked exactly the same
	329	** number of times that unicode_wb_next() was invoked.
	330	**
	331	** In all case, the wordbreak handle will no longer be valid when this
	332	** function returns.
	333	*/
	334
	335	extern int unicode_wb_end(unicode_wb_info_t i);
	336
	337	/*
	338	** Search for a word boundary.
	339	**
	340	** Obtain a handle by calling unicode_wbscan_init(), then invoke
	341	** unicode_wbscan_next() to provide a unicode stream, then invoke
	342	** unicode_wbscan_end(). unicode_wbscan_end() returns the number of unicode
	343	** characters from the beginning of the stream until the first word boundary.
	344	**
	345	** You may prematurely stop calling unicode_wbscan_next() once it returns a
	346	** non-0 value, which means that there is sufficient context to compute the
	347	** first word boundary, and all further calls to unicode_wbscan_next() will
	348	** be internal no-ops.
	349	*/
	350
	351	struct unicode_wbscan_info;
	352
	353	typedef struct unicode_wbscan_info *unicode_wbscan_info_t;
	354
	355	unicode_wbscan_info_t unicode_wbscan_init();
	356
	357	int unicode_wbscan_next(unicode_wbscan_info_t i, unicode_char ch);
	358
	359	size_t unicode_wbscan_end(unicode_wbscan_info_t i);
	360
	361	/*
	362	** A buffer that holds unicode characters, and dynamically grows as needed.
	363	*/
	364
	365	struct unicode_buf {
	366	unicode_char ptr; / The unicode characters */
	367	size_t size, /* Buffer size */
	368	len, /* How many characters in ptr are initialized */
	369	max; /* Maximum size the buffer can grow to */
	370	};
	371
	372	/*
	373	** Initialize a buffer. Constructor.
	374	*/
	375
	376	void unicode_buf_init(/* Initialize this structure. ptr, size, len cleared */
	377	struct unicode_buf *p,
	378
	379	/*
	380	** Maximum size the buffer can grow to. (size_t)-1
	381	** means unlimited.
	382	*/
	383	size_t max);
	384	/*
	385	** Like unicode_buf_init, and initialize the new buffer with the contents of
	386	** another buffer. The maximum size of the initialized buffer is exactly the
	387	** number of characters in the existing buffer. This copies a buffer using
	388	** the minimum amount of heap space.
	389	*/
	390
	391	#define unicode_buf_init_copy(a,b) \
	392	do { \
	393	unicode_buf_init((a), unicode_buf_len(b)); \
	394	unicode_buf_append_buf((a),(b)); \
	395	} while (0)
	396
	397	/*
	398	** Deinitialize the buffer. Destructor. Frees memory.
	399	*/
	400
	401	void unicode_buf_deinit(struct unicode_buf *p);
	402
	403	/*
	404	** Official way to access the characters in the unicode buffer.
	405	*/
	406	#define unicode_buf_ptr(p) ((p)->ptr)
	407
	408	/*
	409	** Official way of obtaining the number of characters in the unicode buffer.
	410	*/
	411	#define unicode_buf_len(p) ((p)->len)
	412
	413	/*
	414	** Remove all existing characters from an initialized buffer. Sets len to 0.
	415	*/
	416
	417	#define unicode_buf_clear(p) ((p)->len=0)
	418
	419	/*
	420	** Append characters to the existing characters in the unicode buffer.
	421	** The buffer grows, if needed. If the buffer would exceed its maximum size,
	422	** the extra characters get truncated.
	423	**
	424	** Returns 0 if the characters were appended. -1 for a malloc failure.
	425	*/
	426
	427	int unicode_buf_append(struct unicode_buf p, / The buffer */
	428	const unicode_char uc, / Characters to append */
	429	size_t l); /* How many of them */
	430
	431	/*
	432	** Convert an iso-8859-1 char string and invoke unicode_buf_append().
	433	*/
	434
	435	void unicode_buf_append_char(struct unicode_buf *dst,
	436	const char *str,
	437	size_t cnt);
	438
	439	/*
	440	** Remove some portion of the unicode buffer
	441	*/
	442
	443	void unicode_buf_remove(struct unicode_buf p, / The buffer */
	444	size_t pos, /* Offset in buffer */
	445	size_t cnt); /* How many to remove */
	446
	447	/*
	448	** Append the contents of an existing buffer to another one.
	449	*/
	450
	451	#define unicode_buf_append_buf(a,b) \
	452	unicode_buf_append((a), unicode_buf_ptr(b), unicode_buf_len(b))
	453
	454
	455	/*
	456	** The equivalent of strcmp() for unicode buffers.
	457	*/
	458
	459	int unicode_buf_cmp(const struct unicode_buf *a,
	460	const struct unicode_buf *b);
	461
	462	/*
	463	** The equivalent of unicode_buf_cmp, except that the second buffer is an
	464	** iso-8859-1 string.
	465	*/
	466
	467	int unicode_buf_cmp_str(const struct unicode_buf *p,
	468	const char c, / iso-8859-1 string */
	469	size_t cl); /* Number of chars in c */
	470
	471	/*
	472	** A wrapper for iconv(3). This wrapper provides a different API for iconv(3).
	473	** A handle gets created by libmail_u_convert_init().
	474	** libmail_u_convert_init() receives a pointer to the output function
	475	** which receives converted character text.
	476	**
	477	** The output function receives a pointer to the converted character text, and
	478	** the number of characters in the converted text.
	479	**
	480	** The character text to convert gets passed, repeatedly, to
	481	** libmail_u_convert(). Each call to libmail_u_convert() results in
	482	** the output function being invoked, zero or more times, with the converted
	483	** text. Finally, libmail_u_convert_deinit() stops the conversion and
	484	** deallocates the conversion handle.
	485	**
	486	** Internal buffering takes place. libmail_u_convert_deinit() may result
	487	** in the output function being called one or more times, to receive the final
	488	** part of the converted character stream.
	489	**
	490	** The output function should return 0. A non-0 value causes
	491	** libmail_u_convert() and/or libmail_u_convert_deinit() returning
	492	** non-0.
	493	*/
	494
	495	struct libmail_u_convert_hdr;
	496
	497	typedef struct libmail_u_convert_hdr *libmail_u_convert_handle_t;
	498
	499	/*
	500	** libmail_u_convert_init() returns a non-NULL handle for the requested
	501	** conversion, or NULL if the requested conversion is not available.
	502	*/
	503
	504	libmail_u_convert_handle_t
	505	libmail_u_convert_init(/* Convert from this chset */
	506	const char *src_chset,
	507
	508	/* Convert to this chset */
	509	const char *dst_chset,
	510
	511	/* The output function */
	512
	513	int (output_func)(const char , size_t, void *),
	514
	515	/* Passthrough arg */
	516	void *convert_arg);
	517
	518	/*
	519	** Repeatedly pass the character text to convert to libmail_u_convert().
	520	**
	521	** Returns non-0 if the output function returned non-0, or 0 if all invocations
	522	** of the output function returned 0.
	523	*/
	524
	525	int libmail_u_convert(/* The conversion handle */
	526	libmail_u_convert_handle_t handle,
	527
	528	/* Text to convert */
	529	const char *text,
	530
	531	/* Number of bytes to convert */
	532	size_t cnt);
	533
	534	/*
	535	** Finish character set conversion. The handle gets deallocated.
	536	**
	537	** May still result in one or more invocations of the output function.
	538	** Returns non-zero if any previous invocation of the output function returned
	539	** non-zero (this includes any invocations of the output function resulting
	540	** from this call, or prior libmail_u_convert() calls), or 0 if all
	541	** invocations of the output function returned 0.
	542	**
	543	** If the errptr is not NULL, *errptr is set to non-zero if there were any
	544	** conversion errors -- if there was any text that could not be converted to
	545	** the destination character text.
	546	*/
	547
	548	int libmail_u_convert_deinit(libmail_u_convert_handle_t handle,
	549	int *errptr);
	550
	551
	552	/*
	553	** Specialization: save converted character text in a buffer.
	554	**
	555	** Implementation: call libmail_u_convert_tocbuf_init() instead of
	556	** libmail_u_convert_init(), then call libmail_u_convert() and
	557	** libmail_u_convert_deinit(), as usual.
	558	**
	559	** If libmail_u_convert_deinit() returns 0, *cbufptr_ret gets initialized to a
	560	** malloc()ed buffer, and the number of converted characters, the size of the
	561	** malloc()ed buffer, are placed into *csize_ret arguments, that were passed
	562	** to libmail_u_convert_tou_init().
	563	**
	564	** Note: if the converted string is an empty string, *cbufsize_ret is set to 0,
	565	** but *cbufptr_ptr still gets initialized (to a dummy malloced buffer).
	566	**
	567	** The optional nullterminate places a trailing \0 character after the
	568	** converted string (this is included in *cbufsize_ret).
	569	*/
	570
	571	libmail_u_convert_handle_t
	572	libmail_u_convert_tocbuf_init(/* Convert from this chset */
	573	const char *src_chset,
	574
	575	/* Convert to this chset */
	576	const char *dst_chset,
	577
	578	/* malloced buffer */
	579	char **cbufptr_ret,
	580
	581	/* size of the malloced buffer */
	582	size_t *cbufsize_ret,
	583
	584	/* null terminate the resulting string */
	585	int nullterminate
	586	);
	587
	588
	589	/*
	590	** Specialization: convert some character text to a unicode_char array.
	591	**
	592	** This is like libmail_u_convert_tocbuf_init(), but converts to a unicode_char
	593	** array.
	594	**
	595	** The returned *ucsize_ret is initialized with the number of unicode_chars,
	596	** rather than the byte count.
	597	**
	598	** In all other ways, this function behaves identically to
	599	** libmail_u_convert_tocbuf_init().
	600	*/
	601
	602	libmail_u_convert_handle_t
	603	libmail_u_convert_tou_init(/* Convert from this chset */
	604	const char *src_chset,
	605
	606	/* malloc()ed buffer pointer, on exit. */
	607	unicode_char **ucptr_ret,
	608
	609	/* size of the malloc()ed buffer, upon exit */
	610	size_t *ucsize_ret,
	611
	612	/* If true, terminate with U+0x0000, for convenience */
	613	int nullterminate
	614	);
	615
	616	/*
	617	** Specialization: convert a unicode_char array to some character text.
	618	**
	619	** This is the opposite of libmail_u_convert_tou_init(). Call this to
	620	** initialize the conversion handle, then use libmail_u_convert_uc()
	621	** instead of libmail_u_convert.
	622	*/
	623
	624	libmail_u_convert_handle_t
	625	libmail_u_convert_fromu_init(/* Convert to this chset */
	626	const char *dst_chset,
	627
	628	/* malloc()ed buffer pointer, on exit. */
	629	char **cbufptr_ret,
	630
	631	/* size of the malloc()ed buffer, upon exit */
	632	size_t *cbufsize_ret,
	633
	634	/* If true, terminate with U+0x0000, for convenience */
	635	int nullterminate
	636	);
	637
	638	int libmail_u_convert_uc(/* The conversion handle */
	639	libmail_u_convert_handle_t handle,
	640
	641	/* Text to convert */
	642	const unicode_char *text,
	643
	644	/* Number of bytes to convert */
	645	size_t cnt);
	646
	647	/*
	648	** Initialize conversion to UTF-8.
	649	**
	650	** This is a wrapper for libmail_u_convert_tocbuf_init() that specifies the
	651	** destination charset as UTF-8.
	652	*/
	653
	654	libmail_u_convert_handle_t
	655	libmail_u_convert_tocbuf_toutf8_init(const char *src_chset,
	656	char **cbufptr_ret,
	657	size_t *cbufsize_ret,
	658	int nullterminate);
	659
	660	/*
	661	** Initialize conversion from UTF-8.
	662	**
	663	** This is a wrapper for libmail_u_convert_tocbuf_init() that specifies the
	664	** source charset as UTF-8.
	665	*/
	666
	667	libmail_u_convert_handle_t
	668	libmail_u_convert_tocbuf_fromutf8_init(const char *dst_chset,
	669	char **cbufptr_ret,
	670	size_t *cbufsize_ret,
	671	int nullterminate);
	672
	673	/*
	674	** Convert a character string to UTF-8.
	675	**
	676	** Returns a malloc-ed buffer holding the UTF-8 string, or NULL if an
	677	** error occured.
	678	*/
	679	char libmail_u_convert_toutf8(/ Text to convert to UTF-8 */
	680	const char *text,
	681
	682	/* Character set to convert to UTF-8 */
	683	const char *charset,
	684
	685	/*
	686	** If non-NULL, and a non-NULL pointer is
	687	** returned, *error is set to non-zero if
	688	** a character conversion error has occured.
	689	*/
	690	int *error);
	691
	692	/*
	693	** Convert UTF-8 text to another character set.
	694	**
	695	** Returns a malloc-ed buffer holding the string converted to the specified
	696	** character set, or NULL if an error occured.
	697	*/
	698
	699	char libmail_u_convert_fromutf8(/ A UTF-8 string */
	700	const char *text,
	701
	702	/*
	703	** Convert the UTF-8 string to this character
	704	** set.
	705	*/
	706
	707	const char *charset,
	708
	709	/*
	710	** If non-NULL, and a non-NULL pointer is
	711	** returned, *error is set to non-zero if
	712	** a character conversion error has occured.
	713	*/
	714	int *error);
	715
	716	/*
	717	** Convert one charset to another charset, placing the result in a malloc-ed
	718	** buffer.
	719	**
	720	** Returns a malloc-ed buffer holding the string converted to the specified
	721	** character set, or NULL if an error occured.
	722	*/
	723
	724	char libmail_u_convert_tobuf(/ A string to convert */
	725	const char *text,
	726
	727	/*
	728	** String's charset.
	729	*/
	730
	731	const char *charset,
	732
	733	/*
	734	** Destination charset
	735	*/
	736	const char *dstcharset,
	737
	738	/*
	739	** If non-NULL, and a non-NULL pointer is
	740	** returned, *error is set to non-zero if
	741	** a character conversion error has occured.
	742	*/
	743	int *error);
	744
	745	/*
	746	** Convenience function: call libmail_u_convert_tou_init(), feed the
	747	** character string through libmail_u_convert(), then call
	748	** libmail_u_convert_deinit().
	749	**
	750	** If this function returns 0, uc and ucsize is set to a malloced buffer+size
	751	** holding the unicode char array.
	752	*/
	753
	754	int libmail_u_convert_tou_tobuf(/* Character text to convert */
	755	const char *text,
	756
	757	/* Number of characters */
	758	size_t text_l,
	759
	760	/* text's charset */
	761	const char *charset,
	762
	763	/*
	764	** If this function returns 0, this gets
	765	** initialized
	766	*/
	767	unicode_char **uc,
	768
	769	/*
	770	** Size of the allocated buffer
	771	*/
	772	size_t *ucsize,
	773
	774	/*
	775	** If not null and this function returns 0,
	776	** this is set to non-0 if there
	777	** was a conversion error (but the output
	778	** buffer gets still allocated and
	779	** initialized)
	780	*/
	781	int *err);
	782
	783	/*
	784	** Convenience function: call libmail_u_convert_fromu_init(), feed the
	785	** unicode_array through libmail_u_convert_uc(), then call
	786	** libmail_u_convert_deinit().
	787	**
	788	** If this function returns 0, uc and ucsize is set to a malloced buffer+size
	789	** holding the converted character string
	790	*/
	791
	792	int libmail_u_convert_fromu_tobuf(/* Unicode array to convert to a char str */
	793	const unicode_char *utext,
	794
	795	/*
	796	** Size of the unicode array.
	797	** If this is (size_t)-1, utext is a
	798	** 0-terminated array.
	799	*/
	800	size_t utext_l,
	801
	802	/*
	803	** Convert the unicode array to this charset.
	804	*/
	805	const char *charset,
	806
	807	/*
	808	** If libmail_u_convert_fromu_tobuf()
	809	** returns 0, this is initialized to a
	810	** malloced buffer with a 0-terminated
	811	** string is kept.
	812	*/
	813	char **c,
	814
	815	/*
	816	** Size of the initialized array, including
	817	** the 0-terminator.
	818	*/
	819	size_t *csize,
	820
	821	/*
	822	** If libmail_u_convert_fromu_tobuf()
	823	** returns 0 and this is not NULL,
	824	** *err is set to non-0 if there was a
	825	** conversion error to the requested
	826	** character set.
	827	*/
	828	int *err);
	829
	830	/*
	831	** Convenience function: convert a string in a given character set
	832	** to/from uppercase, lowercase, or something else.
	833	**
	834	** This is done by calling libmail_u_convert_tou_tobuf() first,
	835	** applying the title_func and char_func, then using
	836	** libmail_u_convert_fromu_tobuf().
	837	**
	838	** A NULL return indicates that the requested conversion cannot be performed.
	839	*/
	840
	841	char libmail_u_convert_tocase( / String to convert */
	842	const char *str,
	843
	844	/* String's character set */
	845
	846	const char *charset,
	847
	848	/*
	849	** Conversion of the first character in
	850	** str: unicode_uc, unicode_lc, or unicode_tc:
	851	*/
	852
	853	unicode_char (*first_char_func)(unicode_char),
	854
	855	/*
	856	** Conversion of the second and the remaining
	857	** character in str. If NULL, same as
	858	** first_char_func.
	859	*/
	860	unicode_char (*char_func)(unicode_char));
	861
	862
	863
	864	/* Either UCS-4BE or UCS-4LE, matching the native unicode_char endianness */
	865
	866	extern const char libmail_u_ucs4_native[];
	867
	868	/* Either UCS-2BE or UCS-2LE, matching the native unicode_char endianness */
	869
	870	extern const char libmail_u_ucs2_native[];
	871
	872	/*
	873	** Modified-UTF7 encoding used for IMAP folder names. Pass it for a charset
	874	** parameter.
	875	**
	876	** This can be followed by a " " and up to 15 characters to be escaped in
	877	** addition to unicode chars.
	878	*/
	879
	880	#define unicode_x_imap_modutf7 "x-imap-modutf7"
	881
	882	#if 0
	883	{
	884	#endif
	885
	886	#ifdef __cplusplus
	887	}
	888
	889	extern size_t unicode_wcwidth(const std::vector<unicode_char> &uc);
	890
	891	namespace mail {
	892
	893	/*
	894	** Interface to iconv.
	895	**
	896	** Subclass converted(). Invoke begin(), then operator(), repeatedly,
	897	** then end().
	898	**
	899	** converted() receives the converted text.
	900	*/
	901
	902	class iconvert {
	903
	904	libmail_u_convert_handle_t handle;
	905
	906	public:
	907	iconvert();
	908	virtual ~iconvert();
	909
	910	/* Start conversion.
	911	** Returns false if the requested conversion cannot be done.
	912	**/
	913
	914	bool begin(/* Convert from */
	915	const std::string &src_chset,
	916
	917	/* Convert to */
	918	const std::string &dst_chset);
	919
	920	/* Feed iconv(3). Returns false if the conversion was aborted.
	921	*/
	922
	923	bool operator()(const char *, size_t);
	924
	925	bool operator()(const unicode_char *, size_t);
	926
	927	/*
	928	** Get the results here. If the subclass returns a non-0
	929	** value, the conversion is aborted.
	930	*/
	931
	932	virtual int converted(const char *, size_t);
	933
	934	/*
	935	** End of conversion.
	936	**
	937	** Returns true if all calls to converted() returned 0,
	938	** false if the conversion was aborted.
	939	**
	940	** errflag is set to true if there was a character that could
	941	** not be converted, and passed to converted().
	942	*/
	943
	944	bool end(bool &errflag)
	945	{
	946	return end(&errflag);
	947	}
	948
	949	bool end()
	950	{
	951	return end(NULL);
	952	}
	953
	954	/* Convert between two different charsets */
	955
	956	static std::string convert(const std::string &text,
	957	const std::string &charset,
	958	const std::string &dstcharset,
	959	bool &errflag);
	960
	961	/* Convert between two different charsets */
	962
	963	static std::string convert(const std::string &text,
	964	const std::string &charset,
	965	const std::string &dstcharset)
	966	{
	967	bool dummy;
	968
	969	return convert(text, charset, dstcharset, dummy);
	970	}
	971
	972	/* Convert from unicode to a charset */
	973
	974	static std::string convert(const std::vector<unicode_char> &uc,
	975	const std::string &dstcharset,
	976	bool &errflag);
	977
	978	/* Convert from unicode to a charset */
	979
	980	static std::string convert(const std::vector<unicode_char> &uc,
	981	const std::string &dstcharset)
	982	{
	983	bool dummy;
	984
	985	return convert(uc, dstcharset, dummy);
	986	}
	987
	988	/* Convert charset to unicode */
	989
	990	static bool convert(const std::string &text,
	991	const std::string &charset,
	992	std::vector<unicode_char> &uc);
	993
	994
	995	/* Convert to upper/lower/title case */
	996
	997	static std::string
	998	convert_tocase(/* Text string */
	999	const std::string &text,
	1000
	1001	/* Its charset */
	1002	const std::string &charset,
	1003
	1004	/* First character: unicode_uc, unicode_lc, or unicode_tc */
	1005	unicode_char (*first_char_func)(unicode_char),
	1006
	1007	/* If not NULL, second and subsequent chars */
	1008	unicode_char (*char_func)(unicode_char)
	1009	=NULL)
	1010	{
	1011	bool dummy;
	1012
	1013	return convert_tocase(text, charset, dummy,
	1014	first_char_func,
	1015	char_func);
	1016	}
	1017
	1018	/* Convert to upper/lower/title case */
	1019
	1020	static std::string
	1021	convert_tocase(/* Text string */
	1022	const std::string &text,
	1023
	1024	/* Its charset */
	1025	const std::string &charset,
	1026
	1027	/* Set if there's a conversion error */
	1028	bool &err,
	1029
	1030	/* First character: unicode_uc, unicode_lc, or unicode_tc */
	1031	unicode_char (*first_char_func)(unicode_char),
	1032
	1033	/* If not NULL, second and subsequent chars */
	1034	unicode_char (*char_func)(unicode_char)
	1035	=NULL);
	1036	private:
	1037	bool end(bool *);
	1038
	1039	public:
	1040	class tou;
	1041	class fromu;
	1042	};
	1043
	1044	/* Convert output of iconvert to unicode_chars. */
	1045
	1046	class iconvert::tou : public iconvert {
	1047
	1048	public:
	1049	bool begin(const std::string &chset);
	1050
	1051	virtual int converted(const unicode_char *, size_t);
	1052
	1053	using iconvert::operator();
	1054	private:
	1055	int converted(const char *ptr, size_t cnt);
	1056
	1057	public:
	1058	template<typename iter_t> class to_iter_class;
	1059
	1060	template<typename input_iter_t,
	1061	typename output_iter_t>
	1062	static output_iter_t convert(input_iter_t from_iter,
	1063	input_iter_t to_iter,
	1064	const std::string &chset,
	1065	output_iter_t out_iter);
	1066
	1067	template<typename input_iter_t>
	1068	static void convert(input_iter_t from_iter,
	1069	input_iter_t to_iter,
	1070	const std::string &chset,
	1071	std::vector<unicode_char> &out_buf)
	1072	{
	1073	out_buf.clear();
	1074	std::back_insert_iterator<std::vector<unicode_char> >
	1075	insert_iter(out_buf);
	1076
	1077	convert(from_iter, to_iter, chset, insert_iter);
	1078	}
	1079
	1080	static void convert(const std::string &str,
	1081	const std::string &chset,
	1082	std::vector<unicode_char> &out_buf);
	1083	};
	1084
	1085	/* Helper class that saves unicode output into an output iterator */
	1086
	1087	template<typename iter_t>
	1088	class iconvert::tou::to_iter_class : public iconvert::tou {
	1089
	1090	iter_t iter;
	1091	public:
	1092
	1093	to_iter_class(iter_t iterValue)
	1094	: iter(iterValue) {}
	1095
	1096	using tou::operator();
	1097
	1098	operator iter_t() const { return iter; }
	1099
	1100	private:
	1101	int converted(const unicode_char *ptr, size_t cnt)
	1102	{
	1103	while (cnt)
	1104	{
	1105	iter=ptr;
	1106
	1107	++iter;
	1108	++ptr;
	1109	--cnt;
	1110	}
	1111	return 0;
	1112	}
	1113	};
	1114
	1115	template<typename input_iter_t,
	1116	typename output_iter_t>
	1117	output_iter_t iconvert::tou::convert(input_iter_t from_iter,
	1118	input_iter_t to_iter,
	1119	const std::string &chset,
	1120	output_iter_t out_iter)
	1121	{
	1122	class to_iter_class<output_iter_t> out(out_iter);
	1123
	1124	if (!out.begin(chset))
	1125	return out;
	1126
	1127	std::vector<char> string;
	1128
	1129	while (from_iter != to_iter)
	1130	{
	1131	string.push_back(*from_iter++);
	1132
	1133	if (string.size() > 31)
	1134	{
	1135	out(&string[0], string.size());
	1136	string.clear();
	1137	}
	1138	}
	1139
	1140	if (string.size() > 0)
	1141	out(&string[0], string.size());
	1142
	1143	out.end();
	1144	return out;
	1145	}
	1146
	1147	/* Convert output of iconvert from unicode_chars. */
	1148
	1149	class iconvert::fromu : public iconvert {
	1150
	1151	public:
	1152	bool begin(const std::string &chset);
	1153
	1154	using iconvert::operator();
	1155
	1156	template<typename iter_t> class to_iter_class;
	1157
	1158	template<typename input_iter_t,
	1159	typename output_iter_t>
	1160	static output_iter_t convert(input_iter_t from_iter,
	1161	input_iter_t to_iter,
	1162	const std::string &chset,
	1163	output_iter_t out_iter);
	1164
	1165	template<typename input_iter_t>
	1166	static void convert(input_iter_t from_iter,
	1167	input_iter_t to_iter,
	1168	const std::string &chset,
	1169	std::string &out_buf)
	1170	{
	1171	out_buf="";
	1172	std::back_insert_iterator<std::string>
	1173	insert_iter(out_buf);
	1174
	1175	convert(from_iter, to_iter, chset, insert_iter);
	1176	}
	1177
	1178	static void convert(const std::vector<unicode_char> &ubuf,
	1179	const std::string &chset,
	1180	std::string &out_buf);
	1181
	1182	static std::string convert(const std::vector<unicode_char>
	1183	&ubuf,
	1184	const std::string &chset);
	1185	};
	1186
	1187	/* Helper class that saves unicode output into an output iterator */
	1188
	1189	template<typename iter_t>
	1190	class iconvert::fromu::to_iter_class : public iconvert::fromu {
	1191
	1192	iter_t iter;
	1193	public:
	1194
	1195	to_iter_class(iter_t iterValue)
	1196	: iter(iterValue) {}
	1197
	1198	using fromu::operator();
	1199
	1200	operator iter_t() const { return iter; }
	1201
	1202	private:
	1203	int converted(const char *ptr, size_t cnt)
	1204	{
	1205	while (cnt)
	1206	{
	1207	iter=ptr;
	1208
	1209	++iter;
	1210	++ptr;
	1211	--cnt;
	1212	}
	1213	return 0;
	1214	}
	1215	};
	1216
	1217	template<typename input_iter_t,
	1218	typename output_iter_t>
	1219	output_iter_t iconvert::fromu::convert(input_iter_t from_iter,
	1220	input_iter_t to_iter,
	1221	const std::string &chset,
	1222	output_iter_t out_iter)
	1223	{
	1224	class to_iter_class<output_iter_t> out(out_iter);
	1225
	1226	if (!out.begin(chset))
	1227	return out;
	1228
	1229	std::vector<unicode_char> string;
	1230
	1231	while (from_iter != to_iter)
	1232	{
	1233	string.push_back(*from_iter++);
	1234
	1235	if (string.size() > 31)
	1236	{
	1237	out(&string[0], string.size());
	1238	string.clear();
	1239	}
	1240	}
	1241
	1242	if (string.size() > 0)
	1243	out(&string[0], string.size());
	1244
	1245	out.end();
	1246	return out;
	1247	}
	1248
	1249	/*
	1250	** Unicode linebreaking algorithm, tr14.
	1251	*/
	1252
	1253	extern "C" int linebreak_trampoline(int value, void *ptr);
	1254	extern "C" int linebreakc_trampoline(int value, unicode_char ch,
	1255	void *ptr);
	1256
	1257	/*
	1258	** Subclass linebreak_callback_base, implement operator()(int).
	1259	**
	1260	** Use operator<< or operator()(iterator, iterator) to feed
	1261	** unicode_chars into the linebreaking algorithm. The subclass receives
	1262	** UNICODE_LB values, as they become available.
	1263	*/
	1264
	1265	class linebreak_callback_base {
	1266
	1267	unicode_lb_info_t handle;
	1268
	1269	int opts;
	1270
	1271	linebreak_callback_base(const linebreak_callback_base &);
	1272	/* NOT IMPLEMENTED */
	1273
	1274	linebreak_callback_base &operator==(const
	1275	linebreak_callback_base &);
	1276	/* NOT IMPLEMENTED */
	1277
	1278	public:
	1279	linebreak_callback_base();
	1280	virtual ~linebreak_callback_base();
	1281
	1282	void finish();
	1283
	1284	void set_opts(int opts);
	1285
	1286	friend int linebreak_trampoline(int, void *);
	1287
	1288	linebreak_callback_base &operator<<(unicode_char uc);
	1289
	1290	template<typename iter_type>
	1291	linebreak_callback_base &operator()(iter_type beg_iter,
	1292	iter_type end_iter)
	1293	{
	1294	while (beg_iter != end_iter)
	1295	operator<<(*beg_iter++);
	1296	return *this;
	1297	}
	1298
	1299	linebreak_callback_base &operator<<(const
	1300	std::vector<unicode_char>
	1301	&vec)
	1302	{
	1303	return operator()(vec.begin(), vec.end());
	1304	}
	1305	private:
	1306	virtual int operator()(int);
	1307	};
	1308
	1309	class linebreak_callback_save_buf : public linebreak_callback_base {
	1310
	1311	public:
	1312	std::list<int> lb_buf;
	1313
	1314	linebreak_callback_save_buf();
	1315	~linebreak_callback_save_buf();
	1316
	1317	private:
	1318	int operator()(int value);
	1319	};
	1320
	1321	/*
	1322	** Convert an input iterator sequence over unicode_chars into
	1323	** an input iterator sequence over linebreak values.
	1324	*/
	1325
	1326	template<typename input_t> class linebreak_iter
	1327	: public std::iterator<std::input_iterator_tag, int, void>
	1328	{
	1329	mutable input_t iter_value, end_iter_value;
	1330
	1331	mutable linebreak_callback_save_buf *buf;
	1332
	1333	void fill() const
	1334	{
	1335	if (buf == NULL)
	1336	return;
	1337
	1338	while (buf->lb_buf.empty())
	1339	{
	1340	if (iter_value == end_iter_value)
	1341	{
	1342	buf->finish();
	1343	if (buf->lb_buf.empty())
	1344	{
	1345	delete buf;
	1346	buf=NULL;
	1347	}
	1348	break;
	1349	}
	1350
	1351	buf->operator<<(*iter_value++);
	1352	}
	1353	}
	1354
	1355	mutable value_type bufvalue;
	1356
	1357	public:
	1358	linebreak_iter(const input_t &iter_valueArg,
	1359	const input_t &iter_endvalueArg)
	1360	: iter_value(iter_valueArg),
	1361	end_iter_value(iter_endvalueArg),
	1362	buf(new linebreak_callback_save_buf)
	1363	{
	1364	}
	1365
	1366	linebreak_iter() : buf(NULL)
	1367	{
	1368	}
	1369
	1370	void set_opts(int opts)
	1371	{
	1372	if (buf)
	1373	buf->set_opts(opts);
	1374	}
	1375
	1376	~linebreak_iter()
	1377	{
	1378	if (buf)
	1379	delete buf;
	1380	}
	1381
	1382	linebreak_iter(const linebreak_iter<input_t> &v)
	1383	: buf(NULL)
	1384	{
	1385	operator=(v);
	1386	}
	1387
	1388	linebreak_iter<input_t> &operator=(const
	1389	linebreak_iter<input_t> &v)
	1390	{
	1391	if (buf)
	1392	delete buf;
	1393	buf=v.buf;
	1394	iter_value=v.iter_value;
	1395	end_iter_value=v.end_iter_value;
	1396	v.buf=NULL;
	1397	return *this;
	1398	}
	1399
	1400	bool operator==(const linebreak_iter<input_t> &v) const
	1401	{
	1402	fill();
	1403	v.fill();
	1404
	1405	return buf == NULL && v.buf == NULL;
	1406	}
	1407
	1408	bool operator!=(const linebreak_iter<input_t> &v) const
	1409	{
	1410	return !operator==(v);
	1411	}
	1412
	1413	value_type operator*() const
	1414	{
	1415	fill();
	1416	return buf == NULL ? UNICODE_LB_MANDATORY:
	1417	buf->lb_buf.front();
	1418	}
	1419
	1420	linebreak_iter<input_t> &operator++()
	1421	{
	1422	bufvalue=operator*();
	1423
	1424	if (buf)
	1425	buf->lb_buf.pop_front();
	1426	return *this;
	1427	}
	1428
	1429	const value_type *operator++(int)
	1430	{
	1431	operator++();
	1432	return &bufvalue;
	1433	}
	1434	};
	1435
	1436	/*
	1437	** Like linebreak_callback_base, except the subclass receives both
	1438	** the linebreaking value, and the unicode character.
	1439	*/
	1440
	1441	class linebreakc_callback_base {
	1442
	1443	unicode_lbc_info_t handle;
	1444
	1445	int opts;
	1446
	1447	linebreakc_callback_base(const linebreakc_callback_base &);
	1448	/* NOT IMPLEMENTED */
	1449
	1450	linebreakc_callback_base &operator==(const
	1451	linebreakc_callback_base
	1452	&);
	1453	/* NOT IMPLEMENTED */
	1454
	1455
	1456	public:
	1457	linebreakc_callback_base();
	1458	virtual ~linebreakc_callback_base();
	1459
	1460	void finish();
	1461
	1462	void set_opts(int opts);
	1463
	1464	friend int linebreakc_trampoline(int, unicode_char, void *);
	1465
	1466	linebreakc_callback_base &operator<<(unicode_char uc);
	1467
	1468	template<typename iter_type>
	1469	linebreakc_callback_base &operator()(iter_type beg_iter,
	1470	iter_type end_iter)
	1471	{
	1472	while (beg_iter != end_iter)
	1473	operator<<(*beg_iter++);
	1474	return *this;
	1475	}
	1476
	1477	linebreakc_callback_base &operator<<(const
	1478	std::vector<unicode_char>
	1479	&vec)
	1480	{
	1481	return operator()(vec.begin(), vec.end());
	1482	}
	1483	private:
	1484	virtual int operator()(int, unicode_char);
	1485	};
	1486
	1487	class linebreakc_callback_save_buf : public linebreakc_callback_base {
	1488
	1489	public:
	1490	std::list<std::pair<int, unicode_char> > lb_buf;
	1491
	1492	linebreakc_callback_save_buf();
	1493	~linebreakc_callback_save_buf();
	1494
	1495	private:
	1496	int operator()(int, unicode_char);
	1497	};
	1498
	1499
	1500	/*
	1501	** Convert an input iterator sequence over unicode_chars into
	1502	** an input iterator sequence over std::pair<int, unicode_char>,
	1503	** the original unicode character, and the linebreaking value before
	1504	** the character.
	1505	*/
	1506
	1507	template<typename input_t> class linebreakc_iter
	1508	: public std::iterator<std::input_iterator_tag,
	1509	std::pair<int, unicode_char>, void>
	1510	{
	1511	mutable input_t iter_value, end_iter_value;
	1512
	1513	mutable linebreakc_callback_save_buf *buf;
	1514
	1515	void fill() const
	1516	{
	1517	if (buf == NULL)
	1518	return;
	1519
	1520	while (buf->lb_buf.empty())
	1521	{
	1522	if (iter_value == end_iter_value)
	1523	{
	1524	buf->finish();
	1525	if (buf->lb_buf.empty())
	1526	{
	1527	delete buf;
	1528	buf=NULL;
	1529	}
	1530	break;
	1531	}
	1532
	1533	buf->operator<<(*iter_value);
	1534	++iter_value;
	1535	}
	1536	}
	1537
	1538	mutable value_type bufvalue;
	1539
	1540	public:
	1541	linebreakc_iter(const input_t &iter_valueArg,
	1542	const input_t &iter_endvalueArg)
	1543	: iter_value(iter_valueArg),
	1544	end_iter_value(iter_endvalueArg),
	1545	buf(new linebreakc_callback_save_buf)
	1546	{
	1547	}
	1548
	1549	linebreakc_iter() : buf(NULL)
	1550	{
	1551	}
	1552
	1553	~linebreakc_iter()
	1554	{
	1555	if (buf)
	1556	delete buf;
	1557	}
	1558
	1559	linebreakc_iter(const linebreakc_iter<input_t> &v)
	1560	: buf(NULL)
	1561	{
	1562	operator=(v);
	1563	}
	1564
	1565	linebreakc_iter<input_t> &operator=(const
	1566	linebreakc_iter<input_t> &v)
	1567	{
	1568	if (buf)
	1569	delete buf;
	1570	buf=v.buf;
	1571	iter_value=v.iter_value;
	1572	end_iter_value=v.end_iter_value;
	1573	v.buf=NULL;
	1574	return *this;
	1575	}
	1576
	1577	bool operator==(const linebreakc_iter<input_t> &v) const
	1578	{
	1579	fill();
	1580	v.fill();
	1581
	1582	return buf == NULL && v.buf == NULL;
	1583	}
	1584
	1585	bool operator!=(const linebreakc_iter<input_t> &v) const
	1586	{
	1587	return !operator==(v);
	1588	}
	1589
	1590	value_type operator*() const
	1591	{
	1592	fill();
	1593	return buf == NULL ?
	1594	std::make_pair(UNICODE_LB_MANDATORY,
	1595	(unicode_char)0):
	1596	buf->lb_buf.front();
	1597	}
	1598
	1599	linebreakc_iter<input_t> &operator++()
	1600	{
	1601	bufvalue=operator*();
	1602
	1603	if (buf)
	1604	buf->lb_buf.pop_front();
	1605	return *this;
	1606	}
	1607
	1608	const value_type *operator++(int)
	1609	{
	1610	operator++();
	1611	return &bufvalue;
	1612	}
	1613	};
	1614
	1615
	1616	/*
	1617	** Subclass wordbreak_callback_base, implement operator()(int).
	1618	**
	1619	** Use operator<< or operator()(iterator, iterator) to feed
	1620	** unicode_chars into the wordbreaking algorithm. The subclass receives
	1621	** word flags, as they become available.
	1622	*/
	1623
	1624	extern "C" int wordbreak_trampoline(int value, void *ptr);
	1625
	1626	class wordbreak_callback_base {
	1627
	1628	unicode_wb_info_t handle;
	1629
	1630	wordbreak_callback_base(const wordbreak_callback_base &);
	1631	/* NOT IMPLEMENTED */
	1632
	1633	wordbreak_callback_base &operator==(const
	1634	wordbreak_callback_base &);
	1635	/* NOT IMPLEMENTED */
	1636
	1637	public:
	1638	wordbreak_callback_base();
	1639	virtual ~wordbreak_callback_base();
	1640
	1641	void finish();
	1642
	1643	friend int wordbreak_trampoline(int, void *);
	1644
	1645	wordbreak_callback_base &operator<<(unicode_char uc);
	1646
	1647	template<typename iter_type>
	1648	wordbreak_callback_base &operator()(iter_type beg_iter,
	1649	iter_type end_iter)
	1650	{
	1651	while (beg_iter != end_iter)
	1652	operator<<(*beg_iter++);
	1653	return *this;
	1654	}
	1655
	1656	wordbreak_callback_base &operator<<(const
	1657	std::vector<unicode_char>
	1658	&vec)
	1659	{
	1660	return operator()(vec.begin(), vec.end());
	1661	}
	1662	private:
	1663	virtual int operator()(bool);
	1664	};
	1665
	1666	/*
	1667	** A C++ wrapper for unicode_wbscan.
	1668	*/
	1669
	1670	class wordbreakscan {
	1671
	1672	unicode_wbscan_info_t handle;
	1673
	1674	wordbreakscan(const wordbreakscan &);
	1675	/* NOT IMPLEMENTED */
	1676
	1677	wordbreakscan &operator==(const wordbreakscan &);
	1678	/* NOT IMPLEMENTED */
	1679	public:
	1680
	1681	wordbreakscan();
	1682	~wordbreakscan();
	1683
	1684	bool operator<<(unicode_char uc);
	1685
	1686	size_t finish();
	1687	};
	1688
	1689	}
	1690	#endif
	1691
	1692	#endif