[hcoop/debian/courier-authlib.git] / unicode / unicode.h

#ifndef	unicode_h
#define	unicode_h

/*
** Copyright 2000-2001 Double Precision, Inc.
** See COPYING for distribution information.
**
** $Id: unicode.h,v 1.18 2008/07/20 16:24:52 mrsam Exp $
*/

#ifdef	__cplusplus
extern "C" {
#endif

#include	"../unicode/unicode_config.h" /* VPATH build */

#include	<stdlib.h>

#include	<stdio.h>
#if HAVE_WCHAR_H
#include	<wchar.h>
#endif

#if HAVE_STDDEF_H
#include	<stddef.h>
#endif

typedef wchar_t unicode_char;

struct unicode_info {
	const char *chset;		/* Official character set */
	int flags;			/* Flags */

#define UNICODE_UTF	1		/* Direct UTF mapping */
#define UNICODE_MB	2		/* Multibyte characters present */
#define UNICODE_SISO	4	/*
				** Composite mapping, using shift in/out
				** (verbatim text comparison may not work,
				** must convert to UTF, or something).
				** (replaces search_chset).
				*/

#define UNICODE_USASCII 8	/* Character set is a US-ASCII superset */
#define	UNICODE_REPLACEABLE	16	/*
				 * Conversion errors can be replaced by
				 * adequate placeholders (replacement
				 * characters).
				 */
#define	UNICODE_HEADER_QUOPRI	32	/*
				** Quoted-printable (Q) encoding is preferred
				** for MIME message headers.
				*/
#define	UNICODE_HEADER_BASE64	64	/*
				** Base64 (B) encoding is preferred
				** for MIME message headers.
				*/
#define	UNICODE_BODY_QUOPRI	128	/*
				** Quoted-printable (Q) encoding is preferred
				** MIME message body.
				*/
#define	UNICODE_BODY_BASE64	256	/*
				** Base64 (B) encoding is preferred
				** for MIME message body.
				*/

	unicode_char *(*c2u)(const struct unicode_info *, const char *, int *);
		/* Convert character string in this charset to unicode */

	char *(*u2c)(const struct unicode_info *, const unicode_char *, int *);
		/* Convert unicode to character string in this charset */

	/* Convert the string in this character set to upper/lower/titlecase */

	char *(*toupper_func)(const struct unicode_info *,
			      const char *, int *);
	char *(*tolower_func)(const struct unicode_info *,
			      const char *, int *);
	char *(*totitle_func)(const struct unicode_info *,
			      const char *, int *);

	const struct unicode_info *search_chset;
	} ;

extern const struct unicode_info unicode_ISO8859_1;
extern const struct unicode_info unicode_UTF8;
extern const struct unicode_info unicode_IMAP_MODUTF7;

extern char *unicode_iso8859_u2c(const unicode_char *, int *,
	const unicode_char *);

extern char *unicode_windows874_u2c(const unicode_char *, int *,
	const unicode_char *);

/* ISO8859 charsets all share the same functions */

extern unicode_char *unicode_iso8859_c2u(const char *, int *,
					const unicode_char *);

extern char *unicode_iso8859_convert(const char *, int *,
					const char *);

/* IBM864 charset has some funkiness */

unicode_char *unicode_ibm864_c2u(const char *, int *,
				 const unicode_char *);

char *unicode_ibm864_u2c(const unicode_char *, int *,
			 const unicode_char *);


struct unicode_chsetlist {
	const char *chsetname;
	const struct unicode_info *ptr;
	} ;

extern const struct unicode_chsetlist unicode_chsetlist[];
extern const char *unicode_default_chset();
extern const struct unicode_info *unicode_find(const char *);

/*
** UTF8 functions
*/

	/* Convert Unicode to/from UTF-8 */

extern char *unicode_toutf8(const unicode_char *);
extern unicode_char *unicode_fromutf8(const char *);

	/* Unicode upper/lower/title case conversion functions */

extern unicode_char unicode_uc(unicode_char);
extern unicode_char unicode_lc(unicode_char);
extern unicode_char unicode_tc(unicode_char);

	/* Convert charsets to/from UTF-8 */

extern char *unicode_ctoutf8(const struct unicode_info *, const char *,
			     int *);
extern char *unicode_cfromutf8(const struct unicode_info *, const char *,
			       int *);


	/* Return width of unicode character */

extern int unicode_wcwidth(unicode_char c);

	/* Internal functions: */

extern unicode_char *unicode_utf8_tou(const char *, int *);
extern char *unicode_utf8_fromu(const unicode_char *, int *);

size_t unicode_utf8_fromu_pass(const unicode_char *, char *);

#define UNICODE_UTF8_MAXLEN	6

extern char *unicode_convert(const char *txt,
			     const struct unicode_info *from,
			     const struct unicode_info *to);
	/* errno=EINVAL if conversion could not be performed */

extern char *unicode_xconvert(const char *txt,
			      const struct unicode_info *from,
			      const struct unicode_info *to);
	/* Like unicode_convert(), except unconvertable chars are replaced
	** by periods (or something similar), instead of aborting with EINVAL
	*/


extern char *unicode_convert_fromchset(const char *txt,
				    const char *from,
				    const struct unicode_info *to);
	/* Like, unicode_convert, except that we search for a character set
	** from a list of chsets we support.
	** errno=EINVAL if 'to' character set does not exist.
	*/

	/*
	** Convert between unicode and modified-UTF7 encoding used for
	** IMAP folder names.
	*/

unicode_char *unicode_modutf7touc(const char *s, int *err);

	/* err < 0 if out of memory, else ptr to first illegal modutf7-char */
	/* This can be used to test if string is a valid mod-utf7 string */

char *unicode_uctomodutf7(const unicode_char *);

char *unicode_uctomodutf7x(const unicode_char *, const unicode_char *);

#ifdef	__cplusplus
}
#endif

#endif
Commit	Line	Data
8d138742 CE	1	#ifndef unicode_h
	2	#define unicode_h
	3
	4	/*
	5	** Copyright 2000-2001 Double Precision, Inc.
	6	** See COPYING for distribution information.
	7	**
	8	** $Id: unicode.h,v 1.18 2008/07/20 16:24:52 mrsam Exp $
	9	*/
	10
	11	#ifdef __cplusplus
	12	extern "C" {
	13	#endif
	14
	15	#include "../unicode/unicode_config.h" /* VPATH build */
	16
	17	#include <stdlib.h>
	18
	19	#include <stdio.h>
	20	#if HAVE_WCHAR_H
	21	#include <wchar.h>
	22	#endif
	23
	24	#if HAVE_STDDEF_H
	25	#include <stddef.h>
	26	#endif
	27
	28	typedef wchar_t unicode_char;
	29
	30	struct unicode_info {
	31	const char chset; / Official character set */
	32	int flags; /* Flags */
	33
	34	#define UNICODE_UTF 1 /* Direct UTF mapping */
	35	#define UNICODE_MB 2 /* Multibyte characters present */
	36	#define UNICODE_SISO 4 /*
	37	** Composite mapping, using shift in/out
	38	** (verbatim text comparison may not work,
	39	** must convert to UTF, or something).
	40	** (replaces search_chset).
	41	*/
	42
	43	#define UNICODE_USASCII 8 /* Character set is a US-ASCII superset */
	44	#define UNICODE_REPLACEABLE 16 /*
	45	* Conversion errors can be replaced by
	46	* adequate placeholders (replacement
	47	* characters).
	48	*/
	49	#define UNICODE_HEADER_QUOPRI 32 /*
	50	** Quoted-printable (Q) encoding is preferred
	51	** for MIME message headers.
	52	*/
	53	#define UNICODE_HEADER_BASE64 64 /*
	54	** Base64 (B) encoding is preferred
	55	** for MIME message headers.
	56	*/
	57	#define UNICODE_BODY_QUOPRI 128 /*
	58	** Quoted-printable (Q) encoding is preferred
	59	** MIME message body.
	60	*/
	61	#define UNICODE_BODY_BASE64 256 /*
	62	** Base64 (B) encoding is preferred
	63	** for MIME message body.
	64	*/
65
66	unicode_char (c2u)(const struct unicode_info , const char , int *);
67	/* Convert character string in this charset to unicode */
68
69	char (u2c)(const struct unicode_info , const unicode_char , int *);
70	/* Convert unicode to character string in this charset */
71
72	/* Convert the string in this character set to upper/lower/titlecase */
73
74	char (toupper_func)(const struct unicode_info *,
75	const char , int );
76	char (tolower_func)(const struct unicode_info *,
77	const char , int );
78	char (totitle_func)(const struct unicode_info *,
79	const char , int );
80
81	const struct unicode_info *search_chset;
82	} ;
83
84	extern const struct unicode_info unicode_ISO8859_1;
85	extern const struct unicode_info unicode_UTF8;
86	extern const struct unicode_info unicode_IMAP_MODUTF7;
87
88	extern char unicode_iso8859_u2c(const unicode_char , int *,
89	const unicode_char *);
90
91	extern char unicode_windows874_u2c(const unicode_char , int *,
92	const unicode_char *);
93
94	/* ISO8859 charsets all share the same functions */
95
96	extern unicode_char unicode_iso8859_c2u(const char , int *,
97	const unicode_char *);
98
99	extern char unicode_iso8859_convert(const char , int *,
100	const char *);
101
102	/* IBM864 charset has some funkiness */
103
104	unicode_char unicode_ibm864_c2u(const char , int *,
105	const unicode_char *);
106
107	char unicode_ibm864_u2c(const unicode_char , int *,
108	const unicode_char *);
109
110
111	struct unicode_chsetlist {
112	const char *chsetname;
113	const struct unicode_info *ptr;
114	} ;
115
116	extern const struct unicode_chsetlist unicode_chsetlist[];
117	extern const char *unicode_default_chset();
118	extern const struct unicode_info unicode_find(const char );
119
120	/*
121	** UTF8 functions
122	*/
123
124	/* Convert Unicode to/from UTF-8 */
125
126	extern char unicode_toutf8(const unicode_char );
127	extern unicode_char unicode_fromutf8(const char );
128
129	/* Unicode upper/lower/title case conversion functions */
130
131	extern unicode_char unicode_uc(unicode_char);
132	extern unicode_char unicode_lc(unicode_char);
133	extern unicode_char unicode_tc(unicode_char);
134
135	/* Convert charsets to/from UTF-8 */
136
137	extern char unicode_ctoutf8(const struct unicode_info , const char *,
138	int *);
139	extern char unicode_cfromutf8(const struct unicode_info , const char *,
140	int *);
141
142
143	/* Return width of unicode character */
144
145	extern int unicode_wcwidth(unicode_char c);
146
147	/* Internal functions: */
148
149	extern unicode_char unicode_utf8_tou(const char , int *);
150	extern char unicode_utf8_fromu(const unicode_char , int *);
151
152	size_t unicode_utf8_fromu_pass(const unicode_char , char );
153
154	#define UNICODE_UTF8_MAXLEN 6
155
156	extern char unicode_convert(const char txt,
157	const struct unicode_info *from,
158	const struct unicode_info *to);
159	/* errno=EINVAL if conversion could not be performed */
160
161	extern char unicode_xconvert(const char txt,
162	const struct unicode_info *from,
163	const struct unicode_info *to);
164	/* Like unicode_convert(), except unconvertable chars are replaced
165	** by periods (or something similar), instead of aborting with EINVAL
166	*/
167
168
169	extern char unicode_convert_fromchset(const char txt,
170	const char *from,
171	const struct unicode_info *to);
172	/* Like, unicode_convert, except that we search for a character set
173	** from a list of chsets we support.
174	** errno=EINVAL if 'to' character set does not exist.
175	*/
176
177	/*
178	** Convert between unicode and modified-UTF7 encoding used for
179	** IMAP folder names.
180	*/
181
182	unicode_char unicode_modutf7touc(const char s, int *err);
183
184	/* err < 0 if out of memory, else ptr to first illegal modutf7-char */
185	/* This can be used to test if string is a valid mod-utf7 string */
186
187	char unicode_uctomodutf7(const unicode_char );
188
189	char unicode_uctomodutf7x(const unicode_char , const unicode_char *);
190
191	#ifdef __cplusplus
192	}
193	#endif
194
195	#endif