5 ** Copyright 2000-2001 Double Precision, Inc.
6 ** See COPYING for distribution information.
8 ** $Id: unicode.h,v 1.18 2008/07/20 16:24:52 mrsam Exp $
15 #include "../unicode/unicode_config.h" /* VPATH build */
28 typedef wchar_t unicode_char
;
31 const char *chset
; /* Official character set */
32 int flags
; /* Flags */
34 #define UNICODE_UTF 1 /* Direct UTF mapping */
35 #define UNICODE_MB 2 /* Multibyte characters present */
36 #define UNICODE_SISO 4 /*
37 ** Composite mapping, using shift in/out
38 ** (verbatim text comparison may not work,
39 ** must convert to UTF, or something).
40 ** (replaces search_chset).
43 #define UNICODE_USASCII 8 /* Character set is a US-ASCII superset */
44 #define UNICODE_REPLACEABLE 16 /*
45 * Conversion errors can be replaced by
46 * adequate placeholders (replacement
49 #define UNICODE_HEADER_QUOPRI 32 /*
50 ** Quoted-printable (Q) encoding is preferred
51 ** for MIME message headers.
53 #define UNICODE_HEADER_BASE64 64 /*
54 ** Base64 (B) encoding is preferred
55 ** for MIME message headers.
57 #define UNICODE_BODY_QUOPRI 128 /*
58 ** Quoted-printable (Q) encoding is preferred
61 #define UNICODE_BODY_BASE64 256 /*
62 ** Base64 (B) encoding is preferred
63 ** for MIME message body.
66 unicode_char
*(*c2u
)(const struct unicode_info
*, const char *, int *);
67 /* Convert character string in this charset to unicode */
69 char *(*u2c
)(const struct unicode_info
*, const unicode_char
*, int *);
70 /* Convert unicode to character string in this charset */
72 /* Convert the string in this character set to upper/lower/titlecase */
74 char *(*toupper_func
)(const struct unicode_info
*,
76 char *(*tolower_func
)(const struct unicode_info
*,
78 char *(*totitle_func
)(const struct unicode_info
*,
81 const struct unicode_info
*search_chset
;
84 extern const struct unicode_info unicode_ISO8859_1
;
85 extern const struct unicode_info unicode_UTF8
;
86 extern const struct unicode_info unicode_IMAP_MODUTF7
;
88 extern char *unicode_iso8859_u2c(const unicode_char
*, int *,
89 const unicode_char
*);
91 extern char *unicode_windows874_u2c(const unicode_char
*, int *,
92 const unicode_char
*);
94 /* ISO8859 charsets all share the same functions */
96 extern unicode_char
*unicode_iso8859_c2u(const char *, int *,
97 const unicode_char
*);
99 extern char *unicode_iso8859_convert(const char *, int *,
102 /* IBM864 charset has some funkiness */
104 unicode_char
*unicode_ibm864_c2u(const char *, int *,
105 const unicode_char
*);
107 char *unicode_ibm864_u2c(const unicode_char
*, int *,
108 const unicode_char
*);
111 struct unicode_chsetlist
{
112 const char *chsetname
;
113 const struct unicode_info
*ptr
;
116 extern const struct unicode_chsetlist unicode_chsetlist
[];
117 extern const char *unicode_default_chset();
118 extern const struct unicode_info
*unicode_find(const char *);
124 /* Convert Unicode to/from UTF-8 */
126 extern char *unicode_toutf8(const unicode_char
*);
127 extern unicode_char
*unicode_fromutf8(const char *);
129 /* Unicode upper/lower/title case conversion functions */
131 extern unicode_char
unicode_uc(unicode_char
);
132 extern unicode_char
unicode_lc(unicode_char
);
133 extern unicode_char
unicode_tc(unicode_char
);
135 /* Convert charsets to/from UTF-8 */
137 extern char *unicode_ctoutf8(const struct unicode_info
*, const char *,
139 extern char *unicode_cfromutf8(const struct unicode_info
*, const char *,
143 /* Return width of unicode character */
145 extern int unicode_wcwidth(unicode_char c
);
147 /* Internal functions: */
149 extern unicode_char
*unicode_utf8_tou(const char *, int *);
150 extern char *unicode_utf8_fromu(const unicode_char
*, int *);
152 size_t unicode_utf8_fromu_pass(const unicode_char
*, char *);
154 #define UNICODE_UTF8_MAXLEN 6
156 extern char *unicode_convert(const char *txt
,
157 const struct unicode_info
*from
,
158 const struct unicode_info
*to
);
159 /* errno=EINVAL if conversion could not be performed */
161 extern char *unicode_xconvert(const char *txt
,
162 const struct unicode_info
*from
,
163 const struct unicode_info
*to
);
164 /* Like unicode_convert(), except unconvertable chars are replaced
165 ** by periods (or something similar), instead of aborting with EINVAL
169 extern char *unicode_convert_fromchset(const char *txt
,
171 const struct unicode_info
*to
);
172 /* Like, unicode_convert, except that we search for a character set
173 ** from a list of chsets we support.
174 ** errno=EINVAL if 'to' character set does not exist.
178 ** Convert between unicode and modified-UTF7 encoding used for
179 ** IMAP folder names.
182 unicode_char
*unicode_modutf7touc(const char *s
, int *err
);
184 /* err < 0 if out of memory, else ptr to first illegal modutf7-char */
185 /* This can be used to test if string is a valid mod-utf7 string */
187 char *unicode_uctomodutf7(const unicode_char
*);
189 char *unicode_uctomodutf7x(const unicode_char
*, const unicode_char
*);