Commit | Line | Data |
---|---|---|
8d138742 CE |
1 | #ifndef unicode_h |
2 | #define unicode_h | |
3 | ||
4 | /* | |
5 | ** Copyright 2000-2001 Double Precision, Inc. | |
6 | ** See COPYING for distribution information. | |
7 | ** | |
8 | ** $Id: unicode.h,v 1.18 2008/07/20 16:24:52 mrsam Exp $ | |
9 | */ | |
10 | ||
11 | #ifdef __cplusplus | |
12 | extern "C" { | |
13 | #endif | |
14 | ||
15 | #include "../unicode/unicode_config.h" /* VPATH build */ | |
16 | ||
17 | #include <stdlib.h> | |
18 | ||
19 | #include <stdio.h> | |
20 | #if HAVE_WCHAR_H | |
21 | #include <wchar.h> | |
22 | #endif | |
23 | ||
24 | #if HAVE_STDDEF_H | |
25 | #include <stddef.h> | |
26 | #endif | |
27 | ||
28 | typedef wchar_t unicode_char; | |
29 | ||
30 | struct unicode_info { | |
31 | const char *chset; /* Official character set */ | |
32 | int flags; /* Flags */ | |
33 | ||
34 | #define UNICODE_UTF 1 /* Direct UTF mapping */ | |
35 | #define UNICODE_MB 2 /* Multibyte characters present */ | |
36 | #define UNICODE_SISO 4 /* | |
37 | ** Composite mapping, using shift in/out | |
38 | ** (verbatim text comparison may not work, | |
39 | ** must convert to UTF, or something). | |
40 | ** (replaces search_chset). | |
41 | */ | |
42 | ||
43 | #define UNICODE_USASCII 8 /* Character set is a US-ASCII superset */ | |
44 | #define UNICODE_REPLACEABLE 16 /* | |
45 | * Conversion errors can be replaced by | |
46 | * adequate placeholders (replacement | |
47 | * characters). | |
48 | */ | |
49 | #define UNICODE_HEADER_QUOPRI 32 /* | |
50 | ** Quoted-printable (Q) encoding is preferred | |
51 | ** for MIME message headers. | |
52 | */ | |
53 | #define UNICODE_HEADER_BASE64 64 /* | |
54 | ** Base64 (B) encoding is preferred | |
55 | ** for MIME message headers. | |
56 | */ | |
57 | #define UNICODE_BODY_QUOPRI 128 /* | |
58 | ** Quoted-printable (Q) encoding is preferred | |
59 | ** MIME message body. | |
60 | */ | |
61 | #define UNICODE_BODY_BASE64 256 /* | |
62 | ** Base64 (B) encoding is preferred | |
63 | ** for MIME message body. | |
64 | */ | |
65 | ||
66 | unicode_char *(*c2u)(const struct unicode_info *, const char *, int *); | |
67 | /* Convert character string in this charset to unicode */ | |
68 | ||
69 | char *(*u2c)(const struct unicode_info *, const unicode_char *, int *); | |
70 | /* Convert unicode to character string in this charset */ | |
71 | ||
72 | /* Convert the string in this character set to upper/lower/titlecase */ | |
73 | ||
74 | char *(*toupper_func)(const struct unicode_info *, | |
75 | const char *, int *); | |
76 | char *(*tolower_func)(const struct unicode_info *, | |
77 | const char *, int *); | |
78 | char *(*totitle_func)(const struct unicode_info *, | |
79 | const char *, int *); | |
80 | ||
81 | const struct unicode_info *search_chset; | |
82 | } ; | |
83 | ||
84 | extern const struct unicode_info unicode_ISO8859_1; | |
85 | extern const struct unicode_info unicode_UTF8; | |
86 | extern const struct unicode_info unicode_IMAP_MODUTF7; | |
87 | ||
88 | extern char *unicode_iso8859_u2c(const unicode_char *, int *, | |
89 | const unicode_char *); | |
90 | ||
91 | extern char *unicode_windows874_u2c(const unicode_char *, int *, | |
92 | const unicode_char *); | |
93 | ||
94 | /* ISO8859 charsets all share the same functions */ | |
95 | ||
96 | extern unicode_char *unicode_iso8859_c2u(const char *, int *, | |
97 | const unicode_char *); | |
98 | ||
99 | extern char *unicode_iso8859_convert(const char *, int *, | |
100 | const char *); | |
101 | ||
102 | /* IBM864 charset has some funkiness */ | |
103 | ||
104 | unicode_char *unicode_ibm864_c2u(const char *, int *, | |
105 | const unicode_char *); | |
106 | ||
107 | char *unicode_ibm864_u2c(const unicode_char *, int *, | |
108 | const unicode_char *); | |
109 | ||
110 | ||
111 | struct unicode_chsetlist { | |
112 | const char *chsetname; | |
113 | const struct unicode_info *ptr; | |
114 | } ; | |
115 | ||
116 | extern const struct unicode_chsetlist unicode_chsetlist[]; | |
117 | extern const char *unicode_default_chset(); | |
118 | extern const struct unicode_info *unicode_find(const char *); | |
119 | ||
120 | /* | |
121 | ** UTF8 functions | |
122 | */ | |
123 | ||
124 | /* Convert Unicode to/from UTF-8 */ | |
125 | ||
126 | extern char *unicode_toutf8(const unicode_char *); | |
127 | extern unicode_char *unicode_fromutf8(const char *); | |
128 | ||
129 | /* Unicode upper/lower/title case conversion functions */ | |
130 | ||
131 | extern unicode_char unicode_uc(unicode_char); | |
132 | extern unicode_char unicode_lc(unicode_char); | |
133 | extern unicode_char unicode_tc(unicode_char); | |
134 | ||
135 | /* Convert charsets to/from UTF-8 */ | |
136 | ||
137 | extern char *unicode_ctoutf8(const struct unicode_info *, const char *, | |
138 | int *); | |
139 | extern char *unicode_cfromutf8(const struct unicode_info *, const char *, | |
140 | int *); | |
141 | ||
142 | ||
143 | /* Return width of unicode character */ | |
144 | ||
145 | extern int unicode_wcwidth(unicode_char c); | |
146 | ||
147 | /* Internal functions: */ | |
148 | ||
149 | extern unicode_char *unicode_utf8_tou(const char *, int *); | |
150 | extern char *unicode_utf8_fromu(const unicode_char *, int *); | |
151 | ||
152 | size_t unicode_utf8_fromu_pass(const unicode_char *, char *); | |
153 | ||
154 | #define UNICODE_UTF8_MAXLEN 6 | |
155 | ||
156 | extern char *unicode_convert(const char *txt, | |
157 | const struct unicode_info *from, | |
158 | const struct unicode_info *to); | |
159 | /* errno=EINVAL if conversion could not be performed */ | |
160 | ||
161 | extern char *unicode_xconvert(const char *txt, | |
162 | const struct unicode_info *from, | |
163 | const struct unicode_info *to); | |
164 | /* Like unicode_convert(), except unconvertable chars are replaced | |
165 | ** by periods (or something similar), instead of aborting with EINVAL | |
166 | */ | |
167 | ||
168 | ||
169 | extern char *unicode_convert_fromchset(const char *txt, | |
170 | const char *from, | |
171 | const struct unicode_info *to); | |
172 | /* Like, unicode_convert, except that we search for a character set | |
173 | ** from a list of chsets we support. | |
174 | ** errno=EINVAL if 'to' character set does not exist. | |
175 | */ | |
176 | ||
177 | /* | |
178 | ** Convert between unicode and modified-UTF7 encoding used for | |
179 | ** IMAP folder names. | |
180 | */ | |
181 | ||
182 | unicode_char *unicode_modutf7touc(const char *s, int *err); | |
183 | ||
184 | /* err < 0 if out of memory, else ptr to first illegal modutf7-char */ | |
185 | /* This can be used to test if string is a valid mod-utf7 string */ | |
186 | ||
187 | char *unicode_uctomodutf7(const unicode_char *); | |
188 | ||
189 | char *unicode_uctomodutf7x(const unicode_char *, const unicode_char *); | |
190 | ||
191 | #ifdef __cplusplus | |
192 | } | |
193 | #endif | |
194 | ||
195 | #endif |