Imported Upstream version 0.63.0
[hcoop/debian/courier-authlib.git] / unicode / unicode.h
1 #ifndef unicode_h
2 #define unicode_h
3
4 /*
5 ** Copyright 2000-2001 Double Precision, Inc.
6 ** See COPYING for distribution information.
7 **
8 ** $Id: unicode.h,v 1.18 2008/07/20 16:24:52 mrsam Exp $
9 */
10
11 #ifdef __cplusplus
12 extern "C" {
13 #endif
14
15 #include "../unicode/unicode_config.h" /* VPATH build */
16
17 #include <stdlib.h>
18
19 #include <stdio.h>
20 #if HAVE_WCHAR_H
21 #include <wchar.h>
22 #endif
23
24 #if HAVE_STDDEF_H
25 #include <stddef.h>
26 #endif
27
28 typedef wchar_t unicode_char;
29
30 struct unicode_info {
31 const char *chset; /* Official character set */
32 int flags; /* Flags */
33
34 #define UNICODE_UTF 1 /* Direct UTF mapping */
35 #define UNICODE_MB 2 /* Multibyte characters present */
36 #define UNICODE_SISO 4 /*
37 ** Composite mapping, using shift in/out
38 ** (verbatim text comparison may not work,
39 ** must convert to UTF, or something).
40 ** (replaces search_chset).
41 */
42
43 #define UNICODE_USASCII 8 /* Character set is a US-ASCII superset */
44 #define UNICODE_REPLACEABLE 16 /*
45 * Conversion errors can be replaced by
46 * adequate placeholders (replacement
47 * characters).
48 */
49 #define UNICODE_HEADER_QUOPRI 32 /*
50 ** Quoted-printable (Q) encoding is preferred
51 ** for MIME message headers.
52 */
53 #define UNICODE_HEADER_BASE64 64 /*
54 ** Base64 (B) encoding is preferred
55 ** for MIME message headers.
56 */
57 #define UNICODE_BODY_QUOPRI 128 /*
58 ** Quoted-printable (Q) encoding is preferred
59 ** MIME message body.
60 */
61 #define UNICODE_BODY_BASE64 256 /*
62 ** Base64 (B) encoding is preferred
63 ** for MIME message body.
64 */
65
66 unicode_char *(*c2u)(const struct unicode_info *, const char *, int *);
67 /* Convert character string in this charset to unicode */
68
69 char *(*u2c)(const struct unicode_info *, const unicode_char *, int *);
70 /* Convert unicode to character string in this charset */
71
72 /* Convert the string in this character set to upper/lower/titlecase */
73
74 char *(*toupper_func)(const struct unicode_info *,
75 const char *, int *);
76 char *(*tolower_func)(const struct unicode_info *,
77 const char *, int *);
78 char *(*totitle_func)(const struct unicode_info *,
79 const char *, int *);
80
81 const struct unicode_info *search_chset;
82 } ;
83
84 extern const struct unicode_info unicode_ISO8859_1;
85 extern const struct unicode_info unicode_UTF8;
86 extern const struct unicode_info unicode_IMAP_MODUTF7;
87
88 extern char *unicode_iso8859_u2c(const unicode_char *, int *,
89 const unicode_char *);
90
91 extern char *unicode_windows874_u2c(const unicode_char *, int *,
92 const unicode_char *);
93
94 /* ISO8859 charsets all share the same functions */
95
96 extern unicode_char *unicode_iso8859_c2u(const char *, int *,
97 const unicode_char *);
98
99 extern char *unicode_iso8859_convert(const char *, int *,
100 const char *);
101
102 /* IBM864 charset has some funkiness */
103
104 unicode_char *unicode_ibm864_c2u(const char *, int *,
105 const unicode_char *);
106
107 char *unicode_ibm864_u2c(const unicode_char *, int *,
108 const unicode_char *);
109
110
111 struct unicode_chsetlist {
112 const char *chsetname;
113 const struct unicode_info *ptr;
114 } ;
115
116 extern const struct unicode_chsetlist unicode_chsetlist[];
117 extern const char *unicode_default_chset();
118 extern const struct unicode_info *unicode_find(const char *);
119
120 /*
121 ** UTF8 functions
122 */
123
124 /* Convert Unicode to/from UTF-8 */
125
126 extern char *unicode_toutf8(const unicode_char *);
127 extern unicode_char *unicode_fromutf8(const char *);
128
129 /* Unicode upper/lower/title case conversion functions */
130
131 extern unicode_char unicode_uc(unicode_char);
132 extern unicode_char unicode_lc(unicode_char);
133 extern unicode_char unicode_tc(unicode_char);
134
135 /* Convert charsets to/from UTF-8 */
136
137 extern char *unicode_ctoutf8(const struct unicode_info *, const char *,
138 int *);
139 extern char *unicode_cfromutf8(const struct unicode_info *, const char *,
140 int *);
141
142
143 /* Return width of unicode character */
144
145 extern int unicode_wcwidth(unicode_char c);
146
147 /* Internal functions: */
148
149 extern unicode_char *unicode_utf8_tou(const char *, int *);
150 extern char *unicode_utf8_fromu(const unicode_char *, int *);
151
152 size_t unicode_utf8_fromu_pass(const unicode_char *, char *);
153
154 #define UNICODE_UTF8_MAXLEN 6
155
156 extern char *unicode_convert(const char *txt,
157 const struct unicode_info *from,
158 const struct unicode_info *to);
159 /* errno=EINVAL if conversion could not be performed */
160
161 extern char *unicode_xconvert(const char *txt,
162 const struct unicode_info *from,
163 const struct unicode_info *to);
164 /* Like unicode_convert(), except unconvertable chars are replaced
165 ** by periods (or something similar), instead of aborting with EINVAL
166 */
167
168
169 extern char *unicode_convert_fromchset(const char *txt,
170 const char *from,
171 const struct unicode_info *to);
172 /* Like, unicode_convert, except that we search for a character set
173 ** from a list of chsets we support.
174 ** errno=EINVAL if 'to' character set does not exist.
175 */
176
177 /*
178 ** Convert between unicode and modified-UTF7 encoding used for
179 ** IMAP folder names.
180 */
181
182 unicode_char *unicode_modutf7touc(const char *s, int *err);
183
184 /* err < 0 if out of memory, else ptr to first illegal modutf7-char */
185 /* This can be used to test if string is a valid mod-utf7 string */
186
187 char *unicode_uctomodutf7(const unicode_char *);
188
189 char *unicode_uctomodutf7x(const unicode_char *, const unicode_char *);
190
191 #ifdef __cplusplus
192 }
193 #endif
194
195 #endif