Imported Upstream version 0.63.0
[hcoop/debian/courier-authlib.git] / unicode / utf8.c
1
2 /*
3 ** Copyright 2000-2002 Double Precision, Inc.
4 ** See COPYING for distribution information.
5 **
6 ** $Id: utf8.c,v 1.4 2002/11/18 00:54:22 mrsam Exp $
7 */
8
9 #include "unicode.h"
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13
14 unicode_char *unicode_utf8_tou(const char *cp, int *ip)
15 {
16 size_t l;
17 size_t n=1;
18 unicode_char *p, uc;
19
20 for (l=0; cp[l]; ++n)
21 {
22 if ((cp[l] & 0x80) == 0)
23 {
24 ++l;
25 continue;
26 }
27
28 if ((cp[l] & 0xE0) == 0xC0)
29 {
30 if ((cp[l+1] & 0xC0) == 0x80)
31 {
32 l += 2;
33 continue;
34 }
35 }
36
37 if ((cp[l] & 0xF0) == 0xE0)
38 {
39 if ((cp[l+1] & 0xC0) == 0x80 &&
40 (cp[l+2] & 0xC0) == 0x80)
41 {
42 l += 3;
43 continue;
44 }
45 }
46
47 if ((cp[l] & 0xF8) == 0xF0)
48 {
49 if ((cp[l+1] & 0xC0) == 0x80 &&
50 (cp[l+2] & 0xC0) == 0x80 &&
51 (cp[l+3] & 0xC0) == 0x80)
52 {
53 l += 4;
54 continue;
55 }
56 }
57
58 if ((cp[l] & 0xFC) == 0xF8)
59 {
60 if ((cp[l+1] & 0xC0) == 0x80 &&
61 (cp[l+2] & 0xC0) == 0x80 &&
62 (cp[l+3] & 0xC0) == 0x80 &&
63 (cp[l+4] & 0xC0) == 0x80)
64 {
65 l += 5;
66 continue;
67 }
68 }
69
70 if ((cp[l] & 0xFE) == 0xFC)
71 {
72 if ((cp[l+1] & 0xC0) == 0x80 &&
73 (cp[l+2] & 0xC0) == 0x80 &&
74 (cp[l+3] & 0xC0) == 0x80 &&
75 (cp[l+4] & 0xC0) == 0x80 &&
76 (cp[l+5] & 0xC0) == 0x80)
77 {
78 l += 6;
79 continue;
80 }
81 }
82
83 if (ip)
84 {
85 *ip= l;
86 return (0);
87 }
88 ++l;
89 }
90 if (ip)
91 *ip = -1;
92 if ((p=malloc(n*sizeof(unicode_char))) == 0)
93 return (0);
94 n=0;
95
96 for (l=0; cp[l]; p[n++]=uc)
97 {
98 if ((cp[l] & 0x80) == 0)
99 {
100 uc=cp[l];
101 ++l;
102 continue;
103 }
104
105 if ((cp[l] & 0xE0) == 0xC0)
106 {
107 if ((cp[l+1] & 0xC0) == 0x80)
108 {
109 uc=cp[l] & 0x1F;
110 uc <<= 6; uc |= cp[l+1] & 0x3F;
111 l += 2;
112 continue;
113 }
114 }
115
116 if ((cp[l] & 0xF0) == 0xE0)
117 {
118 if ((cp[l+1] & 0xC0) == 0x80 &&
119 (cp[l+2] & 0xC0) == 0x80)
120 {
121 uc=cp[l] & 0x0F;
122 uc <<= 6; uc |= cp[l+1] & 0x3F;
123 uc <<= 6; uc |= cp[l+2] & 0x3F;
124 l += 3;
125 continue;
126 }
127 }
128
129 if ((cp[l] & 0xF8) == 0xF0)
130 {
131 if ((cp[l+1] & 0xC0) == 0x80 &&
132 (cp[l+2] & 0xC0) == 0x80 &&
133 (cp[l+3] & 0xC0) == 0x80)
134 {
135 uc=cp[l] & 0x07;
136 uc <<= 6; uc |= cp[l+1] & 0x3F;
137 uc <<= 6; uc |= cp[l+2] & 0x3F;
138 uc <<= 6; uc |= cp[l+3] & 0x3F;
139 l += 4;
140 continue;
141 }
142 }
143
144 if ((cp[l] & 0xFC) == 0xF8)
145 {
146 if ((cp[l+1] & 0xC0) == 0x80 &&
147 (cp[l+2] & 0xC0) == 0x80 &&
148 (cp[l+3] & 0xC0) == 0x80 &&
149 (cp[l+4] & 0xC0) == 0x80)
150 {
151 uc=cp[l] & 0x03;
152 uc <<= 6; uc |= cp[l+1] & 0x3F;
153 uc <<= 6; uc |= cp[l+2] & 0x3F;
154 uc <<= 6; uc |= cp[l+3] & 0x3F;
155 uc <<= 6; uc |= cp[l+4] & 0x3F;
156 l += 5;
157 continue;
158 }
159 }
160
161 if ((cp[l] & 0xFE) == 0xFC)
162 {
163 if ((cp[l+1] & 0xC0) == 0x80 &&
164 (cp[l+2] & 0xC0) == 0x80 &&
165 (cp[l+3] & 0xC0) == 0x80 &&
166 (cp[l+4] & 0xC0) == 0x80 &&
167 (cp[l+5] & 0xC0) == 0x80)
168 {
169 uc=cp[l] & 0x01;
170 uc <<= 6; uc |= cp[l+1] & 0x3F;
171 uc <<= 6; uc |= cp[l+2] & 0x3F;
172 uc <<= 6; uc |= cp[l+3] & 0x3F;
173 uc <<= 6; uc |= cp[l+4] & 0x3F;
174 uc <<= 6; uc |= cp[l+5] & 0x3F;
175 l += 6;
176 continue;
177 }
178 }
179 uc=cp[l];
180 ++l;
181 }
182 p[n]=0;
183 return (p);
184 }
185
186 char *unicode_utf8_fromu(const unicode_char *cp, int *ip)
187 {
188 char *p=0;
189 int pass;
190 size_t l=0;
191
192 for (pass=0; pass<2; pass++)
193 {
194 if (pass)
195 {
196 p=malloc(l+1);
197 if (!p)
198 {
199 if (ip) *ip= -1;
200 return (0);
201 }
202 }
203
204 l=unicode_utf8_fromu_pass(cp, p);
205 if (pass)
206 p[l]=0;
207 }
208 return (p);
209 }
210
211
212 size_t unicode_utf8_fromu_pass(const unicode_char *cp, char *p)
213 {
214 size_t l=0;
215 unicode_char uc;
216
217 l=0;
218
219 while (cp && *cp)
220 {
221 uc= *cp++;
222
223 if ((unicode_char)uc ==
224 (unicode_char)(uc & 0x007F))
225 {
226 if (p)
227 {
228 p[l]= (char)uc;
229 }
230 ++l;
231 continue;
232 }
233
234 if ((unicode_char)uc ==
235 (unicode_char)(uc & 0x07FF))
236 {
237 if (p)
238 {
239 p[l+1]=(char)(uc & 0x3F) | 0x80;
240 uc >>= 6;
241 p[l]= (char)(uc & 0x1F) | 0xC0;
242 }
243 l += 2;
244 continue;
245 }
246
247 if ((unicode_char)uc ==
248 (unicode_char)(uc & 0x00FFFF))
249 {
250 if (p)
251 {
252 p[l+2]=(char)(uc & 0x3F) | 0x80;
253 uc >>= 6;
254 p[l+1]=(char)(uc & 0x3F) | 0x80;
255 uc >>= 6;
256 p[l]= (char)(uc & 0x0F) | 0xE0;
257 }
258 l += 3;
259 continue;
260 }
261
262 if ((unicode_char)uc ==
263 (unicode_char)(uc & 0x001FFFFF))
264 {
265 if (p)
266 {
267 p[l+3]=(char)(uc & 0x3F) | 0x80;
268 uc >>= 6;
269 p[l+2]=(char)(uc & 0x3F) | 0x80;
270 uc >>= 6;
271 p[l+1]=(char)(uc & 0x3F) | 0x80;
272 uc >>= 6;
273 p[l]= (char)(uc & 0x07) | 0xF0;
274 }
275 l += 4;
276 continue;
277 }
278
279 if ((unicode_char)uc ==
280 (unicode_char)(uc & 0x03FFFFFF))
281 {
282 if (p)
283 {
284 p[l+4]=(char)(uc & 0x3F) | 0x80;
285 uc >>= 6;
286 p[l+3]=(char)(uc & 0x3F) | 0x80;
287 uc >>= 6;
288 p[l+2]=(char)(uc & 0x3F) | 0x80;
289 uc >>= 6;
290 p[l+1]=(char)(uc & 0x3F) | 0x80;
291 uc >>= 6;
292 p[l]= (char)(uc & 0x03) | 0xF8;
293 }
294 l += 5;
295 continue;
296 }
297
298 if (p)
299 {
300 p[l+5]=(char)(uc & 0x3F) | 0x80;
301 uc >>= 6;
302 p[l+4]=(char)(uc & 0x3F) | 0x80;
303 uc >>= 6;
304 p[l+3]=(char)(uc & 0x3F) | 0x80;
305 uc >>= 6;
306 p[l+2]=(char)(uc & 0x3F) | 0x80;
307 uc >>= 6;
308 p[l+1]=(char)(uc & 0x3F) | 0x80;
309 uc >>= 6;
310 p[l]= (char)(uc & 0x01) | 0xFC;
311 }
312 l += 6;
313 }
314 return l;
315 }