3 ** Copyright 2000-2002 Double Precision, Inc.
4 ** See COPYING for distribution information.
6 ** $Id: utf8.c,v 1.4 2002/11/18 00:54:22 mrsam Exp $
14 unicode_char
*unicode_utf8_tou(const char *cp
, int *ip
)
22 if ((cp
[l
] & 0x80) == 0)
28 if ((cp
[l
] & 0xE0) == 0xC0)
30 if ((cp
[l
+1] & 0xC0) == 0x80)
37 if ((cp
[l
] & 0xF0) == 0xE0)
39 if ((cp
[l
+1] & 0xC0) == 0x80 &&
40 (cp
[l
+2] & 0xC0) == 0x80)
47 if ((cp
[l
] & 0xF8) == 0xF0)
49 if ((cp
[l
+1] & 0xC0) == 0x80 &&
50 (cp
[l
+2] & 0xC0) == 0x80 &&
51 (cp
[l
+3] & 0xC0) == 0x80)
58 if ((cp
[l
] & 0xFC) == 0xF8)
60 if ((cp
[l
+1] & 0xC0) == 0x80 &&
61 (cp
[l
+2] & 0xC0) == 0x80 &&
62 (cp
[l
+3] & 0xC0) == 0x80 &&
63 (cp
[l
+4] & 0xC0) == 0x80)
70 if ((cp
[l
] & 0xFE) == 0xFC)
72 if ((cp
[l
+1] & 0xC0) == 0x80 &&
73 (cp
[l
+2] & 0xC0) == 0x80 &&
74 (cp
[l
+3] & 0xC0) == 0x80 &&
75 (cp
[l
+4] & 0xC0) == 0x80 &&
76 (cp
[l
+5] & 0xC0) == 0x80)
92 if ((p
=malloc(n
*sizeof(unicode_char
))) == 0)
96 for (l
=0; cp
[l
]; p
[n
++]=uc
)
98 if ((cp
[l
] & 0x80) == 0)
105 if ((cp
[l
] & 0xE0) == 0xC0)
107 if ((cp
[l
+1] & 0xC0) == 0x80)
110 uc
<<= 6; uc
|= cp
[l
+1] & 0x3F;
116 if ((cp
[l
] & 0xF0) == 0xE0)
118 if ((cp
[l
+1] & 0xC0) == 0x80 &&
119 (cp
[l
+2] & 0xC0) == 0x80)
122 uc
<<= 6; uc
|= cp
[l
+1] & 0x3F;
123 uc
<<= 6; uc
|= cp
[l
+2] & 0x3F;
129 if ((cp
[l
] & 0xF8) == 0xF0)
131 if ((cp
[l
+1] & 0xC0) == 0x80 &&
132 (cp
[l
+2] & 0xC0) == 0x80 &&
133 (cp
[l
+3] & 0xC0) == 0x80)
136 uc
<<= 6; uc
|= cp
[l
+1] & 0x3F;
137 uc
<<= 6; uc
|= cp
[l
+2] & 0x3F;
138 uc
<<= 6; uc
|= cp
[l
+3] & 0x3F;
144 if ((cp
[l
] & 0xFC) == 0xF8)
146 if ((cp
[l
+1] & 0xC0) == 0x80 &&
147 (cp
[l
+2] & 0xC0) == 0x80 &&
148 (cp
[l
+3] & 0xC0) == 0x80 &&
149 (cp
[l
+4] & 0xC0) == 0x80)
152 uc
<<= 6; uc
|= cp
[l
+1] & 0x3F;
153 uc
<<= 6; uc
|= cp
[l
+2] & 0x3F;
154 uc
<<= 6; uc
|= cp
[l
+3] & 0x3F;
155 uc
<<= 6; uc
|= cp
[l
+4] & 0x3F;
161 if ((cp
[l
] & 0xFE) == 0xFC)
163 if ((cp
[l
+1] & 0xC0) == 0x80 &&
164 (cp
[l
+2] & 0xC0) == 0x80 &&
165 (cp
[l
+3] & 0xC0) == 0x80 &&
166 (cp
[l
+4] & 0xC0) == 0x80 &&
167 (cp
[l
+5] & 0xC0) == 0x80)
170 uc
<<= 6; uc
|= cp
[l
+1] & 0x3F;
171 uc
<<= 6; uc
|= cp
[l
+2] & 0x3F;
172 uc
<<= 6; uc
|= cp
[l
+3] & 0x3F;
173 uc
<<= 6; uc
|= cp
[l
+4] & 0x3F;
174 uc
<<= 6; uc
|= cp
[l
+5] & 0x3F;
186 char *unicode_utf8_fromu(const unicode_char
*cp
, int *ip
)
192 for (pass
=0; pass
<2; pass
++)
204 l
=unicode_utf8_fromu_pass(cp
, p
);
212 size_t unicode_utf8_fromu_pass(const unicode_char
*cp
, char *p
)
223 if ((unicode_char
)uc
==
224 (unicode_char
)(uc
& 0x007F))
234 if ((unicode_char
)uc
==
235 (unicode_char
)(uc
& 0x07FF))
239 p
[l
+1]=(char)(uc
& 0x3F) | 0x80;
241 p
[l
]= (char)(uc
& 0x1F) | 0xC0;
247 if ((unicode_char
)uc
==
248 (unicode_char
)(uc
& 0x00FFFF))
252 p
[l
+2]=(char)(uc
& 0x3F) | 0x80;
254 p
[l
+1]=(char)(uc
& 0x3F) | 0x80;
256 p
[l
]= (char)(uc
& 0x0F) | 0xE0;
262 if ((unicode_char
)uc
==
263 (unicode_char
)(uc
& 0x001FFFFF))
267 p
[l
+3]=(char)(uc
& 0x3F) | 0x80;
269 p
[l
+2]=(char)(uc
& 0x3F) | 0x80;
271 p
[l
+1]=(char)(uc
& 0x3F) | 0x80;
273 p
[l
]= (char)(uc
& 0x07) | 0xF0;
279 if ((unicode_char
)uc
==
280 (unicode_char
)(uc
& 0x03FFFFFF))
284 p
[l
+4]=(char)(uc
& 0x3F) | 0x80;
286 p
[l
+3]=(char)(uc
& 0x3F) | 0x80;
288 p
[l
+2]=(char)(uc
& 0x3F) | 0x80;
290 p
[l
+1]=(char)(uc
& 0x3F) | 0x80;
292 p
[l
]= (char)(uc
& 0x03) | 0xF8;
300 p
[l
+5]=(char)(uc
& 0x3F) | 0x80;
302 p
[l
+4]=(char)(uc
& 0x3F) | 0x80;
304 p
[l
+3]=(char)(uc
& 0x3F) | 0x80;
306 p
[l
+2]=(char)(uc
& 0x3F) | 0x80;
308 p
[l
+1]=(char)(uc
& 0x3F) | 0x80;
310 p
[l
]= (char)(uc
& 0x01) | 0xFC;