3786802856ea47c163592345517b8c1326077289
[bpt/guile.git] / lib / unistr / u8-mbtouc-aux.c
1 /* Conversion UTF-8 to UCS-4.
2 Copyright (C) 2001-2002, 2006-2007, 2009-2010 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5 This program is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Lesser General Public License as published
7 by the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include <config.h>
19
20 /* Specification. */
21 #include "unistr.h"
22
23 #if defined IN_LIBUNISTRING || HAVE_INLINE
24
25 int
26 u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
27 {
28 uint8_t c = *s;
29
30 if (c >= 0xc2)
31 {
32 if (c < 0xe0)
33 {
34 if (n >= 2)
35 {
36 if ((s[1] ^ 0x80) < 0x40)
37 {
38 *puc = ((unsigned int) (c & 0x1f) << 6)
39 | (unsigned int) (s[1] ^ 0x80);
40 return 2;
41 }
42 /* invalid multibyte character */
43 }
44 else
45 {
46 /* incomplete multibyte character */
47 *puc = 0xfffd;
48 return 1;
49 }
50 }
51 else if (c < 0xf0)
52 {
53 if (n >= 3)
54 {
55 if ((s[1] ^ 0x80) < 0x40)
56 {
57 if ((s[2] ^ 0x80) < 0x40)
58 {
59 if ((c >= 0xe1 || s[1] >= 0xa0)
60 && (c != 0xed || s[1] < 0xa0))
61 {
62 *puc = ((unsigned int) (c & 0x0f) << 12)
63 | ((unsigned int) (s[1] ^ 0x80) << 6)
64 | (unsigned int) (s[2] ^ 0x80);
65 return 3;
66 }
67 /* invalid multibyte character */
68 *puc = 0xfffd;
69 return 3;
70 }
71 /* invalid multibyte character */
72 *puc = 0xfffd;
73 return 2;
74 }
75 /* invalid multibyte character */
76 }
77 else
78 {
79 /* incomplete multibyte character */
80 *puc = 0xfffd;
81 if (n == 1 || (s[1] ^ 0x80) >= 0x40)
82 return 1;
83 else
84 return 2;
85 }
86 }
87 else if (c < 0xf8)
88 {
89 if (n >= 4)
90 {
91 if ((s[1] ^ 0x80) < 0x40)
92 {
93 if ((s[2] ^ 0x80) < 0x40)
94 {
95 if ((s[3] ^ 0x80) < 0x40)
96 {
97 if ((c >= 0xf1 || s[1] >= 0x90)
98 #if 1
99 && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
100 #endif
101 )
102 {
103 *puc = ((unsigned int) (c & 0x07) << 18)
104 | ((unsigned int) (s[1] ^ 0x80) << 12)
105 | ((unsigned int) (s[2] ^ 0x80) << 6)
106 | (unsigned int) (s[3] ^ 0x80);
107 return 4;
108 }
109 /* invalid multibyte character */
110 *puc = 0xfffd;
111 return 4;
112 }
113 /* invalid multibyte character */
114 *puc = 0xfffd;
115 return 3;
116 }
117 /* invalid multibyte character */
118 *puc = 0xfffd;
119 return 2;
120 }
121 /* invalid multibyte character */
122 }
123 else
124 {
125 /* incomplete multibyte character */
126 *puc = 0xfffd;
127 if (n == 1 || (s[1] ^ 0x80) >= 0x40)
128 return 1;
129 else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
130 return 2;
131 else
132 return 3;
133 }
134 }
135 #if 0
136 else if (c < 0xfc)
137 {
138 if (n >= 5)
139 {
140 if ((s[1] ^ 0x80) < 0x40)
141 {
142 if ((s[2] ^ 0x80) < 0x40)
143 {
144 if ((s[3] ^ 0x80) < 0x40)
145 {
146 if ((s[4] ^ 0x80) < 0x40)
147 {
148 if (c >= 0xf9 || s[1] >= 0x88)
149 {
150 *puc = ((unsigned int) (c & 0x03) << 24)
151 | ((unsigned int) (s[1] ^ 0x80) << 18)
152 | ((unsigned int) (s[2] ^ 0x80) << 12)
153 | ((unsigned int) (s[3] ^ 0x80) << 6)
154 | (unsigned int) (s[4] ^ 0x80);
155 return 5;
156 }
157 /* invalid multibyte character */
158 *puc = 0xfffd;
159 return 5;
160 }
161 /* invalid multibyte character */
162 *puc = 0xfffd;
163 return 4;
164 }
165 /* invalid multibyte character */
166 *puc = 0xfffd;
167 return 3;
168 }
169 /* invalid multibyte character */
170 return 2;
171 }
172 /* invalid multibyte character */
173 }
174 else
175 {
176 /* incomplete multibyte character */
177 *puc = 0xfffd;
178 return n;
179 }
180 }
181 else if (c < 0xfe)
182 {
183 if (n >= 6)
184 {
185 if ((s[1] ^ 0x80) < 0x40)
186 {
187 if ((s[2] ^ 0x80) < 0x40)
188 {
189 if ((s[3] ^ 0x80) < 0x40)
190 {
191 if ((s[4] ^ 0x80) < 0x40)
192 {
193 if ((s[5] ^ 0x80) < 0x40)
194 {
195 if (c >= 0xfd || s[1] >= 0x84)
196 {
197 *puc = ((unsigned int) (c & 0x01) << 30)
198 | ((unsigned int) (s[1] ^ 0x80) << 24)
199 | ((unsigned int) (s[2] ^ 0x80) << 18)
200 | ((unsigned int) (s[3] ^ 0x80) << 12)
201 | ((unsigned int) (s[4] ^ 0x80) << 6)
202 | (unsigned int) (s[5] ^ 0x80);
203 return 6;
204 }
205 /* invalid multibyte character */
206 *puc = 0xfffd;
207 return 6;
208 }
209 /* invalid multibyte character */
210 *puc = 0xfffd;
211 return 5;
212 }
213 /* invalid multibyte character */
214 *puc = 0xfffd;
215 return 4;
216 }
217 /* invalid multibyte character */
218 *puc = 0xfffd;
219 return 3;
220 }
221 /* invalid multibyte character */
222 return 2;
223 }
224 /* invalid multibyte character */
225 }
226 else
227 {
228 /* incomplete multibyte character */
229 *puc = 0xfffd;
230 return n;
231 }
232 }
233 #endif
234 }
235 /* invalid multibyte character */
236 *puc = 0xfffd;
237 return 1;
238 }
239
240 #endif