Update Gnulib to v0.1-77-gd9361da
[bpt/guile.git] / lib / unistr / u8-mbtoucr.c
CommitLineData
24d56127 1/* Look at first character in UTF-8 string, returning an error code.
5e69ceb7 2 Copyright (C) 1999-2002, 2006-2007, 2009-2014 Free Software Foundation, Inc.
24d56127
LC
3 Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5 This program is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Lesser General Public License as published
7 by the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17
18#include <config.h>
19
20/* Specification. */
21#include "unistr.h"
22
23int
24u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n)
25{
26 uint8_t c = *s;
27
28 if (c < 0x80)
29 {
30 *puc = c;
31 return 1;
32 }
33 else if (c >= 0xc2)
34 {
35 if (c < 0xe0)
1cd4fffc
LC
36 {
37 if (n >= 2)
38 {
39 if ((s[1] ^ 0x80) < 0x40)
40 {
41 *puc = ((unsigned int) (c & 0x1f) << 6)
42 | (unsigned int) (s[1] ^ 0x80);
43 return 2;
44 }
45 /* invalid multibyte character */
46 }
47 else
48 {
49 /* incomplete multibyte character */
50 *puc = 0xfffd;
51 return -2;
52 }
53 }
24d56127 54 else if (c < 0xf0)
1cd4fffc
LC
55 {
56 if (n >= 2)
57 {
58 if ((s[1] ^ 0x80) < 0x40
59 && (c >= 0xe1 || s[1] >= 0xa0)
60 && (c != 0xed || s[1] < 0xa0))
61 {
62 if (n >= 3)
63 {
64 if ((s[2] ^ 0x80) < 0x40)
65 {
66 *puc = ((unsigned int) (c & 0x0f) << 12)
67 | ((unsigned int) (s[1] ^ 0x80) << 6)
68 | (unsigned int) (s[2] ^ 0x80);
69 return 3;
70 }
71 /* invalid multibyte character */
72 }
73 else
74 {
75 /* incomplete multibyte character */
76 *puc = 0xfffd;
77 return -2;
78 }
79 }
80 /* invalid multibyte character */
81 }
82 else
83 {
84 /* incomplete multibyte character */
85 *puc = 0xfffd;
86 return -2;
87 }
88 }
24d56127 89 else if (c < 0xf8)
1cd4fffc
LC
90 {
91 if (n >= 2)
92 {
93 if ((s[1] ^ 0x80) < 0x40
94 && (c >= 0xf1 || s[1] >= 0x90)
24d56127 95#if 1
1cd4fffc 96 && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
24d56127 97#endif
1cd4fffc
LC
98 )
99 {
100 if (n >= 3)
101 {
102 if ((s[2] ^ 0x80) < 0x40)
103 {
104 if (n >= 4)
105 {
106 if ((s[3] ^ 0x80) < 0x40)
107 {
108 *puc = ((unsigned int) (c & 0x07) << 18)
109 | ((unsigned int) (s[1] ^ 0x80) << 12)
110 | ((unsigned int) (s[2] ^ 0x80) << 6)
111 | (unsigned int) (s[3] ^ 0x80);
112 return 4;
113 }
114 /* invalid multibyte character */
115 }
116 else
117 {
118 /* incomplete multibyte character */
119 *puc = 0xfffd;
120 return -2;
121 }
122 }
123 /* invalid multibyte character */
124 }
125 else
126 {
127 /* incomplete multibyte character */
128 *puc = 0xfffd;
129 return -2;
130 }
131 }
132 /* invalid multibyte character */
133 }
134 else
135 {
136 /* incomplete multibyte character */
137 *puc = 0xfffd;
138 return -2;
139 }
140 }
24d56127
LC
141#if 0
142 else if (c < 0xfc)
1cd4fffc
LC
143 {
144 if (n >= 2)
145 {
146 if ((s[1] ^ 0x80) < 0x40
147 && (c >= 0xf9 || s[1] >= 0x88))
148 {
149 if (n >= 3)
150 {
151 if ((s[2] ^ 0x80) < 0x40)
152 {
153 if (n >= 4)
154 {
155 if ((s[3] ^ 0x80) < 0x40)
156 {
157 if (n >= 5)
158 {
159 if ((s[4] ^ 0x80) < 0x40)
160 {
161 *puc = ((unsigned int) (c & 0x03) << 24)
162 | ((unsigned int) (s[1] ^ 0x80) << 18)
163 | ((unsigned int) (s[2] ^ 0x80) << 12)
164 | ((unsigned int) (s[3] ^ 0x80) << 6)
165 | (unsigned int) (s[4] ^ 0x80);
166 return 5;
167 }
168 /* invalid multibyte character */
169 }
170 else
171 {
172 /* incomplete multibyte character */
173 *puc = 0xfffd;
174 return -2;
175 }
176 }
177 /* invalid multibyte character */
178 }
179 else
180 {
181 /* incomplete multibyte character */
182 *puc = 0xfffd;
183 return -2;
184 }
185 }
186 /* invalid multibyte character */
187 }
188 else
189 {
190 /* incomplete multibyte character */
191 *puc = 0xfffd;
192 return -2;
193 }
194 }
195 /* invalid multibyte character */
196 }
197 else
198 {
199 /* incomplete multibyte character */
200 *puc = 0xfffd;
201 return -2;
202 }
203 }
24d56127 204 else if (c < 0xfe)
1cd4fffc
LC
205 {
206 if (n >= 2)
207 {
208 if ((s[1] ^ 0x80) < 0x40
209 && (c >= 0xfd || s[1] >= 0x84))
210 {
211 if (n >= 3)
212 {
213 if ((s[2] ^ 0x80) < 0x40)
214 {
215 if (n >= 4)
216 {
217 if ((s[3] ^ 0x80) < 0x40)
218 {
219 if (n >= 5)
220 {
221 if ((s[4] ^ 0x80) < 0x40)
222 {
223 if (n >= 6)
224 {
225 if ((s[5] ^ 0x80) < 0x40)
226 {
227 *puc = ((unsigned int) (c & 0x01) << 30)
228 | ((unsigned int) (s[1] ^ 0x80) << 24)
229 | ((unsigned int) (s[2] ^ 0x80) << 18)
230 | ((unsigned int) (s[3] ^ 0x80) << 12)
231 | ((unsigned int) (s[4] ^ 0x80) << 6)
232 | (unsigned int) (s[5] ^ 0x80);
233 return 6;
234 }
235 /* invalid multibyte character */
236 }
237 else
238 {
239 /* incomplete multibyte character */
240 *puc = 0xfffd;
241 return -2;
242 }
243 }
244 /* invalid multibyte character */
245 }
246 else
247 {
248 /* incomplete multibyte character */
249 *puc = 0xfffd;
250 return -2;
251 }
252 }
253 /* invalid multibyte character */
254 }
255 else
256 {
257 /* incomplete multibyte character */
258 *puc = 0xfffd;
259 return -2;
260 }
261 }
262 /* invalid multibyte character */
263 }
264 else
265 {
266 /* incomplete multibyte character */
267 *puc = 0xfffd;
268 return -2;
269 }
270 }
271 /* invalid multibyte character */
272 }
273 else
274 {
275 /* incomplete multibyte character */
276 *puc = 0xfffd;
277 return -2;
278 }
279 }
24d56127
LC
280#endif
281 }
282 /* invalid multibyte character */
283 *puc = 0xfffd;
284 return -1;
285}