Update Gnulib to v0.1-77-gd9361da
[bpt/guile.git] / lib / unistr / u8-mbtouc-unsafe.c
CommitLineData
24d56127 1/* Look at first character in UTF-8 string.
5e69ceb7 2 Copyright (C) 1999-2002, 2006-2007, 2009-2014 Free Software Foundation, Inc.
24d56127
LC
3 Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5 This program is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Lesser General Public License as published
7 by the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17
18#include <config.h>
19
20#if defined IN_LIBUNISTRING
21/* Tell unistr.h to declare u8_mbtouc_unsafe as 'extern', not
22 'static inline'. */
23# include "unistring-notinline.h"
24#endif
25
26/* Specification. */
27#include "unistr.h"
28
29#if !HAVE_INLINE
30
31int
32u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
33{
34 uint8_t c = *s;
35
36 if (c < 0x80)
37 {
38 *puc = c;
39 return 1;
40 }
41 else if (c >= 0xc2)
42 {
43 if (c < 0xe0)
1cd4fffc
LC
44 {
45 if (n >= 2)
46 {
24d56127 47#if CONFIG_UNICODE_SAFETY
1cd4fffc 48 if ((s[1] ^ 0x80) < 0x40)
24d56127 49#endif
1cd4fffc
LC
50 {
51 *puc = ((unsigned int) (c & 0x1f) << 6)
52 | (unsigned int) (s[1] ^ 0x80);
53 return 2;
54 }
0f00f2c3 55#if CONFIG_UNICODE_SAFETY
1cd4fffc 56 /* invalid multibyte character */
0f00f2c3 57#endif
1cd4fffc
LC
58 }
59 else
60 {
61 /* incomplete multibyte character */
62 *puc = 0xfffd;
0f00f2c3 63 return 1;
1cd4fffc
LC
64 }
65 }
24d56127 66 else if (c < 0xf0)
1cd4fffc
LC
67 {
68 if (n >= 3)
69 {
24d56127 70#if CONFIG_UNICODE_SAFETY
0f00f2c3 71 if ((s[1] ^ 0x80) < 0x40)
1cd4fffc 72 {
0f00f2c3
LC
73 if ((s[2] ^ 0x80) < 0x40)
74 {
75 if ((c >= 0xe1 || s[1] >= 0xa0)
76 && (c != 0xed || s[1] < 0xa0))
77#endif
78 {
79 *puc = ((unsigned int) (c & 0x0f) << 12)
80 | ((unsigned int) (s[1] ^ 0x80) << 6)
81 | (unsigned int) (s[2] ^ 0x80);
82 return 3;
83 }
84#if CONFIG_UNICODE_SAFETY
85 /* invalid multibyte character */
86 *puc = 0xfffd;
87 return 3;
88 }
89 /* invalid multibyte character */
90 *puc = 0xfffd;
91 return 2;
1cd4fffc
LC
92 }
93 /* invalid multibyte character */
0f00f2c3 94#endif
1cd4fffc
LC
95 }
96 else
97 {
98 /* incomplete multibyte character */
99 *puc = 0xfffd;
0f00f2c3
LC
100 if (n == 1 || (s[1] ^ 0x80) >= 0x40)
101 return 1;
102 else
103 return 2;
1cd4fffc
LC
104 }
105 }
24d56127 106 else if (c < 0xf8)
1cd4fffc
LC
107 {
108 if (n >= 4)
109 {
24d56127 110#if CONFIG_UNICODE_SAFETY
0f00f2c3
LC
111 if ((s[1] ^ 0x80) < 0x40)
112 {
113 if ((s[2] ^ 0x80) < 0x40)
114 {
115 if ((s[3] ^ 0x80) < 0x40)
116 {
117 if ((c >= 0xf1 || s[1] >= 0x90)
24d56127 118#if 1
0f00f2c3 119 && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
24d56127 120#endif
0f00f2c3 121 )
24d56127 122#endif
0f00f2c3
LC
123 {
124 *puc = ((unsigned int) (c & 0x07) << 18)
125 | ((unsigned int) (s[1] ^ 0x80) << 12)
126 | ((unsigned int) (s[2] ^ 0x80) << 6)
127 | (unsigned int) (s[3] ^ 0x80);
128 return 4;
129 }
130#if CONFIG_UNICODE_SAFETY
131 /* invalid multibyte character */
132 *puc = 0xfffd;
133 return 4;
134 }
135 /* invalid multibyte character */
136 *puc = 0xfffd;
137 return 3;
138 }
139 /* invalid multibyte character */
140 *puc = 0xfffd;
141 return 2;
1cd4fffc
LC
142 }
143 /* invalid multibyte character */
0f00f2c3 144#endif
1cd4fffc
LC
145 }
146 else
147 {
148 /* incomplete multibyte character */
149 *puc = 0xfffd;
0f00f2c3
LC
150 if (n == 1 || (s[1] ^ 0x80) >= 0x40)
151 return 1;
152 else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
153 return 2;
154 else
155 return 3;
1cd4fffc
LC
156 }
157 }
24d56127
LC
158#if 0
159 else if (c < 0xfc)
1cd4fffc
LC
160 {
161 if (n >= 5)
162 {
24d56127 163#if CONFIG_UNICODE_SAFETY
0f00f2c3 164 if ((s[1] ^ 0x80) < 0x40)
1cd4fffc 165 {
0f00f2c3
LC
166 if ((s[2] ^ 0x80) < 0x40)
167 {
168 if ((s[3] ^ 0x80) < 0x40)
169 {
170 if ((s[4] ^ 0x80) < 0x40)
171 {
172 if (c >= 0xf9 || s[1] >= 0x88)
173#endif
174 {
175 *puc = ((unsigned int) (c & 0x03) << 24)
176 | ((unsigned int) (s[1] ^ 0x80) << 18)
177 | ((unsigned int) (s[2] ^ 0x80) << 12)
178 | ((unsigned int) (s[3] ^ 0x80) << 6)
179 | (unsigned int) (s[4] ^ 0x80);
180 return 5;
181 }
182#if CONFIG_UNICODE_SAFETY
183 /* invalid multibyte character */
184 *puc = 0xfffd;
185 return 5;
186 }
187 /* invalid multibyte character */
188 *puc = 0xfffd;
189 return 4;
190 }
191 /* invalid multibyte character */
192 *puc = 0xfffd;
193 return 3;
194 }
195 /* invalid multibyte character */
196 return 2;
1cd4fffc
LC
197 }
198 /* invalid multibyte character */
0f00f2c3 199#endif
1cd4fffc
LC
200 }
201 else
202 {
203 /* incomplete multibyte character */
204 *puc = 0xfffd;
205 return n;
206 }
207 }
24d56127 208 else if (c < 0xfe)
1cd4fffc
LC
209 {
210 if (n >= 6)
211 {
24d56127 212#if CONFIG_UNICODE_SAFETY
0f00f2c3 213 if ((s[1] ^ 0x80) < 0x40)
1cd4fffc 214 {
0f00f2c3
LC
215 if ((s[2] ^ 0x80) < 0x40)
216 {
217 if ((s[3] ^ 0x80) < 0x40)
218 {
219 if ((s[4] ^ 0x80) < 0x40)
220 {
221 if ((s[5] ^ 0x80) < 0x40)
222 {
223 if (c >= 0xfd || s[1] >= 0x84)
224#endif
225 {
226 *puc = ((unsigned int) (c & 0x01) << 30)
227 | ((unsigned int) (s[1] ^ 0x80) << 24)
228 | ((unsigned int) (s[2] ^ 0x80) << 18)
229 | ((unsigned int) (s[3] ^ 0x80) << 12)
230 | ((unsigned int) (s[4] ^ 0x80) << 6)
231 | (unsigned int) (s[5] ^ 0x80);
232 return 6;
233 }
234#if CONFIG_UNICODE_SAFETY
235 /* invalid multibyte character */
236 *puc = 0xfffd;
237 return 6;
238 }
239 /* invalid multibyte character */
240 *puc = 0xfffd;
241 return 5;
242 }
243 /* invalid multibyte character */
244 *puc = 0xfffd;
245 return 4;
246 }
247 /* invalid multibyte character */
248 *puc = 0xfffd;
249 return 3;
250 }
251 /* invalid multibyte character */
252 return 2;
1cd4fffc
LC
253 }
254 /* invalid multibyte character */
0f00f2c3 255#endif
1cd4fffc
LC
256 }
257 else
258 {
259 /* incomplete multibyte character */
260 *puc = 0xfffd;
261 return n;
262 }
263 }
24d56127
LC
264#endif
265 }
266 /* invalid multibyte character */
267 *puc = 0xfffd;
268 return 1;
269}
270
271#endif