65f792c7f0dd1867714a5056ab42fb7b5887fce0
[bpt/guile.git] / lib / unistr / u8-mbtouc-unsafe-aux.c
1 /* Conversion UTF-8 to UCS-4.
2 Copyright (C) 2001-2002, 2006-2007, 2009-2013 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5 This program is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Lesser General Public License as published
7 by the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include <config.h>
19
20 /* Specification. */
21 #include "unistr.h"
22
23 #if defined IN_LIBUNISTRING || HAVE_INLINE
24
25 int
26 u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n)
27 {
28 uint8_t c = *s;
29
30 if (c >= 0xc2)
31 {
32 if (c < 0xe0)
33 {
34 if (n >= 2)
35 {
36 #if CONFIG_UNICODE_SAFETY
37 if ((s[1] ^ 0x80) < 0x40)
38 #endif
39 {
40 *puc = ((unsigned int) (c & 0x1f) << 6)
41 | (unsigned int) (s[1] ^ 0x80);
42 return 2;
43 }
44 #if CONFIG_UNICODE_SAFETY
45 /* invalid multibyte character */
46 #endif
47 }
48 else
49 {
50 /* incomplete multibyte character */
51 *puc = 0xfffd;
52 return 1;
53 }
54 }
55 else if (c < 0xf0)
56 {
57 if (n >= 3)
58 {
59 #if CONFIG_UNICODE_SAFETY
60 if ((s[1] ^ 0x80) < 0x40)
61 {
62 if ((s[2] ^ 0x80) < 0x40)
63 {
64 if ((c >= 0xe1 || s[1] >= 0xa0)
65 && (c != 0xed || s[1] < 0xa0))
66 #endif
67 {
68 *puc = ((unsigned int) (c & 0x0f) << 12)
69 | ((unsigned int) (s[1] ^ 0x80) << 6)
70 | (unsigned int) (s[2] ^ 0x80);
71 return 3;
72 }
73 #if CONFIG_UNICODE_SAFETY
74 /* invalid multibyte character */
75 *puc = 0xfffd;
76 return 3;
77 }
78 /* invalid multibyte character */
79 *puc = 0xfffd;
80 return 2;
81 }
82 /* invalid multibyte character */
83 #endif
84 }
85 else
86 {
87 /* incomplete multibyte character */
88 *puc = 0xfffd;
89 if (n == 1 || (s[1] ^ 0x80) >= 0x40)
90 return 1;
91 else
92 return 2;
93 }
94 }
95 else if (c < 0xf8)
96 {
97 if (n >= 4)
98 {
99 #if CONFIG_UNICODE_SAFETY
100 if ((s[1] ^ 0x80) < 0x40)
101 {
102 if ((s[2] ^ 0x80) < 0x40)
103 {
104 if ((s[3] ^ 0x80) < 0x40)
105 {
106 if ((c >= 0xf1 || s[1] >= 0x90)
107 #if 1
108 && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
109 #endif
110 )
111 #endif
112 {
113 *puc = ((unsigned int) (c & 0x07) << 18)
114 | ((unsigned int) (s[1] ^ 0x80) << 12)
115 | ((unsigned int) (s[2] ^ 0x80) << 6)
116 | (unsigned int) (s[3] ^ 0x80);
117 return 4;
118 }
119 #if CONFIG_UNICODE_SAFETY
120 /* invalid multibyte character */
121 *puc = 0xfffd;
122 return 4;
123 }
124 /* invalid multibyte character */
125 *puc = 0xfffd;
126 return 3;
127 }
128 /* invalid multibyte character */
129 *puc = 0xfffd;
130 return 2;
131 }
132 /* invalid multibyte character */
133 #endif
134 }
135 else
136 {
137 /* incomplete multibyte character */
138 *puc = 0xfffd;
139 if (n == 1 || (s[1] ^ 0x80) >= 0x40)
140 return 1;
141 else if (n == 2 || (s[2] ^ 0x80) >= 0x40)
142 return 2;
143 else
144 return 3;
145 }
146 }
147 #if 0
148 else if (c < 0xfc)
149 {
150 if (n >= 5)
151 {
152 #if CONFIG_UNICODE_SAFETY
153 if ((s[1] ^ 0x80) < 0x40)
154 {
155 if ((s[2] ^ 0x80) < 0x40)
156 {
157 if ((s[3] ^ 0x80) < 0x40)
158 {
159 if ((s[4] ^ 0x80) < 0x40)
160 {
161 if (c >= 0xf9 || s[1] >= 0x88)
162 #endif
163 {
164 *puc = ((unsigned int) (c & 0x03) << 24)
165 | ((unsigned int) (s[1] ^ 0x80) << 18)
166 | ((unsigned int) (s[2] ^ 0x80) << 12)
167 | ((unsigned int) (s[3] ^ 0x80) << 6)
168 | (unsigned int) (s[4] ^ 0x80);
169 return 5;
170 }
171 #if CONFIG_UNICODE_SAFETY
172 /* invalid multibyte character */
173 *puc = 0xfffd;
174 return 5;
175 }
176 /* invalid multibyte character */
177 *puc = 0xfffd;
178 return 4;
179 }
180 /* invalid multibyte character */
181 *puc = 0xfffd;
182 return 3;
183 }
184 /* invalid multibyte character */
185 return 2;
186 }
187 /* invalid multibyte character */
188 #endif
189 }
190 else
191 {
192 /* incomplete multibyte character */
193 *puc = 0xfffd;
194 return n;
195 }
196 }
197 else if (c < 0xfe)
198 {
199 if (n >= 6)
200 {
201 #if CONFIG_UNICODE_SAFETY
202 if ((s[1] ^ 0x80) < 0x40)
203 {
204 if ((s[2] ^ 0x80) < 0x40)
205 {
206 if ((s[3] ^ 0x80) < 0x40)
207 {
208 if ((s[4] ^ 0x80) < 0x40)
209 {
210 if ((s[5] ^ 0x80) < 0x40)
211 {
212 if (c >= 0xfd || s[1] >= 0x84)
213 #endif
214 {
215 *puc = ((unsigned int) (c & 0x01) << 30)
216 | ((unsigned int) (s[1] ^ 0x80) << 24)
217 | ((unsigned int) (s[2] ^ 0x80) << 18)
218 | ((unsigned int) (s[3] ^ 0x80) << 12)
219 | ((unsigned int) (s[4] ^ 0x80) << 6)
220 | (unsigned int) (s[5] ^ 0x80);
221 return 6;
222 }
223 #if CONFIG_UNICODE_SAFETY
224 /* invalid multibyte character */
225 *puc = 0xfffd;
226 return 6;
227 }
228 /* invalid multibyte character */
229 *puc = 0xfffd;
230 return 5;
231 }
232 /* invalid multibyte character */
233 *puc = 0xfffd;
234 return 4;
235 }
236 /* invalid multibyte character */
237 *puc = 0xfffd;
238 return 3;
239 }
240 /* invalid multibyte character */
241 return 2;
242 }
243 /* invalid multibyte character */
244 #endif
245 }
246 else
247 {
248 /* incomplete multibyte character */
249 *puc = 0xfffd;
250 return n;
251 }
252 }
253 #endif
254 }
255 /* invalid multibyte character */
256 *puc = 0xfffd;
257 return 1;
258 }
259
260 #endif