Commit | Line | Data |
---|---|---|
24d56127 | 1 | /* Elementary Unicode string functions. |
5e69ceb7 | 2 | Copyright (C) 2001-2002, 2005-2014 Free Software Foundation, Inc. |
24d56127 LC |
3 | |
4 | This program is free software: you can redistribute it and/or modify it | |
5 | under the terms of the GNU Lesser General Public License as published | |
6 | by the Free Software Foundation; either version 3 of the License, or | |
7 | (at your option) any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | Lesser General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU Lesser General Public License | |
15 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ | |
16 | ||
17 | #ifndef _UNISTR_H | |
18 | #define _UNISTR_H | |
19 | ||
20 | #include "unitypes.h" | |
21 | ||
61cd9dc9 LC |
22 | /* Get common macros for C. */ |
23 | #include "unused-parameter.h" | |
24 | ||
24d56127 LC |
25 | /* Get bool. */ |
26 | #include <stdbool.h> | |
27 | ||
28 | /* Get size_t. */ | |
29 | #include <stddef.h> | |
30 | ||
31 | #ifdef __cplusplus | |
32 | extern "C" { | |
33 | #endif | |
34 | ||
35 | ||
36 | /* Conventions: | |
37 | ||
38 | All functions prefixed with u8_ operate on UTF-8 encoded strings. | |
39 | Their unit is an uint8_t (1 byte). | |
40 | ||
41 | All functions prefixed with u16_ operate on UTF-16 encoded strings. | |
42 | Their unit is an uint16_t (a 2-byte word). | |
43 | ||
44 | All functions prefixed with u32_ operate on UCS-4 encoded strings. | |
45 | Their unit is an uint32_t (a 4-byte word). | |
46 | ||
47 | All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly | |
48 | n units. | |
49 | ||
50 | All arguments starting with "str" and the arguments of functions starting | |
51 | with u8_str/u16_str/u32_str denote a NUL terminated string, i.e. a string | |
52 | which terminates at the first NUL unit. This termination unit is | |
53 | considered part of the string for all memory allocation purposes, but | |
54 | is not considered part of the string for all other logical purposes. | |
55 | ||
56 | Functions returning a string result take a (resultbuf, lengthp) argument | |
57 | pair. If resultbuf is not NULL and the result fits into *lengthp units, | |
58 | it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly | |
59 | allocated string is returned. In both cases, *lengthp is set to the | |
60 | length (number of units) of the returned string. In case of error, | |
61 | NULL is returned and errno is set. */ | |
62 | ||
63 | ||
64 | /* Elementary string checks. */ | |
65 | ||
66 | /* Check whether an UTF-8 string is well-formed. | |
67 | Return NULL if valid, or a pointer to the first invalid unit otherwise. */ | |
68 | extern const uint8_t * | |
005de2e8 LC |
69 | u8_check (const uint8_t *s, size_t n) |
70 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
71 | |
72 | /* Check whether an UTF-16 string is well-formed. | |
73 | Return NULL if valid, or a pointer to the first invalid unit otherwise. */ | |
74 | extern const uint16_t * | |
005de2e8 LC |
75 | u16_check (const uint16_t *s, size_t n) |
76 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
77 | |
78 | /* Check whether an UCS-4 string is well-formed. | |
79 | Return NULL if valid, or a pointer to the first invalid unit otherwise. */ | |
80 | extern const uint32_t * | |
005de2e8 LC |
81 | u32_check (const uint32_t *s, size_t n) |
82 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
83 | |
84 | ||
85 | /* Elementary string conversions. */ | |
86 | ||
87 | /* Convert an UTF-8 string to an UTF-16 string. */ | |
88 | extern uint16_t * | |
89 | u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf, | |
1cd4fffc | 90 | size_t *lengthp); |
24d56127 LC |
91 | |
92 | /* Convert an UTF-8 string to an UCS-4 string. */ | |
93 | extern uint32_t * | |
94 | u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf, | |
1cd4fffc | 95 | size_t *lengthp); |
24d56127 LC |
96 | |
97 | /* Convert an UTF-16 string to an UTF-8 string. */ | |
98 | extern uint8_t * | |
99 | u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf, | |
1cd4fffc | 100 | size_t *lengthp); |
24d56127 LC |
101 | |
102 | /* Convert an UTF-16 string to an UCS-4 string. */ | |
103 | extern uint32_t * | |
104 | u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf, | |
1cd4fffc | 105 | size_t *lengthp); |
24d56127 LC |
106 | |
107 | /* Convert an UCS-4 string to an UTF-8 string. */ | |
108 | extern uint8_t * | |
109 | u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf, | |
1cd4fffc | 110 | size_t *lengthp); |
24d56127 LC |
111 | |
112 | /* Convert an UCS-4 string to an UTF-16 string. */ | |
113 | extern uint16_t * | |
114 | u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf, | |
1cd4fffc | 115 | size_t *lengthp); |
24d56127 LC |
116 | |
117 | ||
118 | /* Elementary string functions. */ | |
119 | ||
120 | /* Return the length (number of units) of the first character in S, which is | |
121 | no longer than N. Return 0 if it is the NUL character. Return -1 upon | |
122 | failure. */ | |
123 | /* Similar to mblen(), except that s must not be NULL. */ | |
124 | extern int | |
005de2e8 LC |
125 | u8_mblen (const uint8_t *s, size_t n) |
126 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 127 | extern int |
005de2e8 LC |
128 | u16_mblen (const uint16_t *s, size_t n) |
129 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 130 | extern int |
005de2e8 LC |
131 | u32_mblen (const uint32_t *s, size_t n) |
132 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
133 | |
134 | /* Return the length (number of units) of the first character in S, putting | |
135 | its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd, | |
136 | and an appropriate number of units is returned. | |
137 | The number of available units, N, must be > 0. */ | |
138 | /* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0, | |
139 | and the NUL character is not treated specially. */ | |
140 | /* The variants with _safe suffix are safe, even if the library is compiled | |
141 | without --enable-safety. */ | |
142 | ||
dd7d0148 | 143 | #if GNULIB_UNISTR_U8_MBTOUC_UNSAFE || HAVE_LIBUNISTRING |
24d56127 LC |
144 | # if !HAVE_INLINE |
145 | extern int | |
146 | u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n); | |
147 | # else | |
148 | extern int | |
149 | u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n); | |
150 | static inline int | |
151 | u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n) | |
152 | { | |
153 | uint8_t c = *s; | |
154 | ||
155 | if (c < 0x80) | |
156 | { | |
157 | *puc = c; | |
158 | return 1; | |
159 | } | |
160 | else | |
161 | return u8_mbtouc_unsafe_aux (puc, s, n); | |
162 | } | |
163 | # endif | |
164 | #endif | |
165 | ||
dd7d0148 | 166 | #if GNULIB_UNISTR_U16_MBTOUC_UNSAFE || HAVE_LIBUNISTRING |
24d56127 LC |
167 | # if !HAVE_INLINE |
168 | extern int | |
169 | u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n); | |
170 | # else | |
171 | extern int | |
172 | u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n); | |
173 | static inline int | |
174 | u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n) | |
175 | { | |
176 | uint16_t c = *s; | |
177 | ||
178 | if (c < 0xd800 || c >= 0xe000) | |
179 | { | |
180 | *puc = c; | |
181 | return 1; | |
182 | } | |
183 | else | |
184 | return u16_mbtouc_unsafe_aux (puc, s, n); | |
185 | } | |
186 | # endif | |
187 | #endif | |
188 | ||
dd7d0148 | 189 | #if GNULIB_UNISTR_U32_MBTOUC_UNSAFE || HAVE_LIBUNISTRING |
24d56127 LC |
190 | # if !HAVE_INLINE |
191 | extern int | |
192 | u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n); | |
193 | # else | |
194 | static inline int | |
61cd9dc9 LC |
195 | u32_mbtouc_unsafe (ucs4_t *puc, |
196 | const uint32_t *s, size_t n _GL_UNUSED_PARAMETER) | |
24d56127 LC |
197 | { |
198 | uint32_t c = *s; | |
199 | ||
dd7d0148 | 200 | # if CONFIG_UNICODE_SAFETY |
24d56127 LC |
201 | if (c < 0xd800 || (c >= 0xe000 && c < 0x110000)) |
202 | # endif | |
203 | *puc = c; | |
dd7d0148 | 204 | # if CONFIG_UNICODE_SAFETY |
24d56127 LC |
205 | else |
206 | /* invalid multibyte character */ | |
207 | *puc = 0xfffd; | |
208 | # endif | |
209 | return 1; | |
210 | } | |
211 | # endif | |
212 | #endif | |
213 | ||
dd7d0148 | 214 | #if GNULIB_UNISTR_U8_MBTOUC || HAVE_LIBUNISTRING |
24d56127 LC |
215 | # if !HAVE_INLINE |
216 | extern int | |
217 | u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n); | |
218 | # else | |
219 | extern int | |
220 | u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n); | |
221 | static inline int | |
222 | u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) | |
223 | { | |
224 | uint8_t c = *s; | |
225 | ||
226 | if (c < 0x80) | |
227 | { | |
228 | *puc = c; | |
229 | return 1; | |
230 | } | |
231 | else | |
232 | return u8_mbtouc_aux (puc, s, n); | |
233 | } | |
234 | # endif | |
235 | #endif | |
236 | ||
dd7d0148 | 237 | #if GNULIB_UNISTR_U16_MBTOUC || HAVE_LIBUNISTRING |
24d56127 LC |
238 | # if !HAVE_INLINE |
239 | extern int | |
240 | u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n); | |
241 | # else | |
242 | extern int | |
243 | u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n); | |
244 | static inline int | |
245 | u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n) | |
246 | { | |
247 | uint16_t c = *s; | |
248 | ||
249 | if (c < 0xd800 || c >= 0xe000) | |
250 | { | |
251 | *puc = c; | |
252 | return 1; | |
253 | } | |
254 | else | |
255 | return u16_mbtouc_aux (puc, s, n); | |
256 | } | |
257 | # endif | |
258 | #endif | |
259 | ||
dd7d0148 | 260 | #if GNULIB_UNISTR_U32_MBTOUC || HAVE_LIBUNISTRING |
24d56127 LC |
261 | # if !HAVE_INLINE |
262 | extern int | |
263 | u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n); | |
264 | # else | |
265 | static inline int | |
61cd9dc9 | 266 | u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n _GL_UNUSED_PARAMETER) |
24d56127 LC |
267 | { |
268 | uint32_t c = *s; | |
269 | ||
270 | if (c < 0xd800 || (c >= 0xe000 && c < 0x110000)) | |
271 | *puc = c; | |
272 | else | |
273 | /* invalid multibyte character */ | |
274 | *puc = 0xfffd; | |
275 | return 1; | |
276 | } | |
277 | # endif | |
278 | #endif | |
279 | ||
280 | /* Return the length (number of units) of the first character in S, putting | |
281 | its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd, | |
282 | and -1 is returned for an invalid sequence of units, -2 is returned for an | |
283 | incomplete sequence of units. | |
284 | The number of available units, N, must be > 0. */ | |
285 | /* Similar to u*_mbtouc(), except that the return value gives more details | |
286 | about the failure, similar to mbrtowc(). */ | |
287 | ||
dd7d0148 | 288 | #if GNULIB_UNISTR_U8_MBTOUCR || HAVE_LIBUNISTRING |
24d56127 LC |
289 | extern int |
290 | u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n); | |
291 | #endif | |
292 | ||
dd7d0148 | 293 | #if GNULIB_UNISTR_U16_MBTOUCR || HAVE_LIBUNISTRING |
24d56127 LC |
294 | extern int |
295 | u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n); | |
296 | #endif | |
297 | ||
dd7d0148 | 298 | #if GNULIB_UNISTR_U32_MBTOUCR || HAVE_LIBUNISTRING |
24d56127 LC |
299 | extern int |
300 | u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n); | |
301 | #endif | |
302 | ||
303 | /* Put the multibyte character represented by UC in S, returning its | |
304 | length. Return -1 upon failure, -2 if the number of available units, N, | |
305 | is too small. The latter case cannot occur if N >= 6/2/1, respectively. */ | |
306 | /* Similar to wctomb(), except that s must not be NULL, and the argument n | |
307 | must be specified. */ | |
308 | ||
dd7d0148 | 309 | #if GNULIB_UNISTR_U8_UCTOMB || HAVE_LIBUNISTRING |
24d56127 LC |
310 | /* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr. */ |
311 | extern int | |
312 | u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n); | |
313 | # if !HAVE_INLINE | |
314 | extern int | |
315 | u8_uctomb (uint8_t *s, ucs4_t uc, int n); | |
316 | # else | |
317 | static inline int | |
318 | u8_uctomb (uint8_t *s, ucs4_t uc, int n) | |
319 | { | |
320 | if (uc < 0x80 && n > 0) | |
321 | { | |
322 | s[0] = uc; | |
323 | return 1; | |
324 | } | |
325 | else | |
326 | return u8_uctomb_aux (s, uc, n); | |
327 | } | |
328 | # endif | |
329 | #endif | |
330 | ||
dd7d0148 | 331 | #if GNULIB_UNISTR_U16_UCTOMB || HAVE_LIBUNISTRING |
24d56127 LC |
332 | /* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr. */ |
333 | extern int | |
334 | u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n); | |
335 | # if !HAVE_INLINE | |
336 | extern int | |
337 | u16_uctomb (uint16_t *s, ucs4_t uc, int n); | |
338 | # else | |
339 | static inline int | |
340 | u16_uctomb (uint16_t *s, ucs4_t uc, int n) | |
341 | { | |
342 | if (uc < 0xd800 && n > 0) | |
343 | { | |
344 | s[0] = uc; | |
345 | return 1; | |
346 | } | |
347 | else | |
348 | return u16_uctomb_aux (s, uc, n); | |
349 | } | |
350 | # endif | |
351 | #endif | |
352 | ||
dd7d0148 | 353 | #if GNULIB_UNISTR_U32_UCTOMB || HAVE_LIBUNISTRING |
24d56127 LC |
354 | # if !HAVE_INLINE |
355 | extern int | |
356 | u32_uctomb (uint32_t *s, ucs4_t uc, int n); | |
357 | # else | |
358 | static inline int | |
359 | u32_uctomb (uint32_t *s, ucs4_t uc, int n) | |
360 | { | |
361 | if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000)) | |
362 | { | |
363 | if (n > 0) | |
1cd4fffc LC |
364 | { |
365 | *s = uc; | |
366 | return 1; | |
367 | } | |
24d56127 | 368 | else |
1cd4fffc | 369 | return -2; |
24d56127 LC |
370 | } |
371 | else | |
372 | return -1; | |
373 | } | |
374 | # endif | |
375 | #endif | |
376 | ||
377 | /* Copy N units from SRC to DEST. */ | |
378 | /* Similar to memcpy(). */ | |
379 | extern uint8_t * | |
380 | u8_cpy (uint8_t *dest, const uint8_t *src, size_t n); | |
381 | extern uint16_t * | |
382 | u16_cpy (uint16_t *dest, const uint16_t *src, size_t n); | |
383 | extern uint32_t * | |
384 | u32_cpy (uint32_t *dest, const uint32_t *src, size_t n); | |
385 | ||
386 | /* Copy N units from SRC to DEST, guaranteeing correct behavior for | |
387 | overlapping memory areas. */ | |
388 | /* Similar to memmove(). */ | |
389 | extern uint8_t * | |
390 | u8_move (uint8_t *dest, const uint8_t *src, size_t n); | |
391 | extern uint16_t * | |
392 | u16_move (uint16_t *dest, const uint16_t *src, size_t n); | |
393 | extern uint32_t * | |
394 | u32_move (uint32_t *dest, const uint32_t *src, size_t n); | |
395 | ||
396 | /* Set the first N characters of S to UC. UC should be a character that | |
397 | occupies only 1 unit. */ | |
398 | /* Similar to memset(). */ | |
399 | extern uint8_t * | |
400 | u8_set (uint8_t *s, ucs4_t uc, size_t n); | |
401 | extern uint16_t * | |
402 | u16_set (uint16_t *s, ucs4_t uc, size_t n); | |
403 | extern uint32_t * | |
404 | u32_set (uint32_t *s, ucs4_t uc, size_t n); | |
405 | ||
406 | /* Compare S1 and S2, each of length N. */ | |
407 | /* Similar to memcmp(). */ | |
408 | extern int | |
005de2e8 LC |
409 | u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n) |
410 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 411 | extern int |
005de2e8 LC |
412 | u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n) |
413 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 414 | extern int |
005de2e8 LC |
415 | u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n) |
416 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
417 | |
418 | /* Compare S1 and S2. */ | |
419 | /* Similar to the gnulib function memcmp2(). */ | |
420 | extern int | |
005de2e8 LC |
421 | u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2) |
422 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 423 | extern int |
005de2e8 LC |
424 | u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2) |
425 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 426 | extern int |
005de2e8 LC |
427 | u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2) |
428 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
429 | |
430 | /* Search the string at S for UC. */ | |
431 | /* Similar to memchr(). */ | |
432 | extern uint8_t * | |
005de2e8 LC |
433 | u8_chr (const uint8_t *s, size_t n, ucs4_t uc) |
434 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 435 | extern uint16_t * |
005de2e8 LC |
436 | u16_chr (const uint16_t *s, size_t n, ucs4_t uc) |
437 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 438 | extern uint32_t * |
005de2e8 LC |
439 | u32_chr (const uint32_t *s, size_t n, ucs4_t uc) |
440 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
441 | |
442 | /* Count the number of Unicode characters in the N units from S. */ | |
443 | /* Similar to mbsnlen(). */ | |
444 | extern size_t | |
005de2e8 LC |
445 | u8_mbsnlen (const uint8_t *s, size_t n) |
446 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 447 | extern size_t |
005de2e8 LC |
448 | u16_mbsnlen (const uint16_t *s, size_t n) |
449 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 450 | extern size_t |
005de2e8 LC |
451 | u32_mbsnlen (const uint32_t *s, size_t n) |
452 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
453 | |
454 | /* Elementary string functions with memory allocation. */ | |
455 | ||
456 | /* Make a freshly allocated copy of S, of length N. */ | |
457 | extern uint8_t * | |
458 | u8_cpy_alloc (const uint8_t *s, size_t n); | |
459 | extern uint16_t * | |
460 | u16_cpy_alloc (const uint16_t *s, size_t n); | |
461 | extern uint32_t * | |
462 | u32_cpy_alloc (const uint32_t *s, size_t n); | |
463 | ||
464 | /* Elementary string functions on NUL terminated strings. */ | |
465 | ||
466 | /* Return the length (number of units) of the first character in S. | |
467 | Return 0 if it is the NUL character. Return -1 upon failure. */ | |
468 | extern int | |
005de2e8 LC |
469 | u8_strmblen (const uint8_t *s) |
470 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 471 | extern int |
005de2e8 LC |
472 | u16_strmblen (const uint16_t *s) |
473 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 474 | extern int |
005de2e8 LC |
475 | u32_strmblen (const uint32_t *s) |
476 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
477 | |
478 | /* Return the length (number of units) of the first character in S, putting | |
479 | its 'ucs4_t' representation in *PUC. Return 0 if it is the NUL | |
480 | character. Return -1 upon failure. */ | |
481 | extern int | |
482 | u8_strmbtouc (ucs4_t *puc, const uint8_t *s); | |
483 | extern int | |
484 | u16_strmbtouc (ucs4_t *puc, const uint16_t *s); | |
485 | extern int | |
486 | u32_strmbtouc (ucs4_t *puc, const uint32_t *s); | |
487 | ||
488 | /* Forward iteration step. Advances the pointer past the next character, | |
489 | or returns NULL if the end of the string has been reached. Puts the | |
490 | character's 'ucs4_t' representation in *PUC. */ | |
491 | extern const uint8_t * | |
492 | u8_next (ucs4_t *puc, const uint8_t *s); | |
493 | extern const uint16_t * | |
494 | u16_next (ucs4_t *puc, const uint16_t *s); | |
495 | extern const uint32_t * | |
496 | u32_next (ucs4_t *puc, const uint32_t *s); | |
497 | ||
498 | /* Backward iteration step. Advances the pointer to point to the previous | |
499 | character, or returns NULL if the beginning of the string had been reached. | |
500 | Puts the character's 'ucs4_t' representation in *PUC. */ | |
501 | extern const uint8_t * | |
502 | u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start); | |
503 | extern const uint16_t * | |
504 | u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start); | |
505 | extern const uint32_t * | |
506 | u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start); | |
507 | ||
508 | /* Return the number of units in S. */ | |
509 | /* Similar to strlen(), wcslen(). */ | |
510 | extern size_t | |
005de2e8 LC |
511 | u8_strlen (const uint8_t *s) |
512 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 513 | extern size_t |
005de2e8 LC |
514 | u16_strlen (const uint16_t *s) |
515 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 516 | extern size_t |
005de2e8 LC |
517 | u32_strlen (const uint32_t *s) |
518 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
519 | |
520 | /* Return the number of units in S, but at most MAXLEN. */ | |
521 | /* Similar to strnlen(), wcsnlen(). */ | |
522 | extern size_t | |
005de2e8 LC |
523 | u8_strnlen (const uint8_t *s, size_t maxlen) |
524 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 525 | extern size_t |
005de2e8 LC |
526 | u16_strnlen (const uint16_t *s, size_t maxlen) |
527 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 528 | extern size_t |
005de2e8 LC |
529 | u32_strnlen (const uint32_t *s, size_t maxlen) |
530 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
531 | |
532 | /* Copy SRC to DEST. */ | |
533 | /* Similar to strcpy(), wcscpy(). */ | |
534 | extern uint8_t * | |
535 | u8_strcpy (uint8_t *dest, const uint8_t *src); | |
536 | extern uint16_t * | |
537 | u16_strcpy (uint16_t *dest, const uint16_t *src); | |
538 | extern uint32_t * | |
539 | u32_strcpy (uint32_t *dest, const uint32_t *src); | |
540 | ||
541 | /* Copy SRC to DEST, returning the address of the terminating NUL in DEST. */ | |
542 | /* Similar to stpcpy(). */ | |
543 | extern uint8_t * | |
544 | u8_stpcpy (uint8_t *dest, const uint8_t *src); | |
545 | extern uint16_t * | |
546 | u16_stpcpy (uint16_t *dest, const uint16_t *src); | |
547 | extern uint32_t * | |
548 | u32_stpcpy (uint32_t *dest, const uint32_t *src); | |
549 | ||
550 | /* Copy no more than N units of SRC to DEST. */ | |
551 | /* Similar to strncpy(), wcsncpy(). */ | |
552 | extern uint8_t * | |
553 | u8_strncpy (uint8_t *dest, const uint8_t *src, size_t n); | |
554 | extern uint16_t * | |
555 | u16_strncpy (uint16_t *dest, const uint16_t *src, size_t n); | |
556 | extern uint32_t * | |
557 | u32_strncpy (uint32_t *dest, const uint32_t *src, size_t n); | |
558 | ||
61cd9dc9 LC |
559 | /* Copy no more than N units of SRC to DEST. Return a pointer past the last |
560 | non-NUL unit written into DEST. */ | |
24d56127 LC |
561 | /* Similar to stpncpy(). */ |
562 | extern uint8_t * | |
563 | u8_stpncpy (uint8_t *dest, const uint8_t *src, size_t n); | |
564 | extern uint16_t * | |
565 | u16_stpncpy (uint16_t *dest, const uint16_t *src, size_t n); | |
566 | extern uint32_t * | |
567 | u32_stpncpy (uint32_t *dest, const uint32_t *src, size_t n); | |
568 | ||
569 | /* Append SRC onto DEST. */ | |
570 | /* Similar to strcat(), wcscat(). */ | |
571 | extern uint8_t * | |
572 | u8_strcat (uint8_t *dest, const uint8_t *src); | |
573 | extern uint16_t * | |
574 | u16_strcat (uint16_t *dest, const uint16_t *src); | |
575 | extern uint32_t * | |
576 | u32_strcat (uint32_t *dest, const uint32_t *src); | |
577 | ||
578 | /* Append no more than N units of SRC onto DEST. */ | |
579 | /* Similar to strncat(), wcsncat(). */ | |
580 | extern uint8_t * | |
581 | u8_strncat (uint8_t *dest, const uint8_t *src, size_t n); | |
582 | extern uint16_t * | |
583 | u16_strncat (uint16_t *dest, const uint16_t *src, size_t n); | |
584 | extern uint32_t * | |
585 | u32_strncat (uint32_t *dest, const uint32_t *src, size_t n); | |
586 | ||
587 | /* Compare S1 and S2. */ | |
588 | /* Similar to strcmp(), wcscmp(). */ | |
0f00f2c3 LC |
589 | #ifdef __sun |
590 | /* Avoid a collision with the u8_strcmp() function in Solaris 11 libc. */ | |
591 | extern int | |
005de2e8 LC |
592 | u8_strcmp_gnu (const uint8_t *s1, const uint8_t *s2) |
593 | _UC_ATTRIBUTE_PURE; | |
0f00f2c3 LC |
594 | # define u8_strcmp u8_strcmp_gnu |
595 | #else | |
24d56127 | 596 | extern int |
005de2e8 LC |
597 | u8_strcmp (const uint8_t *s1, const uint8_t *s2) |
598 | _UC_ATTRIBUTE_PURE; | |
0f00f2c3 | 599 | #endif |
24d56127 | 600 | extern int |
005de2e8 LC |
601 | u16_strcmp (const uint16_t *s1, const uint16_t *s2) |
602 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 603 | extern int |
005de2e8 LC |
604 | u32_strcmp (const uint32_t *s1, const uint32_t *s2) |
605 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
606 | |
607 | /* Compare S1 and S2 using the collation rules of the current locale. | |
608 | Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2. | |
609 | Upon failure, set errno and return any value. */ | |
610 | /* Similar to strcoll(), wcscoll(). */ | |
611 | extern int | |
612 | u8_strcoll (const uint8_t *s1, const uint8_t *s2); | |
613 | extern int | |
614 | u16_strcoll (const uint16_t *s1, const uint16_t *s2); | |
615 | extern int | |
616 | u32_strcoll (const uint32_t *s1, const uint32_t *s2); | |
617 | ||
618 | /* Compare no more than N units of S1 and S2. */ | |
619 | /* Similar to strncmp(), wcsncmp(). */ | |
620 | extern int | |
005de2e8 LC |
621 | u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n) |
622 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 623 | extern int |
005de2e8 LC |
624 | u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n) |
625 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 626 | extern int |
005de2e8 LC |
627 | u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n) |
628 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
629 | |
630 | /* Duplicate S, returning an identical malloc'd string. */ | |
631 | /* Similar to strdup(), wcsdup(). */ | |
632 | extern uint8_t * | |
633 | u8_strdup (const uint8_t *s); | |
634 | extern uint16_t * | |
635 | u16_strdup (const uint16_t *s); | |
636 | extern uint32_t * | |
637 | u32_strdup (const uint32_t *s); | |
638 | ||
639 | /* Find the first occurrence of UC in STR. */ | |
640 | /* Similar to strchr(), wcschr(). */ | |
641 | extern uint8_t * | |
005de2e8 LC |
642 | u8_strchr (const uint8_t *str, ucs4_t uc) |
643 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 644 | extern uint16_t * |
005de2e8 LC |
645 | u16_strchr (const uint16_t *str, ucs4_t uc) |
646 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 647 | extern uint32_t * |
005de2e8 LC |
648 | u32_strchr (const uint32_t *str, ucs4_t uc) |
649 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
650 | |
651 | /* Find the last occurrence of UC in STR. */ | |
652 | /* Similar to strrchr(), wcsrchr(). */ | |
653 | extern uint8_t * | |
005de2e8 LC |
654 | u8_strrchr (const uint8_t *str, ucs4_t uc) |
655 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 656 | extern uint16_t * |
005de2e8 LC |
657 | u16_strrchr (const uint16_t *str, ucs4_t uc) |
658 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 659 | extern uint32_t * |
005de2e8 LC |
660 | u32_strrchr (const uint32_t *str, ucs4_t uc) |
661 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
662 | |
663 | /* Return the length of the initial segment of STR which consists entirely | |
664 | of Unicode characters not in REJECT. */ | |
665 | /* Similar to strcspn(), wcscspn(). */ | |
666 | extern size_t | |
005de2e8 LC |
667 | u8_strcspn (const uint8_t *str, const uint8_t *reject) |
668 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 669 | extern size_t |
005de2e8 LC |
670 | u16_strcspn (const uint16_t *str, const uint16_t *reject) |
671 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 672 | extern size_t |
005de2e8 LC |
673 | u32_strcspn (const uint32_t *str, const uint32_t *reject) |
674 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
675 | |
676 | /* Return the length of the initial segment of STR which consists entirely | |
677 | of Unicode characters in ACCEPT. */ | |
678 | /* Similar to strspn(), wcsspn(). */ | |
679 | extern size_t | |
005de2e8 LC |
680 | u8_strspn (const uint8_t *str, const uint8_t *accept) |
681 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 682 | extern size_t |
005de2e8 LC |
683 | u16_strspn (const uint16_t *str, const uint16_t *accept) |
684 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 685 | extern size_t |
005de2e8 LC |
686 | u32_strspn (const uint32_t *str, const uint32_t *accept) |
687 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
688 | |
689 | /* Find the first occurrence in STR of any character in ACCEPT. */ | |
690 | /* Similar to strpbrk(), wcspbrk(). */ | |
691 | extern uint8_t * | |
005de2e8 LC |
692 | u8_strpbrk (const uint8_t *str, const uint8_t *accept) |
693 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 694 | extern uint16_t * |
005de2e8 LC |
695 | u16_strpbrk (const uint16_t *str, const uint16_t *accept) |
696 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 697 | extern uint32_t * |
005de2e8 LC |
698 | u32_strpbrk (const uint32_t *str, const uint32_t *accept) |
699 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
700 | |
701 | /* Find the first occurrence of NEEDLE in HAYSTACK. */ | |
702 | /* Similar to strstr(), wcsstr(). */ | |
703 | extern uint8_t * | |
005de2e8 LC |
704 | u8_strstr (const uint8_t *haystack, const uint8_t *needle) |
705 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 706 | extern uint16_t * |
005de2e8 LC |
707 | u16_strstr (const uint16_t *haystack, const uint16_t *needle) |
708 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 709 | extern uint32_t * |
005de2e8 LC |
710 | u32_strstr (const uint32_t *haystack, const uint32_t *needle) |
711 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
712 | |
713 | /* Test whether STR starts with PREFIX. */ | |
714 | extern bool | |
005de2e8 LC |
715 | u8_startswith (const uint8_t *str, const uint8_t *prefix) |
716 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 717 | extern bool |
005de2e8 LC |
718 | u16_startswith (const uint16_t *str, const uint16_t *prefix) |
719 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 720 | extern bool |
005de2e8 LC |
721 | u32_startswith (const uint32_t *str, const uint32_t *prefix) |
722 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
723 | |
724 | /* Test whether STR ends with SUFFIX. */ | |
725 | extern bool | |
005de2e8 LC |
726 | u8_endswith (const uint8_t *str, const uint8_t *suffix) |
727 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 728 | extern bool |
005de2e8 LC |
729 | u16_endswith (const uint16_t *str, const uint16_t *suffix) |
730 | _UC_ATTRIBUTE_PURE; | |
24d56127 | 731 | extern bool |
005de2e8 LC |
732 | u32_endswith (const uint32_t *str, const uint32_t *suffix) |
733 | _UC_ATTRIBUTE_PURE; | |
24d56127 LC |
734 | |
735 | /* Divide STR into tokens separated by characters in DELIM. | |
736 | This interface is actually more similar to wcstok than to strtok. */ | |
737 | /* Similar to strtok_r(), wcstok(). */ | |
738 | extern uint8_t * | |
739 | u8_strtok (uint8_t *str, const uint8_t *delim, uint8_t **ptr); | |
740 | extern uint16_t * | |
741 | u16_strtok (uint16_t *str, const uint16_t *delim, uint16_t **ptr); | |
742 | extern uint32_t * | |
743 | u32_strtok (uint32_t *str, const uint32_t *delim, uint32_t **ptr); | |
744 | ||
745 | ||
746 | #ifdef __cplusplus | |
747 | } | |
748 | #endif | |
749 | ||
750 | #endif /* _UNISTR_H */ |