Commit | Line | Data |
---|---|---|
24d56127 LC |
1 | /* Elementary Unicode string functions. |
2 | Copyright (C) 2001-2002, 2005-2009 Free Software Foundation, Inc. | |
3 | ||
4 | This program is free software: you can redistribute it and/or modify it | |
5 | under the terms of the GNU Lesser General Public License as published | |
6 | by the Free Software Foundation; either version 3 of the License, or | |
7 | (at your option) any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | Lesser General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU Lesser General Public License | |
15 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ | |
16 | ||
17 | #ifndef _UNISTR_H | |
18 | #define _UNISTR_H | |
19 | ||
20 | #include "unitypes.h" | |
21 | ||
22 | /* Get bool. */ | |
23 | #include <stdbool.h> | |
24 | ||
25 | /* Get size_t. */ | |
26 | #include <stddef.h> | |
27 | ||
28 | #ifdef __cplusplus | |
29 | extern "C" { | |
30 | #endif | |
31 | ||
32 | ||
33 | /* Conventions: | |
34 | ||
35 | All functions prefixed with u8_ operate on UTF-8 encoded strings. | |
36 | Their unit is an uint8_t (1 byte). | |
37 | ||
38 | All functions prefixed with u16_ operate on UTF-16 encoded strings. | |
39 | Their unit is an uint16_t (a 2-byte word). | |
40 | ||
41 | All functions prefixed with u32_ operate on UCS-4 encoded strings. | |
42 | Their unit is an uint32_t (a 4-byte word). | |
43 | ||
44 | All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly | |
45 | n units. | |
46 | ||
47 | All arguments starting with "str" and the arguments of functions starting | |
48 | with u8_str/u16_str/u32_str denote a NUL terminated string, i.e. a string | |
49 | which terminates at the first NUL unit. This termination unit is | |
50 | considered part of the string for all memory allocation purposes, but | |
51 | is not considered part of the string for all other logical purposes. | |
52 | ||
53 | Functions returning a string result take a (resultbuf, lengthp) argument | |
54 | pair. If resultbuf is not NULL and the result fits into *lengthp units, | |
55 | it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly | |
56 | allocated string is returned. In both cases, *lengthp is set to the | |
57 | length (number of units) of the returned string. In case of error, | |
58 | NULL is returned and errno is set. */ | |
59 | ||
60 | ||
61 | /* Elementary string checks. */ | |
62 | ||
63 | /* Check whether an UTF-8 string is well-formed. | |
64 | Return NULL if valid, or a pointer to the first invalid unit otherwise. */ | |
65 | extern const uint8_t * | |
66 | u8_check (const uint8_t *s, size_t n); | |
67 | ||
68 | /* Check whether an UTF-16 string is well-formed. | |
69 | Return NULL if valid, or a pointer to the first invalid unit otherwise. */ | |
70 | extern const uint16_t * | |
71 | u16_check (const uint16_t *s, size_t n); | |
72 | ||
73 | /* Check whether an UCS-4 string is well-formed. | |
74 | Return NULL if valid, or a pointer to the first invalid unit otherwise. */ | |
75 | extern const uint32_t * | |
76 | u32_check (const uint32_t *s, size_t n); | |
77 | ||
78 | ||
79 | /* Elementary string conversions. */ | |
80 | ||
81 | /* Convert an UTF-8 string to an UTF-16 string. */ | |
82 | extern uint16_t * | |
83 | u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf, | |
84 | size_t *lengthp); | |
85 | ||
86 | /* Convert an UTF-8 string to an UCS-4 string. */ | |
87 | extern uint32_t * | |
88 | u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf, | |
89 | size_t *lengthp); | |
90 | ||
91 | /* Convert an UTF-16 string to an UTF-8 string. */ | |
92 | extern uint8_t * | |
93 | u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf, | |
94 | size_t *lengthp); | |
95 | ||
96 | /* Convert an UTF-16 string to an UCS-4 string. */ | |
97 | extern uint32_t * | |
98 | u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf, | |
99 | size_t *lengthp); | |
100 | ||
101 | /* Convert an UCS-4 string to an UTF-8 string. */ | |
102 | extern uint8_t * | |
103 | u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf, | |
104 | size_t *lengthp); | |
105 | ||
106 | /* Convert an UCS-4 string to an UTF-16 string. */ | |
107 | extern uint16_t * | |
108 | u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf, | |
109 | size_t *lengthp); | |
110 | ||
111 | ||
112 | /* Elementary string functions. */ | |
113 | ||
114 | /* Return the length (number of units) of the first character in S, which is | |
115 | no longer than N. Return 0 if it is the NUL character. Return -1 upon | |
116 | failure. */ | |
117 | /* Similar to mblen(), except that s must not be NULL. */ | |
118 | extern int | |
119 | u8_mblen (const uint8_t *s, size_t n); | |
120 | extern int | |
121 | u16_mblen (const uint16_t *s, size_t n); | |
122 | extern int | |
123 | u32_mblen (const uint32_t *s, size_t n); | |
124 | ||
125 | /* Return the length (number of units) of the first character in S, putting | |
126 | its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd, | |
127 | and an appropriate number of units is returned. | |
128 | The number of available units, N, must be > 0. */ | |
129 | /* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0, | |
130 | and the NUL character is not treated specially. */ | |
131 | /* The variants with _safe suffix are safe, even if the library is compiled | |
132 | without --enable-safety. */ | |
133 | ||
134 | #ifdef GNULIB_UNISTR_U8_MBTOUC_UNSAFE | |
135 | # if !HAVE_INLINE | |
136 | extern int | |
137 | u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n); | |
138 | # else | |
139 | extern int | |
140 | u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n); | |
141 | static inline int | |
142 | u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n) | |
143 | { | |
144 | uint8_t c = *s; | |
145 | ||
146 | if (c < 0x80) | |
147 | { | |
148 | *puc = c; | |
149 | return 1; | |
150 | } | |
151 | else | |
152 | return u8_mbtouc_unsafe_aux (puc, s, n); | |
153 | } | |
154 | # endif | |
155 | #endif | |
156 | ||
157 | #ifdef GNULIB_UNISTR_U16_MBTOUC_UNSAFE | |
158 | # if !HAVE_INLINE | |
159 | extern int | |
160 | u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n); | |
161 | # else | |
162 | extern int | |
163 | u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n); | |
164 | static inline int | |
165 | u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n) | |
166 | { | |
167 | uint16_t c = *s; | |
168 | ||
169 | if (c < 0xd800 || c >= 0xe000) | |
170 | { | |
171 | *puc = c; | |
172 | return 1; | |
173 | } | |
174 | else | |
175 | return u16_mbtouc_unsafe_aux (puc, s, n); | |
176 | } | |
177 | # endif | |
178 | #endif | |
179 | ||
180 | #ifdef GNULIB_UNISTR_U32_MBTOUC_UNSAFE | |
181 | # if !HAVE_INLINE | |
182 | extern int | |
183 | u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n); | |
184 | # else | |
185 | static inline int | |
186 | u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n _UNUSED_PARAMETER_) | |
187 | { | |
188 | uint32_t c = *s; | |
189 | ||
190 | # if CONFIG_UNICODE_SAFETY | |
191 | if (c < 0xd800 || (c >= 0xe000 && c < 0x110000)) | |
192 | # endif | |
193 | *puc = c; | |
194 | # if CONFIG_UNICODE_SAFETY | |
195 | else | |
196 | /* invalid multibyte character */ | |
197 | *puc = 0xfffd; | |
198 | # endif | |
199 | return 1; | |
200 | } | |
201 | # endif | |
202 | #endif | |
203 | ||
204 | #ifdef GNULIB_UNISTR_U8_MBTOUC | |
205 | # if !HAVE_INLINE | |
206 | extern int | |
207 | u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n); | |
208 | # else | |
209 | extern int | |
210 | u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n); | |
211 | static inline int | |
212 | u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) | |
213 | { | |
214 | uint8_t c = *s; | |
215 | ||
216 | if (c < 0x80) | |
217 | { | |
218 | *puc = c; | |
219 | return 1; | |
220 | } | |
221 | else | |
222 | return u8_mbtouc_aux (puc, s, n); | |
223 | } | |
224 | # endif | |
225 | #endif | |
226 | ||
227 | #ifdef GNULIB_UNISTR_U16_MBTOUC | |
228 | # if !HAVE_INLINE | |
229 | extern int | |
230 | u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n); | |
231 | # else | |
232 | extern int | |
233 | u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n); | |
234 | static inline int | |
235 | u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n) | |
236 | { | |
237 | uint16_t c = *s; | |
238 | ||
239 | if (c < 0xd800 || c >= 0xe000) | |
240 | { | |
241 | *puc = c; | |
242 | return 1; | |
243 | } | |
244 | else | |
245 | return u16_mbtouc_aux (puc, s, n); | |
246 | } | |
247 | # endif | |
248 | #endif | |
249 | ||
250 | #ifdef GNULIB_UNISTR_U32_MBTOUC | |
251 | # if !HAVE_INLINE | |
252 | extern int | |
253 | u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n); | |
254 | # else | |
255 | static inline int | |
256 | u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n _UNUSED_PARAMETER_) | |
257 | { | |
258 | uint32_t c = *s; | |
259 | ||
260 | if (c < 0xd800 || (c >= 0xe000 && c < 0x110000)) | |
261 | *puc = c; | |
262 | else | |
263 | /* invalid multibyte character */ | |
264 | *puc = 0xfffd; | |
265 | return 1; | |
266 | } | |
267 | # endif | |
268 | #endif | |
269 | ||
270 | /* Return the length (number of units) of the first character in S, putting | |
271 | its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd, | |
272 | and -1 is returned for an invalid sequence of units, -2 is returned for an | |
273 | incomplete sequence of units. | |
274 | The number of available units, N, must be > 0. */ | |
275 | /* Similar to u*_mbtouc(), except that the return value gives more details | |
276 | about the failure, similar to mbrtowc(). */ | |
277 | ||
278 | #ifdef GNULIB_UNISTR_U8_MBTOUCR | |
279 | extern int | |
280 | u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n); | |
281 | #endif | |
282 | ||
283 | #ifdef GNULIB_UNISTR_U16_MBTOUCR | |
284 | extern int | |
285 | u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n); | |
286 | #endif | |
287 | ||
288 | #ifdef GNULIB_UNISTR_U32_MBTOUCR | |
289 | extern int | |
290 | u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n); | |
291 | #endif | |
292 | ||
293 | /* Put the multibyte character represented by UC in S, returning its | |
294 | length. Return -1 upon failure, -2 if the number of available units, N, | |
295 | is too small. The latter case cannot occur if N >= 6/2/1, respectively. */ | |
296 | /* Similar to wctomb(), except that s must not be NULL, and the argument n | |
297 | must be specified. */ | |
298 | ||
299 | #ifdef GNULIB_UNISTR_U8_UCTOMB | |
300 | /* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr. */ | |
301 | extern int | |
302 | u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n); | |
303 | # if !HAVE_INLINE | |
304 | extern int | |
305 | u8_uctomb (uint8_t *s, ucs4_t uc, int n); | |
306 | # else | |
307 | static inline int | |
308 | u8_uctomb (uint8_t *s, ucs4_t uc, int n) | |
309 | { | |
310 | if (uc < 0x80 && n > 0) | |
311 | { | |
312 | s[0] = uc; | |
313 | return 1; | |
314 | } | |
315 | else | |
316 | return u8_uctomb_aux (s, uc, n); | |
317 | } | |
318 | # endif | |
319 | #endif | |
320 | ||
321 | #ifdef GNULIB_UNISTR_U16_UCTOMB | |
322 | /* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr. */ | |
323 | extern int | |
324 | u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n); | |
325 | # if !HAVE_INLINE | |
326 | extern int | |
327 | u16_uctomb (uint16_t *s, ucs4_t uc, int n); | |
328 | # else | |
329 | static inline int | |
330 | u16_uctomb (uint16_t *s, ucs4_t uc, int n) | |
331 | { | |
332 | if (uc < 0xd800 && n > 0) | |
333 | { | |
334 | s[0] = uc; | |
335 | return 1; | |
336 | } | |
337 | else | |
338 | return u16_uctomb_aux (s, uc, n); | |
339 | } | |
340 | # endif | |
341 | #endif | |
342 | ||
343 | #ifdef GNULIB_UNISTR_U32_UCTOMB | |
344 | # if !HAVE_INLINE | |
345 | extern int | |
346 | u32_uctomb (uint32_t *s, ucs4_t uc, int n); | |
347 | # else | |
348 | static inline int | |
349 | u32_uctomb (uint32_t *s, ucs4_t uc, int n) | |
350 | { | |
351 | if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000)) | |
352 | { | |
353 | if (n > 0) | |
354 | { | |
355 | *s = uc; | |
356 | return 1; | |
357 | } | |
358 | else | |
359 | return -2; | |
360 | } | |
361 | else | |
362 | return -1; | |
363 | } | |
364 | # endif | |
365 | #endif | |
366 | ||
367 | /* Copy N units from SRC to DEST. */ | |
368 | /* Similar to memcpy(). */ | |
369 | extern uint8_t * | |
370 | u8_cpy (uint8_t *dest, const uint8_t *src, size_t n); | |
371 | extern uint16_t * | |
372 | u16_cpy (uint16_t *dest, const uint16_t *src, size_t n); | |
373 | extern uint32_t * | |
374 | u32_cpy (uint32_t *dest, const uint32_t *src, size_t n); | |
375 | ||
376 | /* Copy N units from SRC to DEST, guaranteeing correct behavior for | |
377 | overlapping memory areas. */ | |
378 | /* Similar to memmove(). */ | |
379 | extern uint8_t * | |
380 | u8_move (uint8_t *dest, const uint8_t *src, size_t n); | |
381 | extern uint16_t * | |
382 | u16_move (uint16_t *dest, const uint16_t *src, size_t n); | |
383 | extern uint32_t * | |
384 | u32_move (uint32_t *dest, const uint32_t *src, size_t n); | |
385 | ||
386 | /* Set the first N characters of S to UC. UC should be a character that | |
387 | occupies only 1 unit. */ | |
388 | /* Similar to memset(). */ | |
389 | extern uint8_t * | |
390 | u8_set (uint8_t *s, ucs4_t uc, size_t n); | |
391 | extern uint16_t * | |
392 | u16_set (uint16_t *s, ucs4_t uc, size_t n); | |
393 | extern uint32_t * | |
394 | u32_set (uint32_t *s, ucs4_t uc, size_t n); | |
395 | ||
396 | /* Compare S1 and S2, each of length N. */ | |
397 | /* Similar to memcmp(). */ | |
398 | extern int | |
399 | u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n); | |
400 | extern int | |
401 | u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n); | |
402 | extern int | |
403 | u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n); | |
404 | ||
405 | /* Compare S1 and S2. */ | |
406 | /* Similar to the gnulib function memcmp2(). */ | |
407 | extern int | |
408 | u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2); | |
409 | extern int | |
410 | u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2); | |
411 | extern int | |
412 | u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2); | |
413 | ||
414 | /* Search the string at S for UC. */ | |
415 | /* Similar to memchr(). */ | |
416 | extern uint8_t * | |
417 | u8_chr (const uint8_t *s, size_t n, ucs4_t uc); | |
418 | extern uint16_t * | |
419 | u16_chr (const uint16_t *s, size_t n, ucs4_t uc); | |
420 | extern uint32_t * | |
421 | u32_chr (const uint32_t *s, size_t n, ucs4_t uc); | |
422 | ||
423 | /* Count the number of Unicode characters in the N units from S. */ | |
424 | /* Similar to mbsnlen(). */ | |
425 | extern size_t | |
426 | u8_mbsnlen (const uint8_t *s, size_t n); | |
427 | extern size_t | |
428 | u16_mbsnlen (const uint16_t *s, size_t n); | |
429 | extern size_t | |
430 | u32_mbsnlen (const uint32_t *s, size_t n); | |
431 | ||
432 | /* Elementary string functions with memory allocation. */ | |
433 | ||
434 | /* Make a freshly allocated copy of S, of length N. */ | |
435 | extern uint8_t * | |
436 | u8_cpy_alloc (const uint8_t *s, size_t n); | |
437 | extern uint16_t * | |
438 | u16_cpy_alloc (const uint16_t *s, size_t n); | |
439 | extern uint32_t * | |
440 | u32_cpy_alloc (const uint32_t *s, size_t n); | |
441 | ||
442 | /* Elementary string functions on NUL terminated strings. */ | |
443 | ||
444 | /* Return the length (number of units) of the first character in S. | |
445 | Return 0 if it is the NUL character. Return -1 upon failure. */ | |
446 | extern int | |
447 | u8_strmblen (const uint8_t *s); | |
448 | extern int | |
449 | u16_strmblen (const uint16_t *s); | |
450 | extern int | |
451 | u32_strmblen (const uint32_t *s); | |
452 | ||
453 | /* Return the length (number of units) of the first character in S, putting | |
454 | its 'ucs4_t' representation in *PUC. Return 0 if it is the NUL | |
455 | character. Return -1 upon failure. */ | |
456 | extern int | |
457 | u8_strmbtouc (ucs4_t *puc, const uint8_t *s); | |
458 | extern int | |
459 | u16_strmbtouc (ucs4_t *puc, const uint16_t *s); | |
460 | extern int | |
461 | u32_strmbtouc (ucs4_t *puc, const uint32_t *s); | |
462 | ||
463 | /* Forward iteration step. Advances the pointer past the next character, | |
464 | or returns NULL if the end of the string has been reached. Puts the | |
465 | character's 'ucs4_t' representation in *PUC. */ | |
466 | extern const uint8_t * | |
467 | u8_next (ucs4_t *puc, const uint8_t *s); | |
468 | extern const uint16_t * | |
469 | u16_next (ucs4_t *puc, const uint16_t *s); | |
470 | extern const uint32_t * | |
471 | u32_next (ucs4_t *puc, const uint32_t *s); | |
472 | ||
473 | /* Backward iteration step. Advances the pointer to point to the previous | |
474 | character, or returns NULL if the beginning of the string had been reached. | |
475 | Puts the character's 'ucs4_t' representation in *PUC. */ | |
476 | extern const uint8_t * | |
477 | u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start); | |
478 | extern const uint16_t * | |
479 | u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start); | |
480 | extern const uint32_t * | |
481 | u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start); | |
482 | ||
483 | /* Return the number of units in S. */ | |
484 | /* Similar to strlen(), wcslen(). */ | |
485 | extern size_t | |
486 | u8_strlen (const uint8_t *s); | |
487 | extern size_t | |
488 | u16_strlen (const uint16_t *s); | |
489 | extern size_t | |
490 | u32_strlen (const uint32_t *s); | |
491 | ||
492 | /* Return the number of units in S, but at most MAXLEN. */ | |
493 | /* Similar to strnlen(), wcsnlen(). */ | |
494 | extern size_t | |
495 | u8_strnlen (const uint8_t *s, size_t maxlen); | |
496 | extern size_t | |
497 | u16_strnlen (const uint16_t *s, size_t maxlen); | |
498 | extern size_t | |
499 | u32_strnlen (const uint32_t *s, size_t maxlen); | |
500 | ||
501 | /* Copy SRC to DEST. */ | |
502 | /* Similar to strcpy(), wcscpy(). */ | |
503 | extern uint8_t * | |
504 | u8_strcpy (uint8_t *dest, const uint8_t *src); | |
505 | extern uint16_t * | |
506 | u16_strcpy (uint16_t *dest, const uint16_t *src); | |
507 | extern uint32_t * | |
508 | u32_strcpy (uint32_t *dest, const uint32_t *src); | |
509 | ||
510 | /* Copy SRC to DEST, returning the address of the terminating NUL in DEST. */ | |
511 | /* Similar to stpcpy(). */ | |
512 | extern uint8_t * | |
513 | u8_stpcpy (uint8_t *dest, const uint8_t *src); | |
514 | extern uint16_t * | |
515 | u16_stpcpy (uint16_t *dest, const uint16_t *src); | |
516 | extern uint32_t * | |
517 | u32_stpcpy (uint32_t *dest, const uint32_t *src); | |
518 | ||
519 | /* Copy no more than N units of SRC to DEST. */ | |
520 | /* Similar to strncpy(), wcsncpy(). */ | |
521 | extern uint8_t * | |
522 | u8_strncpy (uint8_t *dest, const uint8_t *src, size_t n); | |
523 | extern uint16_t * | |
524 | u16_strncpy (uint16_t *dest, const uint16_t *src, size_t n); | |
525 | extern uint32_t * | |
526 | u32_strncpy (uint32_t *dest, const uint32_t *src, size_t n); | |
527 | ||
528 | /* Copy no more than N units of SRC to DEST, returning the address of | |
529 | the last unit written into DEST. */ | |
530 | /* Similar to stpncpy(). */ | |
531 | extern uint8_t * | |
532 | u8_stpncpy (uint8_t *dest, const uint8_t *src, size_t n); | |
533 | extern uint16_t * | |
534 | u16_stpncpy (uint16_t *dest, const uint16_t *src, size_t n); | |
535 | extern uint32_t * | |
536 | u32_stpncpy (uint32_t *dest, const uint32_t *src, size_t n); | |
537 | ||
538 | /* Append SRC onto DEST. */ | |
539 | /* Similar to strcat(), wcscat(). */ | |
540 | extern uint8_t * | |
541 | u8_strcat (uint8_t *dest, const uint8_t *src); | |
542 | extern uint16_t * | |
543 | u16_strcat (uint16_t *dest, const uint16_t *src); | |
544 | extern uint32_t * | |
545 | u32_strcat (uint32_t *dest, const uint32_t *src); | |
546 | ||
547 | /* Append no more than N units of SRC onto DEST. */ | |
548 | /* Similar to strncat(), wcsncat(). */ | |
549 | extern uint8_t * | |
550 | u8_strncat (uint8_t *dest, const uint8_t *src, size_t n); | |
551 | extern uint16_t * | |
552 | u16_strncat (uint16_t *dest, const uint16_t *src, size_t n); | |
553 | extern uint32_t * | |
554 | u32_strncat (uint32_t *dest, const uint32_t *src, size_t n); | |
555 | ||
556 | /* Compare S1 and S2. */ | |
557 | /* Similar to strcmp(), wcscmp(). */ | |
558 | extern int | |
559 | u8_strcmp (const uint8_t *s1, const uint8_t *s2); | |
560 | extern int | |
561 | u16_strcmp (const uint16_t *s1, const uint16_t *s2); | |
562 | extern int | |
563 | u32_strcmp (const uint32_t *s1, const uint32_t *s2); | |
564 | ||
565 | /* Compare S1 and S2 using the collation rules of the current locale. | |
566 | Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2. | |
567 | Upon failure, set errno and return any value. */ | |
568 | /* Similar to strcoll(), wcscoll(). */ | |
569 | extern int | |
570 | u8_strcoll (const uint8_t *s1, const uint8_t *s2); | |
571 | extern int | |
572 | u16_strcoll (const uint16_t *s1, const uint16_t *s2); | |
573 | extern int | |
574 | u32_strcoll (const uint32_t *s1, const uint32_t *s2); | |
575 | ||
576 | /* Compare no more than N units of S1 and S2. */ | |
577 | /* Similar to strncmp(), wcsncmp(). */ | |
578 | extern int | |
579 | u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n); | |
580 | extern int | |
581 | u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n); | |
582 | extern int | |
583 | u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n); | |
584 | ||
585 | /* Duplicate S, returning an identical malloc'd string. */ | |
586 | /* Similar to strdup(), wcsdup(). */ | |
587 | extern uint8_t * | |
588 | u8_strdup (const uint8_t *s); | |
589 | extern uint16_t * | |
590 | u16_strdup (const uint16_t *s); | |
591 | extern uint32_t * | |
592 | u32_strdup (const uint32_t *s); | |
593 | ||
594 | /* Find the first occurrence of UC in STR. */ | |
595 | /* Similar to strchr(), wcschr(). */ | |
596 | extern uint8_t * | |
597 | u8_strchr (const uint8_t *str, ucs4_t uc); | |
598 | extern uint16_t * | |
599 | u16_strchr (const uint16_t *str, ucs4_t uc); | |
600 | extern uint32_t * | |
601 | u32_strchr (const uint32_t *str, ucs4_t uc); | |
602 | ||
603 | /* Find the last occurrence of UC in STR. */ | |
604 | /* Similar to strrchr(), wcsrchr(). */ | |
605 | extern uint8_t * | |
606 | u8_strrchr (const uint8_t *str, ucs4_t uc); | |
607 | extern uint16_t * | |
608 | u16_strrchr (const uint16_t *str, ucs4_t uc); | |
609 | extern uint32_t * | |
610 | u32_strrchr (const uint32_t *str, ucs4_t uc); | |
611 | ||
612 | /* Return the length of the initial segment of STR which consists entirely | |
613 | of Unicode characters not in REJECT. */ | |
614 | /* Similar to strcspn(), wcscspn(). */ | |
615 | extern size_t | |
616 | u8_strcspn (const uint8_t *str, const uint8_t *reject); | |
617 | extern size_t | |
618 | u16_strcspn (const uint16_t *str, const uint16_t *reject); | |
619 | extern size_t | |
620 | u32_strcspn (const uint32_t *str, const uint32_t *reject); | |
621 | ||
622 | /* Return the length of the initial segment of STR which consists entirely | |
623 | of Unicode characters in ACCEPT. */ | |
624 | /* Similar to strspn(), wcsspn(). */ | |
625 | extern size_t | |
626 | u8_strspn (const uint8_t *str, const uint8_t *accept); | |
627 | extern size_t | |
628 | u16_strspn (const uint16_t *str, const uint16_t *accept); | |
629 | extern size_t | |
630 | u32_strspn (const uint32_t *str, const uint32_t *accept); | |
631 | ||
632 | /* Find the first occurrence in STR of any character in ACCEPT. */ | |
633 | /* Similar to strpbrk(), wcspbrk(). */ | |
634 | extern uint8_t * | |
635 | u8_strpbrk (const uint8_t *str, const uint8_t *accept); | |
636 | extern uint16_t * | |
637 | u16_strpbrk (const uint16_t *str, const uint16_t *accept); | |
638 | extern uint32_t * | |
639 | u32_strpbrk (const uint32_t *str, const uint32_t *accept); | |
640 | ||
641 | /* Find the first occurrence of NEEDLE in HAYSTACK. */ | |
642 | /* Similar to strstr(), wcsstr(). */ | |
643 | extern uint8_t * | |
644 | u8_strstr (const uint8_t *haystack, const uint8_t *needle); | |
645 | extern uint16_t * | |
646 | u16_strstr (const uint16_t *haystack, const uint16_t *needle); | |
647 | extern uint32_t * | |
648 | u32_strstr (const uint32_t *haystack, const uint32_t *needle); | |
649 | ||
650 | /* Test whether STR starts with PREFIX. */ | |
651 | extern bool | |
652 | u8_startswith (const uint8_t *str, const uint8_t *prefix); | |
653 | extern bool | |
654 | u16_startswith (const uint16_t *str, const uint16_t *prefix); | |
655 | extern bool | |
656 | u32_startswith (const uint32_t *str, const uint32_t *prefix); | |
657 | ||
658 | /* Test whether STR ends with SUFFIX. */ | |
659 | extern bool | |
660 | u8_endswith (const uint8_t *str, const uint8_t *suffix); | |
661 | extern bool | |
662 | u16_endswith (const uint16_t *str, const uint16_t *suffix); | |
663 | extern bool | |
664 | u32_endswith (const uint32_t *str, const uint32_t *suffix); | |
665 | ||
666 | /* Divide STR into tokens separated by characters in DELIM. | |
667 | This interface is actually more similar to wcstok than to strtok. */ | |
668 | /* Similar to strtok_r(), wcstok(). */ | |
669 | extern uint8_t * | |
670 | u8_strtok (uint8_t *str, const uint8_t *delim, uint8_t **ptr); | |
671 | extern uint16_t * | |
672 | u16_strtok (uint16_t *str, const uint16_t *delim, uint16_t **ptr); | |
673 | extern uint32_t * | |
674 | u32_strtok (uint32_t *str, const uint32_t *delim, uint32_t **ptr); | |
675 | ||
676 | ||
677 | #ifdef __cplusplus | |
678 | } | |
679 | #endif | |
680 | ||
681 | #endif /* _UNISTR_H */ |