Commit | Line | Data |
---|---|---|
24d56127 | 1 | /* Elementary Unicode string functions. |
61cd9dc9 | 2 | Copyright (C) 2001-2002, 2005-2010 Free Software Foundation, Inc. |
24d56127 LC |
3 | |
4 | This program is free software: you can redistribute it and/or modify it | |
5 | under the terms of the GNU Lesser General Public License as published | |
6 | by the Free Software Foundation; either version 3 of the License, or | |
7 | (at your option) any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | Lesser General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU Lesser General Public License | |
15 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ | |
16 | ||
17 | #ifndef _UNISTR_H | |
18 | #define _UNISTR_H | |
19 | ||
20 | #include "unitypes.h" | |
21 | ||
61cd9dc9 LC |
22 | /* Get common macros for C. */ |
23 | #include "unused-parameter.h" | |
24 | ||
24d56127 LC |
25 | /* Get bool. */ |
26 | #include <stdbool.h> | |
27 | ||
28 | /* Get size_t. */ | |
29 | #include <stddef.h> | |
30 | ||
31 | #ifdef __cplusplus | |
32 | extern "C" { | |
33 | #endif | |
34 | ||
35 | ||
36 | /* Conventions: | |
37 | ||
38 | All functions prefixed with u8_ operate on UTF-8 encoded strings. | |
39 | Their unit is an uint8_t (1 byte). | |
40 | ||
41 | All functions prefixed with u16_ operate on UTF-16 encoded strings. | |
42 | Their unit is an uint16_t (a 2-byte word). | |
43 | ||
44 | All functions prefixed with u32_ operate on UCS-4 encoded strings. | |
45 | Their unit is an uint32_t (a 4-byte word). | |
46 | ||
47 | All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly | |
48 | n units. | |
49 | ||
50 | All arguments starting with "str" and the arguments of functions starting | |
51 | with u8_str/u16_str/u32_str denote a NUL terminated string, i.e. a string | |
52 | which terminates at the first NUL unit. This termination unit is | |
53 | considered part of the string for all memory allocation purposes, but | |
54 | is not considered part of the string for all other logical purposes. | |
55 | ||
56 | Functions returning a string result take a (resultbuf, lengthp) argument | |
57 | pair. If resultbuf is not NULL and the result fits into *lengthp units, | |
58 | it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly | |
59 | allocated string is returned. In both cases, *lengthp is set to the | |
60 | length (number of units) of the returned string. In case of error, | |
61 | NULL is returned and errno is set. */ | |
62 | ||
63 | ||
64 | /* Elementary string checks. */ | |
65 | ||
66 | /* Check whether an UTF-8 string is well-formed. | |
67 | Return NULL if valid, or a pointer to the first invalid unit otherwise. */ | |
68 | extern const uint8_t * | |
69 | u8_check (const uint8_t *s, size_t n); | |
70 | ||
71 | /* Check whether an UTF-16 string is well-formed. | |
72 | Return NULL if valid, or a pointer to the first invalid unit otherwise. */ | |
73 | extern const uint16_t * | |
74 | u16_check (const uint16_t *s, size_t n); | |
75 | ||
76 | /* Check whether an UCS-4 string is well-formed. | |
77 | Return NULL if valid, or a pointer to the first invalid unit otherwise. */ | |
78 | extern const uint32_t * | |
79 | u32_check (const uint32_t *s, size_t n); | |
80 | ||
81 | ||
82 | /* Elementary string conversions. */ | |
83 | ||
84 | /* Convert an UTF-8 string to an UTF-16 string. */ | |
85 | extern uint16_t * | |
86 | u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf, | |
1cd4fffc | 87 | size_t *lengthp); |
24d56127 LC |
88 | |
89 | /* Convert an UTF-8 string to an UCS-4 string. */ | |
90 | extern uint32_t * | |
91 | u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf, | |
1cd4fffc | 92 | size_t *lengthp); |
24d56127 LC |
93 | |
94 | /* Convert an UTF-16 string to an UTF-8 string. */ | |
95 | extern uint8_t * | |
96 | u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf, | |
1cd4fffc | 97 | size_t *lengthp); |
24d56127 LC |
98 | |
99 | /* Convert an UTF-16 string to an UCS-4 string. */ | |
100 | extern uint32_t * | |
101 | u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf, | |
1cd4fffc | 102 | size_t *lengthp); |
24d56127 LC |
103 | |
104 | /* Convert an UCS-4 string to an UTF-8 string. */ | |
105 | extern uint8_t * | |
106 | u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf, | |
1cd4fffc | 107 | size_t *lengthp); |
24d56127 LC |
108 | |
109 | /* Convert an UCS-4 string to an UTF-16 string. */ | |
110 | extern uint16_t * | |
111 | u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf, | |
1cd4fffc | 112 | size_t *lengthp); |
24d56127 LC |
113 | |
114 | ||
115 | /* Elementary string functions. */ | |
116 | ||
117 | /* Return the length (number of units) of the first character in S, which is | |
118 | no longer than N. Return 0 if it is the NUL character. Return -1 upon | |
119 | failure. */ | |
120 | /* Similar to mblen(), except that s must not be NULL. */ | |
121 | extern int | |
122 | u8_mblen (const uint8_t *s, size_t n); | |
123 | extern int | |
124 | u16_mblen (const uint16_t *s, size_t n); | |
125 | extern int | |
126 | u32_mblen (const uint32_t *s, size_t n); | |
127 | ||
128 | /* Return the length (number of units) of the first character in S, putting | |
129 | its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd, | |
130 | and an appropriate number of units is returned. | |
131 | The number of available units, N, must be > 0. */ | |
132 | /* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0, | |
133 | and the NUL character is not treated specially. */ | |
134 | /* The variants with _safe suffix are safe, even if the library is compiled | |
135 | without --enable-safety. */ | |
136 | ||
137 | #ifdef GNULIB_UNISTR_U8_MBTOUC_UNSAFE | |
138 | # if !HAVE_INLINE | |
139 | extern int | |
140 | u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n); | |
141 | # else | |
142 | extern int | |
143 | u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n); | |
144 | static inline int | |
145 | u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n) | |
146 | { | |
147 | uint8_t c = *s; | |
148 | ||
149 | if (c < 0x80) | |
150 | { | |
151 | *puc = c; | |
152 | return 1; | |
153 | } | |
154 | else | |
155 | return u8_mbtouc_unsafe_aux (puc, s, n); | |
156 | } | |
157 | # endif | |
158 | #endif | |
159 | ||
160 | #ifdef GNULIB_UNISTR_U16_MBTOUC_UNSAFE | |
161 | # if !HAVE_INLINE | |
162 | extern int | |
163 | u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n); | |
164 | # else | |
165 | extern int | |
166 | u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n); | |
167 | static inline int | |
168 | u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n) | |
169 | { | |
170 | uint16_t c = *s; | |
171 | ||
172 | if (c < 0xd800 || c >= 0xe000) | |
173 | { | |
174 | *puc = c; | |
175 | return 1; | |
176 | } | |
177 | else | |
178 | return u16_mbtouc_unsafe_aux (puc, s, n); | |
179 | } | |
180 | # endif | |
181 | #endif | |
182 | ||
183 | #ifdef GNULIB_UNISTR_U32_MBTOUC_UNSAFE | |
184 | # if !HAVE_INLINE | |
185 | extern int | |
186 | u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n); | |
187 | # else | |
188 | static inline int | |
61cd9dc9 LC |
189 | u32_mbtouc_unsafe (ucs4_t *puc, |
190 | const uint32_t *s, size_t n _GL_UNUSED_PARAMETER) | |
24d56127 LC |
191 | { |
192 | uint32_t c = *s; | |
193 | ||
194 | # if CONFIG_UNICODE_SAFETY | |
195 | if (c < 0xd800 || (c >= 0xe000 && c < 0x110000)) | |
196 | # endif | |
197 | *puc = c; | |
198 | # if CONFIG_UNICODE_SAFETY | |
199 | else | |
200 | /* invalid multibyte character */ | |
201 | *puc = 0xfffd; | |
202 | # endif | |
203 | return 1; | |
204 | } | |
205 | # endif | |
206 | #endif | |
207 | ||
208 | #ifdef GNULIB_UNISTR_U8_MBTOUC | |
209 | # if !HAVE_INLINE | |
210 | extern int | |
211 | u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n); | |
212 | # else | |
213 | extern int | |
214 | u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n); | |
215 | static inline int | |
216 | u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n) | |
217 | { | |
218 | uint8_t c = *s; | |
219 | ||
220 | if (c < 0x80) | |
221 | { | |
222 | *puc = c; | |
223 | return 1; | |
224 | } | |
225 | else | |
226 | return u8_mbtouc_aux (puc, s, n); | |
227 | } | |
228 | # endif | |
229 | #endif | |
230 | ||
231 | #ifdef GNULIB_UNISTR_U16_MBTOUC | |
232 | # if !HAVE_INLINE | |
233 | extern int | |
234 | u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n); | |
235 | # else | |
236 | extern int | |
237 | u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n); | |
238 | static inline int | |
239 | u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n) | |
240 | { | |
241 | uint16_t c = *s; | |
242 | ||
243 | if (c < 0xd800 || c >= 0xe000) | |
244 | { | |
245 | *puc = c; | |
246 | return 1; | |
247 | } | |
248 | else | |
249 | return u16_mbtouc_aux (puc, s, n); | |
250 | } | |
251 | # endif | |
252 | #endif | |
253 | ||
254 | #ifdef GNULIB_UNISTR_U32_MBTOUC | |
255 | # if !HAVE_INLINE | |
256 | extern int | |
257 | u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n); | |
258 | # else | |
259 | static inline int | |
61cd9dc9 | 260 | u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n _GL_UNUSED_PARAMETER) |
24d56127 LC |
261 | { |
262 | uint32_t c = *s; | |
263 | ||
264 | if (c < 0xd800 || (c >= 0xe000 && c < 0x110000)) | |
265 | *puc = c; | |
266 | else | |
267 | /* invalid multibyte character */ | |
268 | *puc = 0xfffd; | |
269 | return 1; | |
270 | } | |
271 | # endif | |
272 | #endif | |
273 | ||
274 | /* Return the length (number of units) of the first character in S, putting | |
275 | its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd, | |
276 | and -1 is returned for an invalid sequence of units, -2 is returned for an | |
277 | incomplete sequence of units. | |
278 | The number of available units, N, must be > 0. */ | |
279 | /* Similar to u*_mbtouc(), except that the return value gives more details | |
280 | about the failure, similar to mbrtowc(). */ | |
281 | ||
282 | #ifdef GNULIB_UNISTR_U8_MBTOUCR | |
283 | extern int | |
284 | u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n); | |
285 | #endif | |
286 | ||
287 | #ifdef GNULIB_UNISTR_U16_MBTOUCR | |
288 | extern int | |
289 | u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n); | |
290 | #endif | |
291 | ||
292 | #ifdef GNULIB_UNISTR_U32_MBTOUCR | |
293 | extern int | |
294 | u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n); | |
295 | #endif | |
296 | ||
297 | /* Put the multibyte character represented by UC in S, returning its | |
298 | length. Return -1 upon failure, -2 if the number of available units, N, | |
299 | is too small. The latter case cannot occur if N >= 6/2/1, respectively. */ | |
300 | /* Similar to wctomb(), except that s must not be NULL, and the argument n | |
301 | must be specified. */ | |
302 | ||
303 | #ifdef GNULIB_UNISTR_U8_UCTOMB | |
304 | /* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr. */ | |
305 | extern int | |
306 | u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n); | |
307 | # if !HAVE_INLINE | |
308 | extern int | |
309 | u8_uctomb (uint8_t *s, ucs4_t uc, int n); | |
310 | # else | |
311 | static inline int | |
312 | u8_uctomb (uint8_t *s, ucs4_t uc, int n) | |
313 | { | |
314 | if (uc < 0x80 && n > 0) | |
315 | { | |
316 | s[0] = uc; | |
317 | return 1; | |
318 | } | |
319 | else | |
320 | return u8_uctomb_aux (s, uc, n); | |
321 | } | |
322 | # endif | |
323 | #endif | |
324 | ||
325 | #ifdef GNULIB_UNISTR_U16_UCTOMB | |
326 | /* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr. */ | |
327 | extern int | |
328 | u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n); | |
329 | # if !HAVE_INLINE | |
330 | extern int | |
331 | u16_uctomb (uint16_t *s, ucs4_t uc, int n); | |
332 | # else | |
333 | static inline int | |
334 | u16_uctomb (uint16_t *s, ucs4_t uc, int n) | |
335 | { | |
336 | if (uc < 0xd800 && n > 0) | |
337 | { | |
338 | s[0] = uc; | |
339 | return 1; | |
340 | } | |
341 | else | |
342 | return u16_uctomb_aux (s, uc, n); | |
343 | } | |
344 | # endif | |
345 | #endif | |
346 | ||
347 | #ifdef GNULIB_UNISTR_U32_UCTOMB | |
348 | # if !HAVE_INLINE | |
349 | extern int | |
350 | u32_uctomb (uint32_t *s, ucs4_t uc, int n); | |
351 | # else | |
352 | static inline int | |
353 | u32_uctomb (uint32_t *s, ucs4_t uc, int n) | |
354 | { | |
355 | if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000)) | |
356 | { | |
357 | if (n > 0) | |
1cd4fffc LC |
358 | { |
359 | *s = uc; | |
360 | return 1; | |
361 | } | |
24d56127 | 362 | else |
1cd4fffc | 363 | return -2; |
24d56127 LC |
364 | } |
365 | else | |
366 | return -1; | |
367 | } | |
368 | # endif | |
369 | #endif | |
370 | ||
371 | /* Copy N units from SRC to DEST. */ | |
372 | /* Similar to memcpy(). */ | |
373 | extern uint8_t * | |
374 | u8_cpy (uint8_t *dest, const uint8_t *src, size_t n); | |
375 | extern uint16_t * | |
376 | u16_cpy (uint16_t *dest, const uint16_t *src, size_t n); | |
377 | extern uint32_t * | |
378 | u32_cpy (uint32_t *dest, const uint32_t *src, size_t n); | |
379 | ||
380 | /* Copy N units from SRC to DEST, guaranteeing correct behavior for | |
381 | overlapping memory areas. */ | |
382 | /* Similar to memmove(). */ | |
383 | extern uint8_t * | |
384 | u8_move (uint8_t *dest, const uint8_t *src, size_t n); | |
385 | extern uint16_t * | |
386 | u16_move (uint16_t *dest, const uint16_t *src, size_t n); | |
387 | extern uint32_t * | |
388 | u32_move (uint32_t *dest, const uint32_t *src, size_t n); | |
389 | ||
390 | /* Set the first N characters of S to UC. UC should be a character that | |
391 | occupies only 1 unit. */ | |
392 | /* Similar to memset(). */ | |
393 | extern uint8_t * | |
394 | u8_set (uint8_t *s, ucs4_t uc, size_t n); | |
395 | extern uint16_t * | |
396 | u16_set (uint16_t *s, ucs4_t uc, size_t n); | |
397 | extern uint32_t * | |
398 | u32_set (uint32_t *s, ucs4_t uc, size_t n); | |
399 | ||
400 | /* Compare S1 and S2, each of length N. */ | |
401 | /* Similar to memcmp(). */ | |
402 | extern int | |
403 | u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n); | |
404 | extern int | |
405 | u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n); | |
406 | extern int | |
407 | u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n); | |
408 | ||
409 | /* Compare S1 and S2. */ | |
410 | /* Similar to the gnulib function memcmp2(). */ | |
411 | extern int | |
412 | u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2); | |
413 | extern int | |
414 | u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2); | |
415 | extern int | |
416 | u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2); | |
417 | ||
418 | /* Search the string at S for UC. */ | |
419 | /* Similar to memchr(). */ | |
420 | extern uint8_t * | |
421 | u8_chr (const uint8_t *s, size_t n, ucs4_t uc); | |
422 | extern uint16_t * | |
423 | u16_chr (const uint16_t *s, size_t n, ucs4_t uc); | |
424 | extern uint32_t * | |
425 | u32_chr (const uint32_t *s, size_t n, ucs4_t uc); | |
426 | ||
427 | /* Count the number of Unicode characters in the N units from S. */ | |
428 | /* Similar to mbsnlen(). */ | |
429 | extern size_t | |
430 | u8_mbsnlen (const uint8_t *s, size_t n); | |
431 | extern size_t | |
432 | u16_mbsnlen (const uint16_t *s, size_t n); | |
433 | extern size_t | |
434 | u32_mbsnlen (const uint32_t *s, size_t n); | |
435 | ||
436 | /* Elementary string functions with memory allocation. */ | |
437 | ||
438 | /* Make a freshly allocated copy of S, of length N. */ | |
439 | extern uint8_t * | |
440 | u8_cpy_alloc (const uint8_t *s, size_t n); | |
441 | extern uint16_t * | |
442 | u16_cpy_alloc (const uint16_t *s, size_t n); | |
443 | extern uint32_t * | |
444 | u32_cpy_alloc (const uint32_t *s, size_t n); | |
445 | ||
446 | /* Elementary string functions on NUL terminated strings. */ | |
447 | ||
448 | /* Return the length (number of units) of the first character in S. | |
449 | Return 0 if it is the NUL character. Return -1 upon failure. */ | |
450 | extern int | |
451 | u8_strmblen (const uint8_t *s); | |
452 | extern int | |
453 | u16_strmblen (const uint16_t *s); | |
454 | extern int | |
455 | u32_strmblen (const uint32_t *s); | |
456 | ||
457 | /* Return the length (number of units) of the first character in S, putting | |
458 | its 'ucs4_t' representation in *PUC. Return 0 if it is the NUL | |
459 | character. Return -1 upon failure. */ | |
460 | extern int | |
461 | u8_strmbtouc (ucs4_t *puc, const uint8_t *s); | |
462 | extern int | |
463 | u16_strmbtouc (ucs4_t *puc, const uint16_t *s); | |
464 | extern int | |
465 | u32_strmbtouc (ucs4_t *puc, const uint32_t *s); | |
466 | ||
467 | /* Forward iteration step. Advances the pointer past the next character, | |
468 | or returns NULL if the end of the string has been reached. Puts the | |
469 | character's 'ucs4_t' representation in *PUC. */ | |
470 | extern const uint8_t * | |
471 | u8_next (ucs4_t *puc, const uint8_t *s); | |
472 | extern const uint16_t * | |
473 | u16_next (ucs4_t *puc, const uint16_t *s); | |
474 | extern const uint32_t * | |
475 | u32_next (ucs4_t *puc, const uint32_t *s); | |
476 | ||
477 | /* Backward iteration step. Advances the pointer to point to the previous | |
478 | character, or returns NULL if the beginning of the string had been reached. | |
479 | Puts the character's 'ucs4_t' representation in *PUC. */ | |
480 | extern const uint8_t * | |
481 | u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start); | |
482 | extern const uint16_t * | |
483 | u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start); | |
484 | extern const uint32_t * | |
485 | u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start); | |
486 | ||
487 | /* Return the number of units in S. */ | |
488 | /* Similar to strlen(), wcslen(). */ | |
489 | extern size_t | |
490 | u8_strlen (const uint8_t *s); | |
491 | extern size_t | |
492 | u16_strlen (const uint16_t *s); | |
493 | extern size_t | |
494 | u32_strlen (const uint32_t *s); | |
495 | ||
496 | /* Return the number of units in S, but at most MAXLEN. */ | |
497 | /* Similar to strnlen(), wcsnlen(). */ | |
498 | extern size_t | |
499 | u8_strnlen (const uint8_t *s, size_t maxlen); | |
500 | extern size_t | |
501 | u16_strnlen (const uint16_t *s, size_t maxlen); | |
502 | extern size_t | |
503 | u32_strnlen (const uint32_t *s, size_t maxlen); | |
504 | ||
505 | /* Copy SRC to DEST. */ | |
506 | /* Similar to strcpy(), wcscpy(). */ | |
507 | extern uint8_t * | |
508 | u8_strcpy (uint8_t *dest, const uint8_t *src); | |
509 | extern uint16_t * | |
510 | u16_strcpy (uint16_t *dest, const uint16_t *src); | |
511 | extern uint32_t * | |
512 | u32_strcpy (uint32_t *dest, const uint32_t *src); | |
513 | ||
514 | /* Copy SRC to DEST, returning the address of the terminating NUL in DEST. */ | |
515 | /* Similar to stpcpy(). */ | |
516 | extern uint8_t * | |
517 | u8_stpcpy (uint8_t *dest, const uint8_t *src); | |
518 | extern uint16_t * | |
519 | u16_stpcpy (uint16_t *dest, const uint16_t *src); | |
520 | extern uint32_t * | |
521 | u32_stpcpy (uint32_t *dest, const uint32_t *src); | |
522 | ||
523 | /* Copy no more than N units of SRC to DEST. */ | |
524 | /* Similar to strncpy(), wcsncpy(). */ | |
525 | extern uint8_t * | |
526 | u8_strncpy (uint8_t *dest, const uint8_t *src, size_t n); | |
527 | extern uint16_t * | |
528 | u16_strncpy (uint16_t *dest, const uint16_t *src, size_t n); | |
529 | extern uint32_t * | |
530 | u32_strncpy (uint32_t *dest, const uint32_t *src, size_t n); | |
531 | ||
61cd9dc9 LC |
532 | /* Copy no more than N units of SRC to DEST. Return a pointer past the last |
533 | non-NUL unit written into DEST. */ | |
24d56127 LC |
534 | /* Similar to stpncpy(). */ |
535 | extern uint8_t * | |
536 | u8_stpncpy (uint8_t *dest, const uint8_t *src, size_t n); | |
537 | extern uint16_t * | |
538 | u16_stpncpy (uint16_t *dest, const uint16_t *src, size_t n); | |
539 | extern uint32_t * | |
540 | u32_stpncpy (uint32_t *dest, const uint32_t *src, size_t n); | |
541 | ||
542 | /* Append SRC onto DEST. */ | |
543 | /* Similar to strcat(), wcscat(). */ | |
544 | extern uint8_t * | |
545 | u8_strcat (uint8_t *dest, const uint8_t *src); | |
546 | extern uint16_t * | |
547 | u16_strcat (uint16_t *dest, const uint16_t *src); | |
548 | extern uint32_t * | |
549 | u32_strcat (uint32_t *dest, const uint32_t *src); | |
550 | ||
551 | /* Append no more than N units of SRC onto DEST. */ | |
552 | /* Similar to strncat(), wcsncat(). */ | |
553 | extern uint8_t * | |
554 | u8_strncat (uint8_t *dest, const uint8_t *src, size_t n); | |
555 | extern uint16_t * | |
556 | u16_strncat (uint16_t *dest, const uint16_t *src, size_t n); | |
557 | extern uint32_t * | |
558 | u32_strncat (uint32_t *dest, const uint32_t *src, size_t n); | |
559 | ||
560 | /* Compare S1 and S2. */ | |
561 | /* Similar to strcmp(), wcscmp(). */ | |
562 | extern int | |
563 | u8_strcmp (const uint8_t *s1, const uint8_t *s2); | |
564 | extern int | |
565 | u16_strcmp (const uint16_t *s1, const uint16_t *s2); | |
566 | extern int | |
567 | u32_strcmp (const uint32_t *s1, const uint32_t *s2); | |
568 | ||
569 | /* Compare S1 and S2 using the collation rules of the current locale. | |
570 | Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2. | |
571 | Upon failure, set errno and return any value. */ | |
572 | /* Similar to strcoll(), wcscoll(). */ | |
573 | extern int | |
574 | u8_strcoll (const uint8_t *s1, const uint8_t *s2); | |
575 | extern int | |
576 | u16_strcoll (const uint16_t *s1, const uint16_t *s2); | |
577 | extern int | |
578 | u32_strcoll (const uint32_t *s1, const uint32_t *s2); | |
579 | ||
580 | /* Compare no more than N units of S1 and S2. */ | |
581 | /* Similar to strncmp(), wcsncmp(). */ | |
582 | extern int | |
583 | u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n); | |
584 | extern int | |
585 | u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n); | |
586 | extern int | |
587 | u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n); | |
588 | ||
589 | /* Duplicate S, returning an identical malloc'd string. */ | |
590 | /* Similar to strdup(), wcsdup(). */ | |
591 | extern uint8_t * | |
592 | u8_strdup (const uint8_t *s); | |
593 | extern uint16_t * | |
594 | u16_strdup (const uint16_t *s); | |
595 | extern uint32_t * | |
596 | u32_strdup (const uint32_t *s); | |
597 | ||
598 | /* Find the first occurrence of UC in STR. */ | |
599 | /* Similar to strchr(), wcschr(). */ | |
600 | extern uint8_t * | |
601 | u8_strchr (const uint8_t *str, ucs4_t uc); | |
602 | extern uint16_t * | |
603 | u16_strchr (const uint16_t *str, ucs4_t uc); | |
604 | extern uint32_t * | |
605 | u32_strchr (const uint32_t *str, ucs4_t uc); | |
606 | ||
607 | /* Find the last occurrence of UC in STR. */ | |
608 | /* Similar to strrchr(), wcsrchr(). */ | |
609 | extern uint8_t * | |
610 | u8_strrchr (const uint8_t *str, ucs4_t uc); | |
611 | extern uint16_t * | |
612 | u16_strrchr (const uint16_t *str, ucs4_t uc); | |
613 | extern uint32_t * | |
614 | u32_strrchr (const uint32_t *str, ucs4_t uc); | |
615 | ||
616 | /* Return the length of the initial segment of STR which consists entirely | |
617 | of Unicode characters not in REJECT. */ | |
618 | /* Similar to strcspn(), wcscspn(). */ | |
619 | extern size_t | |
620 | u8_strcspn (const uint8_t *str, const uint8_t *reject); | |
621 | extern size_t | |
622 | u16_strcspn (const uint16_t *str, const uint16_t *reject); | |
623 | extern size_t | |
624 | u32_strcspn (const uint32_t *str, const uint32_t *reject); | |
625 | ||
626 | /* Return the length of the initial segment of STR which consists entirely | |
627 | of Unicode characters in ACCEPT. */ | |
628 | /* Similar to strspn(), wcsspn(). */ | |
629 | extern size_t | |
630 | u8_strspn (const uint8_t *str, const uint8_t *accept); | |
631 | extern size_t | |
632 | u16_strspn (const uint16_t *str, const uint16_t *accept); | |
633 | extern size_t | |
634 | u32_strspn (const uint32_t *str, const uint32_t *accept); | |
635 | ||
636 | /* Find the first occurrence in STR of any character in ACCEPT. */ | |
637 | /* Similar to strpbrk(), wcspbrk(). */ | |
638 | extern uint8_t * | |
639 | u8_strpbrk (const uint8_t *str, const uint8_t *accept); | |
640 | extern uint16_t * | |
641 | u16_strpbrk (const uint16_t *str, const uint16_t *accept); | |
642 | extern uint32_t * | |
643 | u32_strpbrk (const uint32_t *str, const uint32_t *accept); | |
644 | ||
645 | /* Find the first occurrence of NEEDLE in HAYSTACK. */ | |
646 | /* Similar to strstr(), wcsstr(). */ | |
647 | extern uint8_t * | |
648 | u8_strstr (const uint8_t *haystack, const uint8_t *needle); | |
649 | extern uint16_t * | |
650 | u16_strstr (const uint16_t *haystack, const uint16_t *needle); | |
651 | extern uint32_t * | |
652 | u32_strstr (const uint32_t *haystack, const uint32_t *needle); | |
653 | ||
654 | /* Test whether STR starts with PREFIX. */ | |
655 | extern bool | |
656 | u8_startswith (const uint8_t *str, const uint8_t *prefix); | |
657 | extern bool | |
658 | u16_startswith (const uint16_t *str, const uint16_t *prefix); | |
659 | extern bool | |
660 | u32_startswith (const uint32_t *str, const uint32_t *prefix); | |
661 | ||
662 | /* Test whether STR ends with SUFFIX. */ | |
663 | extern bool | |
664 | u8_endswith (const uint8_t *str, const uint8_t *suffix); | |
665 | extern bool | |
666 | u16_endswith (const uint16_t *str, const uint16_t *suffix); | |
667 | extern bool | |
668 | u32_endswith (const uint32_t *str, const uint32_t *suffix); | |
669 | ||
670 | /* Divide STR into tokens separated by characters in DELIM. | |
671 | This interface is actually more similar to wcstok than to strtok. */ | |
672 | /* Similar to strtok_r(), wcstok(). */ | |
673 | extern uint8_t * | |
674 | u8_strtok (uint8_t *str, const uint8_t *delim, uint8_t **ptr); | |
675 | extern uint16_t * | |
676 | u16_strtok (uint16_t *str, const uint16_t *delim, uint16_t **ptr); | |
677 | extern uint32_t * | |
678 | u32_strtok (uint32_t *str, const uint32_t *delim, uint32_t **ptr); | |
679 | ||
680 | ||
681 | #ifdef __cplusplus | |
682 | } | |
683 | #endif | |
684 | ||
685 | #endif /* _UNISTR_H */ |