32-way branching in intmap.scm, not 16-way
[bpt/guile.git] / lib / unistr.in.h
CommitLineData
24d56127 1/* Elementary Unicode string functions.
5e69ceb7 2 Copyright (C) 2001-2002, 2005-2014 Free Software Foundation, Inc.
24d56127
LC
3
4 This program is free software: you can redistribute it and/or modify it
5 under the terms of the GNU Lesser General Public License as published
6 by the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17#ifndef _UNISTR_H
18#define _UNISTR_H
19
20#include "unitypes.h"
21
61cd9dc9
LC
22/* Get common macros for C. */
23#include "unused-parameter.h"
24
24d56127
LC
25/* Get bool. */
26#include <stdbool.h>
27
28/* Get size_t. */
29#include <stddef.h>
30
31#ifdef __cplusplus
32extern "C" {
33#endif
34
35
36/* Conventions:
37
38 All functions prefixed with u8_ operate on UTF-8 encoded strings.
39 Their unit is an uint8_t (1 byte).
40
41 All functions prefixed with u16_ operate on UTF-16 encoded strings.
42 Their unit is an uint16_t (a 2-byte word).
43
44 All functions prefixed with u32_ operate on UCS-4 encoded strings.
45 Their unit is an uint32_t (a 4-byte word).
46
47 All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
48 n units.
49
50 All arguments starting with "str" and the arguments of functions starting
51 with u8_str/u16_str/u32_str denote a NUL terminated string, i.e. a string
52 which terminates at the first NUL unit. This termination unit is
53 considered part of the string for all memory allocation purposes, but
54 is not considered part of the string for all other logical purposes.
55
56 Functions returning a string result take a (resultbuf, lengthp) argument
57 pair. If resultbuf is not NULL and the result fits into *lengthp units,
58 it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly
59 allocated string is returned. In both cases, *lengthp is set to the
60 length (number of units) of the returned string. In case of error,
61 NULL is returned and errno is set. */
62
63
64/* Elementary string checks. */
65
66/* Check whether an UTF-8 string is well-formed.
67 Return NULL if valid, or a pointer to the first invalid unit otherwise. */
68extern const uint8_t *
005de2e8
LC
69 u8_check (const uint8_t *s, size_t n)
70 _UC_ATTRIBUTE_PURE;
24d56127
LC
71
72/* Check whether an UTF-16 string is well-formed.
73 Return NULL if valid, or a pointer to the first invalid unit otherwise. */
74extern const uint16_t *
005de2e8
LC
75 u16_check (const uint16_t *s, size_t n)
76 _UC_ATTRIBUTE_PURE;
24d56127
LC
77
78/* Check whether an UCS-4 string is well-formed.
79 Return NULL if valid, or a pointer to the first invalid unit otherwise. */
80extern const uint32_t *
005de2e8
LC
81 u32_check (const uint32_t *s, size_t n)
82 _UC_ATTRIBUTE_PURE;
24d56127
LC
83
84
85/* Elementary string conversions. */
86
87/* Convert an UTF-8 string to an UTF-16 string. */
88extern uint16_t *
89 u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf,
1cd4fffc 90 size_t *lengthp);
24d56127
LC
91
92/* Convert an UTF-8 string to an UCS-4 string. */
93extern uint32_t *
94 u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf,
1cd4fffc 95 size_t *lengthp);
24d56127
LC
96
97/* Convert an UTF-16 string to an UTF-8 string. */
98extern uint8_t *
99 u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf,
1cd4fffc 100 size_t *lengthp);
24d56127
LC
101
102/* Convert an UTF-16 string to an UCS-4 string. */
103extern uint32_t *
104 u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf,
1cd4fffc 105 size_t *lengthp);
24d56127
LC
106
107/* Convert an UCS-4 string to an UTF-8 string. */
108extern uint8_t *
109 u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf,
1cd4fffc 110 size_t *lengthp);
24d56127
LC
111
112/* Convert an UCS-4 string to an UTF-16 string. */
113extern uint16_t *
114 u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf,
1cd4fffc 115 size_t *lengthp);
24d56127
LC
116
117
118/* Elementary string functions. */
119
120/* Return the length (number of units) of the first character in S, which is
121 no longer than N. Return 0 if it is the NUL character. Return -1 upon
122 failure. */
123/* Similar to mblen(), except that s must not be NULL. */
124extern int
005de2e8
LC
125 u8_mblen (const uint8_t *s, size_t n)
126 _UC_ATTRIBUTE_PURE;
24d56127 127extern int
005de2e8
LC
128 u16_mblen (const uint16_t *s, size_t n)
129 _UC_ATTRIBUTE_PURE;
24d56127 130extern int
005de2e8
LC
131 u32_mblen (const uint32_t *s, size_t n)
132 _UC_ATTRIBUTE_PURE;
24d56127
LC
133
134/* Return the length (number of units) of the first character in S, putting
135 its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd,
136 and an appropriate number of units is returned.
137 The number of available units, N, must be > 0. */
138/* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0,
139 and the NUL character is not treated specially. */
140/* The variants with _safe suffix are safe, even if the library is compiled
141 without --enable-safety. */
142
dd7d0148 143#if GNULIB_UNISTR_U8_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
24d56127
LC
144# if !HAVE_INLINE
145extern int
146 u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n);
147# else
148extern int
149 u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n);
150static inline int
151u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
152{
153 uint8_t c = *s;
154
155 if (c < 0x80)
156 {
157 *puc = c;
158 return 1;
159 }
160 else
161 return u8_mbtouc_unsafe_aux (puc, s, n);
162}
163# endif
164#endif
165
dd7d0148 166#if GNULIB_UNISTR_U16_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
24d56127
LC
167# if !HAVE_INLINE
168extern int
169 u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n);
170# else
171extern int
172 u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n);
173static inline int
174u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n)
175{
176 uint16_t c = *s;
177
178 if (c < 0xd800 || c >= 0xe000)
179 {
180 *puc = c;
181 return 1;
182 }
183 else
184 return u16_mbtouc_unsafe_aux (puc, s, n);
185}
186# endif
187#endif
188
dd7d0148 189#if GNULIB_UNISTR_U32_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
24d56127
LC
190# if !HAVE_INLINE
191extern int
192 u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n);
193# else
194static inline int
61cd9dc9
LC
195u32_mbtouc_unsafe (ucs4_t *puc,
196 const uint32_t *s, size_t n _GL_UNUSED_PARAMETER)
24d56127
LC
197{
198 uint32_t c = *s;
199
dd7d0148 200# if CONFIG_UNICODE_SAFETY
24d56127
LC
201 if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
202# endif
203 *puc = c;
dd7d0148 204# if CONFIG_UNICODE_SAFETY
24d56127
LC
205 else
206 /* invalid multibyte character */
207 *puc = 0xfffd;
208# endif
209 return 1;
210}
211# endif
212#endif
213
dd7d0148 214#if GNULIB_UNISTR_U8_MBTOUC || HAVE_LIBUNISTRING
24d56127
LC
215# if !HAVE_INLINE
216extern int
217 u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n);
218# else
219extern int
220 u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n);
221static inline int
222u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
223{
224 uint8_t c = *s;
225
226 if (c < 0x80)
227 {
228 *puc = c;
229 return 1;
230 }
231 else
232 return u8_mbtouc_aux (puc, s, n);
233}
234# endif
235#endif
236
dd7d0148 237#if GNULIB_UNISTR_U16_MBTOUC || HAVE_LIBUNISTRING
24d56127
LC
238# if !HAVE_INLINE
239extern int
240 u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n);
241# else
242extern int
243 u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n);
244static inline int
245u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n)
246{
247 uint16_t c = *s;
248
249 if (c < 0xd800 || c >= 0xe000)
250 {
251 *puc = c;
252 return 1;
253 }
254 else
255 return u16_mbtouc_aux (puc, s, n);
256}
257# endif
258#endif
259
dd7d0148 260#if GNULIB_UNISTR_U32_MBTOUC || HAVE_LIBUNISTRING
24d56127
LC
261# if !HAVE_INLINE
262extern int
263 u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n);
264# else
265static inline int
61cd9dc9 266u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n _GL_UNUSED_PARAMETER)
24d56127
LC
267{
268 uint32_t c = *s;
269
270 if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
271 *puc = c;
272 else
273 /* invalid multibyte character */
274 *puc = 0xfffd;
275 return 1;
276}
277# endif
278#endif
279
280/* Return the length (number of units) of the first character in S, putting
281 its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd,
282 and -1 is returned for an invalid sequence of units, -2 is returned for an
283 incomplete sequence of units.
284 The number of available units, N, must be > 0. */
285/* Similar to u*_mbtouc(), except that the return value gives more details
286 about the failure, similar to mbrtowc(). */
287
dd7d0148 288#if GNULIB_UNISTR_U8_MBTOUCR || HAVE_LIBUNISTRING
24d56127
LC
289extern int
290 u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n);
291#endif
292
dd7d0148 293#if GNULIB_UNISTR_U16_MBTOUCR || HAVE_LIBUNISTRING
24d56127
LC
294extern int
295 u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n);
296#endif
297
dd7d0148 298#if GNULIB_UNISTR_U32_MBTOUCR || HAVE_LIBUNISTRING
24d56127
LC
299extern int
300 u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n);
301#endif
302
303/* Put the multibyte character represented by UC in S, returning its
304 length. Return -1 upon failure, -2 if the number of available units, N,
305 is too small. The latter case cannot occur if N >= 6/2/1, respectively. */
306/* Similar to wctomb(), except that s must not be NULL, and the argument n
307 must be specified. */
308
dd7d0148 309#if GNULIB_UNISTR_U8_UCTOMB || HAVE_LIBUNISTRING
24d56127
LC
310/* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr. */
311extern int
312 u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n);
313# if !HAVE_INLINE
314extern int
315 u8_uctomb (uint8_t *s, ucs4_t uc, int n);
316# else
317static inline int
318u8_uctomb (uint8_t *s, ucs4_t uc, int n)
319{
320 if (uc < 0x80 && n > 0)
321 {
322 s[0] = uc;
323 return 1;
324 }
325 else
326 return u8_uctomb_aux (s, uc, n);
327}
328# endif
329#endif
330
dd7d0148 331#if GNULIB_UNISTR_U16_UCTOMB || HAVE_LIBUNISTRING
24d56127
LC
332/* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr. */
333extern int
334 u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n);
335# if !HAVE_INLINE
336extern int
337 u16_uctomb (uint16_t *s, ucs4_t uc, int n);
338# else
339static inline int
340u16_uctomb (uint16_t *s, ucs4_t uc, int n)
341{
342 if (uc < 0xd800 && n > 0)
343 {
344 s[0] = uc;
345 return 1;
346 }
347 else
348 return u16_uctomb_aux (s, uc, n);
349}
350# endif
351#endif
352
dd7d0148 353#if GNULIB_UNISTR_U32_UCTOMB || HAVE_LIBUNISTRING
24d56127
LC
354# if !HAVE_INLINE
355extern int
356 u32_uctomb (uint32_t *s, ucs4_t uc, int n);
357# else
358static inline int
359u32_uctomb (uint32_t *s, ucs4_t uc, int n)
360{
361 if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000))
362 {
363 if (n > 0)
1cd4fffc
LC
364 {
365 *s = uc;
366 return 1;
367 }
24d56127 368 else
1cd4fffc 369 return -2;
24d56127
LC
370 }
371 else
372 return -1;
373}
374# endif
375#endif
376
377/* Copy N units from SRC to DEST. */
378/* Similar to memcpy(). */
379extern uint8_t *
380 u8_cpy (uint8_t *dest, const uint8_t *src, size_t n);
381extern uint16_t *
382 u16_cpy (uint16_t *dest, const uint16_t *src, size_t n);
383extern uint32_t *
384 u32_cpy (uint32_t *dest, const uint32_t *src, size_t n);
385
386/* Copy N units from SRC to DEST, guaranteeing correct behavior for
387 overlapping memory areas. */
388/* Similar to memmove(). */
389extern uint8_t *
390 u8_move (uint8_t *dest, const uint8_t *src, size_t n);
391extern uint16_t *
392 u16_move (uint16_t *dest, const uint16_t *src, size_t n);
393extern uint32_t *
394 u32_move (uint32_t *dest, const uint32_t *src, size_t n);
395
396/* Set the first N characters of S to UC. UC should be a character that
397 occupies only 1 unit. */
398/* Similar to memset(). */
399extern uint8_t *
400 u8_set (uint8_t *s, ucs4_t uc, size_t n);
401extern uint16_t *
402 u16_set (uint16_t *s, ucs4_t uc, size_t n);
403extern uint32_t *
404 u32_set (uint32_t *s, ucs4_t uc, size_t n);
405
406/* Compare S1 and S2, each of length N. */
407/* Similar to memcmp(). */
408extern int
005de2e8
LC
409 u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n)
410 _UC_ATTRIBUTE_PURE;
24d56127 411extern int
005de2e8
LC
412 u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n)
413 _UC_ATTRIBUTE_PURE;
24d56127 414extern int
005de2e8
LC
415 u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n)
416 _UC_ATTRIBUTE_PURE;
24d56127
LC
417
418/* Compare S1 and S2. */
419/* Similar to the gnulib function memcmp2(). */
420extern int
005de2e8
LC
421 u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2)
422 _UC_ATTRIBUTE_PURE;
24d56127 423extern int
005de2e8
LC
424 u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2)
425 _UC_ATTRIBUTE_PURE;
24d56127 426extern int
005de2e8
LC
427 u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2)
428 _UC_ATTRIBUTE_PURE;
24d56127
LC
429
430/* Search the string at S for UC. */
431/* Similar to memchr(). */
432extern uint8_t *
005de2e8
LC
433 u8_chr (const uint8_t *s, size_t n, ucs4_t uc)
434 _UC_ATTRIBUTE_PURE;
24d56127 435extern uint16_t *
005de2e8
LC
436 u16_chr (const uint16_t *s, size_t n, ucs4_t uc)
437 _UC_ATTRIBUTE_PURE;
24d56127 438extern uint32_t *
005de2e8
LC
439 u32_chr (const uint32_t *s, size_t n, ucs4_t uc)
440 _UC_ATTRIBUTE_PURE;
24d56127
LC
441
442/* Count the number of Unicode characters in the N units from S. */
443/* Similar to mbsnlen(). */
444extern size_t
005de2e8
LC
445 u8_mbsnlen (const uint8_t *s, size_t n)
446 _UC_ATTRIBUTE_PURE;
24d56127 447extern size_t
005de2e8
LC
448 u16_mbsnlen (const uint16_t *s, size_t n)
449 _UC_ATTRIBUTE_PURE;
24d56127 450extern size_t
005de2e8
LC
451 u32_mbsnlen (const uint32_t *s, size_t n)
452 _UC_ATTRIBUTE_PURE;
24d56127
LC
453
454/* Elementary string functions with memory allocation. */
455
456/* Make a freshly allocated copy of S, of length N. */
457extern uint8_t *
458 u8_cpy_alloc (const uint8_t *s, size_t n);
459extern uint16_t *
460 u16_cpy_alloc (const uint16_t *s, size_t n);
461extern uint32_t *
462 u32_cpy_alloc (const uint32_t *s, size_t n);
463
464/* Elementary string functions on NUL terminated strings. */
465
466/* Return the length (number of units) of the first character in S.
467 Return 0 if it is the NUL character. Return -1 upon failure. */
468extern int
005de2e8
LC
469 u8_strmblen (const uint8_t *s)
470 _UC_ATTRIBUTE_PURE;
24d56127 471extern int
005de2e8
LC
472 u16_strmblen (const uint16_t *s)
473 _UC_ATTRIBUTE_PURE;
24d56127 474extern int
005de2e8
LC
475 u32_strmblen (const uint32_t *s)
476 _UC_ATTRIBUTE_PURE;
24d56127
LC
477
478/* Return the length (number of units) of the first character in S, putting
479 its 'ucs4_t' representation in *PUC. Return 0 if it is the NUL
480 character. Return -1 upon failure. */
481extern int
482 u8_strmbtouc (ucs4_t *puc, const uint8_t *s);
483extern int
484 u16_strmbtouc (ucs4_t *puc, const uint16_t *s);
485extern int
486 u32_strmbtouc (ucs4_t *puc, const uint32_t *s);
487
488/* Forward iteration step. Advances the pointer past the next character,
489 or returns NULL if the end of the string has been reached. Puts the
490 character's 'ucs4_t' representation in *PUC. */
491extern const uint8_t *
492 u8_next (ucs4_t *puc, const uint8_t *s);
493extern const uint16_t *
494 u16_next (ucs4_t *puc, const uint16_t *s);
495extern const uint32_t *
496 u32_next (ucs4_t *puc, const uint32_t *s);
497
498/* Backward iteration step. Advances the pointer to point to the previous
499 character, or returns NULL if the beginning of the string had been reached.
500 Puts the character's 'ucs4_t' representation in *PUC. */
501extern const uint8_t *
502 u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start);
503extern const uint16_t *
504 u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start);
505extern const uint32_t *
506 u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start);
507
508/* Return the number of units in S. */
509/* Similar to strlen(), wcslen(). */
510extern size_t
005de2e8
LC
511 u8_strlen (const uint8_t *s)
512 _UC_ATTRIBUTE_PURE;
24d56127 513extern size_t
005de2e8
LC
514 u16_strlen (const uint16_t *s)
515 _UC_ATTRIBUTE_PURE;
24d56127 516extern size_t
005de2e8
LC
517 u32_strlen (const uint32_t *s)
518 _UC_ATTRIBUTE_PURE;
24d56127
LC
519
520/* Return the number of units in S, but at most MAXLEN. */
521/* Similar to strnlen(), wcsnlen(). */
522extern size_t
005de2e8
LC
523 u8_strnlen (const uint8_t *s, size_t maxlen)
524 _UC_ATTRIBUTE_PURE;
24d56127 525extern size_t
005de2e8
LC
526 u16_strnlen (const uint16_t *s, size_t maxlen)
527 _UC_ATTRIBUTE_PURE;
24d56127 528extern size_t
005de2e8
LC
529 u32_strnlen (const uint32_t *s, size_t maxlen)
530 _UC_ATTRIBUTE_PURE;
24d56127
LC
531
532/* Copy SRC to DEST. */
533/* Similar to strcpy(), wcscpy(). */
534extern uint8_t *
535 u8_strcpy (uint8_t *dest, const uint8_t *src);
536extern uint16_t *
537 u16_strcpy (uint16_t *dest, const uint16_t *src);
538extern uint32_t *
539 u32_strcpy (uint32_t *dest, const uint32_t *src);
540
541/* Copy SRC to DEST, returning the address of the terminating NUL in DEST. */
542/* Similar to stpcpy(). */
543extern uint8_t *
544 u8_stpcpy (uint8_t *dest, const uint8_t *src);
545extern uint16_t *
546 u16_stpcpy (uint16_t *dest, const uint16_t *src);
547extern uint32_t *
548 u32_stpcpy (uint32_t *dest, const uint32_t *src);
549
550/* Copy no more than N units of SRC to DEST. */
551/* Similar to strncpy(), wcsncpy(). */
552extern uint8_t *
553 u8_strncpy (uint8_t *dest, const uint8_t *src, size_t n);
554extern uint16_t *
555 u16_strncpy (uint16_t *dest, const uint16_t *src, size_t n);
556extern uint32_t *
557 u32_strncpy (uint32_t *dest, const uint32_t *src, size_t n);
558
61cd9dc9
LC
559/* Copy no more than N units of SRC to DEST. Return a pointer past the last
560 non-NUL unit written into DEST. */
24d56127
LC
561/* Similar to stpncpy(). */
562extern uint8_t *
563 u8_stpncpy (uint8_t *dest, const uint8_t *src, size_t n);
564extern uint16_t *
565 u16_stpncpy (uint16_t *dest, const uint16_t *src, size_t n);
566extern uint32_t *
567 u32_stpncpy (uint32_t *dest, const uint32_t *src, size_t n);
568
569/* Append SRC onto DEST. */
570/* Similar to strcat(), wcscat(). */
571extern uint8_t *
572 u8_strcat (uint8_t *dest, const uint8_t *src);
573extern uint16_t *
574 u16_strcat (uint16_t *dest, const uint16_t *src);
575extern uint32_t *
576 u32_strcat (uint32_t *dest, const uint32_t *src);
577
578/* Append no more than N units of SRC onto DEST. */
579/* Similar to strncat(), wcsncat(). */
580extern uint8_t *
581 u8_strncat (uint8_t *dest, const uint8_t *src, size_t n);
582extern uint16_t *
583 u16_strncat (uint16_t *dest, const uint16_t *src, size_t n);
584extern uint32_t *
585 u32_strncat (uint32_t *dest, const uint32_t *src, size_t n);
586
587/* Compare S1 and S2. */
588/* Similar to strcmp(), wcscmp(). */
0f00f2c3
LC
589#ifdef __sun
590/* Avoid a collision with the u8_strcmp() function in Solaris 11 libc. */
591extern int
005de2e8
LC
592 u8_strcmp_gnu (const uint8_t *s1, const uint8_t *s2)
593 _UC_ATTRIBUTE_PURE;
0f00f2c3
LC
594# define u8_strcmp u8_strcmp_gnu
595#else
24d56127 596extern int
005de2e8
LC
597 u8_strcmp (const uint8_t *s1, const uint8_t *s2)
598 _UC_ATTRIBUTE_PURE;
0f00f2c3 599#endif
24d56127 600extern int
005de2e8
LC
601 u16_strcmp (const uint16_t *s1, const uint16_t *s2)
602 _UC_ATTRIBUTE_PURE;
24d56127 603extern int
005de2e8
LC
604 u32_strcmp (const uint32_t *s1, const uint32_t *s2)
605 _UC_ATTRIBUTE_PURE;
24d56127
LC
606
607/* Compare S1 and S2 using the collation rules of the current locale.
608 Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2.
609 Upon failure, set errno and return any value. */
610/* Similar to strcoll(), wcscoll(). */
611extern int
612 u8_strcoll (const uint8_t *s1, const uint8_t *s2);
613extern int
614 u16_strcoll (const uint16_t *s1, const uint16_t *s2);
615extern int
616 u32_strcoll (const uint32_t *s1, const uint32_t *s2);
617
618/* Compare no more than N units of S1 and S2. */
619/* Similar to strncmp(), wcsncmp(). */
620extern int
005de2e8
LC
621 u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n)
622 _UC_ATTRIBUTE_PURE;
24d56127 623extern int
005de2e8
LC
624 u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n)
625 _UC_ATTRIBUTE_PURE;
24d56127 626extern int
005de2e8
LC
627 u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n)
628 _UC_ATTRIBUTE_PURE;
24d56127
LC
629
630/* Duplicate S, returning an identical malloc'd string. */
631/* Similar to strdup(), wcsdup(). */
632extern uint8_t *
633 u8_strdup (const uint8_t *s);
634extern uint16_t *
635 u16_strdup (const uint16_t *s);
636extern uint32_t *
637 u32_strdup (const uint32_t *s);
638
639/* Find the first occurrence of UC in STR. */
640/* Similar to strchr(), wcschr(). */
641extern uint8_t *
005de2e8
LC
642 u8_strchr (const uint8_t *str, ucs4_t uc)
643 _UC_ATTRIBUTE_PURE;
24d56127 644extern uint16_t *
005de2e8
LC
645 u16_strchr (const uint16_t *str, ucs4_t uc)
646 _UC_ATTRIBUTE_PURE;
24d56127 647extern uint32_t *
005de2e8
LC
648 u32_strchr (const uint32_t *str, ucs4_t uc)
649 _UC_ATTRIBUTE_PURE;
24d56127
LC
650
651/* Find the last occurrence of UC in STR. */
652/* Similar to strrchr(), wcsrchr(). */
653extern uint8_t *
005de2e8
LC
654 u8_strrchr (const uint8_t *str, ucs4_t uc)
655 _UC_ATTRIBUTE_PURE;
24d56127 656extern uint16_t *
005de2e8
LC
657 u16_strrchr (const uint16_t *str, ucs4_t uc)
658 _UC_ATTRIBUTE_PURE;
24d56127 659extern uint32_t *
005de2e8
LC
660 u32_strrchr (const uint32_t *str, ucs4_t uc)
661 _UC_ATTRIBUTE_PURE;
24d56127
LC
662
663/* Return the length of the initial segment of STR which consists entirely
664 of Unicode characters not in REJECT. */
665/* Similar to strcspn(), wcscspn(). */
666extern size_t
005de2e8
LC
667 u8_strcspn (const uint8_t *str, const uint8_t *reject)
668 _UC_ATTRIBUTE_PURE;
24d56127 669extern size_t
005de2e8
LC
670 u16_strcspn (const uint16_t *str, const uint16_t *reject)
671 _UC_ATTRIBUTE_PURE;
24d56127 672extern size_t
005de2e8
LC
673 u32_strcspn (const uint32_t *str, const uint32_t *reject)
674 _UC_ATTRIBUTE_PURE;
24d56127
LC
675
676/* Return the length of the initial segment of STR which consists entirely
677 of Unicode characters in ACCEPT. */
678/* Similar to strspn(), wcsspn(). */
679extern size_t
005de2e8
LC
680 u8_strspn (const uint8_t *str, const uint8_t *accept)
681 _UC_ATTRIBUTE_PURE;
24d56127 682extern size_t
005de2e8
LC
683 u16_strspn (const uint16_t *str, const uint16_t *accept)
684 _UC_ATTRIBUTE_PURE;
24d56127 685extern size_t
005de2e8
LC
686 u32_strspn (const uint32_t *str, const uint32_t *accept)
687 _UC_ATTRIBUTE_PURE;
24d56127
LC
688
689/* Find the first occurrence in STR of any character in ACCEPT. */
690/* Similar to strpbrk(), wcspbrk(). */
691extern uint8_t *
005de2e8
LC
692 u8_strpbrk (const uint8_t *str, const uint8_t *accept)
693 _UC_ATTRIBUTE_PURE;
24d56127 694extern uint16_t *
005de2e8
LC
695 u16_strpbrk (const uint16_t *str, const uint16_t *accept)
696 _UC_ATTRIBUTE_PURE;
24d56127 697extern uint32_t *
005de2e8
LC
698 u32_strpbrk (const uint32_t *str, const uint32_t *accept)
699 _UC_ATTRIBUTE_PURE;
24d56127
LC
700
701/* Find the first occurrence of NEEDLE in HAYSTACK. */
702/* Similar to strstr(), wcsstr(). */
703extern uint8_t *
005de2e8
LC
704 u8_strstr (const uint8_t *haystack, const uint8_t *needle)
705 _UC_ATTRIBUTE_PURE;
24d56127 706extern uint16_t *
005de2e8
LC
707 u16_strstr (const uint16_t *haystack, const uint16_t *needle)
708 _UC_ATTRIBUTE_PURE;
24d56127 709extern uint32_t *
005de2e8
LC
710 u32_strstr (const uint32_t *haystack, const uint32_t *needle)
711 _UC_ATTRIBUTE_PURE;
24d56127
LC
712
713/* Test whether STR starts with PREFIX. */
714extern bool
005de2e8
LC
715 u8_startswith (const uint8_t *str, const uint8_t *prefix)
716 _UC_ATTRIBUTE_PURE;
24d56127 717extern bool
005de2e8
LC
718 u16_startswith (const uint16_t *str, const uint16_t *prefix)
719 _UC_ATTRIBUTE_PURE;
24d56127 720extern bool
005de2e8
LC
721 u32_startswith (const uint32_t *str, const uint32_t *prefix)
722 _UC_ATTRIBUTE_PURE;
24d56127
LC
723
724/* Test whether STR ends with SUFFIX. */
725extern bool
005de2e8
LC
726 u8_endswith (const uint8_t *str, const uint8_t *suffix)
727 _UC_ATTRIBUTE_PURE;
24d56127 728extern bool
005de2e8
LC
729 u16_endswith (const uint16_t *str, const uint16_t *suffix)
730 _UC_ATTRIBUTE_PURE;
24d56127 731extern bool
005de2e8
LC
732 u32_endswith (const uint32_t *str, const uint32_t *suffix)
733 _UC_ATTRIBUTE_PURE;
24d56127
LC
734
735/* Divide STR into tokens separated by characters in DELIM.
736 This interface is actually more similar to wcstok than to strtok. */
737/* Similar to strtok_r(), wcstok(). */
738extern uint8_t *
739 u8_strtok (uint8_t *str, const uint8_t *delim, uint8_t **ptr);
740extern uint16_t *
741 u16_strtok (uint16_t *str, const uint16_t *delim, uint16_t **ptr);
742extern uint32_t *
743 u32_strtok (uint32_t *str, const uint32_t *delim, uint32_t **ptr);
744
745
746#ifdef __cplusplus
747}
748#endif
749
750#endif /* _UNISTR_H */