Merge remote-tracking branch 'origin/stable-2.0'
[bpt/guile.git] / lib / printf-parse.c
CommitLineData
c4b681fd 1/* Formatted output to strings.
f0007cad 2 Copyright (C) 1999-2000, 2002-2003, 2006-2012 Free Software Foundation, Inc.
c4b681fd
LC
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public License along
15 with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17
18/* This file can be parametrized with the following macros:
19 CHAR_T The element type of the format string.
20 CHAR_T_ONLY_ASCII Set to 1 to enable verification that all characters
21 in the format string are ASCII.
22 DIRECTIVE Structure denoting a format directive.
23 Depends on CHAR_T.
24 DIRECTIVES Structure denoting the set of format directives of a
25 format string. Depends on CHAR_T.
26 PRINTF_PARSE Function that parses a format string.
27 Depends on CHAR_T.
28 STATIC Set to 'static' to declare the function static.
29 ENABLE_UNISTDIO Set to 1 to enable the unistdio extensions. */
30
31#ifndef PRINTF_PARSE
32# include <config.h>
33#endif
34
35/* Specification. */
36#ifndef PRINTF_PARSE
37# include "printf-parse.h"
38#endif
39
40/* Default parameters. */
41#ifndef PRINTF_PARSE
42# define PRINTF_PARSE printf_parse
43# define CHAR_T char
44# define DIRECTIVE char_directive
45# define DIRECTIVES char_directives
46#endif
47
48/* Get size_t, NULL. */
49#include <stddef.h>
50
51/* Get intmax_t. */
52#if defined IN_LIBINTL || defined IN_LIBASPRINTF
53# if HAVE_STDINT_H_WITH_UINTMAX
54# include <stdint.h>
55# endif
56# if HAVE_INTTYPES_H_WITH_UINTMAX
57# include <inttypes.h>
58# endif
59#else
60# include <stdint.h>
61#endif
62
63/* malloc(), realloc(), free(). */
64#include <stdlib.h>
65
49114fd4
LC
66/* memcpy(). */
67#include <string.h>
68
c4b681fd
LC
69/* errno. */
70#include <errno.h>
71
72/* Checked size_t computations. */
73#include "xsize.h"
74
75#if CHAR_T_ONLY_ASCII
76/* c_isascii(). */
77# include "c-ctype.h"
78#endif
79
80#ifdef STATIC
81STATIC
82#endif
83int
84PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
85{
49114fd4 86 const CHAR_T *cp = format; /* pointer into format */
1cd4fffc 87 size_t arg_posn = 0; /* number of regular arguments consumed */
49114fd4
LC
88 size_t d_allocated; /* allocated elements of d->dir */
89 size_t a_allocated; /* allocated elements of a->arg */
c4b681fd
LC
90 size_t max_width_length = 0;
91 size_t max_precision_length = 0;
92
93 d->count = 0;
49114fd4
LC
94 d_allocated = N_DIRECT_ALLOC_DIRECTIVES;
95 d->dir = d->direct_alloc_dir;
c4b681fd
LC
96
97 a->count = 0;
49114fd4
LC
98 a_allocated = N_DIRECT_ALLOC_ARGUMENTS;
99 a->arg = a->direct_alloc_arg;
c4b681fd
LC
100
101#define REGISTER_ARG(_index_,_type_) \
1cd4fffc
LC
102 { \
103 size_t n = (_index_); \
104 if (n >= a_allocated) \
105 { \
106 size_t memory_size; \
107 argument *memory; \
108 \
109 a_allocated = xtimes (a_allocated, 2); \
110 if (a_allocated <= n) \
111 a_allocated = xsum (n, 1); \
112 memory_size = xtimes (a_allocated, sizeof (argument)); \
113 if (size_overflow_p (memory_size)) \
114 /* Overflow, would lead to out of memory. */ \
115 goto out_of_memory; \
49114fd4 116 memory = (argument *) (a->arg != a->direct_alloc_arg \
1cd4fffc
LC
117 ? realloc (a->arg, memory_size) \
118 : malloc (memory_size)); \
119 if (memory == NULL) \
120 /* Out of memory. */ \
121 goto out_of_memory; \
49114fd4
LC
122 if (a->arg == a->direct_alloc_arg) \
123 memcpy (memory, a->arg, a->count * sizeof (argument)); \
1cd4fffc
LC
124 a->arg = memory; \
125 } \
126 while (a->count <= n) \
127 a->arg[a->count++].type = TYPE_NONE; \
128 if (a->arg[n].type == TYPE_NONE) \
129 a->arg[n].type = (_type_); \
130 else if (a->arg[n].type != (_type_)) \
131 /* Ambiguous type for positional argument. */ \
132 goto error; \
c4b681fd
LC
133 }
134
135 while (*cp != '\0')
136 {
137 CHAR_T c = *cp++;
138 if (c == '%')
1cd4fffc
LC
139 {
140 size_t arg_index = ARG_NONE;
141 DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
142
143 /* Initialize the next directive. */
144 dp->dir_start = cp - 1;
145 dp->flags = 0;
146 dp->width_start = NULL;
147 dp->width_end = NULL;
148 dp->width_arg_index = ARG_NONE;
149 dp->precision_start = NULL;
150 dp->precision_end = NULL;
151 dp->precision_arg_index = ARG_NONE;
152 dp->arg_index = ARG_NONE;
153
154 /* Test for positional argument. */
155 if (*cp >= '0' && *cp <= '9')
156 {
157 const CHAR_T *np;
158
159 for (np = cp; *np >= '0' && *np <= '9'; np++)
160 ;
161 if (*np == '$')
162 {
163 size_t n = 0;
164
165 for (np = cp; *np >= '0' && *np <= '9'; np++)
166 n = xsum (xtimes (n, 10), *np - '0');
167 if (n == 0)
168 /* Positional argument 0. */
169 goto error;
170 if (size_overflow_p (n))
171 /* n too large, would lead to out of memory later. */
172 goto error;
173 arg_index = n - 1;
174 cp = np + 1;
175 }
176 }
177
178 /* Read the flags. */
179 for (;;)
180 {
181 if (*cp == '\'')
182 {
183 dp->flags |= FLAG_GROUP;
184 cp++;
185 }
186 else if (*cp == '-')
187 {
188 dp->flags |= FLAG_LEFT;
189 cp++;
190 }
191 else if (*cp == '+')
192 {
193 dp->flags |= FLAG_SHOWSIGN;
194 cp++;
195 }
196 else if (*cp == ' ')
197 {
198 dp->flags |= FLAG_SPACE;
199 cp++;
200 }
201 else if (*cp == '#')
202 {
203 dp->flags |= FLAG_ALT;
204 cp++;
205 }
206 else if (*cp == '0')
207 {
208 dp->flags |= FLAG_ZERO;
209 cp++;
210 }
0f00f2c3
LC
211#if __GLIBC__ >= 2 && !defined __UCLIBC__
212 else if (*cp == 'I')
213 {
214 dp->flags |= FLAG_LOCALIZED;
215 cp++;
216 }
217#endif
1cd4fffc
LC
218 else
219 break;
220 }
221
222 /* Parse the field width. */
223 if (*cp == '*')
224 {
225 dp->width_start = cp;
226 cp++;
227 dp->width_end = cp;
228 if (max_width_length < 1)
229 max_width_length = 1;
230
231 /* Test for positional argument. */
232 if (*cp >= '0' && *cp <= '9')
233 {
234 const CHAR_T *np;
235
236 for (np = cp; *np >= '0' && *np <= '9'; np++)
237 ;
238 if (*np == '$')
239 {
240 size_t n = 0;
241
242 for (np = cp; *np >= '0' && *np <= '9'; np++)
243 n = xsum (xtimes (n, 10), *np - '0');
244 if (n == 0)
245 /* Positional argument 0. */
246 goto error;
247 if (size_overflow_p (n))
248 /* n too large, would lead to out of memory later. */
249 goto error;
250 dp->width_arg_index = n - 1;
251 cp = np + 1;
252 }
253 }
254 if (dp->width_arg_index == ARG_NONE)
255 {
256 dp->width_arg_index = arg_posn++;
257 if (dp->width_arg_index == ARG_NONE)
258 /* arg_posn wrapped around. */
259 goto error;
260 }
261 REGISTER_ARG (dp->width_arg_index, TYPE_INT);
262 }
263 else if (*cp >= '0' && *cp <= '9')
264 {
265 size_t width_length;
266
267 dp->width_start = cp;
268 for (; *cp >= '0' && *cp <= '9'; cp++)
269 ;
270 dp->width_end = cp;
271 width_length = dp->width_end - dp->width_start;
272 if (max_width_length < width_length)
273 max_width_length = width_length;
274 }
275
276 /* Parse the precision. */
277 if (*cp == '.')
278 {
279 cp++;
280 if (*cp == '*')
281 {
282 dp->precision_start = cp - 1;
283 cp++;
284 dp->precision_end = cp;
285 if (max_precision_length < 2)
286 max_precision_length = 2;
287
288 /* Test for positional argument. */
289 if (*cp >= '0' && *cp <= '9')
290 {
291 const CHAR_T *np;
292
293 for (np = cp; *np >= '0' && *np <= '9'; np++)
294 ;
295 if (*np == '$')
296 {
297 size_t n = 0;
298
299 for (np = cp; *np >= '0' && *np <= '9'; np++)
300 n = xsum (xtimes (n, 10), *np - '0');
301 if (n == 0)
302 /* Positional argument 0. */
303 goto error;
304 if (size_overflow_p (n))
305 /* n too large, would lead to out of memory
306 later. */
307 goto error;
308 dp->precision_arg_index = n - 1;
309 cp = np + 1;
310 }
311 }
312 if (dp->precision_arg_index == ARG_NONE)
313 {
314 dp->precision_arg_index = arg_posn++;
315 if (dp->precision_arg_index == ARG_NONE)
316 /* arg_posn wrapped around. */
317 goto error;
318 }
319 REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
320 }
321 else
322 {
323 size_t precision_length;
324
325 dp->precision_start = cp - 1;
326 for (; *cp >= '0' && *cp <= '9'; cp++)
327 ;
328 dp->precision_end = cp;
329 precision_length = dp->precision_end - dp->precision_start;
330 if (max_precision_length < precision_length)
331 max_precision_length = precision_length;
332 }
333 }
334
335 {
336 arg_type type;
337
338 /* Parse argument type/size specifiers. */
339 {
340 int flags = 0;
341
342 for (;;)
343 {
344 if (*cp == 'h')
345 {
346 flags |= (1 << (flags & 1));
347 cp++;
348 }
349 else if (*cp == 'L')
350 {
351 flags |= 4;
352 cp++;
353 }
354 else if (*cp == 'l')
355 {
356 flags += 8;
357 cp++;
358 }
359 else if (*cp == 'j')
360 {
361 if (sizeof (intmax_t) > sizeof (long))
362 {
363 /* intmax_t = long long */
364 flags += 16;
365 }
366 else if (sizeof (intmax_t) > sizeof (int))
367 {
368 /* intmax_t = long */
369 flags += 8;
370 }
371 cp++;
372 }
373 else if (*cp == 'z' || *cp == 'Z')
374 {
375 /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
376 because the warning facility in gcc-2.95.2 understands
377 only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784). */
378 if (sizeof (size_t) > sizeof (long))
379 {
380 /* size_t = long long */
381 flags += 16;
382 }
383 else if (sizeof (size_t) > sizeof (int))
384 {
385 /* size_t = long */
386 flags += 8;
387 }
388 cp++;
389 }
390 else if (*cp == 't')
391 {
392 if (sizeof (ptrdiff_t) > sizeof (long))
393 {
394 /* ptrdiff_t = long long */
395 flags += 16;
396 }
397 else if (sizeof (ptrdiff_t) > sizeof (int))
398 {
399 /* ptrdiff_t = long */
400 flags += 8;
401 }
402 cp++;
403 }
c4b681fd 404#if defined __APPLE__ && defined __MACH__
1cd4fffc
LC
405 /* On MacOS X 10.3, PRIdMAX is defined as "qd".
406 We cannot change it to "lld" because PRIdMAX must also
407 be understood by the system's printf routines. */
408 else if (*cp == 'q')
409 {
410 if (64 / 8 > sizeof (long))
411 {
412 /* int64_t = long long */
413 flags += 16;
414 }
415 else
416 {
417 /* int64_t = long */
418 flags += 8;
419 }
420 cp++;
421 }
c4b681fd
LC
422#endif
423#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
f0007cad 424 /* On native Windows, PRIdMAX is defined as "I64d".
1cd4fffc
LC
425 We cannot change it to "lld" because PRIdMAX must also
426 be understood by the system's printf routines. */
427 else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
428 {
429 if (64 / 8 > sizeof (long))
430 {
431 /* __int64 = long long */
432 flags += 16;
433 }
434 else
435 {
436 /* __int64 = long */
437 flags += 8;
438 }
439 cp += 3;
440 }
c4b681fd 441#endif
1cd4fffc
LC
442 else
443 break;
444 }
445
446 /* Read the conversion character. */
447 c = *cp++;
448 switch (c)
449 {
450 case 'd': case 'i':
c4b681fd 451#if HAVE_LONG_LONG_INT
1cd4fffc
LC
452 /* If 'long long' exists and is larger than 'long': */
453 if (flags >= 16 || (flags & 4))
454 type = TYPE_LONGLONGINT;
455 else
c4b681fd 456#endif
1cd4fffc
LC
457 /* If 'long long' exists and is the same as 'long', we parse
458 "lld" into TYPE_LONGINT. */
459 if (flags >= 8)
460 type = TYPE_LONGINT;
461 else if (flags & 2)
462 type = TYPE_SCHAR;
463 else if (flags & 1)
464 type = TYPE_SHORT;
465 else
466 type = TYPE_INT;
467 break;
468 case 'o': case 'u': case 'x': case 'X':
c4b681fd 469#if HAVE_LONG_LONG_INT
1cd4fffc
LC
470 /* If 'long long' exists and is larger than 'long': */
471 if (flags >= 16 || (flags & 4))
472 type = TYPE_ULONGLONGINT;
473 else
c4b681fd 474#endif
1cd4fffc
LC
475 /* If 'unsigned long long' exists and is the same as
476 'unsigned long', we parse "llu" into TYPE_ULONGINT. */
477 if (flags >= 8)
478 type = TYPE_ULONGINT;
479 else if (flags & 2)
480 type = TYPE_UCHAR;
481 else if (flags & 1)
482 type = TYPE_USHORT;
483 else
484 type = TYPE_UINT;
485 break;
486 case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
487 case 'a': case 'A':
488 if (flags >= 16 || (flags & 4))
489 type = TYPE_LONGDOUBLE;
490 else
491 type = TYPE_DOUBLE;
492 break;
493 case 'c':
494 if (flags >= 8)
c4b681fd 495#if HAVE_WINT_T
1cd4fffc 496 type = TYPE_WIDE_CHAR;
c4b681fd 497#else
1cd4fffc 498 goto error;
c4b681fd 499#endif
1cd4fffc
LC
500 else
501 type = TYPE_CHAR;
502 break;
c4b681fd 503#if HAVE_WINT_T
1cd4fffc
LC
504 case 'C':
505 type = TYPE_WIDE_CHAR;
506 c = 'c';
507 break;
c4b681fd 508#endif
1cd4fffc
LC
509 case 's':
510 if (flags >= 8)
c4b681fd 511#if HAVE_WCHAR_T
1cd4fffc 512 type = TYPE_WIDE_STRING;
c4b681fd 513#else
1cd4fffc 514 goto error;
c4b681fd 515#endif
1cd4fffc
LC
516 else
517 type = TYPE_STRING;
518 break;
c4b681fd 519#if HAVE_WCHAR_T
1cd4fffc
LC
520 case 'S':
521 type = TYPE_WIDE_STRING;
522 c = 's';
523 break;
c4b681fd 524#endif
1cd4fffc
LC
525 case 'p':
526 type = TYPE_POINTER;
527 break;
528 case 'n':
c4b681fd 529#if HAVE_LONG_LONG_INT
1cd4fffc
LC
530 /* If 'long long' exists and is larger than 'long': */
531 if (flags >= 16 || (flags & 4))
532 type = TYPE_COUNT_LONGLONGINT_POINTER;
533 else
c4b681fd 534#endif
1cd4fffc
LC
535 /* If 'long long' exists and is the same as 'long', we parse
536 "lln" into TYPE_COUNT_LONGINT_POINTER. */
537 if (flags >= 8)
538 type = TYPE_COUNT_LONGINT_POINTER;
539 else if (flags & 2)
540 type = TYPE_COUNT_SCHAR_POINTER;
541 else if (flags & 1)
542 type = TYPE_COUNT_SHORT_POINTER;
543 else
544 type = TYPE_COUNT_INT_POINTER;
545 break;
c4b681fd 546#if ENABLE_UNISTDIO
1cd4fffc
LC
547 /* The unistdio extensions. */
548 case 'U':
549 if (flags >= 16)
550 type = TYPE_U32_STRING;
551 else if (flags >= 8)
552 type = TYPE_U16_STRING;
553 else
554 type = TYPE_U8_STRING;
555 break;
c4b681fd 556#endif
1cd4fffc
LC
557 case '%':
558 type = TYPE_NONE;
559 break;
560 default:
561 /* Unknown conversion character. */
562 goto error;
563 }
564 }
565
566 if (type != TYPE_NONE)
567 {
568 dp->arg_index = arg_index;
569 if (dp->arg_index == ARG_NONE)
570 {
571 dp->arg_index = arg_posn++;
572 if (dp->arg_index == ARG_NONE)
573 /* arg_posn wrapped around. */
574 goto error;
575 }
576 REGISTER_ARG (dp->arg_index, type);
577 }
578 dp->conversion = c;
579 dp->dir_end = cp;
580 }
581
582 d->count++;
583 if (d->count >= d_allocated)
584 {
585 size_t memory_size;
586 DIRECTIVE *memory;
587
588 d_allocated = xtimes (d_allocated, 2);
589 memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
590 if (size_overflow_p (memory_size))
591 /* Overflow, would lead to out of memory. */
592 goto out_of_memory;
49114fd4
LC
593 memory = (DIRECTIVE *) (d->dir != d->direct_alloc_dir
594 ? realloc (d->dir, memory_size)
595 : malloc (memory_size));
1cd4fffc
LC
596 if (memory == NULL)
597 /* Out of memory. */
598 goto out_of_memory;
49114fd4
LC
599 if (d->dir == d->direct_alloc_dir)
600 memcpy (memory, d->dir, d->count * sizeof (DIRECTIVE));
1cd4fffc
LC
601 d->dir = memory;
602 }
603 }
c4b681fd
LC
604#if CHAR_T_ONLY_ASCII
605 else if (!c_isascii (c))
1cd4fffc
LC
606 {
607 /* Non-ASCII character. Not supported. */
608 goto error;
609 }
c4b681fd
LC
610#endif
611 }
612 d->dir[d->count].dir_start = cp;
613
614 d->max_width_length = max_width_length;
615 d->max_precision_length = max_precision_length;
616 return 0;
617
618error:
49114fd4 619 if (a->arg != a->direct_alloc_arg)
c4b681fd 620 free (a->arg);
49114fd4 621 if (d->dir != d->direct_alloc_dir)
c4b681fd
LC
622 free (d->dir);
623 errno = EINVAL;
624 return -1;
625
626out_of_memory:
49114fd4 627 if (a->arg != a->direct_alloc_arg)
c4b681fd 628 free (a->arg);
49114fd4 629 if (d->dir != d->direct_alloc_dir)
c4b681fd 630 free (d->dir);
c4b681fd
LC
631 errno = ENOMEM;
632 return -1;
633}
634
635#undef PRINTF_PARSE
636#undef DIRECTIVES
637#undef DIRECTIVE
638#undef CHAR_T_ONLY_ASCII
639#undef CHAR_T