* doprnt.c (SIZE_MAX): Move defn after all includes, as they might #define it.
[bpt/emacs.git] / src / doprnt.c
1 /* Output like sprintf to a buffer of specified size.
2 Also takes args differently: pass one pointer to the end
3 of the format string in addition to the format string itself.
4 Copyright (C) 1985, 2001-2011 Free Software Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20
21 /* If you think about replacing this with some similar standard C function of
22 the printf family (such as vsnprintf), please note that this function
23 supports the following Emacs-specific features:
24
25 . For %c conversions, it produces a string with the multibyte representation
26 of the (`int') argument, suitable for display in an Emacs buffer.
27
28 . For %s and %c, when field width is specified (e.g., %25s), it accounts for
29 the diplay width of each character, according to char-width-table. That
30 is, it does not assume that each character takes one column on display.
31
32 . If the size of the buffer is not enough to produce the formatted string in
33 its entirety, it makes sure that truncation does not chop the last
34 character in the middle of its multibyte sequence, producing an invalid
35 sequence.
36
37 . It accepts a pointer to the end of the format string, so the format string
38 could include embedded null characters.
39
40 . It signals an error if the length of the formatted string is about to
41 overflow MOST_POSITIVE_FIXNUM, to avoid producing strings longer than what
42 Emacs can handle.
43
44 OTOH, this function supports only a small subset of the standard C formatted
45 output facilities. E.g., %u and %ll are not supported, and precision is
46 ignored %s and %c conversions. (See below for the detailed documentation of
47 what is supported.) However, this is okay, as this function is supposed to
48 be called from `error' and similar functions, and thus does not need to
49 support features beyond those in `Fformat', which is used by `error' on the
50 Lisp level. */
51
52 /* This function supports the following %-sequences in the `format'
53 argument:
54
55 %s means print a string argument.
56 %S is silently treated as %s, for loose compatibility with `Fformat'.
57 %d means print a `signed int' argument in decimal.
58 %o means print an `unsigned int' argument in octal.
59 %x means print an `unsigned int' argument in hex.
60 %e means print a `double' argument in exponential notation.
61 %f means print a `double' argument in decimal-point notation.
62 %g means print a `double' argument in exponential notation
63 or in decimal-point notation, whichever uses fewer characters.
64 %c means print a `signed int' argument as a single character.
65 %% means produce a literal % character.
66
67 A %-sequence may contain optional flag, width, and precision specifiers, and
68 a length modifier, as follows:
69
70 %<flags><width><precision><length>character
71
72 where flags is [+ -0], width is [0-9]+, precision is .[0-9]+, and length
73 modifier is empty or l or ll.
74
75 The + flag character inserts a + before any positive number, while a space
76 inserts a space before any positive number; these flags only affect %d, %o,
77 %x, %e, %f, and %g sequences. The - and 0 flags affect the width specifier,
78 as described below. For signed numerical arguments only, the ` ' (space)
79 flag causes the result to be prefixed with a space character if it does not
80 start with a sign (+ or -).
81
82 The l (lower-case letter ell) length modifier is a `long' data type
83 modifier: it is supported for %d, %o, and %x conversions of integral
84 arguments, must immediately precede the conversion specifier, and means that
85 the respective argument is to be treated as `long int' or `unsigned long
86 int'. Similarly, ll (two letter ells) means to use `long long int' or
87 `unsigned long long int'; this can be used only on hosts that have
88 these two types. The empty length modifier means to use `int' or
89 `unsigned int'. EMACS_INT arguments should use the pI macro, which
90 expands to whatever length modifier is needed for the target host.
91
92 The width specifier supplies a lower limit for the length of the printed
93 representation. The padding, if any, normally goes on the left, but it goes
94 on the right if the - flag is present. The padding character is normally a
95 space, but (for numerical arguments only) it is 0 if the 0 flag is present.
96 The - flag takes precedence over the 0 flag.
97
98 For %e, %f, and %g sequences, the number after the "." in the precision
99 specifier says how many decimal places to show; if zero, the decimal point
100 itself is omitted. For %s and %S, the precision specifier is ignored. */
101
102 #include <config.h>
103 #include <stdio.h>
104 #include <ctype.h>
105 #include <setjmp.h>
106
107 #ifdef STDC_HEADERS
108 #include <float.h>
109 #endif
110
111 #include <unistd.h>
112
113 #include <limits.h>
114
115 #include "lisp.h"
116
117 /* Since we use the macro CHAR_HEAD_P, we have to include this, but
118 don't have to include others because CHAR_HEAD_P does not contains
119 another macro. */
120 #include "character.h"
121
122 #ifndef SIZE_MAX
123 # define SIZE_MAX ((size_t) -1)
124 #endif
125
126 #ifndef DBL_MAX_10_EXP
127 #define DBL_MAX_10_EXP 308 /* IEEE double */
128 #endif
129
130 /* Generate output from a format-spec FORMAT,
131 terminated at position FORMAT_END.
132 Output goes in BUFFER, which has room for BUFSIZE chars.
133 If the output does not fit, truncate it to fit.
134 Returns the number of bytes stored into BUFFER, excluding
135 the terminating null byte. Output is always null-terminated.
136 String arguments are passed as C strings.
137 Integers are passed as C integers. */
138
139 size_t
140 doprnt (char *buffer, register size_t bufsize, const char *format,
141 const char *format_end, va_list ap)
142 {
143 const char *fmt = format; /* Pointer into format string */
144 register char *bufptr = buffer; /* Pointer into output buffer.. */
145
146 /* Use this for sprintf unless we need something really big. */
147 char tembuf[DBL_MAX_10_EXP + 100];
148
149 /* Size of sprintf_buffer. */
150 size_t size_allocated = sizeof (tembuf);
151
152 /* Buffer to use for sprintf. Either tembuf or same as BIG_BUFFER. */
153 char *sprintf_buffer = tembuf;
154
155 /* Buffer we have got with malloc. */
156 char *big_buffer = NULL;
157
158 register size_t tem;
159 char *string;
160 char fixed_buffer[20]; /* Default buffer for small formatting. */
161 char *fmtcpy;
162 int minlen;
163 char charbuf[MAX_MULTIBYTE_LENGTH + 1]; /* Used for %c. */
164 USE_SAFE_ALLOCA;
165
166 if (format_end == 0)
167 format_end = format + strlen (format);
168
169 if ((format_end - format + 1) < sizeof (fixed_buffer))
170 fmtcpy = fixed_buffer;
171 else
172 SAFE_ALLOCA (fmtcpy, char *, format_end - format + 1);
173
174 bufsize--;
175
176 /* Loop until end of format string or buffer full. */
177 while (fmt < format_end && bufsize > 0)
178 {
179 if (*fmt == '%') /* Check for a '%' character */
180 {
181 size_t size_bound = 0;
182 EMACS_INT width; /* Columns occupied by STRING on display. */
183 int long_flag = 0;
184
185 fmt++;
186 /* Copy this one %-spec into fmtcpy. */
187 string = fmtcpy;
188 *string++ = '%';
189 while (fmt < format_end)
190 {
191 *string++ = *fmt;
192 if ('0' <= *fmt && *fmt <= '9')
193 {
194 /* Get an idea of how much space we might need.
195 This might be a field width or a precision; e.g.
196 %1.1000f and %1000.1f both might need 1000+ bytes.
197 Parse the width or precision, checking for overflow. */
198 size_t n = *fmt - '0';
199 while (fmt < format_end
200 && '0' <= fmt[1] && fmt[1] <= '9')
201 {
202 /* Avoid int overflow, because many sprintfs seriously
203 mess up with widths or precisions greater than
204 INT_MAX. Avoid size_t overflow, since our counters
205 use size_t. This test is slightly conservative, for
206 speed and simplicity. */
207 if (n >= min (INT_MAX, SIZE_MAX) / 10)
208 error ("Format width or precision too large");
209 n = n * 10 + fmt[1] - '0';
210 *string++ = *++fmt;
211 }
212
213 if (size_bound < n)
214 size_bound = n;
215 }
216 else if (*fmt == '-' || *fmt == ' ' || *fmt == '.' || *fmt == '+')
217 ;
218 else if (*fmt == 'l')
219 {
220 long_flag = 1 + (fmt + 1 < format_end && fmt[1] == 'l');
221 fmt += long_flag;
222 break;
223 }
224 else
225 break;
226 fmt++;
227 }
228 if (fmt > format_end)
229 fmt = format_end;
230 *string = 0;
231
232 /* Make the size bound large enough to handle floating point formats
233 with large numbers. */
234 if (size_bound > SIZE_MAX - DBL_MAX_10_EXP - 50)
235 error ("Format width or precision too large");
236 size_bound += DBL_MAX_10_EXP + 50;
237
238 /* Make sure we have that much. */
239 if (size_bound > size_allocated)
240 {
241 if (big_buffer)
242 xfree (big_buffer);
243 big_buffer = (char *) xmalloc (size_bound);
244 sprintf_buffer = big_buffer;
245 size_allocated = size_bound;
246 }
247 minlen = 0;
248 switch (*fmt++)
249 {
250 default:
251 error ("Invalid format operation %%%s%c",
252 "ll" + 2 - long_flag, fmt[-1]);
253
254 /* case 'b': */
255 case 'l':
256 case 'd':
257 {
258 int i;
259 long l;
260
261 if (1 < long_flag)
262 {
263 #ifdef HAVE_LONG_LONG_INT
264 long long ll = va_arg (ap, long long);
265 sprintf (sprintf_buffer, fmtcpy, ll);
266 #else
267 abort ();
268 #endif
269 }
270 else if (long_flag)
271 {
272 l = va_arg(ap, long);
273 sprintf (sprintf_buffer, fmtcpy, l);
274 }
275 else
276 {
277 i = va_arg(ap, int);
278 sprintf (sprintf_buffer, fmtcpy, i);
279 }
280 /* Now copy into final output, truncating as necessary. */
281 string = sprintf_buffer;
282 goto doit;
283 }
284
285 case 'o':
286 case 'x':
287 {
288 unsigned u;
289 unsigned long ul;
290
291 if (1 < long_flag)
292 {
293 #ifdef HAVE_UNSIGNED_LONG_LONG_INT
294 unsigned long long ull = va_arg (ap, unsigned long long);
295 sprintf (sprintf_buffer, fmtcpy, ull);
296 #else
297 abort ();
298 #endif
299 }
300 else if (long_flag)
301 {
302 ul = va_arg(ap, unsigned long);
303 sprintf (sprintf_buffer, fmtcpy, ul);
304 }
305 else
306 {
307 u = va_arg(ap, unsigned);
308 sprintf (sprintf_buffer, fmtcpy, u);
309 }
310 /* Now copy into final output, truncating as necessary. */
311 string = sprintf_buffer;
312 goto doit;
313 }
314
315 case 'f':
316 case 'e':
317 case 'g':
318 {
319 double d = va_arg(ap, double);
320 sprintf (sprintf_buffer, fmtcpy, d);
321 /* Now copy into final output, truncating as necessary. */
322 string = sprintf_buffer;
323 goto doit;
324 }
325
326 case 'S':
327 string[-1] = 's';
328 case 's':
329 if (fmtcpy[1] != 's')
330 minlen = atoi (&fmtcpy[1]);
331 string = va_arg (ap, char *);
332 tem = strlen (string);
333 if (tem > MOST_POSITIVE_FIXNUM)
334 error ("String for %%s or %%S format is too long");
335 width = strwidth (string, tem);
336 goto doit1;
337
338 /* Copy string into final output, truncating if no room. */
339 doit:
340 /* Coming here means STRING contains ASCII only. */
341 tem = strlen (string);
342 if (tem > MOST_POSITIVE_FIXNUM)
343 error ("Format width or precision too large");
344 width = tem;
345 doit1:
346 /* We have already calculated:
347 TEM -- length of STRING,
348 WIDTH -- columns occupied by STRING when displayed, and
349 MINLEN -- minimum columns of the output. */
350 if (minlen > 0)
351 {
352 while (minlen > width && bufsize > 0)
353 {
354 *bufptr++ = ' ';
355 bufsize--;
356 minlen--;
357 }
358 minlen = 0;
359 }
360 if (tem > bufsize)
361 {
362 /* Truncate the string at character boundary. */
363 tem = bufsize;
364 while (!CHAR_HEAD_P (string[tem - 1])) tem--;
365 memcpy (bufptr, string, tem);
366 /* We must calculate WIDTH again. */
367 width = strwidth (bufptr, tem);
368 }
369 else
370 memcpy (bufptr, string, tem);
371 bufptr += tem;
372 bufsize -= tem;
373 if (minlen < 0)
374 {
375 while (minlen < - width && bufsize > 0)
376 {
377 *bufptr++ = ' ';
378 bufsize--;
379 minlen++;
380 }
381 minlen = 0;
382 }
383 continue;
384
385 case 'c':
386 {
387 int chr = va_arg(ap, int);
388 tem = CHAR_STRING (chr, (unsigned char *) charbuf);
389 string = charbuf;
390 string[tem] = 0;
391 width = strwidth (string, tem);
392 if (fmtcpy[1] != 'c')
393 minlen = atoi (&fmtcpy[1]);
394 goto doit1;
395 }
396
397 case '%':
398 fmt--; /* Drop thru and this % will be treated as normal */
399 }
400 }
401
402 {
403 /* Just some character; Copy it if the whole multi-byte form
404 fit in the buffer. */
405 char *save_bufptr = bufptr;
406
407 do { *bufptr++ = *fmt++; }
408 while (fmt < format_end && --bufsize > 0 && !CHAR_HEAD_P (*fmt));
409 if (!CHAR_HEAD_P (*fmt))
410 {
411 /* Truncate, but return value that will signal to caller
412 that the buffer was too small. */
413 *save_bufptr = 0;
414 break;
415 }
416 }
417 };
418
419 /* If we had to malloc something, free it. */
420 xfree (big_buffer);
421
422 *bufptr = 0; /* Make sure our string ends with a '\0' */
423
424 SAFE_FREE ();
425 return bufptr - buffer;
426 }