src/doprnt.c

   1 /* Output like sprintf to a buffer of specified size.
   2    Also takes args differently: pass one pointer to the end
   3    of the format string in addition to the format string itself.
   4    Copyright (C) 1985, 2001-2011  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 /* If you think about replacing this with some similar standard C function of
  22    the printf family (such as vsnprintf), please note that this function
  23    supports the following Emacs-specific features:
  24
  25    . For %c conversions, it produces a string with the multibyte representation
  26      of the (`int') argument, suitable for display in an Emacs buffer.
  27
  28    . For %s and %c, when field width is specified (e.g., %25s), it accounts for
  29      the diplay width of each character, according to char-width-table.  That
  30      is, it does not assume that each character takes one column on display.
  31
  32    . If the size of the buffer is not enough to produce the formatted string in
  33      its entirety, it makes sure that truncation does not chop the last
  34      character in the middle of its multibyte sequence, producing an invalid
  35      sequence.
  36
  37    . It accepts a pointer to the end of the format string, so the format string
  38      could include embedded null characters.
  39
  40    . It signals an error if the length of the formatted string is about to
  41      overflow MOST_POSITIVE_FIXNUM, to avoid producing strings longer than what
  42      Emacs can handle.
  43
  44    OTOH, this function supports only a small subset of the standard C formatted
  45    output facilities.  E.g., %u and %ll are not supported, and precision is
  46    ignored %s and %c conversions.  (See below for the detailed documentation of
  47    what is supported.)  However, this is okay, as this function is supposed to
  48    be called from `error' and similar functions, and thus does not need to
  49    support features beyond those in `Fformat', which is used by `error' on the
  50    Lisp level.  */
  51
  52 /* This function supports the following %-sequences in the `format'
  53    argument:
  54
  55    %s means print a string argument.
  56    %S is silently treated as %s, for loose compatibility with `Fformat'.
  57    %d means print a `signed int' argument in decimal.
  58    %o means print an `unsigned int' argument in octal.
  59    %x means print an `unsigned int' argument in hex.
  60    %e means print a `double' argument in exponential notation.
  61    %f means print a `double' argument in decimal-point notation.
  62    %g means print a `double' argument in exponential notation
  63       or in decimal-point notation, whichever uses fewer characters.
  64    %c means print a `signed int' argument as a single character.
  65    %% means produce a literal % character.
  66
  67    A %-sequence may contain optional flag, width, and precision specifiers, and
  68    a length modifier, as follows:
  69
  70      %<flags><width><precision><length>character
  71
  72    where flags is [+ -0], width is [0-9]+, precision is .[0-9]+, and length
  73    modifier is empty or l or ll.
  74
  75    The + flag character inserts a + before any positive number, while a space
  76    inserts a space before any positive number; these flags only affect %d, %o,
  77    %x, %e, %f, and %g sequences.  The - and 0 flags affect the width specifier,
  78    as described below.  For signed numerical arguments only, the ` ' (space)
  79    flag causes the result to be prefixed with a space character if it does not
  80    start with a sign (+ or -).
  81
  82    The l (lower-case letter ell) length modifier is a `long' data type
  83    modifier: it is supported for %d, %o, and %x conversions of integral
  84    arguments, must immediately precede the conversion specifier, and means that
  85    the respective argument is to be treated as `long int' or `unsigned long
  86    int'.  Similarly, ll (two letter ells) means to use `long long int' or
  87    `unsigned long long int'; this can be used only on hosts that have
  88    these two types.  The empty length modifier means to use `int' or
  89    `unsigned int'.  EMACS_INT arguments should use the pI macro, which
  90    expands to whatever length modifier is needed for the target host.
  91
  92    The width specifier supplies a lower limit for the length of the printed
  93    representation.  The padding, if any, normally goes on the left, but it goes
  94    on the right if the - flag is present.  The padding character is normally a
  95    space, but (for numerical arguments only) it is 0 if the 0 flag is present.
  96    The - flag takes precedence over the 0 flag.
  97
  98    For %e, %f, and %g sequences, the number after the "." in the precision
  99    specifier says how many decimal places to show; if zero, the decimal point
 100    itself is omitted.  For %s and %S, the precision specifier is ignored.  */
 101
 102 #include <config.h>
 103 #include <stdio.h>
 104 #include <ctype.h>
 105 #include <setjmp.h>
 106
 107 #ifdef STDC_HEADERS
 108 #include <float.h>
 109 #endif
 110
 111 #include <unistd.h>
 112
 113 #include <limits.h>
 114 #ifndef SIZE_MAX
 115 # define SIZE_MAX ((size_t) -1)
 116 #endif
 117
 118 #include "lisp.h"
 119
 120 /* Since we use the macro CHAR_HEAD_P, we have to include this, but
 121    don't have to include others because CHAR_HEAD_P does not contains
 122    another macro.  */
 123 #include "character.h"
 124
 125 #ifndef DBL_MAX_10_EXP
 126 #define DBL_MAX_10_EXP 308 /* IEEE double */
 127 #endif
 128
 129 /* Generate output from a format-spec FORMAT,
 130    terminated at position FORMAT_END.
 131    Output goes in BUFFER, which has room for BUFSIZE chars.
 132    If the output does not fit, truncate it to fit.
 133    Returns the number of bytes stored into BUFFER, excluding
 134    the terminating null byte.  Output is always null-terminated.
 135    String arguments are passed as C strings.
 136    Integers are passed as C integers.  */
 137
 138 size_t
 139 doprnt (char *buffer, register size_t bufsize, const char *format,
 140         const char *format_end, va_list ap)
 141 {
 142   const char *fmt = format;     /* Pointer into format string */
 143   register char *bufptr = buffer; /* Pointer into output buffer.. */
 144
 145   /* Use this for sprintf unless we need something really big.  */
 146   char tembuf[DBL_MAX_10_EXP + 100];
 147
 148   /* Size of sprintf_buffer.  */
 149   size_t size_allocated = sizeof (tembuf);
 150
 151   /* Buffer to use for sprintf.  Either tembuf or same as BIG_BUFFER.  */
 152   char *sprintf_buffer = tembuf;
 153
 154   /* Buffer we have got with malloc.  */
 155   char *big_buffer = NULL;
 156
 157   register size_t tem;
 158   char *string;
 159   char fixed_buffer[20];        /* Default buffer for small formatting. */
 160   char *fmtcpy;
 161   int minlen;
 162   char charbuf[MAX_MULTIBYTE_LENGTH + 1];       /* Used for %c.  */
 163   USE_SAFE_ALLOCA;
 164
 165   if (format_end == 0)
 166     format_end = format + strlen (format);
 167
 168   if ((format_end - format + 1) < sizeof (fixed_buffer))
 169     fmtcpy = fixed_buffer;
 170   else
 171     SAFE_ALLOCA (fmtcpy, char *, format_end - format + 1);
 172
 173   bufsize--;
 174
 175   /* Loop until end of format string or buffer full. */
 176   while (fmt < format_end && bufsize > 0)
 177     {
 178       if (*fmt == '%')  /* Check for a '%' character */
 179         {
 180           size_t size_bound = 0;
 181           EMACS_INT width;  /* Columns occupied by STRING on display.  */
 182           int long_flag = 0;
 183
 184           fmt++;
 185           /* Copy this one %-spec into fmtcpy.  */
 186           string = fmtcpy;
 187           *string++ = '%';
 188           while (fmt < format_end)
 189             {
 190               *string++ = *fmt;
 191               if ('0' <= *fmt && *fmt <= '9')
 192                 {
 193                   /* Get an idea of how much space we might need.
 194                      This might be a field width or a precision; e.g.
 195                      %1.1000f and %1000.1f both might need 1000+ bytes.
 196                      Parse the width or precision, checking for overflow.  */
 197                   size_t n = *fmt - '0';
 198                   while (fmt < format_end
 199                          && '0' <= fmt[1] && fmt[1] <= '9')
 200                     {
 201                       if (n >= SIZE_MAX / 10
 202                           || n * 10 > SIZE_MAX - (fmt[1] - '0'))
 203                         error ("Format width or precision too large");
 204                       n = n * 10 + fmt[1] - '0';
 205                       *string++ = *++fmt;
 206                     }
 207
 208                   if (size_bound < n)
 209                     size_bound = n;
 210                 }
 211               else if (*fmt == '-' || *fmt == ' ' || *fmt == '.' || *fmt == '+')
 212                 ;
 213               else if (*fmt == 'l')
 214                 {
 215                   long_flag = 1 + (fmt + 1 < format_end && fmt[1] == 'l');
 216                   fmt += long_flag;
 217                   break;
 218                 }
 219               else
 220                 break;
 221               fmt++;
 222             }
 223           if (fmt > format_end)
 224             fmt = format_end;
 225           *string = 0;
 226
 227           /* Make the size bound large enough to handle floating point formats
 228              with large numbers.  */
 229           if (size_bound > SIZE_MAX - DBL_MAX_10_EXP - 50)
 230             error ("Format width or precision too large");
 231           size_bound += DBL_MAX_10_EXP + 50;
 232
 233           /* Make sure we have that much.  */
 234           if (size_bound > size_allocated)
 235             {
 236               if (big_buffer)
 237                 xfree (big_buffer);
 238               big_buffer = (char *) xmalloc (size_bound);
 239               sprintf_buffer = big_buffer;
 240               size_allocated = size_bound;
 241             }
 242           minlen = 0;
 243           switch (*fmt++)
 244             {
 245             default:
 246               error ("Invalid format operation %%%s%c",
 247                      "ll" + 2 - long_flag, fmt[-1]);
 248
 249 /*          case 'b': */
 250             case 'l':
 251             case 'd':
 252               {
 253                 int i;
 254                 long l;
 255
 256                 if (1 < long_flag)
 257                   {
 258 #ifdef HAVE_LONG_LONG_INT
 259                     long long ll = va_arg (ap, long long);
 260                     sprintf (sprintf_buffer, fmtcpy, ll);
 261 #else
 262                     abort ();
 263 #endif
 264                   }
 265                 else if (long_flag)
 266                   {
 267                     l = va_arg(ap, long);
 268                     sprintf (sprintf_buffer, fmtcpy, l);
 269                   }
 270                 else
 271                   {
 272                     i = va_arg(ap, int);
 273                     sprintf (sprintf_buffer, fmtcpy, i);
 274                   }
 275                 /* Now copy into final output, truncating as necessary.  */
 276                 string = sprintf_buffer;
 277                 goto doit;
 278               }
 279
 280             case 'o':
 281             case 'x':
 282               {
 283                 unsigned u;
 284                 unsigned long ul;
 285
 286                 if (1 < long_flag)
 287                   {
 288 #ifdef HAVE_UNSIGNED_LONG_LONG_INT
 289                     unsigned long long ull = va_arg (ap, unsigned long long);
 290                     sprintf (sprintf_buffer, fmtcpy, ull);
 291 #else
 292                     abort ();
 293 #endif
 294                   }
 295                 else if (long_flag)
 296                   {
 297                     ul = va_arg(ap, unsigned long);
 298                     sprintf (sprintf_buffer, fmtcpy, ul);
 299                   }
 300                 else
 301                   {
 302                     u = va_arg(ap, unsigned);
 303                     sprintf (sprintf_buffer, fmtcpy, u);
 304                   }
 305                 /* Now copy into final output, truncating as necessary.  */
 306                 string = sprintf_buffer;
 307                 goto doit;
 308               }
 309
 310             case 'f':
 311             case 'e':
 312             case 'g':
 313               {
 314                 double d = va_arg(ap, double);
 315                 sprintf (sprintf_buffer, fmtcpy, d);
 316                 /* Now copy into final output, truncating as necessary.  */
 317                 string = sprintf_buffer;
 318                 goto doit;
 319               }
 320
 321             case 'S':
 322               string[-1] = 's';
 323             case 's':
 324               if (fmtcpy[1] != 's')
 325                 minlen = atoi (&fmtcpy[1]);
 326               string = va_arg (ap, char *);
 327               tem = strlen (string);
 328               if (tem > MOST_POSITIVE_FIXNUM)
 329                 error ("String for %%s or %%S format is too long");
 330               width = strwidth (string, tem);
 331               goto doit1;
 332
 333               /* Copy string into final output, truncating if no room.  */
 334             doit:
 335               /* Coming here means STRING contains ASCII only.  */
 336               tem = strlen (string);
 337               if (tem > MOST_POSITIVE_FIXNUM)
 338                 error ("Format width or precision too large");
 339               width = tem;
 340             doit1:
 341               /* We have already calculated:
 342                  TEM -- length of STRING,
 343                  WIDTH -- columns occupied by STRING when displayed, and
 344                  MINLEN -- minimum columns of the output.  */
 345               if (minlen > 0)
 346                 {
 347                   while (minlen > width && bufsize > 0)
 348                     {
 349                       *bufptr++ = ' ';
 350                       bufsize--;
 351                       minlen--;
 352                     }
 353                   minlen = 0;
 354                 }
 355               if (tem > bufsize)
 356                 {
 357                   /* Truncate the string at character boundary.  */
 358                   tem = bufsize;
 359                   while (!CHAR_HEAD_P (string[tem - 1])) tem--;
 360                   memcpy (bufptr, string, tem);
 361                   /* We must calculate WIDTH again.  */
 362                   width = strwidth (bufptr, tem);
 363                 }
 364               else
 365                 memcpy (bufptr, string, tem);
 366               bufptr += tem;
 367               bufsize -= tem;
 368               if (minlen < 0)
 369                 {
 370                   while (minlen < - width && bufsize > 0)
 371                     {
 372                       *bufptr++ = ' ';
 373                       bufsize--;
 374                       minlen++;
 375                     }
 376                   minlen = 0;
 377                 }
 378               continue;
 379
 380             case 'c':
 381               {
 382                 int chr = va_arg(ap, int);
 383                 tem = CHAR_STRING (chr, (unsigned char *) charbuf);
 384                 string = charbuf;
 385                 string[tem] = 0;
 386                 width = strwidth (string, tem);
 387                 if (fmtcpy[1] != 'c')
 388                   minlen = atoi (&fmtcpy[1]);
 389                 goto doit1;
 390               }
 391
 392             case '%':
 393               fmt--;    /* Drop thru and this % will be treated as normal */
 394             }
 395         }
 396
 397       {
 398         /* Just some character; Copy it if the whole multi-byte form
 399            fit in the buffer.  */
 400         char *save_bufptr = bufptr;
 401
 402         do { *bufptr++ = *fmt++; }
 403         while (fmt < format_end && --bufsize > 0 && !CHAR_HEAD_P (*fmt));
 404         if (!CHAR_HEAD_P (*fmt))
 405           {
 406             bufptr = save_bufptr;
 407             break;
 408           }
 409       }
 410     };
 411
 412   /* If we had to malloc something, free it.  */
 413   xfree (big_buffer);
 414
 415   *bufptr = 0;          /* Make sure our string ends with a '\0' */
 416
 417   SAFE_FREE ();
 418   return bufptr - buffer;
 419 }