src/doprnt.c

   1 /* Output like sprintf to a buffer of specified size.
   2    Also takes args differently: pass one pointer to the end
   3    of the format string in addition to the format string itself.
   4    Copyright (C) 1985, 2001-2011  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 /* If you think about replacing this with some similar standard C function of
  22    the printf family (such as vsnprintf), please note that this function
  23    supports the following Emacs-specific features:
  24
  25    . For %c conversions, it produces a string with the multibyte representation
  26      of the (`int') argument, suitable for display in an Emacs buffer.
  27
  28    . For %s and %c, when field width is specified (e.g., %25s), it accounts for
  29      the diplay width of each character, according to char-width-table.  That
  30      is, it does not assume that each character takes one column on display.
  31
  32    . If the size of the buffer is not enough to produce the formatted string in
  33      its entirety, it makes sure that truncation does not chop the last
  34      character in the middle of its multibyte sequence, producing an invalid
  35      sequence.
  36
  37    . It accepts a pointer to the end of the format string, so the format string
  38      could include embedded null characters.
  39
  40    . It signals an error if the length of the formatted string is about to
  41      overflow MOST_POSITIVE_FIXNUM, to avoid producing strings longer than what
  42      Emacs can handle.
  43
  44    OTOH, this function supports only a small subset of the standard C formatted
  45    output facilities.  E.g., %u and %ll are not supported, and precision is
  46    ignored %s and %c conversions.  (See below for the detailed documentation of
  47    what is supported.)  However, this is okay, as this function is supposed to
  48    be called from `error' and similar functions, and thus does not need to
  49    support features beyond those in `Fformat', which is used by `error' on the
  50    Lisp level.  */
  51
  52 /* This function supports the following %-sequences in the `format'
  53    argument:
  54
  55    %s means print a string argument.
  56    %S is silently treated as %s, for loose compatibility with `Fformat'.
  57    %d means print a `signed int' argument in decimal.
  58    %o means print an `unsigned int' argument in octal.
  59    %x means print an `unsigned int' argument in hex.
  60    %e means print a `double' argument in exponential notation.
  61    %f means print a `double' argument in decimal-point notation.
  62    %g means print a `double' argument in exponential notation
  63       or in decimal-point notation, whichever uses fewer characters.
  64    %c means print a `signed int' argument as a single character.
  65    %% means produce a literal % character.
  66
  67    A %-sequence may contain optional flag, width, and precision specifiers, and
  68    a length modifier, as follows:
  69
  70      %<flags><width><precision><length>character
  71
  72    where flags is [+ -0], width is [0-9]+, precision is .[0-9]+, and length
  73    is empty or l or the value of the pI macro.  Also, %% in a format
  74    stands for a single % in the output.  A % that does not introduce a
  75    valid %-sequence causes undefined behavior.
  76
  77    The + flag character inserts a + before any positive number, while a space
  78    inserts a space before any positive number; these flags only affect %d, %o,
  79    %x, %e, %f, and %g sequences.  The - and 0 flags affect the width specifier,
  80    as described below.  For signed numerical arguments only, the ` ' (space)
  81    flag causes the result to be prefixed with a space character if it does not
  82    start with a sign (+ or -).
  83
  84    The l (lower-case letter ell) length modifier is a `long' data type
  85    modifier: it is supported for %d, %o, and %x conversions of integral
  86    arguments, must immediately precede the conversion specifier, and means that
  87    the respective argument is to be treated as `long int' or `unsigned long
  88    int'.  Similarly, the value of the pI macro means to use EMACS_INT or
  89    EMACS_UINT and the empty length modifier means `int' or `unsigned int'.
  90
  91    The width specifier supplies a lower limit for the length of the printed
  92    representation.  The padding, if any, normally goes on the left, but it goes
  93    on the right if the - flag is present.  The padding character is normally a
  94    space, but (for numerical arguments only) it is 0 if the 0 flag is present.
  95    The - flag takes precedence over the 0 flag.
  96
  97    For %e, %f, and %g sequences, the number after the "." in the precision
  98    specifier says how many decimal places to show; if zero, the decimal point
  99    itself is omitted.  For %s and %S, the precision specifier is ignored.  */
 100
 101 #include <config.h>
 102 #include <stdio.h>
 103 #include <ctype.h>
 104 #include <setjmp.h>
 105 #include <float.h>
 106 #include <unistd.h>
 107 #include <limits.h>
 108
 109 #include "lisp.h"
 110
 111 /* Since we use the macro CHAR_HEAD_P, we have to include this, but
 112    don't have to include others because CHAR_HEAD_P does not contains
 113    another macro.  */
 114 #include "character.h"
 115
 116 #ifndef DBL_MAX_10_EXP
 117 #define DBL_MAX_10_EXP 308 /* IEEE double */
 118 #endif
 119
 120 /* Generate output from a format-spec FORMAT,
 121    terminated at position FORMAT_END.
 122    (*FORMAT_END is not part of the format, but must exist and be readable.)
 123    Output goes in BUFFER, which has room for BUFSIZE chars.
 124    BUFSIZE must be positive.  If the output does not fit, truncate it
 125    to fit and return BUFSIZE - 1; if this truncates a multibyte
 126    sequence, store '\0' into the sequence's first byte.
 127    Returns the number of bytes stored into BUFFER, excluding
 128    the terminating null byte.  Output is always null-terminated.
 129    String arguments are passed as C strings.
 130    Integers are passed as C integers.  */
 131
 132 size_t
 133 doprnt (char *buffer, register size_t bufsize, const char *format,
 134         const char *format_end, va_list ap)
 135 {
 136   const char *fmt = format;     /* Pointer into format string */
 137   register char *bufptr = buffer; /* Pointer into output buffer.. */
 138
 139   /* Use this for sprintf unless we need something really big.  */
 140   char tembuf[DBL_MAX_10_EXP + 100];
 141
 142   /* Size of sprintf_buffer.  */
 143   size_t size_allocated = sizeof (tembuf);
 144
 145   /* Buffer to use for sprintf.  Either tembuf or same as BIG_BUFFER.  */
 146   char *sprintf_buffer = tembuf;
 147
 148   /* Buffer we have got with malloc.  */
 149   char *big_buffer = NULL;
 150
 151   register size_t tem;
 152   char *string;
 153   char fixed_buffer[20];        /* Default buffer for small formatting. */
 154   char *fmtcpy;
 155   int minlen;
 156   char charbuf[MAX_MULTIBYTE_LENGTH + 1];       /* Used for %c.  */
 157   USE_SAFE_ALLOCA;
 158
 159   if (format_end == 0)
 160     format_end = format + strlen (format);
 161
 162   if ((format_end - format + 1) < sizeof (fixed_buffer))
 163     fmtcpy = fixed_buffer;
 164   else
 165     SAFE_ALLOCA (fmtcpy, char *, format_end - format + 1);
 166
 167   bufsize--;
 168
 169   /* Loop until end of format string or buffer full. */
 170   while (fmt < format_end && bufsize > 0)
 171     {
 172       if (*fmt == '%')  /* Check for a '%' character */
 173         {
 174           size_t size_bound = 0;
 175           EMACS_INT width;  /* Columns occupied by STRING on display.  */
 176           int long_flag = 0;
 177           int pIlen = sizeof pI - 1;
 178
 179           fmt++;
 180           /* Copy this one %-spec into fmtcpy.  */
 181           string = fmtcpy;
 182           *string++ = '%';
 183           while (fmt < format_end)
 184             {
 185               *string++ = *fmt;
 186               if ('0' <= *fmt && *fmt <= '9')
 187                 {
 188                   /* Get an idea of how much space we might need.
 189                      This might be a field width or a precision; e.g.
 190                      %1.1000f and %1000.1f both might need 1000+ bytes.
 191                      Parse the width or precision, checking for overflow.  */
 192                   size_t n = *fmt - '0';
 193                   while (fmt + 1 < format_end
 194                          && '0' <= fmt[1] && fmt[1] <= '9')
 195                     {
 196                       /* Avoid size_t overflow.  Avoid int overflow too, as
 197                          many sprintfs mishandle widths greater than INT_MAX.
 198                          This test is simple but slightly conservative: e.g.,
 199                          (INT_MAX - INT_MAX % 10) is reported as an overflow
 200                          even when it's not.  */
 201                       if (n >= min (INT_MAX, SIZE_MAX) / 10)
 202                         error ("Format width or precision too large");
 203                       n = n * 10 + fmt[1] - '0';
 204                       *string++ = *++fmt;
 205                     }
 206
 207                   if (size_bound < n)
 208                     size_bound = n;
 209                 }
 210               else if (! (*fmt == '-' || *fmt == ' ' || *fmt == '.'
 211                           || *fmt == '+'))
 212                 break;
 213               fmt++;
 214             }
 215
 216           if (0 < pIlen && pIlen <= format_end - fmt
 217               && memcmp (fmt, pI, pIlen) == 0)
 218             {
 219               long_flag = 2;
 220               memcpy (string, fmt + 1, pIlen);
 221               string += pIlen;
 222               fmt += pIlen;
 223             }
 224           else if (fmt < format_end && *fmt == 'l')
 225             {
 226               long_flag = 1;
 227               *string++ = *++fmt;
 228             }
 229           *string = 0;
 230
 231           /* Make the size bound large enough to handle floating point formats
 232              with large numbers.  */
 233           if (size_bound > SIZE_MAX - DBL_MAX_10_EXP - 50)
 234             error ("Format width or precision too large");
 235           size_bound += DBL_MAX_10_EXP + 50;
 236
 237           /* Make sure we have that much.  */
 238           if (size_bound > size_allocated)
 239             {
 240               if (big_buffer)
 241                 xfree (big_buffer);
 242               big_buffer = (char *) xmalloc (size_bound);
 243               sprintf_buffer = big_buffer;
 244               size_allocated = size_bound;
 245             }
 246           minlen = 0;
 247           switch (*fmt++)
 248             {
 249             default:
 250               error ("Invalid format operation %s", fmtcpy);
 251
 252 /*          case 'b': */
 253             case 'l':
 254             case 'd':
 255               {
 256                 int i;
 257                 long l;
 258
 259                 if (1 < long_flag)
 260                   {
 261                     EMACS_INT ll = va_arg (ap, EMACS_INT);
 262                     sprintf (sprintf_buffer, fmtcpy, ll);
 263                   }
 264                 else if (long_flag)
 265                   {
 266                     l = va_arg(ap, long);
 267                     sprintf (sprintf_buffer, fmtcpy, l);
 268                   }
 269                 else
 270                   {
 271                     i = va_arg(ap, int);
 272                     sprintf (sprintf_buffer, fmtcpy, i);
 273                   }
 274                 /* Now copy into final output, truncating as necessary.  */
 275                 string = sprintf_buffer;
 276                 goto doit;
 277               }
 278
 279             case 'o':
 280             case 'x':
 281               {
 282                 unsigned u;
 283                 unsigned long ul;
 284
 285                 if (1 < long_flag)
 286                   {
 287                     EMACS_UINT ull = va_arg (ap, EMACS_UINT);
 288                     sprintf (sprintf_buffer, fmtcpy, ull);
 289                   }
 290                 else if (long_flag)
 291                   {
 292                     ul = va_arg(ap, unsigned long);
 293                     sprintf (sprintf_buffer, fmtcpy, ul);
 294                   }
 295                 else
 296                   {
 297                     u = va_arg(ap, unsigned);
 298                     sprintf (sprintf_buffer, fmtcpy, u);
 299                   }
 300                 /* Now copy into final output, truncating as necessary.  */
 301                 string = sprintf_buffer;
 302                 goto doit;
 303               }
 304
 305             case 'f':
 306             case 'e':
 307             case 'g':
 308               {
 309                 double d = va_arg(ap, double);
 310                 sprintf (sprintf_buffer, fmtcpy, d);
 311                 /* Now copy into final output, truncating as necessary.  */
 312                 string = sprintf_buffer;
 313                 goto doit;
 314               }
 315
 316             case 'S':
 317               string[-1] = 's';
 318             case 's':
 319               if (fmtcpy[1] != 's')
 320                 minlen = atoi (&fmtcpy[1]);
 321               string = va_arg (ap, char *);
 322               tem = strlen (string);
 323               if (STRING_BYTES_BOUND < tem)
 324                 error ("String for %%s or %%S format is too long");
 325               width = strwidth (string, tem);
 326               goto doit1;
 327
 328               /* Copy string into final output, truncating if no room.  */
 329             doit:
 330               /* Coming here means STRING contains ASCII only.  */
 331               tem = strlen (string);
 332               if (STRING_BYTES_BOUND < tem)
 333                 error ("Format width or precision too large");
 334               width = tem;
 335             doit1:
 336               /* We have already calculated:
 337                  TEM -- length of STRING,
 338                  WIDTH -- columns occupied by STRING when displayed, and
 339                  MINLEN -- minimum columns of the output.  */
 340               if (minlen > 0)
 341                 {
 342                   while (minlen > width && bufsize > 0)
 343                     {
 344                       *bufptr++ = ' ';
 345                       bufsize--;
 346                       minlen--;
 347                     }
 348                   minlen = 0;
 349                 }
 350               if (tem > bufsize)
 351                 {
 352                   /* Truncate the string at character boundary.  */
 353                   tem = bufsize;
 354                   while (!CHAR_HEAD_P (string[tem - 1])) tem--;
 355                   /* If the multibyte sequence of this character is
 356                      too long for the space we have left in the
 357                      buffer, truncate before it.  */
 358                   if (tem > 0
 359                       && BYTES_BY_CHAR_HEAD (string[tem - 1]) > bufsize)
 360                     tem--;
 361                   if (tem > 0)
 362                     memcpy (bufptr, string, tem);
 363                   bufptr[tem] = 0;
 364                   /* Trigger exit from the loop, but make sure we
 365                      return to the caller a value which will indicate
 366                      that the buffer was too small.  */
 367                   bufptr += bufsize;
 368                   bufsize = 0;
 369                   continue;
 370                 }
 371               else
 372                 memcpy (bufptr, string, tem);
 373               bufptr += tem;
 374               bufsize -= tem;
 375               if (minlen < 0)
 376                 {
 377                   while (minlen < - width && bufsize > 0)
 378                     {
 379                       *bufptr++ = ' ';
 380                       bufsize--;
 381                       minlen++;
 382                     }
 383                   minlen = 0;
 384                 }
 385               continue;
 386
 387             case 'c':
 388               {
 389                 int chr = va_arg(ap, int);
 390                 tem = CHAR_STRING (chr, (unsigned char *) charbuf);
 391                 string = charbuf;
 392                 string[tem] = 0;
 393                 width = strwidth (string, tem);
 394                 if (fmtcpy[1] != 'c')
 395                   minlen = atoi (&fmtcpy[1]);
 396                 goto doit1;
 397               }
 398
 399             case '%':
 400               fmt--;    /* Drop thru and this % will be treated as normal */
 401             }
 402         }
 403
 404       {
 405         /* Just some character; Copy it if the whole multi-byte form
 406            fit in the buffer.  */
 407         char *save_bufptr = bufptr;
 408
 409         do { *bufptr++ = *fmt++; }
 410         while (fmt < format_end && --bufsize > 0 && !CHAR_HEAD_P (*fmt));
 411         if (!CHAR_HEAD_P (*fmt))
 412           {
 413             /* Truncate, but return value that will signal to caller
 414                that the buffer was too small.  */
 415             *save_bufptr = 0;
 416             break;
 417           }
 418       }
 419     };
 420
 421   /* If we had to malloc something, free it.  */
 422   xfree (big_buffer);
 423
 424   *bufptr = 0;          /* Make sure our string ends with a '\0' */
 425
 426   SAFE_FREE ();
 427   return bufptr - buffer;
 428 }