src/doprnt.c

   1 /* Output like sprintf to a buffer of specified size.
   2    Also takes args differently: pass one pointer to the end
   3    of the format string in addition to the format string itself.
   4    Copyright (C) 1985, 2001-2012  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 /* If you think about replacing this with some similar standard C function of
  22    the printf family (such as vsnprintf), please note that this function
  23    supports the following Emacs-specific features:
  24
  25    . For %c conversions, it produces a string with the multibyte representation
  26      of the (`int') argument, suitable for display in an Emacs buffer.
  27
  28    . For %s and %c, when field width is specified (e.g., %25s), it accounts for
  29      the display width of each character, according to char-width-table.  That
  30      is, it does not assume that each character takes one column on display.
  31
  32    . If the size of the buffer is not enough to produce the formatted string in
  33      its entirety, it makes sure that truncation does not chop the last
  34      character in the middle of its multibyte sequence, producing an invalid
  35      sequence.
  36
  37    . It accepts a pointer to the end of the format string, so the format string
  38      could include embedded null characters.
  39
  40    . It signals an error if the length of the formatted string is about to
  41      overflow MOST_POSITIVE_FIXNUM, to avoid producing strings longer than what
  42      Emacs can handle.
  43
  44    OTOH, this function supports only a small subset of the standard C formatted
  45    output facilities.  E.g., %u and %ll are not supported, and precision is
  46    ignored %s and %c conversions.  (See below for the detailed documentation of
  47    what is supported.)  However, this is okay, as this function is supposed to
  48    be called from `error' and similar functions, and thus does not need to
  49    support features beyond those in `Fformat', which is used by `error' on the
  50    Lisp level.  */
  51
  52 /* This function supports the following %-sequences in the `format'
  53    argument:
  54
  55    %s means print a string argument.
  56    %S is silently treated as %s, for loose compatibility with `Fformat'.
  57    %d means print a `signed int' argument in decimal.
  58    %o means print an `unsigned int' argument in octal.
  59    %x means print an `unsigned int' argument in hex.
  60    %e means print a `double' argument in exponential notation.
  61    %f means print a `double' argument in decimal-point notation.
  62    %g means print a `double' argument in exponential notation
  63       or in decimal-point notation, whichever uses fewer characters.
  64    %c means print a `signed int' argument as a single character.
  65    %% means produce a literal % character.
  66
  67    A %-sequence may contain optional flag, width, and precision specifiers, and
  68    a length modifier, as follows:
  69
  70      %<flags><width><precision><length>character
  71
  72    where flags is [+ -0], width is [0-9]+, precision is .[0-9]+, and length
  73    is empty or l or the value of the pD or pI or pMd (sans "d") macros.
  74    Also, %% in a format stands for a single % in the output.  A % that
  75    does not introduce a valid %-sequence causes undefined behavior.
  76
  77    The + flag character inserts a + before any positive number, while a space
  78    inserts a space before any positive number; these flags only affect %d, %o,
  79    %x, %e, %f, and %g sequences.  The - and 0 flags affect the width specifier,
  80    as described below.  For signed numerical arguments only, the ` ' (space)
  81    flag causes the result to be prefixed with a space character if it does not
  82    start with a sign (+ or -).
  83
  84    The l (lower-case letter ell) length modifier is a `long' data type
  85    modifier: it is supported for %d, %o, and %x conversions of integral
  86    arguments, must immediately precede the conversion specifier, and means that
  87    the respective argument is to be treated as `long int' or `unsigned long
  88    int'.  Similarly, the value of the pD macro means to use ptrdiff_t,
  89    the value of the pI macro means to use EMACS_INT or EMACS_UINT, the
  90    value of the pMd etc. macros means to use intmax_t or uintmax_t,
  91    and the empty length modifier means `int' or `unsigned int'.
  92
  93    The width specifier supplies a lower limit for the length of the printed
  94    representation.  The padding, if any, normally goes on the left, but it goes
  95    on the right if the - flag is present.  The padding character is normally a
  96    space, but (for numerical arguments only) it is 0 if the 0 flag is present.
  97    The - flag takes precedence over the 0 flag.
  98
  99    For %e, %f, and %g sequences, the number after the "." in the precision
 100    specifier says how many decimal places to show; if zero, the decimal point
 101    itself is omitted.  For %s and %S, the precision specifier is ignored.  */
 102
 103 #include <config.h>
 104 #include <stdio.h>
 105 #include <ctype.h>
 106 #include <setjmp.h>
 107 #include <float.h>
 108 #include <unistd.h>
 109 #include <limits.h>
 110
 111 #include "lisp.h"
 112
 113 /* Since we use the macro CHAR_HEAD_P, we have to include this, but
 114    don't have to include others because CHAR_HEAD_P does not contains
 115    another macro.  */
 116 #include "character.h"
 117
 118 #ifndef DBL_MAX_10_EXP
 119 #define DBL_MAX_10_EXP 308 /* IEEE double */
 120 #endif
 121
 122 /* Generate output from a format-spec FORMAT,
 123    terminated at position FORMAT_END.
 124    (*FORMAT_END is not part of the format, but must exist and be readable.)
 125    Output goes in BUFFER, which has room for BUFSIZE chars.
 126    BUFSIZE must be positive.  If the output does not fit, truncate it
 127    to fit and return BUFSIZE - 1; if this truncates a multibyte
 128    sequence, store '\0' into the sequence's first byte.
 129    Returns the number of bytes stored into BUFFER, excluding
 130    the terminating null byte.  Output is always null-terminated.
 131    String arguments are passed as C strings.
 132    Integers are passed as C integers.  */
 133
 134 ptrdiff_t
 135 doprnt (char *buffer, ptrdiff_t bufsize, const char *format,
 136         const char *format_end, va_list ap)
 137 {
 138   const char *fmt = format;     /* Pointer into format string.  */
 139   char *bufptr = buffer;        /* Pointer into output buffer.  */
 140
 141   /* Use this for sprintf unless we need something really big.  */
 142   char tembuf[DBL_MAX_10_EXP + 100];
 143
 144   /* Size of sprintf_buffer.  */
 145   ptrdiff_t size_allocated = sizeof (tembuf);
 146
 147   /* Buffer to use for sprintf.  Either tembuf or same as BIG_BUFFER.  */
 148   char *sprintf_buffer = tembuf;
 149
 150   /* Buffer we have got with malloc.  */
 151   char *big_buffer = NULL;
 152
 153   ptrdiff_t tem = -1;
 154   char *string;
 155   char fixed_buffer[20];        /* Default buffer for small formatting. */
 156   char *fmtcpy;
 157   int minlen;
 158   char charbuf[MAX_MULTIBYTE_LENGTH + 1];       /* Used for %c.  */
 159   USE_SAFE_ALLOCA;
 160
 161   if (format_end == 0)
 162     format_end = format + strlen (format);
 163
 164   fmtcpy = (format_end - format < sizeof (fixed_buffer) - 1
 165             ? fixed_buffer
 166             : SAFE_ALLOCA (format_end - format + 1));
 167
 168   bufsize--;
 169
 170   /* Loop until end of format string or buffer full. */
 171   while (fmt < format_end && bufsize > 0)
 172     {
 173       if (*fmt == '%')  /* Check for a '%' character */
 174         {
 175           ptrdiff_t size_bound = 0;
 176           ptrdiff_t width;  /* Columns occupied by STRING on display.  */
 177           enum {
 178             pDlen = sizeof pD - 1,
 179             pIlen = sizeof pI - 1,
 180             pMlen = sizeof pMd - 2
 181           };
 182           enum {
 183             no_modifier, long_modifier, pD_modifier, pI_modifier, pM_modifier
 184           } length_modifier = no_modifier;
 185           static char const modifier_len[] = { 0, 1, pDlen, pIlen, pMlen };
 186           int maxmlen = max (max (1, pDlen), max (pIlen, pMlen));
 187           int mlen;
 188
 189           fmt++;
 190           /* Copy this one %-spec into fmtcpy.  */
 191           string = fmtcpy;
 192           *string++ = '%';
 193           while (fmt < format_end)
 194             {
 195               *string++ = *fmt;
 196               if ('0' <= *fmt && *fmt <= '9')
 197                 {
 198                   /* Get an idea of how much space we might need.
 199                      This might be a field width or a precision; e.g.
 200                      %1.1000f and %1000.1f both might need 1000+ bytes.
 201                      Parse the width or precision, checking for overflow.  */
 202                   ptrdiff_t n = *fmt - '0';
 203                   while (fmt + 1 < format_end
 204                          && '0' <= fmt[1] && fmt[1] <= '9')
 205                     {
 206                       /* Avoid ptrdiff_t, size_t, and int overflow, as
 207                          many sprintfs mishandle widths greater than INT_MAX.
 208                          This test is simple but slightly conservative: e.g.,
 209                          (INT_MAX - INT_MAX % 10) is reported as an overflow
 210                          even when it's not.  */
 211                       if (n >= min (INT_MAX, min (PTRDIFF_MAX, SIZE_MAX)) / 10)
 212                         error ("Format width or precision too large");
 213                       n = n * 10 + fmt[1] - '0';
 214                       *string++ = *++fmt;
 215                     }
 216
 217                   if (size_bound < n)
 218                     size_bound = n;
 219                 }
 220               else if (! (*fmt == '-' || *fmt == ' ' || *fmt == '.'
 221                           || *fmt == '+'))
 222                 break;
 223               fmt++;
 224             }
 225
 226           /* Check for the length modifiers in textual length order, so
 227              that longer modifiers override shorter ones.  */
 228           for (mlen = 1; mlen <= maxmlen; mlen++)
 229             {
 230               if (format_end - fmt < mlen)
 231                 break;
 232               if (mlen == 1 && *fmt == 'l')
 233                 length_modifier = long_modifier;
 234               if (mlen == pDlen && memcmp (fmt, pD, pDlen) == 0)
 235                 length_modifier = pD_modifier;
 236               if (mlen == pIlen && memcmp (fmt, pI, pIlen) == 0)
 237                 length_modifier = pI_modifier;
 238               if (mlen == pMlen && memcmp (fmt, pMd, pMlen) == 0)
 239                 length_modifier = pM_modifier;
 240             }
 241
 242           mlen = modifier_len[length_modifier];
 243           memcpy (string, fmt + 1, mlen);
 244           string += mlen;
 245           fmt += mlen;
 246           *string = 0;
 247
 248           /* Make the size bound large enough to handle floating point formats
 249              with large numbers.  */
 250           if (size_bound > min (PTRDIFF_MAX, SIZE_MAX) - DBL_MAX_10_EXP - 50)
 251             error ("Format width or precision too large");
 252           size_bound += DBL_MAX_10_EXP + 50;
 253
 254           /* Make sure we have that much.  */
 255           if (size_bound > size_allocated)
 256             {
 257               if (big_buffer)
 258                 xfree (big_buffer);
 259               big_buffer = xmalloc (size_bound);
 260               sprintf_buffer = big_buffer;
 261               size_allocated = size_bound;
 262             }
 263           minlen = 0;
 264           switch (*fmt++)
 265             {
 266             default:
 267               error ("Invalid format operation %s", fmtcpy);
 268
 269 /*          case 'b': */
 270             case 'l':
 271             case 'd':
 272               switch (length_modifier)
 273                 {
 274                 case no_modifier:
 275                   {
 276                     int v = va_arg (ap, int);
 277                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 278                   }
 279                   break;
 280                 case long_modifier:
 281                   {
 282                     long v = va_arg (ap, long);
 283                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 284                   }
 285                   break;
 286                 case pD_modifier:
 287                 signed_pD_modifier:
 288                   {
 289                     ptrdiff_t v = va_arg (ap, ptrdiff_t);
 290                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 291                   }
 292                   break;
 293                 case pI_modifier:
 294                   {
 295                     EMACS_INT v = va_arg (ap, EMACS_INT);
 296                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 297                   }
 298                   break;
 299                 case pM_modifier:
 300                   {
 301                     intmax_t v = va_arg (ap, intmax_t);
 302                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 303                   }
 304                   break;
 305                 }
 306               /* Now copy into final output, truncating as necessary.  */
 307               string = sprintf_buffer;
 308               goto doit;
 309
 310             case 'o':
 311             case 'x':
 312               switch (length_modifier)
 313                 {
 314                 case no_modifier:
 315                   {
 316                     unsigned v = va_arg (ap, unsigned);
 317                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 318                   }
 319                   break;
 320                 case long_modifier:
 321                   {
 322                     unsigned long v = va_arg (ap, unsigned long);
 323                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 324                   }
 325                   break;
 326                 case pD_modifier:
 327                   goto signed_pD_modifier;
 328                 case pI_modifier:
 329                   {
 330                     EMACS_UINT v = va_arg (ap, EMACS_UINT);
 331                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 332                   }
 333                   break;
 334                 case pM_modifier:
 335                   {
 336                     uintmax_t v = va_arg (ap, uintmax_t);
 337                     tem = sprintf (sprintf_buffer, fmtcpy, v);
 338                   }
 339                   break;
 340                 }
 341               /* Now copy into final output, truncating as necessary.  */
 342               string = sprintf_buffer;
 343               goto doit;
 344
 345             case 'f':
 346             case 'e':
 347             case 'g':
 348               {
 349                 double d = va_arg (ap, double);
 350                 tem = sprintf (sprintf_buffer, fmtcpy, d);
 351                 /* Now copy into final output, truncating as necessary.  */
 352                 string = sprintf_buffer;
 353                 goto doit;
 354               }
 355
 356             case 'S':
 357               string[-1] = 's';
 358             case 's':
 359               if (fmtcpy[1] != 's')
 360                 minlen = atoi (&fmtcpy[1]);
 361               string = va_arg (ap, char *);
 362               tem = strlen (string);
 363               if (STRING_BYTES_BOUND < tem)
 364                 error ("String for %%s or %%S format is too long");
 365               width = strwidth (string, tem);
 366               goto doit1;
 367
 368               /* Copy string into final output, truncating if no room.  */
 369             doit:
 370               eassert (0 <= tem);
 371               /* Coming here means STRING contains ASCII only.  */
 372               if (STRING_BYTES_BOUND < tem)
 373                 error ("Format width or precision too large");
 374               width = tem;
 375             doit1:
 376               /* We have already calculated:
 377                  TEM -- length of STRING,
 378                  WIDTH -- columns occupied by STRING when displayed, and
 379                  MINLEN -- minimum columns of the output.  */
 380               if (minlen > 0)
 381                 {
 382                   while (minlen > width && bufsize > 0)
 383                     {
 384                       *bufptr++ = ' ';
 385                       bufsize--;
 386                       minlen--;
 387                     }
 388                   minlen = 0;
 389                 }
 390               if (tem > bufsize)
 391                 {
 392                   /* Truncate the string at character boundary.  */
 393                   tem = bufsize;
 394                   do
 395                     {
 396                       tem--;
 397                       if (CHAR_HEAD_P (string[tem]))
 398                         {
 399                           if (BYTES_BY_CHAR_HEAD (string[tem]) <= bufsize - tem)
 400                             tem = bufsize;
 401                           break;
 402                         }
 403                     }
 404                   while (tem != 0);
 405
 406                   memcpy (bufptr, string, tem);
 407                   bufptr[tem] = 0;
 408                   /* Trigger exit from the loop, but make sure we
 409                      return to the caller a value which will indicate
 410                      that the buffer was too small.  */
 411                   bufptr += bufsize;
 412                   bufsize = 0;
 413                   continue;
 414                 }
 415               memcpy (bufptr, string, tem);
 416               bufptr += tem;
 417               bufsize -= tem;
 418               if (minlen < 0)
 419                 {
 420                   while (minlen < - width && bufsize > 0)
 421                     {
 422                       *bufptr++ = ' ';
 423                       bufsize--;
 424                       minlen++;
 425                     }
 426                   minlen = 0;
 427                 }
 428               continue;
 429
 430             case 'c':
 431               {
 432                 int chr = va_arg (ap, int);
 433                 tem = CHAR_STRING (chr, (unsigned char *) charbuf);
 434                 string = charbuf;
 435                 string[tem] = 0;
 436                 width = strwidth (string, tem);
 437                 if (fmtcpy[1] != 'c')
 438                   minlen = atoi (&fmtcpy[1]);
 439                 goto doit1;
 440               }
 441
 442             case '%':
 443               fmt--;    /* Drop thru and this % will be treated as normal */
 444             }
 445         }
 446
 447       {
 448         /* Just some character; Copy it if the whole multi-byte form
 449            fit in the buffer.  */
 450         char *save_bufptr = bufptr;
 451
 452         do { *bufptr++ = *fmt++; }
 453         while (fmt < format_end && --bufsize > 0 && !CHAR_HEAD_P (*fmt));
 454         if (!CHAR_HEAD_P (*fmt))
 455           {
 456             /* Truncate, but return value that will signal to caller
 457                that the buffer was too small.  */
 458             *save_bufptr = 0;
 459             break;
 460           }
 461       }
 462     };
 463
 464   /* If we had to malloc something, free it.  */
 465   xfree (big_buffer);
 466
 467   *bufptr = 0;          /* Make sure our string ends with a '\0' */
 468
 469   SAFE_FREE ();
 470   return bufptr - buffer;
 471 }
 472
 473 /* Format to an unbounded buffer BUF.  This is like sprintf, except it
 474    is not limited to returning an 'int' so it doesn't have a silly 2
 475    GiB limit on typical 64-bit hosts.  However, it is limited to the
 476    Emacs-style formats that doprnt supports.
 477
 478    Return the number of bytes put into BUF, excluding the terminating
 479    '\0'.  */
 480 ptrdiff_t
 481 esprintf (char *buf, char const *format, ...)
 482 {
 483   ptrdiff_t nbytes;
 484   va_list ap;
 485   va_start (ap, format);
 486   nbytes = doprnt (buf, TYPE_MAXIMUM (ptrdiff_t), format, 0, ap);
 487   va_end (ap);
 488   return nbytes;
 489 }
 490
 491 #if defined HAVE_X_WINDOWS && defined USE_X_TOOLKIT
 492
 493 /* Format to buffer *BUF of positive size *BUFSIZE, reallocating *BUF
 494    and updating *BUFSIZE if the buffer is too small, and otherwise
 495    behaving line esprintf.  When reallocating, free *BUF unless it is
 496    equal to NONHEAPBUF, and if BUFSIZE_MAX is nonnegative then signal
 497    memory exhaustion instead of growing the buffer size past
 498    BUFSIZE_MAX.  */
 499 ptrdiff_t
 500 exprintf (char **buf, ptrdiff_t *bufsize,
 501           char const *nonheapbuf, ptrdiff_t bufsize_max,
 502           char const *format, ...)
 503 {
 504   ptrdiff_t nbytes;
 505   va_list ap;
 506   va_start (ap, format);
 507   nbytes = evxprintf (buf, bufsize, nonheapbuf, bufsize_max, format, ap);
 508   va_end (ap);
 509   return nbytes;
 510 }
 511
 512 #endif
 513
 514 /* Act like exprintf, except take a va_list.  */
 515 ptrdiff_t
 516 evxprintf (char **buf, ptrdiff_t *bufsize,
 517            char const *nonheapbuf, ptrdiff_t bufsize_max,
 518            char const *format, va_list ap)
 519 {
 520   for (;;)
 521     {
 522       ptrdiff_t nbytes;
 523       va_list ap_copy;
 524       va_copy (ap_copy, ap);
 525       nbytes = doprnt (*buf, *bufsize, format, 0, ap_copy);
 526       va_end (ap_copy);
 527       if (nbytes < *bufsize - 1)
 528         return nbytes;
 529       if (*buf != nonheapbuf)
 530         xfree (*buf);
 531       *buf = xpalloc (NULL, bufsize, 1, bufsize_max, 1);
 532     }
 533 }