src/doprnt.c

   1 /* Output like sprintf to a buffer of specified size.
   2    Also takes args differently: pass one pointer to the end
   3    of the format string in addition to the format string itself.
   4    Copyright (C) 1985, 2001-2011  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 /* If you think about replacing this with some similar standard C function of
  22    the printf family (such as vsnprintf), please note that this function
  23    supports the following Emacs-specific features:
  24
  25    . For %c conversions, it produces a string with the multibyte representation
  26      of the (`int') argument, suitable for display in an Emacs buffer.
  27
  28    . For %s and %c, when field width is specified (e.g., %25s), it accounts for
  29      the diplay width of each character, according to char-width-table.  That
  30      is, it does not assume that each character takes one column on display.
  31
  32    . If the size of the buffer is not enough to produce the formatted string in
  33      its entirety, it makes sure that truncation does not chop the last
  34      character in the middle of its multibyte sequence, producing an invalid
  35      sequence.
  36
  37    . It accepts a pointer to the end of the format string, so the format string
  38      could include embedded null characters.
  39
  40    . It signals an error if the length of the formatted string is about to
  41      overflow MOST_POSITIVE_FIXNUM, to avoid producing strings longer than what
  42      Emacs can handle.
  43
  44    OTOH, this function supports only a small subset of the standard C formatted
  45    output facilities.  E.g., %u and %ll are not supported, and precision is
  46    ignored %s and %c conversions.  (See below for the detailed documentation of
  47    what is supported.)  However, this is okay, as this function is supposed to
  48    be called from `error' and similar functions, and thus does not need to
  49    support features beyond those in `Fformat', which is used by `error' on the
  50    Lisp level.  */
  51
  52 /* This function supports the following %-sequences in the `format'
  53    argument:
  54
  55    %s means print a string argument.
  56    %S is silently treated as %s, for loose compatibility with `Fformat'.
  57    %d means print a `signed int' argument in decimal.
  58    %o means print an `unsigned int' argument in octal.
  59    %x means print an `unsigned int' argument in hex.
  60    %e means print a `double' argument in exponential notation.
  61    %f means print a `double' argument in decimal-point notation.
  62    %g means print a `double' argument in exponential notation
  63       or in decimal-point notation, whichever uses fewer characters.
  64    %c means print a `signed int' argument as a single character.
  65    %% means produce a literal % character.
  66
  67    A %-sequence may contain optional flag, width, and precision specifiers, and
  68    a length modifier, as follows:
  69
  70      %<flags><width><precision><length>character
  71
  72    where flags is [+ -0], width is [0-9]+, precision is .[0-9]+, and length
  73    modifier is empty or l or ll.
  74
  75    The + flag character inserts a + before any positive number, while a space
  76    inserts a space before any positive number; these flags only affect %d, %o,
  77    %x, %e, %f, and %g sequences.  The - and 0 flags affect the width specifier,
  78    as described below.  For signed numerical arguments only, the ` ' (space)
  79    flag causes the result to be prefixed with a space character if it does not
  80    start with a sign (+ or -).
  81
  82    The l (lower-case letter ell) length modifier is a `long' data type
  83    modifier: it is supported for %d, %o, and %x conversions of integral
  84    arguments, must immediately precede the conversion specifier, and means that
  85    the respective argument is to be treated as `long int' or `unsigned long
  86    int'.  Similarly, ll (two letter ells) means to use `long long int' or
  87    `unsigned long long int'; this can be used only on hosts that have
  88    these two types.  The empty length modifier means to use `int' or
  89    `unsigned int'.  EMACS_INT arguments should use the pI macro, which
  90    expands to whatever length modifier is needed for the target host.
  91
  92    The width specifier supplies a lower limit for the length of the printed
  93    representation.  The padding, if any, normally goes on the left, but it goes
  94    on the right if the - flag is present.  The padding character is normally a
  95    space, but (for numerical arguments only) it is 0 if the 0 flag is present.
  96    The - flag takes precedence over the 0 flag.
  97
  98    For %e, %f, and %g sequences, the number after the "." in the precision
  99    specifier says how many decimal places to show; if zero, the decimal point
 100    itself is omitted.  For %s and %S, the precision specifier is ignored.  */
 101
 102 #include <config.h>
 103 #include <stdio.h>
 104 #include <ctype.h>
 105 #include <setjmp.h>
 106
 107 #ifdef STDC_HEADERS
 108 #include <float.h>
 109 #endif
 110
 111 #include <unistd.h>
 112
 113 #include <limits.h>
 114
 115 #include "lisp.h"
 116
 117 /* Since we use the macro CHAR_HEAD_P, we have to include this, but
 118    don't have to include others because CHAR_HEAD_P does not contains
 119    another macro.  */
 120 #include "character.h"
 121
 122 #ifndef SIZE_MAX
 123 # define SIZE_MAX ((size_t) -1)
 124 #endif
 125
 126 #ifndef DBL_MAX_10_EXP
 127 #define DBL_MAX_10_EXP 308 /* IEEE double */
 128 #endif
 129
 130 /* Generate output from a format-spec FORMAT,
 131    terminated at position FORMAT_END.
 132    Output goes in BUFFER, which has room for BUFSIZE chars.
 133    If the output does not fit, truncate it to fit.
 134    Returns the number of bytes stored into BUFFER, excluding
 135    the terminating null byte.  Output is always null-terminated.
 136    String arguments are passed as C strings.
 137    Integers are passed as C integers.  */
 138
 139 size_t
 140 doprnt (char *buffer, register size_t bufsize, const char *format,
 141         const char *format_end, va_list ap)
 142 {
 143   const char *fmt = format;     /* Pointer into format string */
 144   register char *bufptr = buffer; /* Pointer into output buffer.. */
 145
 146   /* Use this for sprintf unless we need something really big.  */
 147   char tembuf[DBL_MAX_10_EXP + 100];
 148
 149   /* Size of sprintf_buffer.  */
 150   size_t size_allocated = sizeof (tembuf);
 151
 152   /* Buffer to use for sprintf.  Either tembuf or same as BIG_BUFFER.  */
 153   char *sprintf_buffer = tembuf;
 154
 155   /* Buffer we have got with malloc.  */
 156   char *big_buffer = NULL;
 157
 158   register size_t tem;
 159   char *string;
 160   char fixed_buffer[20];        /* Default buffer for small formatting. */
 161   char *fmtcpy;
 162   int minlen;
 163   char charbuf[MAX_MULTIBYTE_LENGTH + 1];       /* Used for %c.  */
 164   USE_SAFE_ALLOCA;
 165
 166   if (format_end == 0)
 167     format_end = format + strlen (format);
 168
 169   if ((format_end - format + 1) < sizeof (fixed_buffer))
 170     fmtcpy = fixed_buffer;
 171   else
 172     SAFE_ALLOCA (fmtcpy, char *, format_end - format + 1);
 173
 174   bufsize--;
 175
 176   /* Loop until end of format string or buffer full. */
 177   while (fmt < format_end && bufsize > 0)
 178     {
 179       if (*fmt == '%')  /* Check for a '%' character */
 180         {
 181           size_t size_bound = 0;
 182           EMACS_INT width;  /* Columns occupied by STRING on display.  */
 183           int long_flag = 0;
 184
 185           fmt++;
 186           /* Copy this one %-spec into fmtcpy.  */
 187           string = fmtcpy;
 188           *string++ = '%';
 189           while (fmt < format_end)
 190             {
 191               *string++ = *fmt;
 192               if ('0' <= *fmt && *fmt <= '9')
 193                 {
 194                   /* Get an idea of how much space we might need.
 195                      This might be a field width or a precision; e.g.
 196                      %1.1000f and %1000.1f both might need 1000+ bytes.
 197                      Parse the width or precision, checking for overflow.  */
 198                   size_t n = *fmt - '0';
 199                   while (fmt < format_end
 200                          && '0' <= fmt[1] && fmt[1] <= '9')
 201                     {
 202                       /* Avoid int overflow, because many sprintfs seriously
 203                          mess up with widths or precisions greater than
 204                          INT_MAX.  Avoid size_t overflow, since our counters
 205                          use size_t.  This test is slightly conservative, for
 206                          speed and simplicity.  */
 207                       if (n >= min (INT_MAX, SIZE_MAX) / 10)
 208                         error ("Format width or precision too large");
 209                       n = n * 10 + fmt[1] - '0';
 210                       *string++ = *++fmt;
 211                     }
 212
 213                   if (size_bound < n)
 214                     size_bound = n;
 215                 }
 216               else if (*fmt == '-' || *fmt == ' ' || *fmt == '.' || *fmt == '+')
 217                 ;
 218               else if (*fmt == 'l')
 219                 {
 220                   long_flag = 1 + (fmt + 1 < format_end && fmt[1] == 'l');
 221                   fmt += long_flag;
 222                   break;
 223                 }
 224               else
 225                 break;
 226               fmt++;
 227             }
 228           if (fmt > format_end)
 229             fmt = format_end;
 230           *string = 0;
 231
 232           /* Make the size bound large enough to handle floating point formats
 233              with large numbers.  */
 234           if (size_bound > SIZE_MAX - DBL_MAX_10_EXP - 50)
 235             error ("Format width or precision too large");
 236           size_bound += DBL_MAX_10_EXP + 50;
 237
 238           /* Make sure we have that much.  */
 239           if (size_bound > size_allocated)
 240             {
 241               if (big_buffer)
 242                 xfree (big_buffer);
 243               big_buffer = (char *) xmalloc (size_bound);
 244               sprintf_buffer = big_buffer;
 245               size_allocated = size_bound;
 246             }
 247           minlen = 0;
 248           switch (*fmt++)
 249             {
 250             default:
 251               error ("Invalid format operation %%%s%c",
 252                      "ll" + 2 - long_flag, fmt[-1]);
 253
 254 /*          case 'b': */
 255             case 'l':
 256             case 'd':
 257               {
 258                 int i;
 259                 long l;
 260
 261                 if (1 < long_flag)
 262                   {
 263 #ifdef HAVE_LONG_LONG_INT
 264                     long long ll = va_arg (ap, long long);
 265                     sprintf (sprintf_buffer, fmtcpy, ll);
 266 #else
 267                     abort ();
 268 #endif
 269                   }
 270                 else if (long_flag)
 271                   {
 272                     l = va_arg(ap, long);
 273                     sprintf (sprintf_buffer, fmtcpy, l);
 274                   }
 275                 else
 276                   {
 277                     i = va_arg(ap, int);
 278                     sprintf (sprintf_buffer, fmtcpy, i);
 279                   }
 280                 /* Now copy into final output, truncating as necessary.  */
 281                 string = sprintf_buffer;
 282                 goto doit;
 283               }
 284
 285             case 'o':
 286             case 'x':
 287               {
 288                 unsigned u;
 289                 unsigned long ul;
 290
 291                 if (1 < long_flag)
 292                   {
 293 #ifdef HAVE_UNSIGNED_LONG_LONG_INT
 294                     unsigned long long ull = va_arg (ap, unsigned long long);
 295                     sprintf (sprintf_buffer, fmtcpy, ull);
 296 #else
 297                     abort ();
 298 #endif
 299                   }
 300                 else if (long_flag)
 301                   {
 302                     ul = va_arg(ap, unsigned long);
 303                     sprintf (sprintf_buffer, fmtcpy, ul);
 304                   }
 305                 else
 306                   {
 307                     u = va_arg(ap, unsigned);
 308                     sprintf (sprintf_buffer, fmtcpy, u);
 309                   }
 310                 /* Now copy into final output, truncating as necessary.  */
 311                 string = sprintf_buffer;
 312                 goto doit;
 313               }
 314
 315             case 'f':
 316             case 'e':
 317             case 'g':
 318               {
 319                 double d = va_arg(ap, double);
 320                 sprintf (sprintf_buffer, fmtcpy, d);
 321                 /* Now copy into final output, truncating as necessary.  */
 322                 string = sprintf_buffer;
 323                 goto doit;
 324               }
 325
 326             case 'S':
 327               string[-1] = 's';
 328             case 's':
 329               if (fmtcpy[1] != 's')
 330                 minlen = atoi (&fmtcpy[1]);
 331               string = va_arg (ap, char *);
 332               tem = strlen (string);
 333               if (tem > MOST_POSITIVE_FIXNUM)
 334                 error ("String for %%s or %%S format is too long");
 335               width = strwidth (string, tem);
 336               goto doit1;
 337
 338               /* Copy string into final output, truncating if no room.  */
 339             doit:
 340               /* Coming here means STRING contains ASCII only.  */
 341               tem = strlen (string);
 342               if (tem > MOST_POSITIVE_FIXNUM)
 343                 error ("Format width or precision too large");
 344               width = tem;
 345             doit1:
 346               /* We have already calculated:
 347                  TEM -- length of STRING,
 348                  WIDTH -- columns occupied by STRING when displayed, and
 349                  MINLEN -- minimum columns of the output.  */
 350               if (minlen > 0)
 351                 {
 352                   while (minlen > width && bufsize > 0)
 353                     {
 354                       *bufptr++ = ' ';
 355                       bufsize--;
 356                       minlen--;
 357                     }
 358                   minlen = 0;
 359                 }
 360               if (tem > bufsize)
 361                 {
 362                   /* Truncate the string at character boundary.  */
 363                   tem = bufsize;
 364                   while (!CHAR_HEAD_P (string[tem - 1])) tem--;
 365                   memcpy (bufptr, string, tem);
 366                   /* We must calculate WIDTH again.  */
 367                   width = strwidth (bufptr, tem);
 368                 }
 369               else
 370                 memcpy (bufptr, string, tem);
 371               bufptr += tem;
 372               bufsize -= tem;
 373               if (minlen < 0)
 374                 {
 375                   while (minlen < - width && bufsize > 0)
 376                     {
 377                       *bufptr++ = ' ';
 378                       bufsize--;
 379                       minlen++;
 380                     }
 381                   minlen = 0;
 382                 }
 383               continue;
 384
 385             case 'c':
 386               {
 387                 int chr = va_arg(ap, int);
 388                 tem = CHAR_STRING (chr, (unsigned char *) charbuf);
 389                 string = charbuf;
 390                 string[tem] = 0;
 391                 width = strwidth (string, tem);
 392                 if (fmtcpy[1] != 'c')
 393                   minlen = atoi (&fmtcpy[1]);
 394                 goto doit1;
 395               }
 396
 397             case '%':
 398               fmt--;    /* Drop thru and this % will be treated as normal */
 399             }
 400         }
 401
 402       {
 403         /* Just some character; Copy it if the whole multi-byte form
 404            fit in the buffer.  */
 405         char *save_bufptr = bufptr;
 406
 407         do { *bufptr++ = *fmt++; }
 408         while (fmt < format_end && --bufsize > 0 && !CHAR_HEAD_P (*fmt));
 409         if (!CHAR_HEAD_P (*fmt))
 410           {
 411             /* Truncate, but return value that will signal to caller
 412                that the buffer was too small.  */
 413             *save_bufptr = 0;
 414             break;
 415           }
 416       }
 417     };
 418
 419   /* If we had to malloc something, free it.  */
 420   xfree (big_buffer);
 421
 422   *bufptr = 0;          /* Make sure our string ends with a '\0' */
 423
 424   SAFE_FREE ();
 425   return bufptr - buffer;
 426 }