lib/striconveh.c

   1 /* Character set conversion with error handling.
   2    Copyright (C) 2001-2010 Free Software Foundation, Inc.
   3    Written by Bruno Haible and Simon Josefsson.
   4
   5    This program is free software: you can redistribute it and/or modify
   6    it under the terms of the GNU Lesser General Public License as published by
   7    the Free Software Foundation; either version 3 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public License
  16    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  17
  18 #include <config.h>
  19
  20 /* Specification.  */
  21 #include "striconveh.h"
  22
  23 #include <errno.h>
  24 #include <stdbool.h>
  25 #include <stdlib.h>
  26 #include <string.h>
  27
  28 #if HAVE_ICONV
  29 # include <iconv.h>
  30 # include "unistr.h"
  31 #endif
  32
  33 #include "c-strcase.h"
  34 #include "c-strcaseeq.h"
  35
  36 #ifndef SIZE_MAX
  37 # define SIZE_MAX ((size_t) -1)
  38 #endif
  39
  40
  41 #if HAVE_ICONV
  42
  43 /* The caller must provide an iconveh_t, not just an iconv_t, because when a
  44    conversion error occurs, we may have to determine the Unicode representation
  45    of the inconvertible character.  */
  46
  47 int
  48 iconveh_open (const char *to_codeset, const char *from_codeset, iconveh_t *cdp)
  49 {
  50   iconv_t cd;
  51   iconv_t cd1;
  52   iconv_t cd2;
  53
  54   /* Avoid glibc-2.1 bug with EUC-KR.  */
  55 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
  56   if (c_strcasecmp (from_codeset, "EUC-KR") == 0
  57       || c_strcasecmp (to_codeset, "EUC-KR") == 0)
  58     {
  59       errno = EINVAL;
  60       return -1;
  61     }
  62 # endif
  63
  64   cd = iconv_open (to_codeset, from_codeset);
  65
  66   if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
  67     cd1 = (iconv_t)(-1);
  68   else
  69     {
  70       cd1 = iconv_open ("UTF-8", from_codeset);
  71       if (cd1 == (iconv_t)(-1))
  72         {
  73           int saved_errno = errno;
  74           if (cd != (iconv_t)(-1))
  75             iconv_close (cdp->cd);
  76           errno = saved_errno;
  77           return -1;
  78         }
  79     }
  80
  81   if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)
  82 # if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
  83       || c_strcasecmp (to_codeset, "UTF-8//TRANSLIT") == 0
  84 # endif
  85      )
  86     cd2 = (iconv_t)(-1);
  87   else
  88     {
  89       cd2 = iconv_open (to_codeset, "UTF-8");
  90       if (cd2 == (iconv_t)(-1))
  91         {
  92           int saved_errno = errno;
  93           if (cd1 != (iconv_t)(-1))
  94             iconv_close (cd1);
  95           if (cd != (iconv_t)(-1))
  96             iconv_close (cd);
  97           errno = saved_errno;
  98           return -1;
  99         }
 100     }
 101
 102   cdp->cd = cd;
 103   cdp->cd1 = cd1;
 104   cdp->cd2 = cd2;
 105   return 0;
 106 }
 107
 108 int
 109 iconveh_close (const iconveh_t *cd)
 110 {
 111   if (cd->cd2 != (iconv_t)(-1) && iconv_close (cd->cd2) < 0)
 112     {
 113       /* Return -1, but preserve the errno from iconv_close.  */
 114       int saved_errno = errno;
 115       if (cd->cd1 != (iconv_t)(-1))
 116         iconv_close (cd->cd1);
 117       if (cd->cd != (iconv_t)(-1))
 118         iconv_close (cd->cd);
 119       errno = saved_errno;
 120       return -1;
 121     }
 122   if (cd->cd1 != (iconv_t)(-1) && iconv_close (cd->cd1) < 0)
 123     {
 124       /* Return -1, but preserve the errno from iconv_close.  */
 125       int saved_errno = errno;
 126       if (cd->cd != (iconv_t)(-1))
 127         iconv_close (cd->cd);
 128       errno = saved_errno;
 129       return -1;
 130     }
 131   if (cd->cd != (iconv_t)(-1) && iconv_close (cd->cd) < 0)
 132     return -1;
 133   return 0;
 134 }
 135
 136 /* iconv_carefully is like iconv, except that it stops as soon as it encounters
 137    a conversion error, and it returns in *INCREMENTED a boolean telling whether
 138    it has incremented the input pointers past the error location.  */
 139 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
 140 /* Irix iconv() inserts a NUL byte if it cannot convert.
 141    NetBSD iconv() inserts a question mark if it cannot convert.
 142    Only GNU libiconv and GNU libc are known to prefer to fail rather
 143    than doing a lossy conversion.  */
 144 static size_t
 145 iconv_carefully (iconv_t cd,
 146                  const char **inbuf, size_t *inbytesleft,
 147                  char **outbuf, size_t *outbytesleft,
 148                  bool *incremented)
 149 {
 150   const char *inptr = *inbuf;
 151   const char *inptr_end = inptr + *inbytesleft;
 152   char *outptr = *outbuf;
 153   size_t outsize = *outbytesleft;
 154   const char *inptr_before;
 155   size_t res;
 156
 157   do
 158     {
 159       size_t insize;
 160
 161       inptr_before = inptr;
 162       res = (size_t)(-1);
 163
 164       for (insize = 1; inptr + insize <= inptr_end; insize++)
 165         {
 166           res = iconv (cd,
 167                        (ICONV_CONST char **) &inptr, &insize,
 168                        &outptr, &outsize);
 169           if (!(res == (size_t)(-1) && errno == EINVAL))
 170             break;
 171           /* iconv can eat up a shift sequence but give EINVAL while attempting
 172              to convert the first character.  E.g. libiconv does this.  */
 173           if (inptr > inptr_before)
 174             {
 175               res = 0;
 176               break;
 177             }
 178         }
 179
 180       if (res == 0)
 181         {
 182           *outbuf = outptr;
 183           *outbytesleft = outsize;
 184         }
 185     }
 186   while (res == 0 && inptr < inptr_end);
 187
 188   *inbuf = inptr;
 189   *inbytesleft = inptr_end - inptr;
 190   if (res != (size_t)(-1) && res > 0)
 191     {
 192       /* iconv() has already incremented INPTR.  We cannot go back to a
 193          previous INPTR, otherwise the state inside CD would become invalid,
 194          if FROM_CODESET is a stateful encoding.  So, tell the caller that
 195          *INBUF has already been incremented.  */
 196       *incremented = (inptr > inptr_before);
 197       errno = EILSEQ;
 198       return (size_t)(-1);
 199     }
 200   else
 201     {
 202       *incremented = false;
 203       return res;
 204     }
 205 }
 206 # else
 207 #  define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \
 208      (*(incremented) = false, \
 209       iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))
 210 # endif
 211
 212 /* iconv_carefully_1 is like iconv_carefully, except that it stops after
 213    converting one character or one shift sequence.  */
 214 static size_t
 215 iconv_carefully_1 (iconv_t cd,
 216                    const char **inbuf, size_t *inbytesleft,
 217                    char **outbuf, size_t *outbytesleft,
 218                    bool *incremented)
 219 {
 220   const char *inptr_before = *inbuf;
 221   const char *inptr = inptr_before;
 222   const char *inptr_end = inptr_before + *inbytesleft;
 223   char *outptr = *outbuf;
 224   size_t outsize = *outbytesleft;
 225   size_t res = (size_t)(-1);
 226   size_t insize;
 227
 228   for (insize = 1; inptr_before + insize <= inptr_end; insize++)
 229     {
 230       inptr = inptr_before;
 231       res = iconv (cd,
 232                    (ICONV_CONST char **) &inptr, &insize,
 233                    &outptr, &outsize);
 234       if (!(res == (size_t)(-1) && errno == EINVAL))
 235         break;
 236       /* iconv can eat up a shift sequence but give EINVAL while attempting
 237          to convert the first character.  E.g. libiconv does this.  */
 238       if (inptr > inptr_before)
 239         {
 240           res = 0;
 241           break;
 242         }
 243     }
 244
 245   *inbuf = inptr;
 246   *inbytesleft = inptr_end - inptr;
 247 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
 248   /* Irix iconv() inserts a NUL byte if it cannot convert.
 249      NetBSD iconv() inserts a question mark if it cannot convert.
 250      Only GNU libiconv and GNU libc are known to prefer to fail rather
 251      than doing a lossy conversion.  */
 252   if (res != (size_t)(-1) && res > 0)
 253     {
 254       /* iconv() has already incremented INPTR.  We cannot go back to a
 255          previous INPTR, otherwise the state inside CD would become invalid,
 256          if FROM_CODESET is a stateful encoding.  So, tell the caller that
 257          *INBUF has already been incremented.  */
 258       *incremented = (inptr > inptr_before);
 259       errno = EILSEQ;
 260       return (size_t)(-1);
 261     }
 262 # endif
 263
 264   if (res != (size_t)(-1))
 265     {
 266       *outbuf = outptr;
 267       *outbytesleft = outsize;
 268     }
 269   *incremented = false;
 270   return res;
 271 }
 272
 273 /* utf8conv_carefully is like iconv, except that
 274      - it converts from UTF-8 to UTF-8,
 275      - it stops as soon as it encounters a conversion error, and it returns
 276        in *INCREMENTED a boolean telling whether it has incremented the input
 277        pointers past the error location,
 278      - if one_character_only is true, it stops after converting one
 279        character.  */
 280 static size_t
 281 utf8conv_carefully (bool one_character_only,
 282                     const char **inbuf, size_t *inbytesleft,
 283                     char **outbuf, size_t *outbytesleft,
 284                     bool *incremented)
 285 {
 286   const char *inptr = *inbuf;
 287   size_t insize = *inbytesleft;
 288   char *outptr = *outbuf;
 289   size_t outsize = *outbytesleft;
 290   size_t res;
 291
 292   res = 0;
 293   do
 294     {
 295       ucs4_t uc;
 296       int n;
 297       int m;
 298
 299       n = u8_mbtoucr (&uc, (const uint8_t *) inptr, insize);
 300       if (n < 0)
 301         {
 302           errno = (n == -2 ? EINVAL : EILSEQ);
 303           n = u8_mbtouc (&uc, (const uint8_t *) inptr, insize);
 304           inptr += n;
 305           insize -= n;
 306           res = (size_t)(-1);
 307           *incremented = true;
 308           break;
 309         }
 310       if (outsize == 0)
 311         {
 312           errno = E2BIG;
 313           res = (size_t)(-1);
 314           *incremented = false;
 315           break;
 316         }
 317       m = u8_uctomb ((uint8_t *) outptr, uc, outsize);
 318       if (m == -2)
 319         {
 320           errno = E2BIG;
 321           res = (size_t)(-1);
 322           *incremented = false;
 323           break;
 324         }
 325       inptr += n;
 326       insize -= n;
 327       if (m == -1)
 328         {
 329           errno = EILSEQ;
 330           res = (size_t)(-1);
 331           *incremented = true;
 332           break;
 333         }
 334       outptr += m;
 335       outsize -= m;
 336     }
 337   while (!one_character_only && insize > 0);
 338
 339   *inbuf = inptr;
 340   *inbytesleft = insize;
 341   *outbuf = outptr;
 342   *outbytesleft = outsize;
 343   return res;
 344 }
 345
 346 static int
 347 mem_cd_iconveh_internal (const char *src, size_t srclen,
 348                          iconv_t cd, iconv_t cd1, iconv_t cd2,
 349                          enum iconv_ilseq_handler handler,
 350                          size_t extra_alloc,
 351                          size_t *offsets,
 352                          char **resultp, size_t *lengthp)
 353 {
 354   /* When a conversion error occurs, we cannot start using CD1 and CD2 at
 355      this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR.
 356      Instead, we have to start afresh from the beginning of SRC.  */
 357   /* Use a temporary buffer, so that for small strings, a single malloc()
 358      call will be sufficient.  */
 359 # define tmpbufsize 4096
 360   /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
 361      libiconv's UCS-4-INTERNAL encoding.  */
 362   union { unsigned int align; char buf[tmpbufsize]; } tmp;
 363 # define tmpbuf tmp.buf
 364
 365   char *initial_result;
 366   char *result;
 367   size_t allocated;
 368   size_t length;
 369   size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */
 370
 371   if (*resultp != NULL && *lengthp >= sizeof (tmpbuf))
 372     {
 373       initial_result = *resultp;
 374       allocated = *lengthp;
 375     }
 376   else
 377     {
 378       initial_result = tmpbuf;
 379       allocated = sizeof (tmpbuf);
 380     }
 381   result = initial_result;
 382
 383   /* Test whether a direct conversion is possible at all.  */
 384   if (cd == (iconv_t)(-1))
 385     goto indirectly;
 386
 387   if (offsets != NULL)
 388     {
 389       size_t i;
 390
 391       for (i = 0; i < srclen; i++)
 392         offsets[i] = (size_t)(-1);
 393
 394       last_length = (size_t)(-1);
 395     }
 396   length = 0;
 397
 398   /* First, try a direct conversion, and see whether a conversion error
 399      occurs at all.  */
 400   {
 401     const char *inptr = src;
 402     size_t insize = srclen;
 403
 404     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 405 # if defined _LIBICONV_VERSION \
 406      || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 407     /* Set to the initial state.  */
 408     iconv (cd, NULL, NULL, NULL, NULL);
 409 # endif
 410
 411     while (insize > 0)
 412       {
 413         char *outptr = result + length;
 414         size_t outsize = allocated - extra_alloc - length;
 415         bool incremented;
 416         size_t res;
 417         bool grow;
 418
 419         if (offsets != NULL)
 420           {
 421             if (length != last_length) /* ensure that offset[] be increasing */
 422               {
 423                 offsets[inptr - src] = length;
 424                 last_length = length;
 425               }
 426             res = iconv_carefully_1 (cd,
 427                                      &inptr, &insize,
 428                                      &outptr, &outsize,
 429                                      &incremented);
 430           }
 431         else
 432           /* Use iconv_carefully instead of iconv here, because:
 433              - If TO_CODESET is UTF-8, we can do the error handling in this
 434                loop, no need for a second loop,
 435              - With iconv() implementations other than GNU libiconv and GNU
 436                libc, if we use iconv() in a big swoop, checking for an E2BIG
 437                return, we lose the number of irreversible conversions.  */
 438           res = iconv_carefully (cd,
 439                                  &inptr, &insize,
 440                                  &outptr, &outsize,
 441                                  &incremented);
 442
 443         length = outptr - result;
 444         grow = (length + extra_alloc > allocated / 2);
 445         if (res == (size_t)(-1))
 446           {
 447             if (errno == E2BIG)
 448               grow = true;
 449             else if (errno == EINVAL)
 450               break;
 451             else if (errno == EILSEQ && handler != iconveh_error)
 452               {
 453                 if (cd2 == (iconv_t)(-1))
 454                   {
 455                     /* TO_CODESET is UTF-8.  */
 456                     /* Error handling can produce up to 1 byte of output.  */
 457                     if (length + 1 + extra_alloc > allocated)
 458                       {
 459                         char *memory;
 460
 461                         allocated = 2 * allocated;
 462                         if (length + 1 + extra_alloc > allocated)
 463                           abort ();
 464                         if (result == initial_result)
 465                           memory = (char *) malloc (allocated);
 466                         else
 467                           memory = (char *) realloc (result, allocated);
 468                         if (memory == NULL)
 469                           {
 470                             if (result != initial_result)
 471                               free (result);
 472                             errno = ENOMEM;
 473                             return -1;
 474                           }
 475                         if (result == initial_result)
 476                           memcpy (memory, initial_result, length);
 477                         result = memory;
 478                         grow = false;
 479                       }
 480                     /* The input is invalid in FROM_CODESET.  Eat up one byte
 481                        and emit a question mark.  */
 482                     if (!incremented)
 483                       {
 484                         if (insize == 0)
 485                           abort ();
 486                         inptr++;
 487                         insize--;
 488                       }
 489                     result[length] = '?';
 490                     length++;
 491                   }
 492                 else
 493                   goto indirectly;
 494               }
 495             else
 496               {
 497                 if (result != initial_result)
 498                   {
 499                     int saved_errno = errno;
 500                     free (result);
 501                     errno = saved_errno;
 502                   }
 503                 return -1;
 504               }
 505           }
 506         if (insize == 0)
 507           break;
 508         if (grow)
 509           {
 510             char *memory;
 511
 512             allocated = 2 * allocated;
 513             if (result == initial_result)
 514               memory = (char *) malloc (allocated);
 515             else
 516               memory = (char *) realloc (result, allocated);
 517             if (memory == NULL)
 518               {
 519                 if (result != initial_result)
 520                   free (result);
 521                 errno = ENOMEM;
 522                 return -1;
 523               }
 524             if (result == initial_result)
 525               memcpy (memory, initial_result, length);
 526             result = memory;
 527           }
 528       }
 529   }
 530
 531   /* Now get the conversion state back to the initial state.
 532      But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 533 #if defined _LIBICONV_VERSION \
 534     || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 535   for (;;)
 536     {
 537       char *outptr = result + length;
 538       size_t outsize = allocated - extra_alloc - length;
 539       size_t res;
 540
 541       res = iconv (cd, NULL, NULL, &outptr, &outsize);
 542       length = outptr - result;
 543       if (res == (size_t)(-1))
 544         {
 545           if (errno == E2BIG)
 546             {
 547               char *memory;
 548
 549               allocated = 2 * allocated;
 550               if (result == initial_result)
 551                 memory = (char *) malloc (allocated);
 552               else
 553                 memory = (char *) realloc (result, allocated);
 554               if (memory == NULL)
 555                 {
 556                   if (result != initial_result)
 557                     free (result);
 558                   errno = ENOMEM;
 559                   return -1;
 560                 }
 561               if (result == initial_result)
 562                 memcpy (memory, initial_result, length);
 563               result = memory;
 564             }
 565           else
 566             {
 567               if (result != initial_result)
 568                 {
 569                   int saved_errno = errno;
 570                   free (result);
 571                   errno = saved_errno;
 572                 }
 573               return -1;
 574             }
 575         }
 576       else
 577         break;
 578     }
 579 #endif
 580
 581   /* The direct conversion succeeded.  */
 582   goto done;
 583
 584  indirectly:
 585   /* The direct conversion failed.
 586      Use a conversion through UTF-8.  */
 587   if (offsets != NULL)
 588     {
 589       size_t i;
 590
 591       for (i = 0; i < srclen; i++)
 592         offsets[i] = (size_t)(-1);
 593
 594       last_length = (size_t)(-1);
 595     }
 596   length = 0;
 597   {
 598     const bool slowly = (offsets != NULL || handler == iconveh_error);
 599 # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
 600     char utf8buf[utf8bufsize + 1];
 601     size_t utf8len = 0;
 602     const char *in1ptr = src;
 603     size_t in1size = srclen;
 604     bool do_final_flush1 = true;
 605     bool do_final_flush2 = true;
 606
 607     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
 608 # if defined _LIBICONV_VERSION \
 609      || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
 610     /* Set to the initial state.  */
 611     if (cd1 != (iconv_t)(-1))
 612       iconv (cd1, NULL, NULL, NULL, NULL);
 613     if (cd2 != (iconv_t)(-1))
 614       iconv (cd2, NULL, NULL, NULL, NULL);
 615 # endif
 616
 617     while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2)
 618       {
 619         char *out1ptr = utf8buf + utf8len;
 620         size_t out1size = utf8bufsize - utf8len;
 621         bool incremented1;
 622         size_t res1;
 623         int errno1;
 624
 625         /* Conversion step 1: from FROM_CODESET to UTF-8.  */
 626         if (in1size > 0)
 627           {
 628             if (offsets != NULL
 629                 && length != last_length) /* ensure that offset[] be increasing */
 630               {
 631                 offsets[in1ptr - src] = length;
 632                 last_length = length;
 633               }
 634             if (cd1 != (iconv_t)(-1))
 635               {
 636                 if (slowly)
 637                   res1 = iconv_carefully_1 (cd1,
 638                                             &in1ptr, &in1size,
 639                                             &out1ptr, &out1size,
 640                                             &incremented1);
 641                 else
 642                   res1 = iconv_carefully (cd1,
 643                                           &in1ptr, &in1size,
 644                                           &out1ptr, &out1size,
 645                                           &incremented1);
 646               }
 647             else
 648               {
 649                 /* FROM_CODESET is UTF-8.  */
 650                 res1 = utf8conv_carefully (slowly,
 651                                            &in1ptr, &in1size,
 652                                            &out1ptr, &out1size,
 653                                            &incremented1);
 654               }
 655           }
 656         else if (do_final_flush1)
 657           {
 658             /* Now get the conversion state of CD1 back to the initial state.
 659                But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 660 # if defined _LIBICONV_VERSION \
 661      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 662             if (cd1 != (iconv_t)(-1))
 663               res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size);
 664             else
 665 # endif
 666               res1 = 0;
 667             do_final_flush1 = false;
 668             incremented1 = true;
 669           }
 670         else
 671           {
 672             res1 = 0;
 673             incremented1 = true;
 674           }
 675         if (res1 == (size_t)(-1)
 676             && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))
 677           {
 678             if (result != initial_result)
 679               {
 680                 int saved_errno = errno;
 681                 free (result);
 682                 errno = saved_errno;
 683               }
 684             return -1;
 685           }
 686         if (res1 == (size_t)(-1)
 687             && errno == EILSEQ && handler != iconveh_error)
 688           {
 689             /* The input is invalid in FROM_CODESET.  Eat up one byte and
 690                emit a question mark.  Room for the question mark was allocated
 691                at the end of utf8buf.  */
 692             if (!incremented1)
 693               {
 694                 if (in1size == 0)
 695                   abort ();
 696                 in1ptr++;
 697                 in1size--;
 698               }
 699             *out1ptr++ = '?';
 700             res1 = 0;
 701           }
 702         errno1 = errno;
 703         utf8len = out1ptr - utf8buf;
 704
 705         if (offsets != NULL
 706             || in1size == 0
 707             || utf8len > utf8bufsize / 2
 708             || (res1 == (size_t)(-1) && errno1 == E2BIG))
 709           {
 710             /* Conversion step 2: from UTF-8 to TO_CODESET.  */
 711             const char *in2ptr = utf8buf;
 712             size_t in2size = utf8len;
 713
 714             while (in2size > 0
 715                    || (in1size == 0 && !do_final_flush1 && do_final_flush2))
 716               {
 717                 char *out2ptr = result + length;
 718                 size_t out2size = allocated - extra_alloc - length;
 719                 bool incremented2;
 720                 size_t res2;
 721                 bool grow;
 722
 723                 if (in2size > 0)
 724                   {
 725                     if (cd2 != (iconv_t)(-1))
 726                       res2 = iconv_carefully (cd2,
 727                                               &in2ptr, &in2size,
 728                                               &out2ptr, &out2size,
 729                                               &incremented2);
 730                     else
 731                       /* TO_CODESET is UTF-8.  */
 732                       res2 = utf8conv_carefully (false,
 733                                                  &in2ptr, &in2size,
 734                                                  &out2ptr, &out2size,
 735                                                  &incremented2);
 736                   }
 737                 else /* in1size == 0 && !do_final_flush1
 738                         && in2size == 0 && do_final_flush2 */
 739                   {
 740                     /* Now get the conversion state of CD1 back to the initial
 741                        state.  But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 742 # if defined _LIBICONV_VERSION \
 743      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
 744                     if (cd2 != (iconv_t)(-1))
 745                       res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size);
 746                     else
 747 # endif
 748                       res2 = 0;
 749                     do_final_flush2 = false;
 750                     incremented2 = true;
 751                   }
 752
 753                 length = out2ptr - result;
 754                 grow = (length + extra_alloc > allocated / 2);
 755                 if (res2 == (size_t)(-1))
 756                   {
 757                     if (errno == E2BIG)
 758                       grow = true;
 759                     else if (errno == EINVAL)
 760                       break;
 761                     else if (errno == EILSEQ && handler != iconveh_error)
 762                       {
 763                         /* Error handling can produce up to 10 bytes of ASCII
 764                            output.  But TO_CODESET may be UCS-2, UTF-16 or
 765                            UCS-4, so use CD2 here as well.  */
 766                         char scratchbuf[10];
 767                         size_t scratchlen;
 768                         ucs4_t uc;
 769                         const char *inptr;
 770                         size_t insize;
 771                         size_t res;
 772
 773                         if (incremented2)
 774                           {
 775                             if (u8_prev (&uc, (const uint8_t *) in2ptr,
 776                                          (const uint8_t *) utf8buf)
 777                                 == NULL)
 778                               abort ();
 779                           }
 780                         else
 781                           {
 782                             int n;
 783                             if (in2size == 0)
 784                               abort ();
 785                             n = u8_mbtouc_unsafe (&uc, (const uint8_t *) in2ptr,
 786                                                   in2size);
 787                             in2ptr += n;
 788                             in2size -= n;
 789                           }
 790
 791                         if (handler == iconveh_escape_sequence)
 792                           {
 793                             static char hex[16] = "0123456789ABCDEF";
 794                             scratchlen = 0;
 795                             scratchbuf[scratchlen++] = '\\';
 796                             if (uc < 0x10000)
 797                               scratchbuf[scratchlen++] = 'u';
 798                             else
 799                               {
 800                                 scratchbuf[scratchlen++] = 'U';
 801                                 scratchbuf[scratchlen++] = hex[(uc>>28) & 15];
 802                                 scratchbuf[scratchlen++] = hex[(uc>>24) & 15];
 803                                 scratchbuf[scratchlen++] = hex[(uc>>20) & 15];
 804                                 scratchbuf[scratchlen++] = hex[(uc>>16) & 15];
 805                               }
 806                             scratchbuf[scratchlen++] = hex[(uc>>12) & 15];
 807                             scratchbuf[scratchlen++] = hex[(uc>>8) & 15];
 808                             scratchbuf[scratchlen++] = hex[(uc>>4) & 15];
 809                             scratchbuf[scratchlen++] = hex[uc & 15];
 810                           }
 811                         else
 812                           {
 813                             scratchbuf[0] = '?';
 814                             scratchlen = 1;
 815                           }
 816
 817                         inptr = scratchbuf;
 818                         insize = scratchlen;
 819                         if (cd2 != (iconv_t)(-1))
 820                           res = iconv (cd2,
 821                                        (ICONV_CONST char **) &inptr, &insize,
 822                                        &out2ptr, &out2size);
 823                         else
 824                           {
 825                             /* TO_CODESET is UTF-8.  */
 826                             if (out2size >= insize)
 827                               {
 828                                 memcpy (out2ptr, inptr, insize);
 829                                 out2ptr += insize;
 830                                 out2size -= insize;
 831                                 inptr += insize;
 832                                 insize = 0;
 833                                 res = 0;
 834                               }
 835                             else
 836                               {
 837                                 errno = E2BIG;
 838                                 res = (size_t)(-1);
 839                               }
 840                           }
 841                         length = out2ptr - result;
 842                         if (res == (size_t)(-1) && errno == E2BIG)
 843                           {
 844                             char *memory;
 845
 846                             allocated = 2 * allocated;
 847                             if (length + 1 + extra_alloc > allocated)
 848                               abort ();
 849                             if (result == initial_result)
 850                               memory = (char *) malloc (allocated);
 851                             else
 852                               memory = (char *) realloc (result, allocated);
 853                             if (memory == NULL)
 854                               {
 855                                 if (result != initial_result)
 856                                   free (result);
 857                                 errno = ENOMEM;
 858                                 return -1;
 859                               }
 860                             if (result == initial_result)
 861                               memcpy (memory, initial_result, length);
 862                             result = memory;
 863                             grow = false;
 864
 865                             out2ptr = result + length;
 866                             out2size = allocated - extra_alloc - length;
 867                             if (cd2 != (iconv_t)(-1))
 868                               res = iconv (cd2,
 869                                            (ICONV_CONST char **) &inptr,
 870                                            &insize,
 871                                            &out2ptr, &out2size);
 872                             else
 873                               {
 874                                 /* TO_CODESET is UTF-8.  */
 875                                 if (!(out2size >= insize))
 876                                   abort ();
 877                                 memcpy (out2ptr, inptr, insize);
 878                                 out2ptr += insize;
 879                                 out2size -= insize;
 880                                 inptr += insize;
 881                                 insize = 0;
 882                                 res = 0;
 883                               }
 884                             length = out2ptr - result;
 885                           }
 886 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
 887                         /* Irix iconv() inserts a NUL byte if it cannot convert.
 888                            NetBSD iconv() inserts a question mark if it cannot
 889                            convert.
 890                            Only GNU libiconv and GNU libc are known to prefer
 891                            to fail rather than doing a lossy conversion.  */
 892                         if (res != (size_t)(-1) && res > 0)
 893                           {
 894                             errno = EILSEQ;
 895                             res = (size_t)(-1);
 896                           }
 897 # endif
 898                         if (res == (size_t)(-1))
 899                           {
 900                             /* Failure converting the ASCII replacement.  */
 901                             if (result != initial_result)
 902                               {
 903                                 int saved_errno = errno;
 904                                 free (result);
 905                                 errno = saved_errno;
 906                               }
 907                             return -1;
 908                           }
 909                       }
 910                     else
 911                       {
 912                         if (result != initial_result)
 913                           {
 914                             int saved_errno = errno;
 915                             free (result);
 916                             errno = saved_errno;
 917                           }
 918                         return -1;
 919                       }
 920                   }
 921                 if (!(in2size > 0
 922                       || (in1size == 0 && !do_final_flush1 && do_final_flush2)))
 923                   break;
 924                 if (grow)
 925                   {
 926                     char *memory;
 927
 928                     allocated = 2 * allocated;
 929                     if (result == initial_result)
 930                       memory = (char *) malloc (allocated);
 931                     else
 932                       memory = (char *) realloc (result, allocated);
 933                     if (memory == NULL)
 934                       {
 935                         if (result != initial_result)
 936                           free (result);
 937                         errno = ENOMEM;
 938                         return -1;
 939                       }
 940                     if (result == initial_result)
 941                       memcpy (memory, initial_result, length);
 942                     result = memory;
 943                   }
 944               }
 945
 946             /* Move the remaining bytes to the beginning of utf8buf.  */
 947             if (in2size > 0)
 948               memmove (utf8buf, in2ptr, in2size);
 949             utf8len = in2size;
 950           }
 951
 952         if (res1 == (size_t)(-1))
 953           {
 954             if (errno1 == EINVAL)
 955               in1size = 0;
 956             else if (errno1 == EILSEQ)
 957               {
 958                 if (result != initial_result)
 959                   free (result);
 960                 errno = errno1;
 961                 return -1;
 962               }
 963           }
 964       }
 965 # undef utf8bufsize
 966   }
 967
 968  done:
 969   /* Now the final memory allocation.  */
 970   if (result == tmpbuf)
 971     {
 972       size_t memsize = length + extra_alloc;
 973
 974       if (*resultp != NULL && *lengthp >= memsize)
 975         result = *resultp;
 976       else
 977         {
 978           char *memory;
 979
 980           memory = (char *) malloc (memsize > 0 ? memsize : 1);
 981           if (memory != NULL)
 982             result = memory;
 983           else
 984             {
 985               errno = ENOMEM;
 986               return -1;
 987             }
 988         }
 989       memcpy (result, tmpbuf, length);
 990     }
 991   else if (result != *resultp && length + extra_alloc < allocated)
 992     {
 993       /* Shrink the allocated memory if possible.  */
 994       size_t memsize = length + extra_alloc;
 995       char *memory;
 996
 997       memory = (char *) realloc (result, memsize > 0 ? memsize : 1);
 998       if (memory != NULL)
 999         result = memory;
1000     }
1001   *resultp = result;
1002   *lengthp = length;
1003   return 0;
1004 # undef tmpbuf
1005 # undef tmpbufsize
1006 }
1007
1008 int
1009 mem_cd_iconveh (const char *src, size_t srclen,
1010                 const iconveh_t *cd,
1011                 enum iconv_ilseq_handler handler,
1012                 size_t *offsets,
1013                 char **resultp, size_t *lengthp)
1014 {
1015   return mem_cd_iconveh_internal (src, srclen, cd->cd, cd->cd1, cd->cd2,
1016                                   handler, 0, offsets, resultp, lengthp);
1017 }
1018
1019 char *
1020 str_cd_iconveh (const char *src,
1021                 const iconveh_t *cd,
1022                 enum iconv_ilseq_handler handler)
1023 {
1024   /* For most encodings, a trailing NUL byte in the input will be converted
1025      to a trailing NUL byte in the output.  But not for UTF-7.  So that this
1026      function is usable for UTF-7, we have to exclude the NUL byte from the
1027      conversion and add it by hand afterwards.  */
1028   char *result = NULL;
1029   size_t length = 0;
1030   int retval = mem_cd_iconveh_internal (src, strlen (src),
1031                                         cd->cd, cd->cd1, cd->cd2, handler, 1,
1032                                         NULL, &result, &length);
1033
1034   if (retval < 0)
1035     {
1036       if (result != NULL)
1037         {
1038           int saved_errno = errno;
1039           free (result);
1040           errno = saved_errno;
1041         }
1042       return NULL;
1043     }
1044
1045   /* Add the terminating NUL byte.  */
1046   result[length] = '\0';
1047
1048   return result;
1049 }
1050
1051 #endif
1052
1053 int
1054 mem_iconveh (const char *src, size_t srclen,
1055              const char *from_codeset, const char *to_codeset,
1056              enum iconv_ilseq_handler handler,
1057              size_t *offsets,
1058              char **resultp, size_t *lengthp)
1059 {
1060   if (srclen == 0)
1061     {
1062       /* Nothing to convert.  */
1063       *lengthp = 0;
1064       return 0;
1065     }
1066   else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0)
1067     {
1068       char *result;
1069
1070       if (*resultp != NULL && *lengthp >= srclen)
1071         result = *resultp;
1072       else
1073         {
1074           result = (char *) malloc (srclen);
1075           if (result == NULL)
1076             {
1077               errno = ENOMEM;
1078               return -1;
1079             }
1080         }
1081       memcpy (result, src, srclen);
1082       *resultp = result;
1083       *lengthp = srclen;
1084       return 0;
1085     }
1086   else
1087     {
1088 #if HAVE_ICONV
1089       iconveh_t cd;
1090       char *result;
1091       size_t length;
1092       int retval;
1093
1094       if (iconveh_open (to_codeset, from_codeset, &cd) < 0)
1095         return -1;
1096
1097       result = *resultp;
1098       length = *lengthp;
1099       retval = mem_cd_iconveh (src, srclen, &cd, handler, offsets,
1100                                &result, &length);
1101
1102       if (retval < 0)
1103         {
1104           /* Close cd, but preserve the errno from str_cd_iconv.  */
1105           int saved_errno = errno;
1106           iconveh_close (&cd);
1107           errno = saved_errno;
1108         }
1109       else
1110         {
1111           if (iconveh_close (&cd) < 0)
1112             {
1113               /* Return -1, but free the allocated memory, and while doing
1114                  that, preserve the errno from iconveh_close.  */
1115               int saved_errno = errno;
1116               if (result != *resultp && result != NULL)
1117                 free (result);
1118               errno = saved_errno;
1119               return -1;
1120             }
1121           *resultp = result;
1122           *lengthp = length;
1123         }
1124       return retval;
1125 #else
1126       /* This is a different error code than if iconv_open existed but didn't
1127          support from_codeset and to_codeset, so that the caller can emit
1128          an error message such as
1129            "iconv() is not supported. Installing GNU libiconv and
1130             then reinstalling this package would fix this."  */
1131       errno = ENOSYS;
1132       return -1;
1133 #endif
1134     }
1135 }
1136
1137 char *
1138 str_iconveh (const char *src,
1139              const char *from_codeset, const char *to_codeset,
1140              enum iconv_ilseq_handler handler)
1141 {
1142   if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
1143     {
1144       char *result = strdup (src);
1145
1146       if (result == NULL)
1147         errno = ENOMEM;
1148       return result;
1149     }
1150   else
1151     {
1152 #if HAVE_ICONV
1153       iconveh_t cd;
1154       char *result;
1155
1156       if (iconveh_open (to_codeset, from_codeset, &cd) < 0)
1157         return NULL;
1158
1159       result = str_cd_iconveh (src, &cd, handler);
1160
1161       if (result == NULL)
1162         {
1163           /* Close cd, but preserve the errno from str_cd_iconv.  */
1164           int saved_errno = errno;
1165           iconveh_close (&cd);
1166           errno = saved_errno;
1167         }
1168       else
1169         {
1170           if (iconveh_close (&cd) < 0)
1171             {
1172               /* Return NULL, but free the allocated memory, and while doing
1173                  that, preserve the errno from iconveh_close.  */
1174               int saved_errno = errno;
1175               free (result);
1176               errno = saved_errno;
1177               return NULL;
1178             }
1179         }
1180       return result;
1181 #else
1182       /* This is a different error code than if iconv_open existed but didn't
1183          support from_codeset and to_codeset, so that the caller can emit
1184          an error message such as
1185            "iconv() is not supported. Installing GNU libiconv and
1186             then reinstalling this package would fix this."  */
1187       errno = ENOSYS;
1188       return NULL;
1189 #endif
1190     }
1191 }