apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <apt-pkg/strutl.h>
  19 #include <apt-pkg/fileutl.h>
  20 #include <apt-pkg/error.h>
  21
  22 #include <apti18n.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <stdio.h>
  27 #include <algorithm>
  28 #include <unistd.h>
  29 #include <regex.h>
  30 #include <errno.h>
  31 #include <stdarg.h>
  32 #include <iconv.h>
  33
  34 #include "config.h"
  35
  36 using namespace std;
  37                                                                         /*}}}*/
  38
  39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  40 // ---------------------------------------------------------------------
  41 /* This is handy to use before display some information for enduser  */
  42 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  43 {
  44   iconv_t cd;
  45   const char *inbuf;
  46   char *inptr, *outbuf;
  47   size_t insize, bufsize;
  48   dest->clear();
  49
  50   cd = iconv_open(codeset, "UTF-8");
  51   if (cd == (iconv_t)(-1)) {
  52      // Something went wrong
  53      if (errno == EINVAL)
  54         _error->Error("conversion from 'UTF-8' to '%s' not available",
  55                codeset);
  56      else
  57         perror("iconv_open");
  58
  59      return false;
  60   }
  61
  62   insize = bufsize = orig.size();
  63   inbuf = orig.data();
  64   inptr = (char *)inbuf;
  65   outbuf = new char[bufsize];
  66   size_t lastError = -1;
  67
  68   while (insize != 0)
  69   {
  70      char *outptr = outbuf;
  71      size_t outsize = bufsize;
  72      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
  73      dest->append(outbuf, outptr - outbuf);
  74      if (err == (size_t)(-1))
  75      {
  76         switch (errno)
  77         {
  78         case EILSEQ:
  79            insize--;
  80            inptr++;
  81            // replace a series of unknown multibytes with a single "?"
  82            if (lastError != insize) {
  83               lastError = insize - 1;
  84               dest->append("?");
  85            }
  86            break;
  87         case EINVAL:
  88            insize = 0;
  89            break;
  90         case E2BIG:
  91            if (outptr == outbuf)
  92            {
  93               bufsize *= 2;
  94               delete[] outbuf;
  95               outbuf = new char[bufsize];
  96            }
  97            break;
  98         }
  99      }
 100   }
 101
 102   delete[] outbuf;
 103
 104   iconv_close(cd);
 105
 106   return true;
 107 }
 108                                                                         /*}}}*/
 109 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 110 // ---------------------------------------------------------------------
 111 /* This is handy to use when parsing a file. It also removes \n's left
 112    over from fgets and company */
 113 char *_strstrip(char *String)
 114 {
 115    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 116
 117    if (*String == 0)
 118       return String;
 119
 120    char *End = String + strlen(String) - 1;
 121    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 122                                *End == '\r'); End--);
 123    End++;
 124    *End = 0;
 125    return String;
 126 };
 127                                                                         /*}}}*/
 128 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 129 // ---------------------------------------------------------------------
 130 /* */
 131 char *_strtabexpand(char *String,size_t Len)
 132 {
 133    for (char *I = String; I != I + Len && *I != 0; I++)
 134    {
 135       if (*I != '\t')
 136          continue;
 137       if (I + 8 > String + Len)
 138       {
 139          *I = 0;
 140          return String;
 141       }
 142
 143       /* Assume the start of the string is 0 and find the next 8 char
 144          division */
 145       int Len;
 146       if (String == I)
 147          Len = 1;
 148       else
 149          Len = 8 - ((String - I) % 8);
 150       Len -= 2;
 151       if (Len <= 0)
 152       {
 153          *I = ' ';
 154          continue;
 155       }
 156
 157       memmove(I + Len,I + 1,strlen(I) + 1);
 158       for (char *J = I; J + Len != I; *I = ' ', I++);
 159    }
 160    return String;
 161 }
 162                                                                         /*}}}*/
 163 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 164 // ---------------------------------------------------------------------
 165 /* This grabs a single word, converts any % escaped characters to their
 166    proper values and advances the pointer. Double quotes are understood
 167    and striped out as well. This is for URI/URL parsing. It also can
 168    understand [] brackets.*/
 169 bool ParseQuoteWord(const char *&String,string &Res)
 170 {
 171    // Skip leading whitespace
 172    const char *C = String;
 173    for (;*C != 0 && *C == ' '; C++);
 174    if (*C == 0)
 175       return false;
 176
 177    // Jump to the next word
 178    for (;*C != 0 && isspace(*C) == 0; C++)
 179    {
 180       if (*C == '"')
 181       {
 182          for (C++; *C != 0 && *C != '"'; C++);
 183          if (*C == 0)
 184             return false;
 185       }
 186       if (*C == '[')
 187       {
 188          for (C++; *C != 0 && *C != ']'; C++);
 189          if (*C == 0)
 190             return false;
 191       }
 192    }
 193
 194    // Now de-quote characters
 195    char Buffer[1024];
 196    char Tmp[3];
 197    const char *Start = String;
 198    char *I;
 199    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 200    {
 201       if (*Start == '%' && Start + 2 < C)
 202       {
 203          Tmp[0] = Start[1];
 204          Tmp[1] = Start[2];
 205          Tmp[2] = 0;
 206          *I = (char)strtol(Tmp,0,16);
 207          Start += 3;
 208          continue;
 209       }
 210       if (*Start != '"')
 211          *I = *Start;
 212       else
 213          I--;
 214       Start++;
 215    }
 216    *I = 0;
 217    Res = Buffer;
 218
 219    // Skip ending white space
 220    for (;*C != 0 && isspace(*C) != 0; C++);
 221    String = C;
 222    return true;
 223 }
 224                                                                         /*}}}*/
 225 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 226 // ---------------------------------------------------------------------
 227 /* This expects a series of space separated strings enclosed in ""'s.
 228    It concatenates the ""'s into a single string. */
 229 bool ParseCWord(const char *&String,string &Res)
 230 {
 231    // Skip leading whitespace
 232    const char *C = String;
 233    for (;*C != 0 && *C == ' '; C++);
 234    if (*C == 0)
 235       return false;
 236
 237    char Buffer[1024];
 238    char *Buf = Buffer;
 239    if (strlen(String) >= sizeof(Buffer))
 240        return false;
 241
 242    for (; *C != 0; C++)
 243    {
 244       if (*C == '"')
 245       {
 246          for (C++; *C != 0 && *C != '"'; C++)
 247             *Buf++ = *C;
 248
 249          if (*C == 0)
 250             return false;
 251
 252          continue;
 253       }
 254
 255       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 256          continue;
 257       if (isspace(*C) == 0)
 258          return false;
 259       *Buf++ = ' ';
 260    }
 261    *Buf = 0;
 262    Res = Buffer;
 263    String = C;
 264    return true;
 265 }
 266                                                                         /*}}}*/
 267 // QuoteString - Convert a string into quoted from                      /*{{{*/
 268 // ---------------------------------------------------------------------
 269 /* */
 270 string QuoteString(const string &Str, const char *Bad)
 271 {
 272    string Res;
 273    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 274    {
 275       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 276           *I <= 0x20 || *I >= 0x7F)
 277       {
 278          char Buf[10];
 279          sprintf(Buf,"%%%02x",(int)*I);
 280          Res += Buf;
 281       }
 282       else
 283          Res += *I;
 284    }
 285    return Res;
 286 }
 287                                                                         /*}}}*/
 288 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 289 // ---------------------------------------------------------------------
 290 /* This undoes QuoteString */
 291 string DeQuoteString(const string &Str)
 292 {
 293    string Res;
 294    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 295    {
 296       if (*I == '%' && I + 2 < Str.end())
 297       {
 298          char Tmp[3];
 299          Tmp[0] = I[1];
 300          Tmp[1] = I[2];
 301          Tmp[2] = 0;
 302          Res += (char)strtol(Tmp,0,16);
 303          I += 2;
 304          continue;
 305       }
 306       else
 307          Res += *I;
 308    }
 309    return Res;
 310 }
 311
 312                                                                         /*}}}*/
 313 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 314 // ---------------------------------------------------------------------
 315 /* A max of 4 digits are shown before conversion to the next highest unit.
 316    The max length of the string will be 5 chars unless the size is > 10
 317    YottaBytes (E24) */
 318 string SizeToStr(double Size)
 319 {
 320    char S[300];
 321    double ASize;
 322    if (Size >= 0)
 323       ASize = Size;
 324    else
 325       ASize = -1*Size;
 326
 327    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 328       ExaBytes, ZettaBytes, YottaBytes */
 329    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 330    int I = 0;
 331    while (I <= 8)
 332    {
 333       if (ASize < 100 && I != 0)
 334       {
 335          sprintf(S,"%'.1f%c",ASize,Ext[I]);
 336          break;
 337       }
 338
 339       if (ASize < 10000)
 340       {
 341          sprintf(S,"%'.0f%c",ASize,Ext[I]);
 342          break;
 343       }
 344       ASize /= 1000.0;
 345       I++;
 346    }
 347
 348    return S;
 349 }
 350                                                                         /*}}}*/
 351 // TimeToStr - Convert the time into a string                           /*{{{*/
 352 // ---------------------------------------------------------------------
 353 /* Converts a number of seconds to a hms format */
 354 string TimeToStr(unsigned long Sec)
 355 {
 356    char S[300];
 357
 358    while (1)
 359    {
 360       if (Sec > 60*60*24)
 361       {
 362          //d means days, h means hours, min means minutes, s means seconds
 363          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 364          break;
 365       }
 366
 367       if (Sec > 60*60)
 368       {
 369          //h means hours, min means minutes, s means seconds
 370          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 371          break;
 372       }
 373
 374       if (Sec > 60)
 375       {
 376          //min means minutes, s means seconds
 377          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 378          break;
 379       }
 380
 381       //s means seconds
 382       sprintf(S,_("%lis"),Sec);
 383       break;
 384    }
 385
 386    return S;
 387 }
 388                                                                         /*}}}*/
 389 // SubstVar - Substitute a string for another string                    /*{{{*/
 390 // ---------------------------------------------------------------------
 391 /* This replaces all occurances of Subst with Contents in Str. */
 392 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 393 {
 394    string::size_type Pos = 0;
 395    string::size_type OldPos = 0;
 396    string Temp;
 397
 398    while (OldPos < Str.length() &&
 399           (Pos = Str.find(Subst,OldPos)) != string::npos)
 400    {
 401       Temp += string(Str,OldPos,Pos) + Contents;
 402       OldPos = Pos + Subst.length();
 403    }
 404
 405    if (OldPos == 0)
 406       return Str;
 407
 408    return Temp + string(Str,OldPos);
 409 }
 410
 411 string SubstVar(string Str,const struct SubstVar *Vars)
 412 {
 413    for (; Vars->Subst != 0; Vars++)
 414       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 415    return Str;
 416 }
 417                                                                         /*}}}*/
 418 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 419 // ---------------------------------------------------------------------
 420 /* Returns a string with the supplied separator depth + 1 times in it */
 421 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 422 {
 423    std::string output = "";
 424    for(unsigned long d=Depth+1; d > 0; d--)
 425       output.append(Separator);
 426    return output;
 427 }
 428                                                                         /*}}}*/
 429 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 430 // ---------------------------------------------------------------------
 431 /* This converts a URI into a safe filename. It quotes all unsafe characters
 432    and converts / to _ and removes the scheme identifier. The resulting
 433    file name should be unique and never occur again for a different file */
 434 string URItoFileName(const string &URI)
 435 {
 436    // Nuke 'sensitive' items
 437    ::URI U(URI);
 438    U.User.clear();
 439    U.Password.clear();
 440    U.Access.clear();
 441
 442    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 443    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 444    replace(NewURI.begin(),NewURI.end(),'/','_');
 445    return NewURI;
 446 }
 447                                                                         /*}}}*/
 448 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 449 // ---------------------------------------------------------------------
 450 /* This routine performs a base64 transformation on a string. It was ripped
 451    from wget and then patched and bug fixed.
 452
 453    This spec can be found in rfc2045 */
 454 string Base64Encode(const string &S)
 455 {
 456    // Conversion table.
 457    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 458                           'I','J','K','L','M','N','O','P',
 459                           'Q','R','S','T','U','V','W','X',
 460                           'Y','Z','a','b','c','d','e','f',
 461                           'g','h','i','j','k','l','m','n',
 462                           'o','p','q','r','s','t','u','v',
 463                           'w','x','y','z','0','1','2','3',
 464                           '4','5','6','7','8','9','+','/'};
 465
 466    // Pre-allocate some space
 467    string Final;
 468    Final.reserve((4*S.length() + 2)/3 + 2);
 469
 470    /* Transform the 3x8 bits to 4x6 bits, as required by
 471       base64.  */
 472    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 473    {
 474       char Bits[3] = {0,0,0};
 475       Bits[0] = I[0];
 476       if (I + 1 < S.end())
 477          Bits[1] = I[1];
 478       if (I + 2 < S.end())
 479          Bits[2] = I[2];
 480
 481       Final += tbl[Bits[0] >> 2];
 482       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 483
 484       if (I + 1 >= S.end())
 485          break;
 486
 487       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 488
 489       if (I + 2 >= S.end())
 490          break;
 491
 492       Final += tbl[Bits[2] & 0x3f];
 493    }
 494
 495    /* Apply the padding elements, this tells how many bytes the remote
 496       end should discard */
 497    if (S.length() % 3 == 2)
 498       Final += '=';
 499    if (S.length() % 3 == 1)
 500       Final += "==";
 501
 502    return Final;
 503 }
 504                                                                         /*}}}*/
 505 // stringcmp - Arbitrary string compare                                 /*{{{*/
 506 // ---------------------------------------------------------------------
 507 /* This safely compares two non-null terminated strings of arbitrary
 508    length */
 509 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 510 {
 511    for (; A != AEnd && B != BEnd; A++, B++)
 512       if (*A != *B)
 513          break;
 514
 515    if (A == AEnd && B == BEnd)
 516       return 0;
 517    if (A == AEnd)
 518       return 1;
 519    if (B == BEnd)
 520       return -1;
 521    if (*A < *B)
 522       return -1;
 523    return 1;
 524 }
 525
 526 #if __GNUC__ >= 3
 527 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 528               const char *B,const char *BEnd)
 529 {
 530    for (; A != AEnd && B != BEnd; A++, B++)
 531       if (*A != *B)
 532          break;
 533
 534    if (A == AEnd && B == BEnd)
 535       return 0;
 536    if (A == AEnd)
 537       return 1;
 538    if (B == BEnd)
 539       return -1;
 540    if (*A < *B)
 541       return -1;
 542    return 1;
 543 }
 544 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 545               string::const_iterator B,string::const_iterator BEnd)
 546 {
 547    for (; A != AEnd && B != BEnd; A++, B++)
 548       if (*A != *B)
 549          break;
 550
 551    if (A == AEnd && B == BEnd)
 552       return 0;
 553    if (A == AEnd)
 554       return 1;
 555    if (B == BEnd)
 556       return -1;
 557    if (*A < *B)
 558       return -1;
 559    return 1;
 560 }
 561 #endif
 562                                                                         /*}}}*/
 563 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 564 // ---------------------------------------------------------------------
 565 /* */
 566 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 567 {
 568    for (; A != AEnd && B != BEnd; A++, B++)
 569       if (toupper(*A) != toupper(*B))
 570          break;
 571
 572    if (A == AEnd && B == BEnd)
 573       return 0;
 574    if (A == AEnd)
 575       return 1;
 576    if (B == BEnd)
 577       return -1;
 578    if (toupper(*A) < toupper(*B))
 579       return -1;
 580    return 1;
 581 }
 582 #if __GNUC__ >= 3
 583 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 584                   const char *B,const char *BEnd)
 585 {
 586    for (; A != AEnd && B != BEnd; A++, B++)
 587       if (toupper(*A) != toupper(*B))
 588          break;
 589
 590    if (A == AEnd && B == BEnd)
 591       return 0;
 592    if (A == AEnd)
 593       return 1;
 594    if (B == BEnd)
 595       return -1;
 596    if (toupper(*A) < toupper(*B))
 597       return -1;
 598    return 1;
 599 }
 600 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 601                   string::const_iterator B,string::const_iterator BEnd)
 602 {
 603    for (; A != AEnd && B != BEnd; A++, B++)
 604       if (toupper(*A) != toupper(*B))
 605          break;
 606
 607    if (A == AEnd && B == BEnd)
 608       return 0;
 609    if (A == AEnd)
 610       return 1;
 611    if (B == BEnd)
 612       return -1;
 613    if (toupper(*A) < toupper(*B))
 614       return -1;
 615    return 1;
 616 }
 617 #endif
 618                                                                         /*}}}*/
 619 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 620 // ---------------------------------------------------------------------
 621 /* The format is like those used in package files and the method
 622    communication system */
 623 string LookupTag(const string &Message,const char *Tag,const char *Default)
 624 {
 625    // Look for a matching tag.
 626    int Length = strlen(Tag);
 627    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); I++)
 628    {
 629       // Found the tag
 630       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 631       {
 632          // Find the end of line and strip the leading/trailing spaces
 633          string::const_iterator J;
 634          I += Length + 1;
 635          for (; isspace(*I) != 0 && I < Message.end(); I++);
 636          for (J = I; *J != '\n' && J < Message.end(); J++);
 637          for (; J > I && isspace(J[-1]) != 0; J--);
 638
 639          return string(I,J);
 640       }
 641
 642       for (; *I != '\n' && I < Message.end(); I++);
 643    }
 644
 645    // Failed to find a match
 646    if (Default == 0)
 647       return string();
 648    return Default;
 649 }
 650                                                                         /*}}}*/
 651 // StringToBool - Converts a string into a boolean                      /*{{{*/
 652 // ---------------------------------------------------------------------
 653 /* This inspects the string to see if it is true or if it is false and
 654    then returns the result. Several varients on true/false are checked. */
 655 int StringToBool(const string &Text,int Default)
 656 {
 657    char *End;
 658    int Res = strtol(Text.c_str(),&End,0);
 659    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 660       return Res;
 661
 662    // Check for positives
 663    if (strcasecmp(Text.c_str(),"no") == 0 ||
 664        strcasecmp(Text.c_str(),"false") == 0 ||
 665        strcasecmp(Text.c_str(),"without") == 0 ||
 666        strcasecmp(Text.c_str(),"off") == 0 ||
 667        strcasecmp(Text.c_str(),"disable") == 0)
 668       return 0;
 669
 670    // Check for negatives
 671    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 672        strcasecmp(Text.c_str(),"true") == 0 ||
 673        strcasecmp(Text.c_str(),"with") == 0 ||
 674        strcasecmp(Text.c_str(),"on") == 0 ||
 675        strcasecmp(Text.c_str(),"enable") == 0)
 676       return 1;
 677
 678    return Default;
 679 }
 680                                                                         /*}}}*/
 681 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 682 // ---------------------------------------------------------------------
 683 /* This converts a time_t into a string time representation that is
 684    year 2000 complient and timezone neutral */
 685 string TimeRFC1123(time_t Date)
 686 {
 687    struct tm Conv = *gmtime(&Date);
 688    char Buf[300];
 689
 690    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 691    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 692                           "Aug","Sep","Oct","Nov","Dec"};
 693
 694    sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 695            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 696            Conv.tm_min,Conv.tm_sec);
 697    return Buf;
 698 }
 699                                                                         /*}}}*/
 700 // ReadMessages - Read messages from the FD                             /*{{{*/
 701 // ---------------------------------------------------------------------
 702 /* This pulls full messages from the input FD into the message buffer.
 703    It assumes that messages will not pause during transit so no
 704    fancy buffering is used.
 705
 706    In particular: this reads blocks from the input until it believes
 707    that it's run out of input text.  Each block is terminated by a
 708    double newline ('\n' followed by '\n').  As noted below, there is a
 709    bug in this code: it assumes that all the blocks have been read if
 710    it doesn't see additional text in the buffer after the last one is
 711    parsed, which will cause it to lose blocks if the last block
 712    coincides with the end of the buffer.
 713  */
 714 bool ReadMessages(int Fd, vector<string> &List)
 715 {
 716    char Buffer[64000];
 717    char *End = Buffer;
 718    // Represents any left-over from the previous iteration of the
 719    // parse loop.  (i.e., if a message is split across the end
 720    // of the buffer, it goes here)
 721    string PartialMessage;
 722
 723    while (1)
 724    {
 725       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 726       if (Res < 0 && errno == EINTR)
 727          continue;
 728
 729       // Process is dead, this is kind of bad..
 730       if (Res == 0)
 731          return false;
 732
 733       // No data
 734       if (Res < 0 && errno == EAGAIN)
 735          return true;
 736       if (Res < 0)
 737          return false;
 738
 739       End += Res;
 740
 741       // Look for the end of the message
 742       for (char *I = Buffer; I + 1 < End; I++)
 743       {
 744          if (I[0] != '\n' || I[1] != '\n')
 745             continue;
 746
 747          // Pull the message out
 748          string Message(Buffer,I-Buffer);
 749          PartialMessage += Message;
 750
 751          // Fix up the buffer
 752          for (; I < End && *I == '\n'; I++);
 753          End -= I-Buffer;
 754          memmove(Buffer,I,End-Buffer);
 755          I = Buffer;
 756
 757          List.push_back(PartialMessage);
 758          PartialMessage.clear();
 759       }
 760       if (End != Buffer)
 761         {
 762           // If there's text left in the buffer, store it
 763           // in PartialMessage and throw the rest of the buffer
 764           // away.  This allows us to handle messages that
 765           // are longer than the static buffer size.
 766           PartialMessage += string(Buffer, End);
 767           End = Buffer;
 768         }
 769       else
 770         {
 771           // BUG ALERT: if a message block happens to end at a
 772           // multiple of 64000 characters, this will cause it to
 773           // terminate early, leading to a badly formed block and
 774           // probably crashing the method.  However, this is the only
 775           // way we have to find the end of the message block.  I have
 776           // an idea of how to fix this, but it will require changes
 777           // to the protocol (essentially to mark the beginning and
 778           // end of the block).
 779           //
 780           //  -- dburrows 2008-04-02
 781           return true;
 782         }
 783
 784       if (WaitFd(Fd) == false)
 785          return false;
 786    }
 787 }
 788                                                                         /*}}}*/
 789 // MonthConv - Converts a month string into a number                    /*{{{*/
 790 // ---------------------------------------------------------------------
 791 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 792    Made it a bit more robust with a few touppers though. */
 793 static int MonthConv(char *Month)
 794 {
 795    switch (toupper(*Month))
 796    {
 797       case 'A':
 798       return toupper(Month[1]) == 'P'?3:7;
 799       case 'D':
 800       return 11;
 801       case 'F':
 802       return 1;
 803       case 'J':
 804       if (toupper(Month[1]) == 'A')
 805          return 0;
 806       return toupper(Month[2]) == 'N'?5:6;
 807       case 'M':
 808       return toupper(Month[2]) == 'R'?2:4;
 809       case 'N':
 810       return 10;
 811       case 'O':
 812       return 9;
 813       case 'S':
 814       return 8;
 815
 816       // Pretend it is January..
 817       default:
 818       return 0;
 819    }
 820 }
 821                                                                         /*}}}*/
 822 // timegm - Internal timegm function if gnu is not available            /*{{{*/
 823 // ---------------------------------------------------------------------
 824 /* Ripped this evil little function from wget - I prefer the use of
 825    GNU timegm if possible as this technique will have interesting problems
 826    with leap seconds, timezones and other.
 827
 828    Converts struct tm to time_t, assuming the data in tm is UTC rather
 829    than local timezone (mktime assumes the latter).
 830
 831    Contributed by Roger Beeman <beeman@cisco.com>, with the help of
 832    Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
 833
 834 /* Turned it into an autoconf check, because GNU is not the only thing which
 835    can provide timegm. -- 2002-09-22, Joel Baker */
 836
 837 #ifndef HAVE_TIMEGM // Now with autoconf!
 838 static time_t timegm(struct tm *t)
 839 {
 840    time_t tl, tb;
 841
 842    tl = mktime (t);
 843    if (tl == -1)
 844       return -1;
 845    tb = mktime (gmtime (&tl));
 846    return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
 847 }
 848 #endif
 849                                                                         /*}}}*/
 850 // StrToTime - Converts a string into a time_t                          /*{{{*/
 851 // ---------------------------------------------------------------------
 852 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 853    and the C library asctime format. It requires the GNU library function
 854    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 855    reason the C library does not provide any such function :< This also
 856    handles the weird, but unambiguous FTP time format*/
 857 bool StrToTime(const string &Val,time_t &Result)
 858 {
 859    struct tm Tm;
 860    char Month[10];
 861    const char *I = Val.c_str();
 862
 863    // Skip the day of the week
 864    for (;*I != 0  && *I != ' '; I++);
 865
 866    // Handle RFC 1123 time
 867    Month[0] = 0;
 868    if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 869               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 870    {
 871       // Handle RFC 1036 time
 872       if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
 873                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 874          Tm.tm_year += 1900;
 875       else
 876       {
 877          // asctime format
 878          if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
 879                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 880          {
 881             // 'ftp' time
 882             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 883                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 884                return false;
 885             Tm.tm_mon--;
 886          }
 887       }
 888    }
 889
 890    Tm.tm_isdst = 0;
 891    if (Month[0] != 0)
 892       Tm.tm_mon = MonthConv(Month);
 893    Tm.tm_year -= 1900;
 894
 895    // Convert to local time and then to GMT
 896    Result = timegm(&Tm);
 897    return true;
 898 }
 899                                                                         /*}}}*/
 900 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 901 // ---------------------------------------------------------------------
 902 /* This is used in decoding the crazy fixed length string headers in
 903    tar and ar files. */
 904 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 905 {
 906    char S[30];
 907    if (Len >= sizeof(S))
 908       return false;
 909    memcpy(S,Str,Len);
 910    S[Len] = 0;
 911
 912    // All spaces is a zero
 913    Res = 0;
 914    unsigned I;
 915    for (I = 0; S[I] == ' '; I++);
 916    if (S[I] == 0)
 917       return true;
 918
 919    char *End;
 920    Res = strtoul(S,&End,Base);
 921    if (End == S)
 922       return false;
 923
 924    return true;
 925 }
 926                                                                         /*}}}*/
 927 // HexDigit - Convert a hex character into an integer                   /*{{{*/
 928 // ---------------------------------------------------------------------
 929 /* Helper for Hex2Num */
 930 static int HexDigit(int c)
 931 {
 932    if (c >= '0' && c <= '9')
 933       return c - '0';
 934    if (c >= 'a' && c <= 'f')
 935       return c - 'a' + 10;
 936    if (c >= 'A' && c <= 'F')
 937       return c - 'A' + 10;
 938    return 0;
 939 }
 940                                                                         /*}}}*/
 941 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
 942 // ---------------------------------------------------------------------
 943 /* The length of the buffer must be exactly 1/2 the length of the string. */
 944 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
 945 {
 946    if (Str.length() != Length*2)
 947       return false;
 948
 949    // Convert each digit. We store it in the same order as the string
 950    int J = 0;
 951    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
 952    {
 953       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
 954          return false;
 955
 956       Num[J] = HexDigit(I[0]) << 4;
 957       Num[J] += HexDigit(I[1]);
 958    }
 959
 960    return true;
 961 }
 962                                                                         /*}}}*/
 963 // TokSplitString - Split a string up by a given token                  /*{{{*/
 964 // ---------------------------------------------------------------------
 965 /* This is intended to be a faster splitter, it does not use dynamic
 966    memories. Input is changed to insert nulls at each token location. */
 967 bool TokSplitString(char Tok,char *Input,char **List,
 968                     unsigned long ListMax)
 969 {
 970    // Strip any leading spaces
 971    char *Start = Input;
 972    char *Stop = Start + strlen(Start);
 973    for (; *Start != 0 && isspace(*Start) != 0; Start++);
 974
 975    unsigned long Count = 0;
 976    char *Pos = Start;
 977    while (Pos != Stop)
 978    {
 979       // Skip to the next Token
 980       for (; Pos != Stop && *Pos != Tok; Pos++);
 981
 982       // Back remove spaces
 983       char *End = Pos;
 984       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
 985       *End = 0;
 986
 987       List[Count++] = Start;
 988       if (Count >= ListMax)
 989       {
 990          List[Count-1] = 0;
 991          return false;
 992       }
 993
 994       // Advance pos
 995       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
 996       Start = Pos;
 997    }
 998
 999    List[Count] = 0;
1000    return true;
1001 }
1002                                                                         /*}}}*/
1003 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1004 // ---------------------------------------------------------------------
1005 /* */
1006 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1007                       const char **ListEnd)
1008 {
1009    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1010       R->Hit = false;
1011
1012    unsigned long Hits = 0;
1013    for (; ListBegin != ListEnd; ListBegin++)
1014    {
1015       // Check if the name is a regex
1016       const char *I;
1017       bool Regex = true;
1018       for (I = *ListBegin; *I != 0; I++)
1019          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1020             break;
1021       if (*I == 0)
1022          Regex = false;
1023
1024       // Compile the regex pattern
1025       regex_t Pattern;
1026       if (Regex == true)
1027          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1028                      REG_NOSUB) != 0)
1029             Regex = false;
1030
1031       // Search the list
1032       bool Done = false;
1033       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1034       {
1035          if (R->Str[0] == 0)
1036             continue;
1037
1038          if (strcasecmp(R->Str,*ListBegin) != 0)
1039          {
1040             if (Regex == false)
1041                continue;
1042             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1043                continue;
1044          }
1045          Done = true;
1046
1047          if (R->Hit == false)
1048             Hits++;
1049
1050          R->Hit = true;
1051       }
1052
1053       if (Regex == true)
1054          regfree(&Pattern);
1055
1056       if (Done == false)
1057          _error->Warning(_("Selection %s not found"),*ListBegin);
1058    }
1059
1060    return Hits;
1061 }
1062                                                                         /*}}}*/
1063 // ioprintf - C format string outputter to C++ iostreams                /*{{{*/
1064 // ---------------------------------------------------------------------
1065 /* This is used to make the internationalization strings easier to translate
1066    and to allow reordering of parameters */
1067 void ioprintf(ostream &out,const char *format,...)
1068 {
1069    va_list args;
1070    va_start(args,format);
1071
1072    // sprintf the description
1073    char S[4096];
1074    vsnprintf(S,sizeof(S),format,args);
1075    out << S;
1076 }
1077                                                                         /*}}}*/
1078 // strprintf - C format string outputter to C++ strings                 /*{{{*/
1079 // ---------------------------------------------------------------------
1080 /* This is used to make the internationalization strings easier to translate
1081    and to allow reordering of parameters */
1082 void strprintf(string &out,const char *format,...)
1083 {
1084    va_list args;
1085    va_start(args,format);
1086
1087    // sprintf the description
1088    char S[4096];
1089    vsnprintf(S,sizeof(S),format,args);
1090    out = string(S);
1091 }
1092                                                                         /*}}}*/
1093 // safe_snprintf - Safer snprintf                                       /*{{{*/
1094 // ---------------------------------------------------------------------
1095 /* This is a snprintf that will never (ever) go past 'End' and returns a
1096    pointer to the end of the new string. The returned string is always null
1097    terminated unless Buffer == end. This is a better alterantive to using
1098    consecutive snprintfs. */
1099 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1100 {
1101    va_list args;
1102    unsigned long Did;
1103
1104    va_start(args,Format);
1105
1106    if (End <= Buffer)
1107       return End;
1108
1109    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1110    if (Did < 0 || Buffer + Did > End)
1111       return End;
1112    return Buffer + Did;
1113 }
1114                                                                         /*}}}*/
1115
1116 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1117 // ---------------------------------------------------------------------
1118 /* */
1119 int tolower_ascii(int c)
1120 {
1121    if (c >= 'A' and c <= 'Z')
1122       return c + 32;
1123    return c;
1124 }
1125                                                                         /*}}}*/
1126
1127 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1128 // ---------------------------------------------------------------------
1129 /* The domain list is a comma seperate list of domains that are suffix
1130    matched against the argument */
1131 bool CheckDomainList(const string &Host,const string &List)
1132 {
1133    string::const_iterator Start = List.begin();
1134    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); Cur++)
1135    {
1136       if (Cur < List.end() && *Cur != ',')
1137          continue;
1138
1139       // Match the end of the string..
1140       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1141           Cur - Start != 0 &&
1142           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1143          return true;
1144
1145       Start = Cur + 1;
1146    }
1147    return false;
1148 }
1149                                                                         /*}}}*/
1150
1151 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1152 // ---------------------------------------------------------------------
1153 /* This parses the URI into all of its components */
1154 void URI::CopyFrom(const string &U)
1155 {
1156    string::const_iterator I = U.begin();
1157
1158    // Locate the first colon, this separates the scheme
1159    for (; I < U.end() && *I != ':' ; I++);
1160    string::const_iterator FirstColon = I;
1161
1162    /* Determine if this is a host type URI with a leading double //
1163       and then search for the first single / */
1164    string::const_iterator SingleSlash = I;
1165    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1166       SingleSlash += 3;
1167
1168    /* Find the / indicating the end of the hostname, ignoring /'s in the
1169       square brackets */
1170    bool InBracket = false;
1171    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
1172    {
1173       if (*SingleSlash == '[')
1174          InBracket = true;
1175       if (InBracket == true && *SingleSlash == ']')
1176          InBracket = false;
1177    }
1178
1179    if (SingleSlash > U.end())
1180       SingleSlash = U.end();
1181
1182    // We can now write the access and path specifiers
1183    Access.assign(U.begin(),FirstColon);
1184    if (SingleSlash != U.end())
1185       Path.assign(SingleSlash,U.end());
1186    if (Path.empty() == true)
1187       Path = "/";
1188
1189    // Now we attempt to locate a user:pass@host fragment
1190    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1191       FirstColon += 3;
1192    else
1193       FirstColon += 1;
1194    if (FirstColon >= U.end())
1195       return;
1196
1197    if (FirstColon > SingleSlash)
1198       FirstColon = SingleSlash;
1199
1200    // Find the colon...
1201    I = FirstColon + 1;
1202    if (I > SingleSlash)
1203       I = SingleSlash;
1204    for (; I < SingleSlash && *I != ':'; I++);
1205    string::const_iterator SecondColon = I;
1206
1207    // Search for the @ after the colon
1208    for (; I < SingleSlash && *I != '@'; I++);
1209    string::const_iterator At = I;
1210
1211    // Now write the host and user/pass
1212    if (At == SingleSlash)
1213    {
1214       if (FirstColon < SingleSlash)
1215          Host.assign(FirstColon,SingleSlash);
1216    }
1217    else
1218    {
1219       Host.assign(At+1,SingleSlash);
1220       User.assign(FirstColon,SecondColon);
1221       if (SecondColon < At)
1222          Password.assign(SecondColon+1,At);
1223    }
1224
1225    // Now we parse the RFC 2732 [] hostnames.
1226    unsigned long PortEnd = 0;
1227    InBracket = false;
1228    for (unsigned I = 0; I != Host.length();)
1229    {
1230       if (Host[I] == '[')
1231       {
1232          InBracket = true;
1233          Host.erase(I,1);
1234          continue;
1235       }
1236
1237       if (InBracket == true && Host[I] == ']')
1238       {
1239          InBracket = false;
1240          Host.erase(I,1);
1241          PortEnd = I;
1242          continue;
1243       }
1244       I++;
1245    }
1246
1247    // Tsk, weird.
1248    if (InBracket == true)
1249    {
1250       Host.clear();
1251       return;
1252    }
1253
1254    // Now we parse off a port number from the hostname
1255    Port = 0;
1256    string::size_type Pos = Host.rfind(':');
1257    if (Pos == string::npos || Pos < PortEnd)
1258       return;
1259
1260    Port = atoi(string(Host,Pos+1).c_str());
1261    Host.assign(Host,0,Pos);
1262 }
1263                                                                         /*}}}*/
1264 // URI::operator string - Convert the URI to a string                   /*{{{*/
1265 // ---------------------------------------------------------------------
1266 /* */
1267 URI::operator string()
1268 {
1269    string Res;
1270
1271    if (Access.empty() == false)
1272       Res = Access + ':';
1273
1274    if (Host.empty() == false)
1275    {
1276       if (Access.empty() == false)
1277          Res += "//";
1278
1279       if (User.empty() == false)
1280       {
1281          Res +=  User;
1282          if (Password.empty() == false)
1283             Res += ":" + Password;
1284          Res += "@";
1285       }
1286
1287       // Add RFC 2732 escaping characters
1288       if (Access.empty() == false &&
1289           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1290          Res += '[' + Host + ']';
1291       else
1292          Res += Host;
1293
1294       if (Port != 0)
1295       {
1296          char S[30];
1297          sprintf(S,":%u",Port);
1298          Res += S;
1299       }
1300    }
1301
1302    if (Path.empty() == false)
1303    {
1304       if (Path[0] != '/')
1305          Res += "/" + Path;
1306       else
1307          Res += Path;
1308    }
1309
1310    return Res;
1311 }
1312                                                                         /*}}}*/
1313 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1314 // ---------------------------------------------------------------------
1315 /* */
1316 string URI::SiteOnly(const string &URI)
1317 {
1318    ::URI U(URI);
1319    U.User.clear();
1320    U.Password.clear();
1321    U.Path.clear();
1322    U.Port = 0;
1323    return U;
1324 }
1325                                                                         /*}}}*/