apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <apt-pkg/strutl.h>
  19 #include <apt-pkg/fileutl.h>
  20 #include <apt-pkg/error.h>
  21
  22 #include <apti18n.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <stdio.h>
  27 #include <algorithm>
  28 #include <unistd.h>
  29 #include <regex.h>
  30 #include <errno.h>
  31 #include <stdarg.h>
  32 #include <iconv.h>
  33
  34 #include "config.h"
  35
  36 using namespace std;
  37                                                                         /*}}}*/
  38
  39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  40 // ---------------------------------------------------------------------
  41 /* This is handy to use before display some information for enduser  */
  42 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  43 {
  44   iconv_t cd;
  45   const char *inbuf;
  46   char *inptr, *outbuf;
  47   size_t insize, bufsize;
  48   dest->clear();
  49
  50   cd = iconv_open(codeset, "UTF-8");
  51   if (cd == (iconv_t)(-1)) {
  52      // Something went wrong
  53      if (errno == EINVAL)
  54         _error->Error("conversion from 'UTF-8' to '%s' not available",
  55                codeset);
  56      else
  57         perror("iconv_open");
  58
  59      return false;
  60   }
  61
  62   insize = bufsize = orig.size();
  63   inbuf = orig.data();
  64   inptr = (char *)inbuf;
  65   outbuf = new char[bufsize];
  66   size_t lastError = -1;
  67
  68   while (insize != 0)
  69   {
  70      char *outptr = outbuf;
  71      size_t outsize = bufsize;
  72      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
  73      dest->append(outbuf, outptr - outbuf);
  74      if (err == (size_t)(-1))
  75      {
  76         switch (errno)
  77         {
  78         case EILSEQ:
  79            insize--;
  80            inptr++;
  81            // replace a series of unknown multibytes with a single "?"
  82            if (lastError != insize) {
  83               lastError = insize - 1;
  84               dest->append("?");
  85            }
  86            break;
  87         case EINVAL:
  88            insize = 0;
  89            break;
  90         case E2BIG:
  91            if (outptr == outbuf)
  92            {
  93               bufsize *= 2;
  94               delete[] outbuf;
  95               outbuf = new char[bufsize];
  96            }
  97            break;
  98         }
  99      }
 100   }
 101
 102   delete[] outbuf;
 103
 104   iconv_close(cd);
 105
 106   return true;
 107 }
 108                                                                         /*}}}*/
 109 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 110 // ---------------------------------------------------------------------
 111 /* This is handy to use when parsing a file. It also removes \n's left
 112    over from fgets and company */
 113 char *_strstrip(char *String)
 114 {
 115    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 116
 117    if (*String == 0)
 118       return String;
 119
 120    char *End = String + strlen(String) - 1;
 121    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 122                                *End == '\r'); End--);
 123    End++;
 124    *End = 0;
 125    return String;
 126 };
 127                                                                         /*}}}*/
 128 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 129 // ---------------------------------------------------------------------
 130 /* */
 131 char *_strtabexpand(char *String,size_t Len)
 132 {
 133    for (char *I = String; I != I + Len && *I != 0; I++)
 134    {
 135       if (*I != '\t')
 136          continue;
 137       if (I + 8 > String + Len)
 138       {
 139          *I = 0;
 140          return String;
 141       }
 142
 143       /* Assume the start of the string is 0 and find the next 8 char
 144          division */
 145       int Len;
 146       if (String == I)
 147          Len = 1;
 148       else
 149          Len = 8 - ((String - I) % 8);
 150       Len -= 2;
 151       if (Len <= 0)
 152       {
 153          *I = ' ';
 154          continue;
 155       }
 156
 157       memmove(I + Len,I + 1,strlen(I) + 1);
 158       for (char *J = I; J + Len != I; *I = ' ', I++);
 159    }
 160    return String;
 161 }
 162                                                                         /*}}}*/
 163 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 164 // ---------------------------------------------------------------------
 165 /* This grabs a single word, converts any % escaped characters to their
 166    proper values and advances the pointer. Double quotes are understood
 167    and striped out as well. This is for URI/URL parsing. It also can
 168    understand [] brackets.*/
 169 bool ParseQuoteWord(const char *&String,string &Res)
 170 {
 171    // Skip leading whitespace
 172    const char *C = String;
 173    for (;*C != 0 && *C == ' '; C++);
 174    if (*C == 0)
 175       return false;
 176
 177    // Jump to the next word
 178    for (;*C != 0 && isspace(*C) == 0; C++)
 179    {
 180       if (*C == '"')
 181       {
 182          for (C++; *C != 0 && *C != '"'; C++);
 183          if (*C == 0)
 184             return false;
 185       }
 186       if (*C == '[')
 187       {
 188          for (C++; *C != 0 && *C != ']'; C++);
 189          if (*C == 0)
 190             return false;
 191       }
 192    }
 193
 194    // Now de-quote characters
 195    char Buffer[1024];
 196    char Tmp[3];
 197    const char *Start = String;
 198    char *I;
 199    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 200    {
 201       if (*Start == '%' && Start + 2 < C)
 202       {
 203          Tmp[0] = Start[1];
 204          Tmp[1] = Start[2];
 205          Tmp[2] = 0;
 206          *I = (char)strtol(Tmp,0,16);
 207          Start += 3;
 208          continue;
 209       }
 210       if (*Start != '"')
 211          *I = *Start;
 212       else
 213          I--;
 214       Start++;
 215    }
 216    *I = 0;
 217    Res = Buffer;
 218
 219    // Skip ending white space
 220    for (;*C != 0 && isspace(*C) != 0; C++);
 221    String = C;
 222    return true;
 223 }
 224                                                                         /*}}}*/
 225 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 226 // ---------------------------------------------------------------------
 227 /* This expects a series of space separated strings enclosed in ""'s.
 228    It concatenates the ""'s into a single string. */
 229 bool ParseCWord(const char *&String,string &Res)
 230 {
 231    // Skip leading whitespace
 232    const char *C = String;
 233    for (;*C != 0 && *C == ' '; C++);
 234    if (*C == 0)
 235       return false;
 236
 237    char Buffer[1024];
 238    char *Buf = Buffer;
 239    if (strlen(String) >= sizeof(Buffer))
 240        return false;
 241
 242    for (; *C != 0; C++)
 243    {
 244       if (*C == '"')
 245       {
 246          for (C++; *C != 0 && *C != '"'; C++)
 247             *Buf++ = *C;
 248
 249          if (*C == 0)
 250             return false;
 251
 252          continue;
 253       }
 254
 255       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 256          continue;
 257       if (isspace(*C) == 0)
 258          return false;
 259       *Buf++ = ' ';
 260    }
 261    *Buf = 0;
 262    Res = Buffer;
 263    String = C;
 264    return true;
 265 }
 266                                                                         /*}}}*/
 267 // QuoteString - Convert a string into quoted from                      /*{{{*/
 268 // ---------------------------------------------------------------------
 269 /* */
 270 string QuoteString(const string &Str, const char *Bad)
 271 {
 272    string Res;
 273    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 274    {
 275       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 276           *I <= 0x20 || *I >= 0x7F)
 277       {
 278          char Buf[10];
 279          sprintf(Buf,"%%%02x",(int)*I);
 280          Res += Buf;
 281       }
 282       else
 283          Res += *I;
 284    }
 285    return Res;
 286 }
 287                                                                         /*}}}*/
 288 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 289 // ---------------------------------------------------------------------
 290 /* This undoes QuoteString */
 291 string DeQuoteString(const string &Str)
 292 {
 293    string Res;
 294    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 295    {
 296       if (*I == '%' && I + 2 < Str.end())
 297       {
 298          char Tmp[3];
 299          Tmp[0] = I[1];
 300          Tmp[1] = I[2];
 301          Tmp[2] = 0;
 302          Res += (char)strtol(Tmp,0,16);
 303          I += 2;
 304          continue;
 305       }
 306       else
 307          Res += *I;
 308    }
 309    return Res;
 310 }
 311
 312                                                                         /*}}}*/
 313 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 314 // ---------------------------------------------------------------------
 315 /* A max of 4 digits are shown before conversion to the next highest unit.
 316    The max length of the string will be 5 chars unless the size is > 10
 317    YottaBytes (E24) */
 318 string SizeToStr(double Size)
 319 {
 320    char S[300];
 321    double ASize;
 322    if (Size >= 0)
 323       ASize = Size;
 324    else
 325       ASize = -1*Size;
 326
 327    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 328       ExaBytes, ZettaBytes, YottaBytes */
 329    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 330    int I = 0;
 331    while (I <= 8)
 332    {
 333       if (ASize < 100 && I != 0)
 334       {
 335          sprintf(S,"%'.1f%c",ASize,Ext[I]);
 336          break;
 337       }
 338
 339       if (ASize < 10000)
 340       {
 341          sprintf(S,"%'.0f%c",ASize,Ext[I]);
 342          break;
 343       }
 344       ASize /= 1000.0;
 345       I++;
 346    }
 347
 348    return S;
 349 }
 350                                                                         /*}}}*/
 351 // TimeToStr - Convert the time into a string                           /*{{{*/
 352 // ---------------------------------------------------------------------
 353 /* Converts a number of seconds to a hms format */
 354 string TimeToStr(unsigned long Sec)
 355 {
 356    char S[300];
 357
 358    while (1)
 359    {
 360       if (Sec > 60*60*24)
 361       {
 362          //d means days, h means hours, min means minutes, s means seconds
 363          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 364          break;
 365       }
 366
 367       if (Sec > 60*60)
 368       {
 369          //h means hours, min means minutes, s means seconds
 370          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 371          break;
 372       }
 373
 374       if (Sec > 60)
 375       {
 376          //min means minutes, s means seconds
 377          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 378          break;
 379       }
 380
 381       //s means seconds
 382       sprintf(S,_("%lis"),Sec);
 383       break;
 384    }
 385
 386    return S;
 387 }
 388                                                                         /*}}}*/
 389 // SubstVar - Substitute a string for another string                    /*{{{*/
 390 // ---------------------------------------------------------------------
 391 /* This replaces all occurances of Subst with Contents in Str. */
 392 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 393 {
 394    string::size_type Pos = 0;
 395    string::size_type OldPos = 0;
 396    string Temp;
 397
 398    while (OldPos < Str.length() &&
 399           (Pos = Str.find(Subst,OldPos)) != string::npos)
 400    {
 401       Temp += string(Str,OldPos,Pos) + Contents;
 402       OldPos = Pos + Subst.length();
 403    }
 404
 405    if (OldPos == 0)
 406       return Str;
 407
 408    return Temp + string(Str,OldPos);
 409 }
 410
 411 string SubstVar(string Str,const struct SubstVar *Vars)
 412 {
 413    for (; Vars->Subst != 0; Vars++)
 414       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 415    return Str;
 416 }
 417                                                                         /*}}}*/
 418 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 419 // ---------------------------------------------------------------------
 420 /* Returns a string with the supplied separator depth + 1 times in it */
 421 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 422 {
 423    std::string output = "";
 424    for(unsigned long d=Depth+1; d > 0; d--)
 425       output.append(Separator);
 426    return output;
 427 }
 428                                                                         /*}}}*/
 429 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 430 // ---------------------------------------------------------------------
 431 /* This converts a URI into a safe filename. It quotes all unsafe characters
 432    and converts / to _ and removes the scheme identifier. The resulting
 433    file name should be unique and never occur again for a different file */
 434 string URItoFileName(const string &URI)
 435 {
 436    // Nuke 'sensitive' items
 437    ::URI U(URI);
 438    U.User.clear();
 439    U.Password.clear();
 440    U.Access.clear();
 441
 442    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 443    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 444    replace(NewURI.begin(),NewURI.end(),'/','_');
 445    return NewURI;
 446 }
 447                                                                         /*}}}*/
 448 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 449 // ---------------------------------------------------------------------
 450 /* This routine performs a base64 transformation on a string. It was ripped
 451    from wget and then patched and bug fixed.
 452
 453    This spec can be found in rfc2045 */
 454 string Base64Encode(const string &S)
 455 {
 456    // Conversion table.
 457    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 458                           'I','J','K','L','M','N','O','P',
 459                           'Q','R','S','T','U','V','W','X',
 460                           'Y','Z','a','b','c','d','e','f',
 461                           'g','h','i','j','k','l','m','n',
 462                           'o','p','q','r','s','t','u','v',
 463                           'w','x','y','z','0','1','2','3',
 464                           '4','5','6','7','8','9','+','/'};
 465
 466    // Pre-allocate some space
 467    string Final;
 468    Final.reserve((4*S.length() + 2)/3 + 2);
 469
 470    /* Transform the 3x8 bits to 4x6 bits, as required by
 471       base64.  */
 472    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 473    {
 474       char Bits[3] = {0,0,0};
 475       Bits[0] = I[0];
 476       if (I + 1 < S.end())
 477          Bits[1] = I[1];
 478       if (I + 2 < S.end())
 479          Bits[2] = I[2];
 480
 481       Final += tbl[Bits[0] >> 2];
 482       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 483
 484       if (I + 1 >= S.end())
 485          break;
 486
 487       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 488
 489       if (I + 2 >= S.end())
 490          break;
 491
 492       Final += tbl[Bits[2] & 0x3f];
 493    }
 494
 495    /* Apply the padding elements, this tells how many bytes the remote
 496       end should discard */
 497    if (S.length() % 3 == 2)
 498       Final += '=';
 499    if (S.length() % 3 == 1)
 500       Final += "==";
 501
 502    return Final;
 503 }
 504                                                                         /*}}}*/
 505 // stringcmp - Arbitrary string compare                                 /*{{{*/
 506 // ---------------------------------------------------------------------
 507 /* This safely compares two non-null terminated strings of arbitrary
 508    length */
 509 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 510 {
 511    for (; A != AEnd && B != BEnd; A++, B++)
 512       if (*A != *B)
 513          break;
 514
 515    if (A == AEnd && B == BEnd)
 516       return 0;
 517    if (A == AEnd)
 518       return 1;
 519    if (B == BEnd)
 520       return -1;
 521    if (*A < *B)
 522       return -1;
 523    return 1;
 524 }
 525
 526 #if __GNUC__ >= 3
 527 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 528               const char *B,const char *BEnd)
 529 {
 530    for (; A != AEnd && B != BEnd; A++, B++)
 531       if (*A != *B)
 532          break;
 533
 534    if (A == AEnd && B == BEnd)
 535       return 0;
 536    if (A == AEnd)
 537       return 1;
 538    if (B == BEnd)
 539       return -1;
 540    if (*A < *B)
 541       return -1;
 542    return 1;
 543 }
 544 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 545               string::const_iterator B,string::const_iterator BEnd)
 546 {
 547    for (; A != AEnd && B != BEnd; A++, B++)
 548       if (*A != *B)
 549          break;
 550
 551    if (A == AEnd && B == BEnd)
 552       return 0;
 553    if (A == AEnd)
 554       return 1;
 555    if (B == BEnd)
 556       return -1;
 557    if (*A < *B)
 558       return -1;
 559    return 1;
 560 }
 561 #endif
 562                                                                         /*}}}*/
 563 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 564 // ---------------------------------------------------------------------
 565 /* */
 566 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 567 {
 568    for (; A != AEnd && B != BEnd; A++, B++)
 569       if (toupper(*A) != toupper(*B))
 570          break;
 571
 572    if (A == AEnd && B == BEnd)
 573       return 0;
 574    if (A == AEnd)
 575       return 1;
 576    if (B == BEnd)
 577       return -1;
 578    if (toupper(*A) < toupper(*B))
 579       return -1;
 580    return 1;
 581 }
 582 #if __GNUC__ >= 3
 583 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 584                   const char *B,const char *BEnd)
 585 {
 586    for (; A != AEnd && B != BEnd; A++, B++)
 587       if (toupper(*A) != toupper(*B))
 588          break;
 589
 590    if (A == AEnd && B == BEnd)
 591       return 0;
 592    if (A == AEnd)
 593       return 1;
 594    if (B == BEnd)
 595       return -1;
 596    if (toupper(*A) < toupper(*B))
 597       return -1;
 598    return 1;
 599 }
 600 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 601                   string::const_iterator B,string::const_iterator BEnd)
 602 {
 603    for (; A != AEnd && B != BEnd; A++, B++)
 604       if (toupper(*A) != toupper(*B))
 605          break;
 606
 607    if (A == AEnd && B == BEnd)
 608       return 0;
 609    if (A == AEnd)
 610       return 1;
 611    if (B == BEnd)
 612       return -1;
 613    if (toupper(*A) < toupper(*B))
 614       return -1;
 615    return 1;
 616 }
 617 #endif
 618                                                                         /*}}}*/
 619 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 620 // ---------------------------------------------------------------------
 621 /* The format is like those used in package files and the method
 622    communication system */
 623 string LookupTag(const string &Message,const char *Tag,const char *Default)
 624 {
 625    // Look for a matching tag.
 626    int Length = strlen(Tag);
 627    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); I++)
 628    {
 629       // Found the tag
 630       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 631       {
 632          // Find the end of line and strip the leading/trailing spaces
 633          string::const_iterator J;
 634          I += Length + 1;
 635          for (; isspace(*I) != 0 && I < Message.end(); I++);
 636          for (J = I; *J != '\n' && J < Message.end(); J++);
 637          for (; J > I && isspace(J[-1]) != 0; J--);
 638
 639          return string(I,J);
 640       }
 641
 642       for (; *I != '\n' && I < Message.end(); I++);
 643    }
 644
 645    // Failed to find a match
 646    if (Default == 0)
 647       return string();
 648    return Default;
 649 }
 650                                                                         /*}}}*/
 651 // StringToBool - Converts a string into a boolean                      /*{{{*/
 652 // ---------------------------------------------------------------------
 653 /* This inspects the string to see if it is true or if it is false and
 654    then returns the result. Several varients on true/false are checked. */
 655 int StringToBool(const string &Text,int Default)
 656 {
 657    char *End;
 658    int Res = strtol(Text.c_str(),&End,0);
 659    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 660       return Res;
 661
 662    // Check for positives
 663    if (strcasecmp(Text.c_str(),"no") == 0 ||
 664        strcasecmp(Text.c_str(),"false") == 0 ||
 665        strcasecmp(Text.c_str(),"without") == 0 ||
 666        strcasecmp(Text.c_str(),"off") == 0 ||
 667        strcasecmp(Text.c_str(),"disable") == 0)
 668       return 0;
 669
 670    // Check for negatives
 671    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 672        strcasecmp(Text.c_str(),"true") == 0 ||
 673        strcasecmp(Text.c_str(),"with") == 0 ||
 674        strcasecmp(Text.c_str(),"on") == 0 ||
 675        strcasecmp(Text.c_str(),"enable") == 0)
 676       return 1;
 677
 678    return Default;
 679 }
 680                                                                         /*}}}*/
 681 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 682 // ---------------------------------------------------------------------
 683 /* This converts a time_t into a string time representation that is
 684    year 2000 complient and timezone neutral */
 685 string TimeRFC1123(time_t Date)
 686 {
 687    struct tm Conv = *gmtime(&Date);
 688    char Buf[300];
 689
 690    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 691    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 692                           "Aug","Sep","Oct","Nov","Dec"};
 693
 694    sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 695            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 696            Conv.tm_min,Conv.tm_sec);
 697    return Buf;
 698 }
 699                                                                         /*}}}*/
 700 // ReadMessages - Read messages from the FD                             /*{{{*/
 701 // ---------------------------------------------------------------------
 702 /* This pulls full messages from the input FD into the message buffer.
 703    It assumes that messages will not pause during transit so no
 704    fancy buffering is used.
 705
 706    In particular: this reads blocks from the input until it believes
 707    that it's run out of input text.  Each block is terminated by a
 708    double newline ('\n' followed by '\n').  As noted below, there is a
 709    bug in this code: it assumes that all the blocks have been read if
 710    it doesn't see additional text in the buffer after the last one is
 711    parsed, which will cause it to lose blocks if the last block
 712    coincides with the end of the buffer.
 713  */
 714 bool ReadMessages(int Fd, vector<string> &List)
 715 {
 716    char Buffer[64000];
 717    char *End = Buffer;
 718    // Represents any left-over from the previous iteration of the
 719    // parse loop.  (i.e., if a message is split across the end
 720    // of the buffer, it goes here)
 721    string PartialMessage;
 722
 723    while (1)
 724    {
 725       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 726       if (Res < 0 && errno == EINTR)
 727          continue;
 728
 729       // Process is dead, this is kind of bad..
 730       if (Res == 0)
 731          return false;
 732
 733       // No data
 734       if (Res < 0 && errno == EAGAIN)
 735          return true;
 736       if (Res < 0)
 737          return false;
 738
 739       End += Res;
 740
 741       // Look for the end of the message
 742       for (char *I = Buffer; I + 1 < End; I++)
 743       {
 744          if (I[0] != '\n' || I[1] != '\n')
 745             continue;
 746
 747          // Pull the message out
 748          string Message(Buffer,I-Buffer);
 749          PartialMessage += Message;
 750
 751          // Fix up the buffer
 752          for (; I < End && *I == '\n'; I++);
 753          End -= I-Buffer;
 754          memmove(Buffer,I,End-Buffer);
 755          I = Buffer;
 756
 757          List.push_back(PartialMessage);
 758          PartialMessage.clear();
 759       }
 760       if (End != Buffer)
 761         {
 762           // If there's text left in the buffer, store it
 763           // in PartialMessage and throw the rest of the buffer
 764           // away.  This allows us to handle messages that
 765           // are longer than the static buffer size.
 766           PartialMessage += string(Buffer, End);
 767           End = Buffer;
 768         }
 769       else
 770         {
 771           // BUG ALERT: if a message block happens to end at a
 772           // multiple of 64000 characters, this will cause it to
 773           // terminate early, leading to a badly formed block and
 774           // probably crashing the method.  However, this is the only
 775           // way we have to find the end of the message block.  I have
 776           // an idea of how to fix this, but it will require changes
 777           // to the protocol (essentially to mark the beginning and
 778           // end of the block).
 779           //
 780           //  -- dburrows 2008-04-02
 781           return true;
 782         }
 783
 784       if (WaitFd(Fd) == false)
 785          return false;
 786    }
 787 }
 788                                                                         /*}}}*/
 789 // MonthConv - Converts a month string into a number                    /*{{{*/
 790 // ---------------------------------------------------------------------
 791 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 792    Made it a bit more robust with a few touppers though. */
 793 static int MonthConv(char *Month)
 794 {
 795    switch (toupper(*Month))
 796    {
 797       case 'A':
 798       return toupper(Month[1]) == 'P'?3:7;
 799       case 'D':
 800       return 11;
 801       case 'F':
 802       return 1;
 803       case 'J':
 804       if (toupper(Month[1]) == 'A')
 805          return 0;
 806       return toupper(Month[2]) == 'N'?5:6;
 807       case 'M':
 808       return toupper(Month[2]) == 'R'?2:4;
 809       case 'N':
 810       return 10;
 811       case 'O':
 812       return 9;
 813       case 'S':
 814       return 8;
 815
 816       // Pretend it is January..
 817       default:
 818       return 0;
 819    }
 820 }
 821                                                                         /*}}}*/
 822 // timegm - Internal timegm function if gnu is not available            /*{{{*/
 823 // ---------------------------------------------------------------------
 824 /* Ripped this evil little function from wget - I prefer the use of
 825    GNU timegm if possible as this technique will have interesting problems
 826    with leap seconds, timezones and other.
 827
 828    Converts struct tm to time_t, assuming the data in tm is UTC rather
 829    than local timezone (mktime assumes the latter).
 830
 831    Contributed by Roger Beeman <beeman@cisco.com>, with the help of
 832    Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
 833
 834 /* Turned it into an autoconf check, because GNU is not the only thing which
 835    can provide timegm. -- 2002-09-22, Joel Baker */
 836
 837 #ifndef HAVE_TIMEGM // Now with autoconf!
 838 static time_t timegm(struct tm *t)
 839 {
 840    time_t tl, tb;
 841
 842    tl = mktime (t);
 843    if (tl == -1)
 844       return -1;
 845    tb = mktime (gmtime (&tl));
 846    return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
 847 }
 848 #endif
 849                                                                         /*}}}*/
 850 // StrToTime - Converts a string into a time_t                          /*{{{*/
 851 // ---------------------------------------------------------------------
 852 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 853    and the C library asctime format. It requires the GNU library function
 854    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 855    reason the C library does not provide any such function :< This also
 856    handles the weird, but unambiguous FTP time format*/
 857 bool StrToTime(const string &Val,time_t &Result)
 858 {
 859    struct tm Tm;
 860    char Month[10];
 861    const char *I = Val.c_str();
 862
 863    // Skip the day of the week
 864    for (;*I != 0  && *I != ' '; I++);
 865
 866    // Handle RFC 1123 time
 867    Month[0] = 0;
 868    if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 869               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 870    {
 871       // Handle RFC 1036 time
 872       if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
 873                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 874          Tm.tm_year += 1900;
 875       else
 876       {
 877          // asctime format
 878          if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
 879                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 880          {
 881             // 'ftp' time
 882             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 883                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 884                return false;
 885             Tm.tm_mon--;
 886          }
 887       }
 888    }
 889
 890    Tm.tm_isdst = 0;
 891    if (Month[0] != 0)
 892       Tm.tm_mon = MonthConv(Month);
 893    Tm.tm_year -= 1900;
 894
 895    // Convert to local time and then to GMT
 896    Result = timegm(&Tm);
 897    return true;
 898 }
 899                                                                         /*}}}*/
 900 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 901 // ---------------------------------------------------------------------
 902 /* This is used in decoding the crazy fixed length string headers in
 903    tar and ar files. */
 904 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 905 {
 906    char S[30];
 907    if (Len >= sizeof(S))
 908       return false;
 909    memcpy(S,Str,Len);
 910    S[Len] = 0;
 911
 912    // All spaces is a zero
 913    Res = 0;
 914    unsigned I;
 915    for (I = 0; S[I] == ' '; I++);
 916    if (S[I] == 0)
 917       return true;
 918
 919    char *End;
 920    Res = strtoul(S,&End,Base);
 921    if (End == S)
 922       return false;
 923
 924    return true;
 925 }
 926                                                                         /*}}}*/
 927 // HexDigit - Convert a hex character into an integer                   /*{{{*/
 928 // ---------------------------------------------------------------------
 929 /* Helper for Hex2Num */
 930 static int HexDigit(int c)
 931 {
 932    if (c >= '0' && c <= '9')
 933       return c - '0';
 934    if (c >= 'a' && c <= 'f')
 935       return c - 'a' + 10;
 936    if (c >= 'A' && c <= 'F')
 937       return c - 'A' + 10;
 938    return 0;
 939 }
 940                                                                         /*}}}*/
 941 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
 942 // ---------------------------------------------------------------------
 943 /* The length of the buffer must be exactly 1/2 the length of the string. */
 944 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
 945 {
 946    if (Str.length() != Length*2)
 947       return false;
 948
 949    // Convert each digit. We store it in the same order as the string
 950    int J = 0;
 951    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
 952    {
 953       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
 954          return false;
 955
 956       Num[J] = HexDigit(I[0]) << 4;
 957       Num[J] += HexDigit(I[1]);
 958    }
 959
 960    return true;
 961 }
 962                                                                         /*}}}*/
 963 // TokSplitString - Split a string up by a given token                  /*{{{*/
 964 // ---------------------------------------------------------------------
 965 /* This is intended to be a faster splitter, it does not use dynamic
 966    memories. Input is changed to insert nulls at each token location. */
 967 bool TokSplitString(char Tok,char *Input,char **List,
 968                     unsigned long ListMax)
 969 {
 970    // Strip any leading spaces
 971    char *Start = Input;
 972    char *Stop = Start + strlen(Start);
 973    for (; *Start != 0 && isspace(*Start) != 0; Start++);
 974
 975    unsigned long Count = 0;
 976    char *Pos = Start;
 977    while (Pos != Stop)
 978    {
 979       // Skip to the next Token
 980       for (; Pos != Stop && *Pos != Tok; Pos++);
 981
 982       // Back remove spaces
 983       char *End = Pos;
 984       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
 985       *End = 0;
 986
 987       List[Count++] = Start;
 988       if (Count >= ListMax)
 989       {
 990          List[Count-1] = 0;
 991          return false;
 992       }
 993
 994       // Advance pos
 995       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
 996       Start = Pos;
 997    }
 998
 999    List[Count] = 0;
1000    return true;
1001 }
1002                                                                         /*}}}*/
1003 // ExplodeString - Split a string up into a vector                      /*{{{*/
1004 // ---------------------------------------------------------------------
1005 /* This can be used to split a given string up into a vector, so the
1006    propose is the same as in the method above and this one is a bit slower
1007    also, but the advantage is that we an iteratable vector */
1008 vector<string> ExplodeString(string const &haystack, char const &split)
1009 {
1010    string::const_iterator start = haystack.begin();
1011    string::const_iterator end = start;
1012    vector<string> exploded;
1013    do {
1014       for (; end != haystack.end() && *end != split; ++end);
1015       exploded.push_back(string(start, end));
1016       start = end + 1;
1017    } while (end != haystack.end() && (++end) != haystack.end());
1018    return exploded;
1019 }
1020                                                                         /*}}}*/
1021 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1022 // ---------------------------------------------------------------------
1023 /* */
1024 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1025                       const char **ListEnd)
1026 {
1027    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1028       R->Hit = false;
1029
1030    unsigned long Hits = 0;
1031    for (; ListBegin != ListEnd; ListBegin++)
1032    {
1033       // Check if the name is a regex
1034       const char *I;
1035       bool Regex = true;
1036       for (I = *ListBegin; *I != 0; I++)
1037          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1038             break;
1039       if (*I == 0)
1040          Regex = false;
1041
1042       // Compile the regex pattern
1043       regex_t Pattern;
1044       if (Regex == true)
1045          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1046                      REG_NOSUB) != 0)
1047             Regex = false;
1048
1049       // Search the list
1050       bool Done = false;
1051       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1052       {
1053          if (R->Str[0] == 0)
1054             continue;
1055
1056          if (strcasecmp(R->Str,*ListBegin) != 0)
1057          {
1058             if (Regex == false)
1059                continue;
1060             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1061                continue;
1062          }
1063          Done = true;
1064
1065          if (R->Hit == false)
1066             Hits++;
1067
1068          R->Hit = true;
1069       }
1070
1071       if (Regex == true)
1072          regfree(&Pattern);
1073
1074       if (Done == false)
1075          _error->Warning(_("Selection %s not found"),*ListBegin);
1076    }
1077
1078    return Hits;
1079 }
1080                                                                         /*}}}*/
1081 // ioprintf - C format string outputter to C++ iostreams                /*{{{*/
1082 // ---------------------------------------------------------------------
1083 /* This is used to make the internationalization strings easier to translate
1084    and to allow reordering of parameters */
1085 void ioprintf(ostream &out,const char *format,...)
1086 {
1087    va_list args;
1088    va_start(args,format);
1089
1090    // sprintf the description
1091    char S[4096];
1092    vsnprintf(S,sizeof(S),format,args);
1093    out << S;
1094 }
1095                                                                         /*}}}*/
1096 // strprintf - C format string outputter to C++ strings                 /*{{{*/
1097 // ---------------------------------------------------------------------
1098 /* This is used to make the internationalization strings easier to translate
1099    and to allow reordering of parameters */
1100 void strprintf(string &out,const char *format,...)
1101 {
1102    va_list args;
1103    va_start(args,format);
1104
1105    // sprintf the description
1106    char S[4096];
1107    vsnprintf(S,sizeof(S),format,args);
1108    out = string(S);
1109 }
1110                                                                         /*}}}*/
1111 // safe_snprintf - Safer snprintf                                       /*{{{*/
1112 // ---------------------------------------------------------------------
1113 /* This is a snprintf that will never (ever) go past 'End' and returns a
1114    pointer to the end of the new string. The returned string is always null
1115    terminated unless Buffer == end. This is a better alterantive to using
1116    consecutive snprintfs. */
1117 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1118 {
1119    va_list args;
1120    unsigned long Did;
1121
1122    va_start(args,Format);
1123
1124    if (End <= Buffer)
1125       return End;
1126
1127    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1128    if (Did < 0 || Buffer + Did > End)
1129       return End;
1130    return Buffer + Did;
1131 }
1132                                                                         /*}}}*/
1133
1134 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1135 // ---------------------------------------------------------------------
1136 /* */
1137 int tolower_ascii(int c)
1138 {
1139    if (c >= 'A' and c <= 'Z')
1140       return c + 32;
1141    return c;
1142 }
1143                                                                         /*}}}*/
1144
1145 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1146 // ---------------------------------------------------------------------
1147 /* The domain list is a comma seperate list of domains that are suffix
1148    matched against the argument */
1149 bool CheckDomainList(const string &Host,const string &List)
1150 {
1151    string::const_iterator Start = List.begin();
1152    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); Cur++)
1153    {
1154       if (Cur < List.end() && *Cur != ',')
1155          continue;
1156
1157       // Match the end of the string..
1158       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1159           Cur - Start != 0 &&
1160           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1161          return true;
1162
1163       Start = Cur + 1;
1164    }
1165    return false;
1166 }
1167                                                                         /*}}}*/
1168
1169 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1170 // ---------------------------------------------------------------------
1171 /* This parses the URI into all of its components */
1172 void URI::CopyFrom(const string &U)
1173 {
1174    string::const_iterator I = U.begin();
1175
1176    // Locate the first colon, this separates the scheme
1177    for (; I < U.end() && *I != ':' ; I++);
1178    string::const_iterator FirstColon = I;
1179
1180    /* Determine if this is a host type URI with a leading double //
1181       and then search for the first single / */
1182    string::const_iterator SingleSlash = I;
1183    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1184       SingleSlash += 3;
1185
1186    /* Find the / indicating the end of the hostname, ignoring /'s in the
1187       square brackets */
1188    bool InBracket = false;
1189    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
1190    {
1191       if (*SingleSlash == '[')
1192          InBracket = true;
1193       if (InBracket == true && *SingleSlash == ']')
1194          InBracket = false;
1195    }
1196
1197    if (SingleSlash > U.end())
1198       SingleSlash = U.end();
1199
1200    // We can now write the access and path specifiers
1201    Access.assign(U.begin(),FirstColon);
1202    if (SingleSlash != U.end())
1203       Path.assign(SingleSlash,U.end());
1204    if (Path.empty() == true)
1205       Path = "/";
1206
1207    // Now we attempt to locate a user:pass@host fragment
1208    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1209       FirstColon += 3;
1210    else
1211       FirstColon += 1;
1212    if (FirstColon >= U.end())
1213       return;
1214
1215    if (FirstColon > SingleSlash)
1216       FirstColon = SingleSlash;
1217
1218    // Find the colon...
1219    I = FirstColon + 1;
1220    if (I > SingleSlash)
1221       I = SingleSlash;
1222    for (; I < SingleSlash && *I != ':'; I++);
1223    string::const_iterator SecondColon = I;
1224
1225    // Search for the @ after the colon
1226    for (; I < SingleSlash && *I != '@'; I++);
1227    string::const_iterator At = I;
1228
1229    // Now write the host and user/pass
1230    if (At == SingleSlash)
1231    {
1232       if (FirstColon < SingleSlash)
1233          Host.assign(FirstColon,SingleSlash);
1234    }
1235    else
1236    {
1237       Host.assign(At+1,SingleSlash);
1238       User.assign(FirstColon,SecondColon);
1239       if (SecondColon < At)
1240          Password.assign(SecondColon+1,At);
1241    }
1242
1243    // Now we parse the RFC 2732 [] hostnames.
1244    unsigned long PortEnd = 0;
1245    InBracket = false;
1246    for (unsigned I = 0; I != Host.length();)
1247    {
1248       if (Host[I] == '[')
1249       {
1250          InBracket = true;
1251          Host.erase(I,1);
1252          continue;
1253       }
1254
1255       if (InBracket == true && Host[I] == ']')
1256       {
1257          InBracket = false;
1258          Host.erase(I,1);
1259          PortEnd = I;
1260          continue;
1261       }
1262       I++;
1263    }
1264
1265    // Tsk, weird.
1266    if (InBracket == true)
1267    {
1268       Host.clear();
1269       return;
1270    }
1271
1272    // Now we parse off a port number from the hostname
1273    Port = 0;
1274    string::size_type Pos = Host.rfind(':');
1275    if (Pos == string::npos || Pos < PortEnd)
1276       return;
1277
1278    Port = atoi(string(Host,Pos+1).c_str());
1279    Host.assign(Host,0,Pos);
1280 }
1281                                                                         /*}}}*/
1282 // URI::operator string - Convert the URI to a string                   /*{{{*/
1283 // ---------------------------------------------------------------------
1284 /* */
1285 URI::operator string()
1286 {
1287    string Res;
1288
1289    if (Access.empty() == false)
1290       Res = Access + ':';
1291
1292    if (Host.empty() == false)
1293    {
1294       if (Access.empty() == false)
1295          Res += "//";
1296
1297       if (User.empty() == false)
1298       {
1299          Res +=  User;
1300          if (Password.empty() == false)
1301             Res += ":" + Password;
1302          Res += "@";
1303       }
1304
1305       // Add RFC 2732 escaping characters
1306       if (Access.empty() == false &&
1307           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1308          Res += '[' + Host + ']';
1309       else
1310          Res += Host;
1311
1312       if (Port != 0)
1313       {
1314          char S[30];
1315          sprintf(S,":%u",Port);
1316          Res += S;
1317       }
1318    }
1319
1320    if (Path.empty() == false)
1321    {
1322       if (Path[0] != '/')
1323          Res += "/" + Path;
1324       else
1325          Res += Path;
1326    }
1327
1328    return Res;
1329 }
1330                                                                         /*}}}*/
1331 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1332 // ---------------------------------------------------------------------
1333 /* */
1334 string URI::SiteOnly(const string &URI)
1335 {
1336    ::URI U(URI);
1337    U.User.clear();
1338    U.Password.clear();
1339    U.Path.clear();
1340    U.Port = 0;
1341    return U;
1342 }
1343                                                                         /*}}}*/
1344 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1345 // ---------------------------------------------------------------------
1346 /* */
1347 string URI::NoUserPassword(const string &URI)
1348 {
1349    ::URI U(URI);
1350    U.User.clear();
1351    U.Password.clear();
1352    U.Port = 0;
1353    return U;
1354 }
1355                                                                         /*}}}*/