apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <apt-pkg/strutl.h>
  19 #include <apt-pkg/fileutl.h>
  20 #include <apt-pkg/error.h>
  21
  22 #include <apti18n.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <stdio.h>
  27 #include <algorithm>
  28 #include <unistd.h>
  29 #include <regex.h>
  30 #include <errno.h>
  31 #include <stdarg.h>
  32 #include <iconv.h>
  33
  34 #include "config.h"
  35
  36 using namespace std;
  37                                                                         /*}}}*/
  38
  39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  40 // ---------------------------------------------------------------------
  41 /* This is handy to use before display some information for enduser  */
  42 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  43 {
  44   iconv_t cd;
  45   const char *inbuf;
  46   char *inptr, *outbuf, *outptr;
  47   size_t insize, outsize;
  48
  49   cd = iconv_open(codeset, "UTF-8");
  50   if (cd == (iconv_t)(-1)) {
  51      // Something went wrong
  52      if (errno == EINVAL)
  53         _error->Error("conversion from 'UTF-8' to '%s' not available",
  54                codeset);
  55      else
  56         perror("iconv_open");
  57
  58      // Clean the destination string
  59      *dest = "";
  60
  61      return false;
  62   }
  63
  64   insize = outsize = orig.size();
  65   inbuf = orig.data();
  66   inptr = (char *)inbuf;
  67   outbuf = new char[insize+1];
  68   outptr = outbuf;
  69
  70   iconv(cd, &inptr, &insize, &outptr, &outsize);
  71   *outptr = '\0';
  72
  73   *dest = outbuf;
  74   delete[] outbuf;
  75
  76   iconv_close(cd);
  77
  78   return true;
  79 }
  80                                                                         /*}}}*/
  81 // strstrip - Remove white space from the front and back of a string    /*{{{*/
  82 // ---------------------------------------------------------------------
  83 /* This is handy to use when parsing a file. It also removes \n's left
  84    over from fgets and company */
  85 char *_strstrip(char *String)
  86 {
  87    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
  88
  89    if (*String == 0)
  90       return String;
  91
  92    char *End = String + strlen(String) - 1;
  93    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
  94                                *End == '\r'); End--);
  95    End++;
  96    *End = 0;
  97    return String;
  98 };
  99                                                                         /*}}}*/
 100 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 101 // ---------------------------------------------------------------------
 102 /* */
 103 char *_strtabexpand(char *String,size_t Len)
 104 {
 105    for (char *I = String; I != I + Len && *I != 0; I++)
 106    {
 107       if (*I != '\t')
 108          continue;
 109       if (I + 8 > String + Len)
 110       {
 111          *I = 0;
 112          return String;
 113       }
 114
 115       /* Assume the start of the string is 0 and find the next 8 char
 116          division */
 117       int Len;
 118       if (String == I)
 119          Len = 1;
 120       else
 121          Len = 8 - ((String - I) % 8);
 122       Len -= 2;
 123       if (Len <= 0)
 124       {
 125          *I = ' ';
 126          continue;
 127       }
 128
 129       memmove(I + Len,I + 1,strlen(I) + 1);
 130       for (char *J = I; J + Len != I; *I = ' ', I++);
 131    }
 132    return String;
 133 }
 134                                                                         /*}}}*/
 135 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 136 // ---------------------------------------------------------------------
 137 /* This grabs a single word, converts any % escaped characters to their
 138    proper values and advances the pointer. Double quotes are understood
 139    and striped out as well. This is for URI/URL parsing. It also can
 140    understand [] brackets.*/
 141 bool ParseQuoteWord(const char *&String,string &Res)
 142 {
 143    // Skip leading whitespace
 144    const char *C = String;
 145    for (;*C != 0 && *C == ' '; C++);
 146    if (*C == 0)
 147       return false;
 148
 149    // Jump to the next word
 150    for (;*C != 0 && isspace(*C) == 0; C++)
 151    {
 152       if (*C == '"')
 153       {
 154          for (C++; *C != 0 && *C != '"'; C++);
 155          if (*C == 0)
 156             return false;
 157       }
 158       if (*C == '[')
 159       {
 160          for (C++; *C != 0 && *C != ']'; C++);
 161          if (*C == 0)
 162             return false;
 163       }
 164    }
 165
 166    // Now de-quote characters
 167    char Buffer[1024];
 168    char Tmp[3];
 169    const char *Start = String;
 170    char *I;
 171    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 172    {
 173       if (*Start == '%' && Start + 2 < C)
 174       {
 175          Tmp[0] = Start[1];
 176          Tmp[1] = Start[2];
 177          Tmp[2] = 0;
 178          *I = (char)strtol(Tmp,0,16);
 179          Start += 3;
 180          continue;
 181       }
 182       if (*Start != '"')
 183          *I = *Start;
 184       else
 185          I--;
 186       Start++;
 187    }
 188    *I = 0;
 189    Res = Buffer;
 190
 191    // Skip ending white space
 192    for (;*C != 0 && isspace(*C) != 0; C++);
 193    String = C;
 194    return true;
 195 }
 196                                                                         /*}}}*/
 197 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 198 // ---------------------------------------------------------------------
 199 /* This expects a series of space separated strings enclosed in ""'s.
 200    It concatenates the ""'s into a single string. */
 201 bool ParseCWord(const char *&String,string &Res)
 202 {
 203    // Skip leading whitespace
 204    const char *C = String;
 205    for (;*C != 0 && *C == ' '; C++);
 206    if (*C == 0)
 207       return false;
 208
 209    char Buffer[1024];
 210    char *Buf = Buffer;
 211    if (strlen(String) >= sizeof(Buffer))
 212        return false;
 213
 214    for (; *C != 0; C++)
 215    {
 216       if (*C == '"')
 217       {
 218          for (C++; *C != 0 && *C != '"'; C++)
 219             *Buf++ = *C;
 220
 221          if (*C == 0)
 222             return false;
 223
 224          continue;
 225       }
 226
 227       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 228          continue;
 229       if (isspace(*C) == 0)
 230          return false;
 231       *Buf++ = ' ';
 232    }
 233    *Buf = 0;
 234    Res = Buffer;
 235    String = C;
 236    return true;
 237 }
 238                                                                         /*}}}*/
 239 // QuoteString - Convert a string into quoted from                      /*{{{*/
 240 // ---------------------------------------------------------------------
 241 /* */
 242 string QuoteString(const string &Str, const char *Bad)
 243 {
 244    string Res;
 245    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 246    {
 247       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 248           *I <= 0x20 || *I >= 0x7F)
 249       {
 250          char Buf[10];
 251          sprintf(Buf,"%%%02x",(int)*I);
 252          Res += Buf;
 253       }
 254       else
 255          Res += *I;
 256    }
 257    return Res;
 258 }
 259                                                                         /*}}}*/
 260 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 261 // ---------------------------------------------------------------------
 262 /* This undoes QuoteString */
 263 string DeQuoteString(const string &Str)
 264 {
 265    string Res;
 266    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 267    {
 268       if (*I == '%' && I + 2 < Str.end())
 269       {
 270          char Tmp[3];
 271          Tmp[0] = I[1];
 272          Tmp[1] = I[2];
 273          Tmp[2] = 0;
 274          Res += (char)strtol(Tmp,0,16);
 275          I += 2;
 276          continue;
 277       }
 278       else
 279          Res += *I;
 280    }
 281    return Res;
 282 }
 283
 284                                                                         /*}}}*/
 285 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 286 // ---------------------------------------------------------------------
 287 /* A max of 4 digits are shown before conversion to the next highest unit.
 288    The max length of the string will be 5 chars unless the size is > 10
 289    YottaBytes (E24) */
 290 string SizeToStr(double Size)
 291 {
 292    char S[300];
 293    double ASize;
 294    if (Size >= 0)
 295       ASize = Size;
 296    else
 297       ASize = -1*Size;
 298
 299    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 300       ExaBytes, ZettaBytes, YottaBytes */
 301    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 302    int I = 0;
 303    while (I <= 8)
 304    {
 305       if (ASize < 100 && I != 0)
 306       {
 307          sprintf(S,"%.1f%c",ASize,Ext[I]);
 308          break;
 309       }
 310
 311       if (ASize < 10000)
 312       {
 313          sprintf(S,"%.0f%c",ASize,Ext[I]);
 314          break;
 315       }
 316       ASize /= 1000.0;
 317       I++;
 318    }
 319
 320    return S;
 321 }
 322                                                                         /*}}}*/
 323 // TimeToStr - Convert the time into a string                           /*{{{*/
 324 // ---------------------------------------------------------------------
 325 /* Converts a number of seconds to a hms format */
 326 string TimeToStr(unsigned long Sec)
 327 {
 328    char S[300];
 329
 330    while (1)
 331    {
 332       if (Sec > 60*60*24)
 333       {
 334          sprintf(S,"%lid %lih%limin%lis",Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 335          break;
 336       }
 337
 338       if (Sec > 60*60)
 339       {
 340          sprintf(S,"%lih%limin%lis",Sec/60/60,(Sec/60) % 60,Sec % 60);
 341          break;
 342       }
 343
 344       if (Sec > 60)
 345       {
 346          sprintf(S,"%limin%lis",Sec/60,Sec % 60);
 347          break;
 348       }
 349
 350       sprintf(S,"%lis",Sec);
 351       break;
 352    }
 353
 354    return S;
 355 }
 356                                                                         /*}}}*/
 357 // SubstVar - Substitute a string for another string                    /*{{{*/
 358 // ---------------------------------------------------------------------
 359 /* This replaces all occurances of Subst with Contents in Str. */
 360 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 361 {
 362    string::size_type Pos = 0;
 363    string::size_type OldPos = 0;
 364    string Temp;
 365
 366    while (OldPos < Str.length() &&
 367           (Pos = Str.find(Subst,OldPos)) != string::npos)
 368    {
 369       Temp += string(Str,OldPos,Pos) + Contents;
 370       OldPos = Pos + Subst.length();
 371    }
 372
 373    if (OldPos == 0)
 374       return Str;
 375
 376    return Temp + string(Str,OldPos);
 377 }
 378
 379 string SubstVar(string Str,const struct SubstVar *Vars)
 380 {
 381    for (; Vars->Subst != 0; Vars++)
 382       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 383    return Str;
 384 }
 385                                                                         /*}}}*/
 386 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 387 // ---------------------------------------------------------------------
 388 /* This converts a URI into a safe filename. It quotes all unsafe characters
 389    and converts / to _ and removes the scheme identifier. The resulting
 390    file name should be unique and never occur again for a different file */
 391 string URItoFileName(const string &URI)
 392 {
 393    // Nuke 'sensitive' items
 394    ::URI U(URI);
 395    U.User.clear();
 396    U.Password.clear();
 397    U.Access.clear();
 398
 399    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 400    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 401    replace(NewURI.begin(),NewURI.end(),'/','_');
 402    return NewURI;
 403 }
 404                                                                         /*}}}*/
 405 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 406 // ---------------------------------------------------------------------
 407 /* This routine performs a base64 transformation on a string. It was ripped
 408    from wget and then patched and bug fixed.
 409
 410    This spec can be found in rfc2045 */
 411 string Base64Encode(const string &S)
 412 {
 413    // Conversion table.
 414    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 415                           'I','J','K','L','M','N','O','P',
 416                           'Q','R','S','T','U','V','W','X',
 417                           'Y','Z','a','b','c','d','e','f',
 418                           'g','h','i','j','k','l','m','n',
 419                           'o','p','q','r','s','t','u','v',
 420                           'w','x','y','z','0','1','2','3',
 421                           '4','5','6','7','8','9','+','/'};
 422
 423    // Pre-allocate some space
 424    string Final;
 425    Final.reserve((4*S.length() + 2)/3 + 2);
 426
 427    /* Transform the 3x8 bits to 4x6 bits, as required by
 428       base64.  */
 429    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 430    {
 431       char Bits[3] = {0,0,0};
 432       Bits[0] = I[0];
 433       if (I + 1 < S.end())
 434          Bits[1] = I[1];
 435       if (I + 2 < S.end())
 436          Bits[2] = I[2];
 437
 438       Final += tbl[Bits[0] >> 2];
 439       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 440
 441       if (I + 1 >= S.end())
 442          break;
 443
 444       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 445
 446       if (I + 2 >= S.end())
 447          break;
 448
 449       Final += tbl[Bits[2] & 0x3f];
 450    }
 451
 452    /* Apply the padding elements, this tells how many bytes the remote
 453       end should discard */
 454    if (S.length() % 3 == 2)
 455       Final += '=';
 456    if (S.length() % 3 == 1)
 457       Final += "==";
 458
 459    return Final;
 460 }
 461                                                                         /*}}}*/
 462 // stringcmp - Arbitrary string compare                                 /*{{{*/
 463 // ---------------------------------------------------------------------
 464 /* This safely compares two non-null terminated strings of arbitrary
 465    length */
 466 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 467 {
 468    for (; A != AEnd && B != BEnd; A++, B++)
 469       if (*A != *B)
 470          break;
 471
 472    if (A == AEnd && B == BEnd)
 473       return 0;
 474    if (A == AEnd)
 475       return 1;
 476    if (B == BEnd)
 477       return -1;
 478    if (*A < *B)
 479       return -1;
 480    return 1;
 481 }
 482
 483 #if __GNUC__ >= 3
 484 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 485               const char *B,const char *BEnd)
 486 {
 487    for (; A != AEnd && B != BEnd; A++, B++)
 488       if (*A != *B)
 489          break;
 490
 491    if (A == AEnd && B == BEnd)
 492       return 0;
 493    if (A == AEnd)
 494       return 1;
 495    if (B == BEnd)
 496       return -1;
 497    if (*A < *B)
 498       return -1;
 499    return 1;
 500 }
 501 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 502               string::const_iterator B,string::const_iterator BEnd)
 503 {
 504    for (; A != AEnd && B != BEnd; A++, B++)
 505       if (*A != *B)
 506          break;
 507
 508    if (A == AEnd && B == BEnd)
 509       return 0;
 510    if (A == AEnd)
 511       return 1;
 512    if (B == BEnd)
 513       return -1;
 514    if (*A < *B)
 515       return -1;
 516    return 1;
 517 }
 518 #endif
 519                                                                         /*}}}*/
 520 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 521 // ---------------------------------------------------------------------
 522 /* */
 523 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 524 {
 525    for (; A != AEnd && B != BEnd; A++, B++)
 526       if (toupper(*A) != toupper(*B))
 527          break;
 528
 529    if (A == AEnd && B == BEnd)
 530       return 0;
 531    if (A == AEnd)
 532       return 1;
 533    if (B == BEnd)
 534       return -1;
 535    if (toupper(*A) < toupper(*B))
 536       return -1;
 537    return 1;
 538 }
 539 #if __GNUC__ >= 3
 540 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 541                   const char *B,const char *BEnd)
 542 {
 543    for (; A != AEnd && B != BEnd; A++, B++)
 544       if (toupper(*A) != toupper(*B))
 545          break;
 546
 547    if (A == AEnd && B == BEnd)
 548       return 0;
 549    if (A == AEnd)
 550       return 1;
 551    if (B == BEnd)
 552       return -1;
 553    if (toupper(*A) < toupper(*B))
 554       return -1;
 555    return 1;
 556 }
 557 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 558                   string::const_iterator B,string::const_iterator BEnd)
 559 {
 560    for (; A != AEnd && B != BEnd; A++, B++)
 561       if (toupper(*A) != toupper(*B))
 562          break;
 563
 564    if (A == AEnd && B == BEnd)
 565       return 0;
 566    if (A == AEnd)
 567       return 1;
 568    if (B == BEnd)
 569       return -1;
 570    if (toupper(*A) < toupper(*B))
 571       return -1;
 572    return 1;
 573 }
 574 #endif
 575                                                                         /*}}}*/
 576 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 577 // ---------------------------------------------------------------------
 578 /* The format is like those used in package files and the method
 579    communication system */
 580 string LookupTag(const string &Message,const char *Tag,const char *Default)
 581 {
 582    // Look for a matching tag.
 583    int Length = strlen(Tag);
 584    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); I++)
 585    {
 586       // Found the tag
 587       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 588       {
 589          // Find the end of line and strip the leading/trailing spaces
 590          string::const_iterator J;
 591          I += Length + 1;
 592          for (; isspace(*I) != 0 && I < Message.end(); I++);
 593          for (J = I; *J != '\n' && J < Message.end(); J++);
 594          for (; J > I && isspace(J[-1]) != 0; J--);
 595
 596          return string(I,J);
 597       }
 598
 599       for (; *I != '\n' && I < Message.end(); I++);
 600    }
 601
 602    // Failed to find a match
 603    if (Default == 0)
 604       return string();
 605    return Default;
 606 }
 607                                                                         /*}}}*/
 608 // StringToBool - Converts a string into a boolean                      /*{{{*/
 609 // ---------------------------------------------------------------------
 610 /* This inspects the string to see if it is true or if it is false and
 611    then returns the result. Several varients on true/false are checked. */
 612 int StringToBool(const string &Text,int Default)
 613 {
 614    char *End;
 615    int Res = strtol(Text.c_str(),&End,0);
 616    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 617       return Res;
 618
 619    // Check for positives
 620    if (strcasecmp(Text.c_str(),"no") == 0 ||
 621        strcasecmp(Text.c_str(),"false") == 0 ||
 622        strcasecmp(Text.c_str(),"without") == 0 ||
 623        strcasecmp(Text.c_str(),"off") == 0 ||
 624        strcasecmp(Text.c_str(),"disable") == 0)
 625       return 0;
 626
 627    // Check for negatives
 628    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 629        strcasecmp(Text.c_str(),"true") == 0 ||
 630        strcasecmp(Text.c_str(),"with") == 0 ||
 631        strcasecmp(Text.c_str(),"on") == 0 ||
 632        strcasecmp(Text.c_str(),"enable") == 0)
 633       return 1;
 634
 635    return Default;
 636 }
 637                                                                         /*}}}*/
 638 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 639 // ---------------------------------------------------------------------
 640 /* This converts a time_t into a string time representation that is
 641    year 2000 complient and timezone neutral */
 642 string TimeRFC1123(time_t Date)
 643 {
 644    struct tm Conv = *gmtime(&Date);
 645    char Buf[300];
 646
 647    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 648    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 649                           "Aug","Sep","Oct","Nov","Dec"};
 650
 651    sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 652            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 653            Conv.tm_min,Conv.tm_sec);
 654    return Buf;
 655 }
 656                                                                         /*}}}*/
 657 // ReadMessages - Read messages from the FD                             /*{{{*/
 658 // ---------------------------------------------------------------------
 659 /* This pulls full messages from the input FD into the message buffer.
 660    It assumes that messages will not pause during transit so no
 661    fancy buffering is used.
 662
 663    In particular: this reads blocks from the input until it believes
 664    that it's run out of input text.  Each block is terminated by a
 665    double newline ('\n' followed by '\n').  As noted below, there is a
 666    bug in this code: it assumes that all the blocks have been read if
 667    it doesn't see additional text in the buffer after the last one is
 668    parsed, which will cause it to lose blocks if the last block
 669    coincides with the end of the buffer.
 670  */
 671 bool ReadMessages(int Fd, vector<string> &List)
 672 {
 673    char Buffer[64000];
 674    char *End = Buffer;
 675    // Represents any left-over from the previous iteration of the
 676    // parse loop.  (i.e., if a message is split across the end
 677    // of the buffer, it goes here)
 678    string PartialMessage;
 679
 680    while (1)
 681    {
 682       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 683       if (Res < 0 && errno == EINTR)
 684          continue;
 685
 686       // Process is dead, this is kind of bad..
 687       if (Res == 0)
 688          return false;
 689
 690       // No data
 691       if (Res < 0 && errno == EAGAIN)
 692          return true;
 693       if (Res < 0)
 694          return false;
 695
 696       End += Res;
 697
 698       // Look for the end of the message
 699       for (char *I = Buffer; I + 1 < End; I++)
 700       {
 701          if (I[0] != '\n' || I[1] != '\n')
 702             continue;
 703
 704          // Pull the message out
 705          string Message(Buffer,I-Buffer);
 706          PartialMessage += Message;
 707
 708          // Fix up the buffer
 709          for (; I < End && *I == '\n'; I++);
 710          End -= I-Buffer;
 711          memmove(Buffer,I,End-Buffer);
 712          I = Buffer;
 713
 714          List.push_back(PartialMessage);
 715          PartialMessage.clear();
 716       }
 717       if (End != Buffer)
 718         {
 719           // If there's text left in the buffer, store it
 720           // in PartialMessage and throw the rest of the buffer
 721           // away.  This allows us to handle messages that
 722           // are longer than the static buffer size.
 723           PartialMessage += string(Buffer, End);
 724           End = Buffer;
 725         }
 726       else
 727         {
 728           // BUG ALERT: if a message block happens to end at a
 729           // multiple of 64000 characters, this will cause it to
 730           // terminate early, leading to a badly formed block and
 731           // probably crashing the method.  However, this is the only
 732           // way we have to find the end of the message block.  I have
 733           // an idea of how to fix this, but it will require changes
 734           // to the protocol (essentially to mark the beginning and
 735           // end of the block).
 736           //
 737           //  -- dburrows 2008-04-02
 738           return true;
 739         }
 740
 741       if (WaitFd(Fd) == false)
 742          return false;
 743    }
 744 }
 745                                                                         /*}}}*/
 746 // MonthConv - Converts a month string into a number                    /*{{{*/
 747 // ---------------------------------------------------------------------
 748 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 749    Made it a bit more robust with a few touppers though. */
 750 static int MonthConv(char *Month)
 751 {
 752    switch (toupper(*Month))
 753    {
 754       case 'A':
 755       return toupper(Month[1]) == 'P'?3:7;
 756       case 'D':
 757       return 11;
 758       case 'F':
 759       return 1;
 760       case 'J':
 761       if (toupper(Month[1]) == 'A')
 762          return 0;
 763       return toupper(Month[2]) == 'N'?5:6;
 764       case 'M':
 765       return toupper(Month[2]) == 'R'?2:4;
 766       case 'N':
 767       return 10;
 768       case 'O':
 769       return 9;
 770       case 'S':
 771       return 8;
 772
 773       // Pretend it is January..
 774       default:
 775       return 0;
 776    }
 777 }
 778                                                                         /*}}}*/
 779 // timegm - Internal timegm function if gnu is not available            /*{{{*/
 780 // ---------------------------------------------------------------------
 781 /* Ripped this evil little function from wget - I prefer the use of
 782    GNU timegm if possible as this technique will have interesting problems
 783    with leap seconds, timezones and other.
 784
 785    Converts struct tm to time_t, assuming the data in tm is UTC rather
 786    than local timezone (mktime assumes the latter).
 787
 788    Contributed by Roger Beeman <beeman@cisco.com>, with the help of
 789    Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
 790
 791 /* Turned it into an autoconf check, because GNU is not the only thing which
 792    can provide timegm. -- 2002-09-22, Joel Baker */
 793
 794 #ifndef HAVE_TIMEGM // Now with autoconf!
 795 static time_t timegm(struct tm *t)
 796 {
 797    time_t tl, tb;
 798
 799    tl = mktime (t);
 800    if (tl == -1)
 801       return -1;
 802    tb = mktime (gmtime (&tl));
 803    return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
 804 }
 805 #endif
 806                                                                         /*}}}*/
 807 // StrToTime - Converts a string into a time_t                          /*{{{*/
 808 // ---------------------------------------------------------------------
 809 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 810    and the C library asctime format. It requires the GNU library function
 811    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 812    reason the C library does not provide any such function :< This also
 813    handles the weird, but unambiguous FTP time format*/
 814 bool StrToTime(const string &Val,time_t &Result)
 815 {
 816    struct tm Tm;
 817    char Month[10];
 818    const char *I = Val.c_str();
 819
 820    // Skip the day of the week
 821    for (;*I != 0  && *I != ' '; I++);
 822
 823    // Handle RFC 1123 time
 824    Month[0] = 0;
 825    if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 826               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 827    {
 828       // Handle RFC 1036 time
 829       if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
 830                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 831          Tm.tm_year += 1900;
 832       else
 833       {
 834          // asctime format
 835          if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
 836                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 837          {
 838             // 'ftp' time
 839             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 840                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 841                return false;
 842             Tm.tm_mon--;
 843          }
 844       }
 845    }
 846
 847    Tm.tm_isdst = 0;
 848    if (Month[0] != 0)
 849       Tm.tm_mon = MonthConv(Month);
 850    Tm.tm_year -= 1900;
 851
 852    // Convert to local time and then to GMT
 853    Result = timegm(&Tm);
 854    return true;
 855 }
 856                                                                         /*}}}*/
 857 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 858 // ---------------------------------------------------------------------
 859 /* This is used in decoding the crazy fixed length string headers in
 860    tar and ar files. */
 861 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 862 {
 863    char S[30];
 864    if (Len >= sizeof(S))
 865       return false;
 866    memcpy(S,Str,Len);
 867    S[Len] = 0;
 868
 869    // All spaces is a zero
 870    Res = 0;
 871    unsigned I;
 872    for (I = 0; S[I] == ' '; I++);
 873    if (S[I] == 0)
 874       return true;
 875
 876    char *End;
 877    Res = strtoul(S,&End,Base);
 878    if (End == S)
 879       return false;
 880
 881    return true;
 882 }
 883                                                                         /*}}}*/
 884 // HexDigit - Convert a hex character into an integer                   /*{{{*/
 885 // ---------------------------------------------------------------------
 886 /* Helper for Hex2Num */
 887 static int HexDigit(int c)
 888 {
 889    if (c >= '0' && c <= '9')
 890       return c - '0';
 891    if (c >= 'a' && c <= 'f')
 892       return c - 'a' + 10;
 893    if (c >= 'A' && c <= 'F')
 894       return c - 'A' + 10;
 895    return 0;
 896 }
 897                                                                         /*}}}*/
 898 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
 899 // ---------------------------------------------------------------------
 900 /* The length of the buffer must be exactly 1/2 the length of the string. */
 901 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
 902 {
 903    if (Str.length() != Length*2)
 904       return false;
 905
 906    // Convert each digit. We store it in the same order as the string
 907    int J = 0;
 908    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
 909    {
 910       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
 911          return false;
 912
 913       Num[J] = HexDigit(I[0]) << 4;
 914       Num[J] += HexDigit(I[1]);
 915    }
 916
 917    return true;
 918 }
 919                                                                         /*}}}*/
 920 // TokSplitString - Split a string up by a given token                  /*{{{*/
 921 // ---------------------------------------------------------------------
 922 /* This is intended to be a faster splitter, it does not use dynamic
 923    memories. Input is changed to insert nulls at each token location. */
 924 bool TokSplitString(char Tok,char *Input,char **List,
 925                     unsigned long ListMax)
 926 {
 927    // Strip any leading spaces
 928    char *Start = Input;
 929    char *Stop = Start + strlen(Start);
 930    for (; *Start != 0 && isspace(*Start) != 0; Start++);
 931
 932    unsigned long Count = 0;
 933    char *Pos = Start;
 934    while (Pos != Stop)
 935    {
 936       // Skip to the next Token
 937       for (; Pos != Stop && *Pos != Tok; Pos++);
 938
 939       // Back remove spaces
 940       char *End = Pos;
 941       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
 942       *End = 0;
 943
 944       List[Count++] = Start;
 945       if (Count >= ListMax)
 946       {
 947          List[Count-1] = 0;
 948          return false;
 949       }
 950
 951       // Advance pos
 952       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
 953       Start = Pos;
 954    }
 955
 956    List[Count] = 0;
 957    return true;
 958 }
 959                                                                         /*}}}*/
 960 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
 961 // ---------------------------------------------------------------------
 962 /* */
 963 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
 964                       const char **ListEnd)
 965 {
 966    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
 967       R->Hit = false;
 968
 969    unsigned long Hits = 0;
 970    for (; ListBegin != ListEnd; ListBegin++)
 971    {
 972       // Check if the name is a regex
 973       const char *I;
 974       bool Regex = true;
 975       for (I = *ListBegin; *I != 0; I++)
 976          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
 977             break;
 978       if (*I == 0)
 979          Regex = false;
 980
 981       // Compile the regex pattern
 982       regex_t Pattern;
 983       if (Regex == true)
 984          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
 985                      REG_NOSUB) != 0)
 986             Regex = false;
 987
 988       // Search the list
 989       bool Done = false;
 990       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
 991       {
 992          if (R->Str[0] == 0)
 993             continue;
 994
 995          if (strcasecmp(R->Str,*ListBegin) != 0)
 996          {
 997             if (Regex == false)
 998                continue;
 999             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1000                continue;
1001          }
1002          Done = true;
1003
1004          if (R->Hit == false)
1005             Hits++;
1006
1007          R->Hit = true;
1008       }
1009
1010       if (Regex == true)
1011          regfree(&Pattern);
1012
1013       if (Done == false)
1014          _error->Warning(_("Selection %s not found"),*ListBegin);
1015    }
1016
1017    return Hits;
1018 }
1019                                                                         /*}}}*/
1020 // ioprintf - C format string outputter to C++ iostreams                /*{{{*/
1021 // ---------------------------------------------------------------------
1022 /* This is used to make the internationalization strings easier to translate
1023    and to allow reordering of parameters */
1024 void ioprintf(ostream &out,const char *format,...)
1025 {
1026    va_list args;
1027    va_start(args,format);
1028
1029    // sprintf the description
1030    char S[400];
1031    vsnprintf(S,sizeof(S),format,args);
1032    out << S;
1033 }
1034                                                                         /*}}}*/
1035 // strprintf - C format string outputter to C++ strings                 /*{{{*/
1036 // ---------------------------------------------------------------------
1037 /* This is used to make the internationalization strings easier to translate
1038    and to allow reordering of parameters */
1039 void strprintf(string &out,const char *format,...)
1040 {
1041    va_list args;
1042    va_start(args,format);
1043
1044    // sprintf the description
1045    char S[1024];
1046    vsnprintf(S,sizeof(S),format,args);
1047    out = string(S);
1048 }
1049                                                                         /*}}}*/
1050 // safe_snprintf - Safer snprintf                                       /*{{{*/
1051 // ---------------------------------------------------------------------
1052 /* This is a snprintf that will never (ever) go past 'End' and returns a
1053    pointer to the end of the new string. The returned string is always null
1054    terminated unless Buffer == end. This is a better alterantive to using
1055    consecutive snprintfs. */
1056 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1057 {
1058    va_list args;
1059    unsigned long Did;
1060
1061    va_start(args,Format);
1062
1063    if (End <= Buffer)
1064       return End;
1065
1066    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1067    if (Did < 0 || Buffer + Did > End)
1068       return End;
1069    return Buffer + Did;
1070 }
1071                                                                         /*}}}*/
1072
1073 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1074 // ---------------------------------------------------------------------
1075 /* The domain list is a comma seperate list of domains that are suffix
1076    matched against the argument */
1077 bool CheckDomainList(const string &Host,const string &List)
1078 {
1079    string::const_iterator Start = List.begin();
1080    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); Cur++)
1081    {
1082       if (Cur < List.end() && *Cur != ',')
1083          continue;
1084
1085       // Match the end of the string..
1086       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1087           Cur - Start != 0 &&
1088           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1089          return true;
1090
1091       Start = Cur + 1;
1092    }
1093    return false;
1094 }
1095                                                                         /*}}}*/
1096
1097 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1098 // ---------------------------------------------------------------------
1099 /* This parses the URI into all of its components */
1100 void URI::CopyFrom(const string &U)
1101 {
1102    string::const_iterator I = U.begin();
1103
1104    // Locate the first colon, this separates the scheme
1105    for (; I < U.end() && *I != ':' ; I++);
1106    string::const_iterator FirstColon = I;
1107
1108    /* Determine if this is a host type URI with a leading double //
1109       and then search for the first single / */
1110    string::const_iterator SingleSlash = I;
1111    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1112       SingleSlash += 3;
1113
1114    /* Find the / indicating the end of the hostname, ignoring /'s in the
1115       square brackets */
1116    bool InBracket = false;
1117    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
1118    {
1119       if (*SingleSlash == '[')
1120          InBracket = true;
1121       if (InBracket == true && *SingleSlash == ']')
1122          InBracket = false;
1123    }
1124
1125    if (SingleSlash > U.end())
1126       SingleSlash = U.end();
1127
1128    // We can now write the access and path specifiers
1129    Access.assign(U.begin(),FirstColon);
1130    if (SingleSlash != U.end())
1131       Path.assign(SingleSlash,U.end());
1132    if (Path.empty() == true)
1133       Path = "/";
1134
1135    // Now we attempt to locate a user:pass@host fragment
1136    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1137       FirstColon += 3;
1138    else
1139       FirstColon += 1;
1140    if (FirstColon >= U.end())
1141       return;
1142
1143    if (FirstColon > SingleSlash)
1144       FirstColon = SingleSlash;
1145
1146    // Find the colon...
1147    I = FirstColon + 1;
1148    if (I > SingleSlash)
1149       I = SingleSlash;
1150    for (; I < SingleSlash && *I != ':'; I++);
1151    string::const_iterator SecondColon = I;
1152
1153    // Search for the @ after the colon
1154    for (; I < SingleSlash && *I != '@'; I++);
1155    string::const_iterator At = I;
1156
1157    // Now write the host and user/pass
1158    if (At == SingleSlash)
1159    {
1160       if (FirstColon < SingleSlash)
1161          Host.assign(FirstColon,SingleSlash);
1162    }
1163    else
1164    {
1165       Host.assign(At+1,SingleSlash);
1166       User.assign(FirstColon,SecondColon);
1167       if (SecondColon < At)
1168          Password.assign(SecondColon+1,At);
1169    }
1170
1171    // Now we parse the RFC 2732 [] hostnames.
1172    unsigned long PortEnd = 0;
1173    InBracket = false;
1174    for (unsigned I = 0; I != Host.length();)
1175    {
1176       if (Host[I] == '[')
1177       {
1178          InBracket = true;
1179          Host.erase(I,1);
1180          continue;
1181       }
1182
1183       if (InBracket == true && Host[I] == ']')
1184       {
1185          InBracket = false;
1186          Host.erase(I,1);
1187          PortEnd = I;
1188          continue;
1189       }
1190       I++;
1191    }
1192
1193    // Tsk, weird.
1194    if (InBracket == true)
1195    {
1196       Host.clear();
1197       return;
1198    }
1199
1200    // Now we parse off a port number from the hostname
1201    Port = 0;
1202    string::size_type Pos = Host.rfind(':');
1203    if (Pos == string::npos || Pos < PortEnd)
1204       return;
1205
1206    Port = atoi(string(Host,Pos+1).c_str());
1207    Host.assign(Host,0,Pos);
1208 }
1209                                                                         /*}}}*/
1210 // URI::operator string - Convert the URI to a string                   /*{{{*/
1211 // ---------------------------------------------------------------------
1212 /* */
1213 URI::operator string()
1214 {
1215    string Res;
1216
1217    if (Access.empty() == false)
1218       Res = Access + ':';
1219
1220    if (Host.empty() == false)
1221    {
1222       if (Access.empty() == false)
1223          Res += "//";
1224
1225       if (User.empty() == false)
1226       {
1227          Res +=  User;
1228          if (Password.empty() == false)
1229             Res += ":" + Password;
1230          Res += "@";
1231       }
1232
1233       // Add RFC 2732 escaping characters
1234       if (Access.empty() == false &&
1235           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1236          Res += '[' + Host + ']';
1237       else
1238          Res += Host;
1239
1240       if (Port != 0)
1241       {
1242          char S[30];
1243          sprintf(S,":%u",Port);
1244          Res += S;
1245       }
1246    }
1247
1248    if (Path.empty() == false)
1249    {
1250       if (Path[0] != '/')
1251          Res += "/" + Path;
1252       else
1253          Res += Path;
1254    }
1255
1256    return Res;
1257 }
1258                                                                         /*}}}*/
1259 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1260 // ---------------------------------------------------------------------
1261 /* */
1262 string URI::SiteOnly(const string &URI)
1263 {
1264    ::URI U(URI);
1265    U.User.clear();
1266    U.Password.clear();
1267    U.Path.clear();
1268    U.Port = 0;
1269    return U;
1270 }
1271                                                                         /*}}}*/