apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <config.h>
  19
  20 #include <apt-pkg/strutl.h>
  21 #include <apt-pkg/fileutl.h>
  22 #include <apt-pkg/error.h>
  23
  24 #include <stddef.h>
  25 #include <stdlib.h>
  26 #include <time.h>
  27 #include <string>
  28 #include <vector>
  29 #include <ctype.h>
  30 #include <string.h>
  31 #include <sstream>
  32 #include <stdio.h>
  33 #include <algorithm>
  34 #include <unistd.h>
  35 #include <regex.h>
  36 #include <errno.h>
  37 #include <stdarg.h>
  38 #include <iconv.h>
  39
  40 #include <apti18n.h>
  41                                                                         /*}}}*/
  42 using namespace std;
  43
  44 // Strip - Remove white space from the front and back of a string       /*{{{*/
  45 // ---------------------------------------------------------------------
  46 namespace APT {
  47    namespace String {
  48 std::string Strip(const std::string &s)
  49 {
  50    size_t start = s.find_first_not_of(" \t\n");
  51    // only whitespace
  52    if (start == string::npos)
  53       return "";
  54    size_t end = s.find_last_not_of(" \t\n");
  55    return s.substr(start, end-start+1);
  56 }
  57
  58 bool Endswith(const std::string &s, const std::string &end)
  59 {
  60    if (end.size() > s.size())
  61       return false;
  62    return (s.substr(s.size() - end.size(), s.size()) == end);
  63 }
  64
  65 }
  66 }
  67                                                                         /*}}}*/
  68 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  69 // ---------------------------------------------------------------------
  70 /* This is handy to use before display some information for enduser  */
  71 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  72 {
  73   iconv_t cd;
  74   const char *inbuf;
  75   char *inptr, *outbuf;
  76   size_t insize, bufsize;
  77   dest->clear();
  78
  79   cd = iconv_open(codeset, "UTF-8");
  80   if (cd == (iconv_t)(-1)) {
  81      // Something went wrong
  82      if (errno == EINVAL)
  83         _error->Error("conversion from 'UTF-8' to '%s' not available",
  84                codeset);
  85      else
  86         perror("iconv_open");
  87
  88      return false;
  89   }
  90
  91   insize = bufsize = orig.size();
  92   inbuf = orig.data();
  93   inptr = (char *)inbuf;
  94   outbuf = new char[bufsize];
  95   size_t lastError = -1;
  96
  97   while (insize != 0)
  98   {
  99      char *outptr = outbuf;
 100      size_t outsize = bufsize;
 101      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
 102      dest->append(outbuf, outptr - outbuf);
 103      if (err == (size_t)(-1))
 104      {
 105         switch (errno)
 106         {
 107         case EILSEQ:
 108            insize--;
 109            inptr++;
 110            // replace a series of unknown multibytes with a single "?"
 111            if (lastError != insize) {
 112               lastError = insize - 1;
 113               dest->append("?");
 114            }
 115            break;
 116         case EINVAL:
 117            insize = 0;
 118            break;
 119         case E2BIG:
 120            if (outptr == outbuf)
 121            {
 122               bufsize *= 2;
 123               delete[] outbuf;
 124               outbuf = new char[bufsize];
 125            }
 126            break;
 127         }
 128      }
 129   }
 130
 131   delete[] outbuf;
 132
 133   iconv_close(cd);
 134
 135   return true;
 136 }
 137                                                                         /*}}}*/
 138 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 139 // ---------------------------------------------------------------------
 140 /* This is handy to use when parsing a file. It also removes \n's left
 141    over from fgets and company */
 142 char *_strstrip(char *String)
 143 {
 144    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 145
 146    if (*String == 0)
 147       return String;
 148    return _strrstrip(String);
 149 }
 150                                                                         /*}}}*/
 151 // strrstrip - Remove white space from the back of a string     /*{{{*/
 152 // ---------------------------------------------------------------------
 153 char *_strrstrip(char *String)
 154 {
 155    char *End = String + strlen(String) - 1;
 156    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 157                                *End == '\r'); End--);
 158    End++;
 159    *End = 0;
 160    return String;
 161 }
 162                                                                         /*}}}*/
 163 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 164 // ---------------------------------------------------------------------
 165 /* */
 166 char *_strtabexpand(char *String,size_t Len)
 167 {
 168    for (char *I = String; I != I + Len && *I != 0; I++)
 169    {
 170       if (*I != '\t')
 171          continue;
 172       if (I + 8 > String + Len)
 173       {
 174          *I = 0;
 175          return String;
 176       }
 177
 178       /* Assume the start of the string is 0 and find the next 8 char
 179          division */
 180       int Len;
 181       if (String == I)
 182          Len = 1;
 183       else
 184          Len = 8 - ((String - I) % 8);
 185       Len -= 2;
 186       if (Len <= 0)
 187       {
 188          *I = ' ';
 189          continue;
 190       }
 191
 192       memmove(I + Len,I + 1,strlen(I) + 1);
 193       for (char *J = I; J + Len != I; *I = ' ', I++);
 194    }
 195    return String;
 196 }
 197                                                                         /*}}}*/
 198 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 199 // ---------------------------------------------------------------------
 200 /* This grabs a single word, converts any % escaped characters to their
 201    proper values and advances the pointer. Double quotes are understood
 202    and striped out as well. This is for URI/URL parsing. It also can
 203    understand [] brackets.*/
 204 bool ParseQuoteWord(const char *&String,string &Res)
 205 {
 206    // Skip leading whitespace
 207    const char *C = String;
 208    for (;*C != 0 && *C == ' '; C++);
 209    if (*C == 0)
 210       return false;
 211
 212    // Jump to the next word
 213    for (;*C != 0 && isspace(*C) == 0; C++)
 214    {
 215       if (*C == '"')
 216       {
 217          C = strchr(C + 1, '"');
 218          if (C == NULL)
 219             return false;
 220       }
 221       if (*C == '[')
 222       {
 223          C = strchr(C + 1, ']');
 224          if (C == NULL)
 225             return false;
 226       }
 227    }
 228
 229    // Now de-quote characters
 230    char Buffer[1024];
 231    char Tmp[3];
 232    const char *Start = String;
 233    char *I;
 234    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 235    {
 236       if (*Start == '%' && Start + 2 < C &&
 237           isxdigit(Start[1]) && isxdigit(Start[2]))
 238       {
 239          Tmp[0] = Start[1];
 240          Tmp[1] = Start[2];
 241          Tmp[2] = 0;
 242          *I = (char)strtol(Tmp,0,16);
 243          Start += 3;
 244          continue;
 245       }
 246       if (*Start != '"')
 247          *I = *Start;
 248       else
 249          I--;
 250       Start++;
 251    }
 252    *I = 0;
 253    Res = Buffer;
 254
 255    // Skip ending white space
 256    for (;*C != 0 && isspace(*C) != 0; C++);
 257    String = C;
 258    return true;
 259 }
 260                                                                         /*}}}*/
 261 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 262 // ---------------------------------------------------------------------
 263 /* This expects a series of space separated strings enclosed in ""'s.
 264    It concatenates the ""'s into a single string. */
 265 bool ParseCWord(const char *&String,string &Res)
 266 {
 267    // Skip leading whitespace
 268    const char *C = String;
 269    for (;*C != 0 && *C == ' '; C++);
 270    if (*C == 0)
 271       return false;
 272
 273    char Buffer[1024];
 274    char *Buf = Buffer;
 275    if (strlen(String) >= sizeof(Buffer))
 276        return false;
 277
 278    for (; *C != 0; C++)
 279    {
 280       if (*C == '"')
 281       {
 282          for (C++; *C != 0 && *C != '"'; C++)
 283             *Buf++ = *C;
 284
 285          if (*C == 0)
 286             return false;
 287
 288          continue;
 289       }
 290
 291       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 292          continue;
 293       if (isspace(*C) == 0)
 294          return false;
 295       *Buf++ = ' ';
 296    }
 297    *Buf = 0;
 298    Res = Buffer;
 299    String = C;
 300    return true;
 301 }
 302                                                                         /*}}}*/
 303 // QuoteString - Convert a string into quoted from                      /*{{{*/
 304 // ---------------------------------------------------------------------
 305 /* */
 306 string QuoteString(const string &Str, const char *Bad)
 307 {
 308    string Res;
 309    for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
 310    {
 311       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 312           *I == 0x25 || // percent '%' char
 313           *I <= 0x20 || *I >= 0x7F) // control chars
 314       {
 315          char Buf[10];
 316          sprintf(Buf,"%%%02x",(int)*I);
 317          Res += Buf;
 318       }
 319       else
 320          Res += *I;
 321    }
 322    return Res;
 323 }
 324                                                                         /*}}}*/
 325 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 326 // ---------------------------------------------------------------------
 327 /* This undoes QuoteString */
 328 string DeQuoteString(const string &Str)
 329 {
 330    return DeQuoteString(Str.begin(),Str.end());
 331 }
 332 string DeQuoteString(string::const_iterator const &begin,
 333                         string::const_iterator const &end)
 334 {
 335    string Res;
 336    for (string::const_iterator I = begin; I != end; ++I)
 337    {
 338       if (*I == '%' && I + 2 < end &&
 339           isxdigit(I[1]) && isxdigit(I[2]))
 340       {
 341          char Tmp[3];
 342          Tmp[0] = I[1];
 343          Tmp[1] = I[2];
 344          Tmp[2] = 0;
 345          Res += (char)strtol(Tmp,0,16);
 346          I += 2;
 347          continue;
 348       }
 349       else
 350          Res += *I;
 351    }
 352    return Res;
 353 }
 354
 355                                                                         /*}}}*/
 356 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 357 // ---------------------------------------------------------------------
 358 /* A max of 4 digits are shown before conversion to the next highest unit.
 359    The max length of the string will be 5 chars unless the size is > 10
 360    YottaBytes (E24) */
 361 string SizeToStr(double Size)
 362 {
 363    char S[300];
 364    double ASize;
 365    if (Size >= 0)
 366       ASize = Size;
 367    else
 368       ASize = -1*Size;
 369
 370    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 371       ExaBytes, ZettaBytes, YottaBytes */
 372    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 373    int I = 0;
 374    while (I <= 8)
 375    {
 376       if (ASize < 100 && I != 0)
 377       {
 378          sprintf(S,"%'.1f %c",ASize,Ext[I]);
 379          break;
 380       }
 381
 382       if (ASize < 10000)
 383       {
 384          sprintf(S,"%'.0f %c",ASize,Ext[I]);
 385          break;
 386       }
 387       ASize /= 1000.0;
 388       I++;
 389    }
 390
 391    return S;
 392 }
 393                                                                         /*}}}*/
 394 // TimeToStr - Convert the time into a string                           /*{{{*/
 395 // ---------------------------------------------------------------------
 396 /* Converts a number of seconds to a hms format */
 397 string TimeToStr(unsigned long Sec)
 398 {
 399    char S[300];
 400
 401    while (1)
 402    {
 403       if (Sec > 60*60*24)
 404       {
 405          //d means days, h means hours, min means minutes, s means seconds
 406          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 407          break;
 408       }
 409
 410       if (Sec > 60*60)
 411       {
 412          //h means hours, min means minutes, s means seconds
 413          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 414          break;
 415       }
 416
 417       if (Sec > 60)
 418       {
 419          //min means minutes, s means seconds
 420          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 421          break;
 422       }
 423
 424       //s means seconds
 425       sprintf(S,_("%lis"),Sec);
 426       break;
 427    }
 428
 429    return S;
 430 }
 431                                                                         /*}}}*/
 432 // SubstVar - Substitute a string for another string                    /*{{{*/
 433 // ---------------------------------------------------------------------
 434 /* This replaces all occurrences of Subst with Contents in Str. */
 435 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 436 {
 437    if (Subst.empty() == true)
 438       return Str;
 439
 440    string::size_type Pos = 0;
 441    string::size_type OldPos = 0;
 442    string Temp;
 443
 444    while (OldPos < Str.length() &&
 445           (Pos = Str.find(Subst,OldPos)) != string::npos)
 446    {
 447       if (OldPos != Pos)
 448          Temp.append(Str, OldPos, Pos - OldPos);
 449       if (Contents.empty() == false)
 450          Temp.append(Contents);
 451       OldPos = Pos + Subst.length();
 452    }
 453
 454    if (OldPos == 0)
 455       return Str;
 456
 457    if (OldPos >= Str.length())
 458       return Temp;
 459    return Temp + string(Str,OldPos);
 460 }
 461 string SubstVar(string Str,const struct SubstVar *Vars)
 462 {
 463    for (; Vars->Subst != 0; Vars++)
 464       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 465    return Str;
 466 }
 467                                                                         /*}}}*/
 468 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 469 // ---------------------------------------------------------------------
 470 /* Returns a string with the supplied separator depth + 1 times in it */
 471 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 472 {
 473    std::string output = "";
 474    for(unsigned long d=Depth+1; d > 0; d--)
 475       output.append(Separator);
 476    return output;
 477 }
 478                                                                         /*}}}*/
 479 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 480 // ---------------------------------------------------------------------
 481 /* This converts a URI into a safe filename. It quotes all unsafe characters
 482    and converts / to _ and removes the scheme identifier. The resulting
 483    file name should be unique and never occur again for a different file */
 484 string URItoFileName(const string &URI)
 485 {
 486    // Nuke 'sensitive' items
 487    ::URI U(URI);
 488    U.User.clear();
 489    U.Password.clear();
 490    U.Access.clear();
 491
 492    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 493    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 494    replace(NewURI.begin(),NewURI.end(),'/','_');
 495    return NewURI;
 496 }
 497                                                                         /*}}}*/
 498 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 499 // ---------------------------------------------------------------------
 500 /* This routine performs a base64 transformation on a string. It was ripped
 501    from wget and then patched and bug fixed.
 502
 503    This spec can be found in rfc2045 */
 504 string Base64Encode(const string &S)
 505 {
 506    // Conversion table.
 507    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 508                           'I','J','K','L','M','N','O','P',
 509                           'Q','R','S','T','U','V','W','X',
 510                           'Y','Z','a','b','c','d','e','f',
 511                           'g','h','i','j','k','l','m','n',
 512                           'o','p','q','r','s','t','u','v',
 513                           'w','x','y','z','0','1','2','3',
 514                           '4','5','6','7','8','9','+','/'};
 515
 516    // Pre-allocate some space
 517    string Final;
 518    Final.reserve((4*S.length() + 2)/3 + 2);
 519
 520    /* Transform the 3x8 bits to 4x6 bits, as required by
 521       base64.  */
 522    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 523    {
 524       char Bits[3] = {0,0,0};
 525       Bits[0] = I[0];
 526       if (I + 1 < S.end())
 527          Bits[1] = I[1];
 528       if (I + 2 < S.end())
 529          Bits[2] = I[2];
 530
 531       Final += tbl[Bits[0] >> 2];
 532       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 533
 534       if (I + 1 >= S.end())
 535          break;
 536
 537       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 538
 539       if (I + 2 >= S.end())
 540          break;
 541
 542       Final += tbl[Bits[2] & 0x3f];
 543    }
 544
 545    /* Apply the padding elements, this tells how many bytes the remote
 546       end should discard */
 547    if (S.length() % 3 == 2)
 548       Final += '=';
 549    if (S.length() % 3 == 1)
 550       Final += "==";
 551
 552    return Final;
 553 }
 554                                                                         /*}}}*/
 555 // stringcmp - Arbitrary string compare                                 /*{{{*/
 556 // ---------------------------------------------------------------------
 557 /* This safely compares two non-null terminated strings of arbitrary
 558    length */
 559 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 560 {
 561    for (; A != AEnd && B != BEnd; A++, B++)
 562       if (*A != *B)
 563          break;
 564
 565    if (A == AEnd && B == BEnd)
 566       return 0;
 567    if (A == AEnd)
 568       return 1;
 569    if (B == BEnd)
 570       return -1;
 571    if (*A < *B)
 572       return -1;
 573    return 1;
 574 }
 575
 576 #if __GNUC__ >= 3
 577 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 578               const char *B,const char *BEnd)
 579 {
 580    for (; A != AEnd && B != BEnd; A++, B++)
 581       if (*A != *B)
 582          break;
 583
 584    if (A == AEnd && B == BEnd)
 585       return 0;
 586    if (A == AEnd)
 587       return 1;
 588    if (B == BEnd)
 589       return -1;
 590    if (*A < *B)
 591       return -1;
 592    return 1;
 593 }
 594 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 595               string::const_iterator B,string::const_iterator BEnd)
 596 {
 597    for (; A != AEnd && B != BEnd; A++, B++)
 598       if (*A != *B)
 599          break;
 600
 601    if (A == AEnd && B == BEnd)
 602       return 0;
 603    if (A == AEnd)
 604       return 1;
 605    if (B == BEnd)
 606       return -1;
 607    if (*A < *B)
 608       return -1;
 609    return 1;
 610 }
 611 #endif
 612                                                                         /*}}}*/
 613 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 614 // ---------------------------------------------------------------------
 615 /* */
 616 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 617 {
 618    for (; A != AEnd && B != BEnd; A++, B++)
 619       if (tolower_ascii(*A) != tolower_ascii(*B))
 620          break;
 621
 622    if (A == AEnd && B == BEnd)
 623       return 0;
 624    if (A == AEnd)
 625       return 1;
 626    if (B == BEnd)
 627       return -1;
 628    if (tolower_ascii(*A) < tolower_ascii(*B))
 629       return -1;
 630    return 1;
 631 }
 632 #if __GNUC__ >= 3
 633 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 634                   const char *B,const char *BEnd)
 635 {
 636    for (; A != AEnd && B != BEnd; A++, B++)
 637       if (tolower_ascii(*A) != tolower_ascii(*B))
 638          break;
 639
 640    if (A == AEnd && B == BEnd)
 641       return 0;
 642    if (A == AEnd)
 643       return 1;
 644    if (B == BEnd)
 645       return -1;
 646    if (tolower_ascii(*A) < tolower_ascii(*B))
 647       return -1;
 648    return 1;
 649 }
 650 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 651                   string::const_iterator B,string::const_iterator BEnd)
 652 {
 653    for (; A != AEnd && B != BEnd; A++, B++)
 654       if (tolower_ascii(*A) != tolower_ascii(*B))
 655          break;
 656
 657    if (A == AEnd && B == BEnd)
 658       return 0;
 659    if (A == AEnd)
 660       return 1;
 661    if (B == BEnd)
 662       return -1;
 663    if (tolower_ascii(*A) < tolower_ascii(*B))
 664       return -1;
 665    return 1;
 666 }
 667 #endif
 668                                                                         /*}}}*/
 669 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 670 // ---------------------------------------------------------------------
 671 /* The format is like those used in package files and the method
 672    communication system */
 673 string LookupTag(const string &Message,const char *Tag,const char *Default)
 674 {
 675    // Look for a matching tag.
 676    int Length = strlen(Tag);
 677    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
 678    {
 679       // Found the tag
 680       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 681       {
 682          // Find the end of line and strip the leading/trailing spaces
 683          string::const_iterator J;
 684          I += Length + 1;
 685          for (; isspace(*I) != 0 && I < Message.end(); ++I);
 686          for (J = I; *J != '\n' && J < Message.end(); ++J);
 687          for (; J > I && isspace(J[-1]) != 0; --J);
 688
 689          return string(I,J);
 690       }
 691
 692       for (; *I != '\n' && I < Message.end(); ++I);
 693    }
 694
 695    // Failed to find a match
 696    if (Default == 0)
 697       return string();
 698    return Default;
 699 }
 700                                                                         /*}}}*/
 701 // StringToBool - Converts a string into a boolean                      /*{{{*/
 702 // ---------------------------------------------------------------------
 703 /* This inspects the string to see if it is true or if it is false and
 704    then returns the result. Several varients on true/false are checked. */
 705 int StringToBool(const string &Text,int Default)
 706 {
 707    char *End;
 708    int Res = strtol(Text.c_str(),&End,0);
 709    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 710       return Res;
 711
 712    // Check for positives
 713    if (strcasecmp(Text.c_str(),"no") == 0 ||
 714        strcasecmp(Text.c_str(),"false") == 0 ||
 715        strcasecmp(Text.c_str(),"without") == 0 ||
 716        strcasecmp(Text.c_str(),"off") == 0 ||
 717        strcasecmp(Text.c_str(),"disable") == 0)
 718       return 0;
 719
 720    // Check for negatives
 721    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 722        strcasecmp(Text.c_str(),"true") == 0 ||
 723        strcasecmp(Text.c_str(),"with") == 0 ||
 724        strcasecmp(Text.c_str(),"on") == 0 ||
 725        strcasecmp(Text.c_str(),"enable") == 0)
 726       return 1;
 727
 728    return Default;
 729 }
 730                                                                         /*}}}*/
 731 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 732 // ---------------------------------------------------------------------
 733 /* This converts a time_t into a string time representation that is
 734    year 2000 complient and timezone neutral */
 735 string TimeRFC1123(time_t Date)
 736 {
 737    struct tm Conv;
 738    if (gmtime_r(&Date, &Conv) == NULL)
 739       return "";
 740
 741    char Buf[300];
 742    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 743    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 744                           "Aug","Sep","Oct","Nov","Dec"};
 745
 746    snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 747            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 748            Conv.tm_min,Conv.tm_sec);
 749    return Buf;
 750 }
 751                                                                         /*}}}*/
 752 // ReadMessages - Read messages from the FD                             /*{{{*/
 753 // ---------------------------------------------------------------------
 754 /* This pulls full messages from the input FD into the message buffer.
 755    It assumes that messages will not pause during transit so no
 756    fancy buffering is used.
 757
 758    In particular: this reads blocks from the input until it believes
 759    that it's run out of input text.  Each block is terminated by a
 760    double newline ('\n' followed by '\n').  As noted below, there is a
 761    bug in this code: it assumes that all the blocks have been read if
 762    it doesn't see additional text in the buffer after the last one is
 763    parsed, which will cause it to lose blocks if the last block
 764    coincides with the end of the buffer.
 765  */
 766 bool ReadMessages(int Fd, vector<string> &List)
 767 {
 768    char Buffer[64000];
 769    char *End = Buffer;
 770    // Represents any left-over from the previous iteration of the
 771    // parse loop.  (i.e., if a message is split across the end
 772    // of the buffer, it goes here)
 773    string PartialMessage;
 774
 775    while (1)
 776    {
 777       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 778       if (Res < 0 && errno == EINTR)
 779          continue;
 780
 781       // Process is dead, this is kind of bad..
 782       if (Res == 0)
 783          return false;
 784
 785       // No data
 786       if (Res < 0 && errno == EAGAIN)
 787          return true;
 788       if (Res < 0)
 789          return false;
 790
 791       End += Res;
 792
 793       // Look for the end of the message
 794       for (char *I = Buffer; I + 1 < End; I++)
 795       {
 796          if (I[1] != '\n' ||
 797                (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
 798             continue;
 799
 800          // Pull the message out
 801          string Message(Buffer,I-Buffer);
 802          PartialMessage += Message;
 803
 804          // Fix up the buffer
 805          for (; I < End && (*I == '\n' || *I == '\r'); ++I);
 806          End -= I-Buffer;
 807          memmove(Buffer,I,End-Buffer);
 808          I = Buffer;
 809
 810          List.push_back(PartialMessage);
 811          PartialMessage.clear();
 812       }
 813       if (End != Buffer)
 814         {
 815           // If there's text left in the buffer, store it
 816           // in PartialMessage and throw the rest of the buffer
 817           // away.  This allows us to handle messages that
 818           // are longer than the static buffer size.
 819           PartialMessage += string(Buffer, End);
 820           End = Buffer;
 821         }
 822       else
 823         {
 824           // BUG ALERT: if a message block happens to end at a
 825           // multiple of 64000 characters, this will cause it to
 826           // terminate early, leading to a badly formed block and
 827           // probably crashing the method.  However, this is the only
 828           // way we have to find the end of the message block.  I have
 829           // an idea of how to fix this, but it will require changes
 830           // to the protocol (essentially to mark the beginning and
 831           // end of the block).
 832           //
 833           //  -- dburrows 2008-04-02
 834           return true;
 835         }
 836
 837       if (WaitFd(Fd) == false)
 838          return false;
 839    }
 840 }
 841                                                                         /*}}}*/
 842 // MonthConv - Converts a month string into a number                    /*{{{*/
 843 // ---------------------------------------------------------------------
 844 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 845    Made it a bit more robust with a few tolower_ascii though. */
 846 static int MonthConv(char *Month)
 847 {
 848    switch (tolower_ascii(*Month))
 849    {
 850       case 'a':
 851       return tolower_ascii(Month[1]) == 'p'?3:7;
 852       case 'd':
 853       return 11;
 854       case 'f':
 855       return 1;
 856       case 'j':
 857       if (tolower_ascii(Month[1]) == 'a')
 858          return 0;
 859       return tolower_ascii(Month[2]) == 'n'?5:6;
 860       case 'm':
 861       return tolower_ascii(Month[2]) == 'r'?2:4;
 862       case 'n':
 863       return 10;
 864       case 'o':
 865       return 9;
 866       case 's':
 867       return 8;
 868
 869       // Pretend it is January..
 870       default:
 871       return 0;
 872    }
 873 }
 874                                                                         /*}}}*/
 875 // timegm - Internal timegm if the gnu version is not available         /*{{{*/
 876 // ---------------------------------------------------------------------
 877 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
 878    than local timezone (mktime assumes the latter).
 879
 880    This function is a nonstandard GNU extension that is also present on
 881    the BSDs and maybe other systems. For others we follow the advice of
 882    the manpage of timegm and use his portable replacement. */
 883 #ifndef HAVE_TIMEGM
 884 static time_t timegm(struct tm *t)
 885 {
 886    char *tz = getenv("TZ");
 887    setenv("TZ", "", 1);
 888    tzset();
 889    time_t ret = mktime(t);
 890    if (tz)
 891       setenv("TZ", tz, 1);
 892    else
 893       unsetenv("TZ");
 894    tzset();
 895    return ret;
 896 }
 897 #endif
 898                                                                         /*}}}*/
 899 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t  /*{{{*/
 900 // ---------------------------------------------------------------------
 901 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
 902    with one exception: All timezones (%Z) are accepted but the protocol
 903    says that it MUST be GMT, but this one is equal to UTC which we will
 904    encounter from time to time (e.g. in Release files) so we accept all
 905    here and just assume it is GMT (or UTC) later on */
 906 bool RFC1123StrToTime(const char* const str,time_t &time)
 907 {
 908    struct tm Tm;
 909    setlocale (LC_ALL,"C");
 910    bool const invalid =
 911    // Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 912       (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
 913    // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 914        strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
 915    // Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 916        strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
 917    setlocale (LC_ALL,"");
 918    if (invalid == true)
 919       return false;
 920
 921    time = timegm(&Tm);
 922    return true;
 923 }
 924                                                                         /*}}}*/
 925 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t    /*{{{*/
 926 // ---------------------------------------------------------------------
 927 /* */
 928 bool FTPMDTMStrToTime(const char* const str,time_t &time)
 929 {
 930    struct tm Tm;
 931    // MDTM includes no whitespaces but recommend and ignored by strptime
 932    if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
 933       return false;
 934
 935    time = timegm(&Tm);
 936    return true;
 937 }
 938                                                                         /*}}}*/
 939 // StrToTime - Converts a string into a time_t                          /*{{{*/
 940 // ---------------------------------------------------------------------
 941 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
 942    and the C library asctime format. It requires the GNU library function
 943    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 944    reason the C library does not provide any such function :< This also
 945    handles the weird, but unambiguous FTP time format*/
 946 bool StrToTime(const string &Val,time_t &Result)
 947 {
 948    struct tm Tm;
 949    char Month[10];
 950
 951    // Skip the day of the week
 952    const char *I = strchr(Val.c_str(), ' ');
 953
 954    // Handle RFC 1123 time
 955    Month[0] = 0;
 956    if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 957               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 958    {
 959       // Handle RFC 1036 time
 960       if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
 961                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 962          Tm.tm_year += 1900;
 963       else
 964       {
 965          // asctime format
 966          if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
 967                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 968          {
 969             // 'ftp' time
 970             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 971                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 972                return false;
 973             Tm.tm_mon--;
 974          }
 975       }
 976    }
 977
 978    Tm.tm_isdst = 0;
 979    if (Month[0] != 0)
 980       Tm.tm_mon = MonthConv(Month);
 981    else
 982       Tm.tm_mon = 0; // we don't have a month, so pick something
 983    Tm.tm_year -= 1900;
 984
 985    // Convert to local time and then to GMT
 986    Result = timegm(&Tm);
 987    return true;
 988 }
 989                                                                         /*}}}*/
 990 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 991 // ---------------------------------------------------------------------
 992 /* This is used in decoding the crazy fixed length string headers in
 993    tar and ar files. */
 994 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 995 {
 996    char S[30];
 997    if (Len >= sizeof(S))
 998       return false;
 999    memcpy(S,Str,Len);
1000    S[Len] = 0;
1001
1002    // All spaces is a zero
1003    Res = 0;
1004    unsigned I;
1005    for (I = 0; S[I] == ' '; I++);
1006    if (S[I] == 0)
1007       return true;
1008
1009    char *End;
1010    Res = strtoul(S,&End,Base);
1011    if (End == S)
1012       return false;
1013
1014    return true;
1015 }
1016                                                                         /*}}}*/
1017 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
1018 // ---------------------------------------------------------------------
1019 /* This is used in decoding the crazy fixed length string headers in
1020    tar and ar files. */
1021 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1022 {
1023    char S[30];
1024    if (Len >= sizeof(S))
1025       return false;
1026    memcpy(S,Str,Len);
1027    S[Len] = 0;
1028
1029    // All spaces is a zero
1030    Res = 0;
1031    unsigned I;
1032    for (I = 0; S[I] == ' '; I++);
1033    if (S[I] == 0)
1034       return true;
1035
1036    char *End;
1037    Res = strtoull(S,&End,Base);
1038    if (End == S)
1039       return false;
1040
1041    return true;
1042 }
1043                                                                         /*}}}*/
1044
1045 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
1046 // ---------------------------------------------------------------------
1047 /* This is used in decoding the 256bit encoded fixed length fields in
1048    tar files */
1049 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1050 {
1051    if ((Str[0] & 0x80) == 0)
1052       return false;
1053    else
1054    {
1055       Res = Str[0] & 0x7F;
1056       for(unsigned int i = 1; i < Len; ++i)
1057          Res = (Res<<8) + Str[i];
1058       return true;
1059    }
1060 }
1061                                                                         /*}}}*/
1062 // HexDigit - Convert a hex character into an integer                   /*{{{*/
1063 // ---------------------------------------------------------------------
1064 /* Helper for Hex2Num */
1065 static int HexDigit(int c)
1066 {
1067    if (c >= '0' && c <= '9')
1068       return c - '0';
1069    if (c >= 'a' && c <= 'f')
1070       return c - 'a' + 10;
1071    if (c >= 'A' && c <= 'F')
1072       return c - 'A' + 10;
1073    return 0;
1074 }
1075                                                                         /*}}}*/
1076 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
1077 // ---------------------------------------------------------------------
1078 /* The length of the buffer must be exactly 1/2 the length of the string. */
1079 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1080 {
1081    if (Str.length() != Length*2)
1082       return false;
1083
1084    // Convert each digit. We store it in the same order as the string
1085    int J = 0;
1086    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1087    {
1088       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1089          return false;
1090
1091       Num[J] = HexDigit(I[0]) << 4;
1092       Num[J] += HexDigit(I[1]);
1093    }
1094
1095    return true;
1096 }
1097                                                                         /*}}}*/
1098 // TokSplitString - Split a string up by a given token                  /*{{{*/
1099 // ---------------------------------------------------------------------
1100 /* This is intended to be a faster splitter, it does not use dynamic
1101    memories. Input is changed to insert nulls at each token location. */
1102 bool TokSplitString(char Tok,char *Input,char **List,
1103                     unsigned long ListMax)
1104 {
1105    // Strip any leading spaces
1106    char *Start = Input;
1107    char *Stop = Start + strlen(Start);
1108    for (; *Start != 0 && isspace(*Start) != 0; Start++);
1109
1110    unsigned long Count = 0;
1111    char *Pos = Start;
1112    while (Pos != Stop)
1113    {
1114       // Skip to the next Token
1115       for (; Pos != Stop && *Pos != Tok; Pos++);
1116
1117       // Back remove spaces
1118       char *End = Pos;
1119       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1120       *End = 0;
1121
1122       List[Count++] = Start;
1123       if (Count >= ListMax)
1124       {
1125          List[Count-1] = 0;
1126          return false;
1127       }
1128
1129       // Advance pos
1130       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1131       Start = Pos;
1132    }
1133
1134    List[Count] = 0;
1135    return true;
1136 }
1137                                                                         /*}}}*/
1138 // VectorizeString - Split a string up into a vector of strings         /*{{{*/
1139 // ---------------------------------------------------------------------
1140 /* This can be used to split a given string up into a vector, so the
1141    propose is the same as in the method above and this one is a bit slower
1142    also, but the advantage is that we have an iteratable vector */
1143 vector<string> VectorizeString(string const &haystack, char const &split)
1144 {
1145    vector<string> exploded;
1146    if (haystack.empty() == true)
1147       return exploded;
1148    string::const_iterator start = haystack.begin();
1149    string::const_iterator end = start;
1150    do {
1151       for (; end != haystack.end() && *end != split; ++end);
1152       exploded.push_back(string(start, end));
1153       start = end + 1;
1154    } while (end != haystack.end() && (++end) != haystack.end());
1155    return exploded;
1156 }
1157                                                                         /*}}}*/
1158 // StringSplit - split a string into a string vector by token           /*{{{*/
1159 // ---------------------------------------------------------------------
1160 /* See header for details.
1161  */
1162 vector<string> StringSplit(std::string const &s, std::string const &sep,
1163                            unsigned int maxsplit)
1164 {
1165    vector<string> split;
1166    size_t start, pos;
1167
1168    // no seperator given, this is bogus
1169    if(sep.size() == 0)
1170       return split;
1171
1172    start = pos = 0;
1173    while (pos != string::npos)
1174    {
1175       pos = s.find(sep, start);
1176       split.push_back(s.substr(start, pos-start));
1177
1178       // if maxsplit is reached, the remaining string is the last item
1179       if(split.size() >= maxsplit)
1180       {
1181          split[split.size()-1] = s.substr(start);
1182          break;
1183       }
1184       start = pos+sep.size();
1185    }
1186    return split;
1187 }
1188                                                                         /*}}}*/
1189 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1190 // ---------------------------------------------------------------------
1191 /* */
1192 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1193                       const char **ListEnd)
1194 {
1195    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1196       R->Hit = false;
1197
1198    unsigned long Hits = 0;
1199    for (; ListBegin < ListEnd; ++ListBegin)
1200    {
1201       // Check if the name is a regex
1202       const char *I;
1203       bool Regex = true;
1204       for (I = *ListBegin; *I != 0; I++)
1205          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1206             break;
1207       if (*I == 0)
1208          Regex = false;
1209
1210       // Compile the regex pattern
1211       regex_t Pattern;
1212       if (Regex == true)
1213          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1214                      REG_NOSUB) != 0)
1215             Regex = false;
1216
1217       // Search the list
1218       bool Done = false;
1219       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1220       {
1221          if (R->Str[0] == 0)
1222             continue;
1223
1224          if (strcasecmp(R->Str,*ListBegin) != 0)
1225          {
1226             if (Regex == false)
1227                continue;
1228             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1229                continue;
1230          }
1231          Done = true;
1232
1233          if (R->Hit == false)
1234             Hits++;
1235
1236          R->Hit = true;
1237       }
1238
1239       if (Regex == true)
1240          regfree(&Pattern);
1241
1242       if (Done == false)
1243          _error->Warning(_("Selection %s not found"),*ListBegin);
1244    }
1245
1246    return Hits;
1247 }
1248                                                                         /*}}}*/
1249 // {str,io}printf - C format string outputter to C++ strings/iostreams  /*{{{*/
1250 // ---------------------------------------------------------------------
1251 /* This is used to make the internationalization strings easier to translate
1252    and to allow reordering of parameters */
1253 static bool iovprintf(ostream &out, const char *format,
1254                       va_list &args, ssize_t &size) {
1255    char *S = (char*)malloc(size);
1256    ssize_t const n = vsnprintf(S, size, format, args);
1257    if (n > -1 && n < size) {
1258       out << S;
1259       free(S);
1260       return true;
1261    } else {
1262       if (n > -1)
1263          size = n + 1;
1264       else
1265          size *= 2;
1266    }
1267    free(S);
1268    return false;
1269 }
1270 void ioprintf(ostream &out,const char *format,...)
1271 {
1272    va_list args;
1273    ssize_t size = 400;
1274    while (true) {
1275       va_start(args,format);
1276       if (iovprintf(out, format, args, size) == true)
1277          return;
1278       va_end(args);
1279    }
1280 }
1281 void strprintf(string &out,const char *format,...)
1282 {
1283    va_list args;
1284    ssize_t size = 400;
1285    std::ostringstream outstr;
1286    while (true) {
1287       va_start(args,format);
1288       if (iovprintf(outstr, format, args, size) == true)
1289          break;
1290       va_end(args);
1291    }
1292    out = outstr.str();
1293 }
1294                                                                         /*}}}*/
1295 // safe_snprintf - Safer snprintf                                       /*{{{*/
1296 // ---------------------------------------------------------------------
1297 /* This is a snprintf that will never (ever) go past 'End' and returns a
1298    pointer to the end of the new string. The returned string is always null
1299    terminated unless Buffer == end. This is a better alterantive to using
1300    consecutive snprintfs. */
1301 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1302 {
1303    va_list args;
1304    int Did;
1305
1306    if (End <= Buffer)
1307       return End;
1308    va_start(args,Format);
1309    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1310    va_end(args);
1311
1312    if (Did < 0 || Buffer + Did > End)
1313       return End;
1314    return Buffer + Did;
1315 }
1316                                                                         /*}}}*/
1317 // StripEpoch - Remove the version "epoch" from a version string        /*{{{*/
1318 // ---------------------------------------------------------------------
1319 string StripEpoch(const string &VerStr)
1320 {
1321    size_t i = VerStr.find(":");
1322    if (i == string::npos)
1323       return VerStr;
1324    return VerStr.substr(i+1);
1325 }
1326                                                                         /*}}}*/
1327 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1328 // ---------------------------------------------------------------------
1329 /* This little function is the most called method we have and tries
1330    therefore to do the absolut minimum - and is notable faster than
1331    standard tolower/toupper and as a bonus avoids problems with different
1332    locales - we only operate on ascii chars anyway. */
1333 int tolower_ascii(int const c)
1334 {
1335    if (c >= 'A' && c <= 'Z')
1336       return c + 32;
1337    return c;
1338 }
1339                                                                         /*}}}*/
1340
1341 // CheckDomainList - See if Host is in a , separate list                /*{{{*/
1342 // ---------------------------------------------------------------------
1343 /* The domain list is a comma separate list of domains that are suffix
1344    matched against the argument */
1345 bool CheckDomainList(const string &Host,const string &List)
1346 {
1347    string::const_iterator Start = List.begin();
1348    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1349    {
1350       if (Cur < List.end() && *Cur != ',')
1351          continue;
1352
1353       // Match the end of the string..
1354       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1355           Cur - Start != 0 &&
1356           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1357          return true;
1358
1359       Start = Cur + 1;
1360    }
1361    return false;
1362 }
1363                                                                         /*}}}*/
1364 // strv_length - Return the length of a NULL-terminated string array    /*{{{*/
1365 // ---------------------------------------------------------------------
1366 /* */
1367 size_t strv_length(const char **str_array)
1368 {
1369    size_t i;
1370    for (i=0; str_array[i] != NULL; i++)
1371       /* nothing */
1372       ;
1373    return i;
1374 }
1375
1376 // DeEscapeString - unescape (\0XX and \xXX) from a string              /*{{{*/
1377 // ---------------------------------------------------------------------
1378 /* */
1379 string DeEscapeString(const string &input)
1380 {
1381    char tmp[3];
1382    string::const_iterator it;
1383    string output;
1384    for (it = input.begin(); it != input.end(); ++it)
1385    {
1386       // just copy non-escape chars
1387       if (*it != '\\')
1388       {
1389          output += *it;
1390          continue;
1391       }
1392
1393       // deal with double escape
1394       if (*it == '\\' &&
1395           (it + 1 < input.end()) &&  it[1] == '\\')
1396       {
1397          // copy
1398          output += *it;
1399          // advance iterator one step further
1400          ++it;
1401          continue;
1402       }
1403
1404       // ensure we have a char to read
1405       if (it + 1 == input.end())
1406          continue;
1407
1408       // read it
1409       ++it;
1410       switch (*it)
1411       {
1412          case '0':
1413             if (it + 2 <= input.end()) {
1414                tmp[0] = it[1];
1415                tmp[1] = it[2];
1416                tmp[2] = 0;
1417                output += (char)strtol(tmp, 0, 8);
1418                it += 2;
1419             }
1420             break;
1421          case 'x':
1422             if (it + 2 <= input.end()) {
1423                tmp[0] = it[1];
1424                tmp[1] = it[2];
1425                tmp[2] = 0;
1426                output += (char)strtol(tmp, 0, 16);
1427                it += 2;
1428             }
1429             break;
1430          default:
1431             // FIXME: raise exception here?
1432             break;
1433       }
1434    }
1435    return output;
1436 }
1437                                                                         /*}}}*/
1438 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1439 // ---------------------------------------------------------------------
1440 /* This parses the URI into all of its components */
1441 void URI::CopyFrom(const string &U)
1442 {
1443    string::const_iterator I = U.begin();
1444
1445    // Locate the first colon, this separates the scheme
1446    for (; I < U.end() && *I != ':' ; ++I);
1447    string::const_iterator FirstColon = I;
1448
1449    /* Determine if this is a host type URI with a leading double //
1450       and then search for the first single / */
1451    string::const_iterator SingleSlash = I;
1452    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1453       SingleSlash += 3;
1454
1455    /* Find the / indicating the end of the hostname, ignoring /'s in the
1456       square brackets */
1457    bool InBracket = false;
1458    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1459    {
1460       if (*SingleSlash == '[')
1461          InBracket = true;
1462       if (InBracket == true && *SingleSlash == ']')
1463          InBracket = false;
1464    }
1465
1466    if (SingleSlash > U.end())
1467       SingleSlash = U.end();
1468
1469    // We can now write the access and path specifiers
1470    Access.assign(U.begin(),FirstColon);
1471    if (SingleSlash != U.end())
1472       Path.assign(SingleSlash,U.end());
1473    if (Path.empty() == true)
1474       Path = "/";
1475
1476    // Now we attempt to locate a user:pass@host fragment
1477    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1478       FirstColon += 3;
1479    else
1480       FirstColon += 1;
1481    if (FirstColon >= U.end())
1482       return;
1483
1484    if (FirstColon > SingleSlash)
1485       FirstColon = SingleSlash;
1486
1487    // Find the colon...
1488    I = FirstColon + 1;
1489    if (I > SingleSlash)
1490       I = SingleSlash;
1491    for (; I < SingleSlash && *I != ':'; ++I);
1492    string::const_iterator SecondColon = I;
1493
1494    // Search for the @ after the colon
1495    for (; I < SingleSlash && *I != '@'; ++I);
1496    string::const_iterator At = I;
1497
1498    // Now write the host and user/pass
1499    if (At == SingleSlash)
1500    {
1501       if (FirstColon < SingleSlash)
1502          Host.assign(FirstColon,SingleSlash);
1503    }
1504    else
1505    {
1506       Host.assign(At+1,SingleSlash);
1507       // username and password must be encoded (RFC 3986)
1508       User.assign(DeQuoteString(FirstColon,SecondColon));
1509       if (SecondColon < At)
1510          Password.assign(DeQuoteString(SecondColon+1,At));
1511    }
1512
1513    // Now we parse the RFC 2732 [] hostnames.
1514    unsigned long PortEnd = 0;
1515    InBracket = false;
1516    for (unsigned I = 0; I != Host.length();)
1517    {
1518       if (Host[I] == '[')
1519       {
1520          InBracket = true;
1521          Host.erase(I,1);
1522          continue;
1523       }
1524
1525       if (InBracket == true && Host[I] == ']')
1526       {
1527          InBracket = false;
1528          Host.erase(I,1);
1529          PortEnd = I;
1530          continue;
1531       }
1532       I++;
1533    }
1534
1535    // Tsk, weird.
1536    if (InBracket == true)
1537    {
1538       Host.clear();
1539       return;
1540    }
1541
1542    // Now we parse off a port number from the hostname
1543    Port = 0;
1544    string::size_type Pos = Host.rfind(':');
1545    if (Pos == string::npos || Pos < PortEnd)
1546       return;
1547
1548    Port = atoi(string(Host,Pos+1).c_str());
1549    Host.assign(Host,0,Pos);
1550 }
1551                                                                         /*}}}*/
1552 // URI::operator string - Convert the URI to a string                   /*{{{*/
1553 // ---------------------------------------------------------------------
1554 /* */
1555 URI::operator string()
1556 {
1557    string Res;
1558
1559    if (Access.empty() == false)
1560       Res = Access + ':';
1561
1562    if (Host.empty() == false)
1563    {
1564       if (Access.empty() == false)
1565          Res += "//";
1566
1567       if (User.empty() == false)
1568       {
1569          // FIXME: Technically userinfo is permitted even less
1570          // characters than these, but this is not conveniently
1571          // expressed with a blacklist.
1572          Res += QuoteString(User, ":/?#[]@");
1573          if (Password.empty() == false)
1574             Res += ":" + QuoteString(Password, ":/?#[]@");
1575          Res += "@";
1576       }
1577
1578       // Add RFC 2732 escaping characters
1579       if (Access.empty() == false &&
1580           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1581          Res += '[' + Host + ']';
1582       else
1583          Res += Host;
1584
1585       if (Port != 0)
1586       {
1587          char S[30];
1588          sprintf(S,":%u",Port);
1589          Res += S;
1590       }
1591    }
1592
1593    if (Path.empty() == false)
1594    {
1595       if (Path[0] != '/')
1596          Res += "/" + Path;
1597       else
1598          Res += Path;
1599    }
1600
1601    return Res;
1602 }
1603                                                                         /*}}}*/
1604 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1605 // ---------------------------------------------------------------------
1606 /* */
1607 string URI::SiteOnly(const string &URI)
1608 {
1609    ::URI U(URI);
1610    U.User.clear();
1611    U.Password.clear();
1612    U.Path.clear();
1613    return U;
1614 }
1615                                                                         /*}}}*/
1616 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1617 // ---------------------------------------------------------------------
1618 /* */
1619 string URI::NoUserPassword(const string &URI)
1620 {
1621    ::URI U(URI);
1622    U.User.clear();
1623    U.Password.clear();
1624    return U;
1625 }
1626                                                                         /*}}}*/