apt-pkg/tagfile.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
   4 /* ######################################################################
   5
   6    Fast scanner for RFC-822 type header information
   7
   8    This uses a rotating buffer to load the package information into.
   9    The scanner runs over it and isolates and indexes a single section.
  10
  11    ##################################################################### */
  12                                                                         /*}}}*/
  13 // Include Files                                                        /*{{{*/
  14 #ifdef __GNUG__
  15 #pragma implementation "apt-pkg/tagfile.h"
  16 #endif
  17
  18 #include <apt-pkg/tagfile.h>
  19 #include <apt-pkg/error.h>
  20 #include <apt-pkg/strutl.h>
  21
  22 #include <apti18n.h>
  23
  24 #include <string>
  25 #include <stdio.h>
  26 #include <ctype.h>
  27                                                                         /*}}}*/
  28
  29 using std::string;
  30
  31 // TagFile::pkgTagFile - Constructor                                    /*{{{*/
  32 // ---------------------------------------------------------------------
  33 /* */
  34 pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long Size) :
  35      Fd(*pFd),
  36      Size(Size)
  37 {
  38    if (Fd.IsOpen() == false || Fd.Size() == 0)
  39    {
  40       Buffer = 0;
  41       Start = End = Buffer = 0;
  42       iOffset = 0;
  43       Map = NULL;
  44       return;
  45    }
  46
  47    Map = new MMap (Fd, MMap::Public | MMap::ReadOnly);
  48    Buffer = (char *) Map->Data ();
  49    Start = Buffer;
  50    End = Buffer + Map->Size ();
  51    iOffset = 0;
  52 }
  53                                                                         /*}}}*/
  54 // TagFile::~pkgTagFile - Destructor                                    /*{{{*/
  55 // ---------------------------------------------------------------------
  56 /* */
  57 pkgTagFile::~pkgTagFile()
  58 {
  59    delete Map;
  60 }
  61                                                                         /*}}}*/
  62 // TagFile::Step - Advance to the next section                          /*{{{*/
  63 // ---------------------------------------------------------------------
  64 /* If the Section Scanner fails we refill the buffer and try again. */
  65 bool pkgTagFile::Step(pkgTagSection &Tag)
  66 {
  67    if (Start == End)
  68       return false;
  69
  70    if (Tag.Scan(Start,End - Start) == false)
  71    {
  72       return _error->Error(_("Unable to parse package file %s (1)"),
  73               Fd.Name().c_str());
  74    }
  75    Start += Tag.size();
  76    iOffset += Tag.size();
  77
  78    Tag.Trim();
  79    return true;
  80 }
  81                                                                         /*}}}*/
  82 // TagFile::Jump - Jump to a pre-recorded location in the file          /*{{{*/
  83 // ---------------------------------------------------------------------
  84 /* This jumps to a pre-recorded file location and reads the record
  85    that is there */
  86 bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long Offset)
  87 {
  88    // We are within a buffer space of the next hit..
  89    if (Offset >= iOffset && iOffset + (End - Start) > Offset)
  90    {
  91       unsigned long Dist = Offset - iOffset;
  92       Start += Dist;
  93       iOffset += Dist;
  94       return Step(Tag);
  95    }
  96
  97    // Reposition and reload..
  98    iOffset = Offset;
  99    Start = Buffer + iOffset;
 100
 101    // Start != End is a special case to not fail on empty TagFiles
 102    if (Start != End && Tag.Scan(Start,End - Start) == false)
 103       return _error->Error(_("Unable to parse package file %s (2)"),Fd.Name().c_str());
 104
 105    return true;
 106 }
 107                                                                         /*}}}*/
 108 // TagSection::Scan - Scan for the end of the header information        /*{{{*/
 109 // ---------------------------------------------------------------------
 110 /* This looks for the first double new line in the data stream. It also
 111    indexes the tags in the section. This very simple hash function for the
 112    first 3 letters gives very good performance on the debian package files */
 113 inline static unsigned long AlphaHash(const char *Text, const char *End = 0)
 114 {
 115    unsigned long Res = 0;
 116    for (; Text != End && *Text != ':' && *Text != 0; Text++)
 117       Res = (unsigned long)(*Text) ^ (Res << 2);
 118    return Res & 0xFF;
 119 }
 120
 121 bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
 122 {
 123    const char *End = Start + MaxLength;
 124    Stop = Section = Start;
 125    memset(AlphaIndexes,0,sizeof(AlphaIndexes));
 126
 127    if (Stop == 0 || MaxLength == 0)
 128       return false;
 129
 130    TagCount = 0;
 131    while (TagCount+1 < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
 132    {
 133       // Start a new index and add it to the hash
 134       if (isspace(Stop[0]) == 0)
 135       {
 136          Indexes[TagCount++] = Stop - Section;
 137          AlphaIndexes[AlphaHash(Stop,End)] = TagCount;
 138       }
 139
 140       Stop = (const char *)memchr(Stop,'\n',End - Stop);
 141
 142       if (Stop == 0)
 143          return false;
 144
 145       for (; Stop+1 < End && Stop[1] == '\r'; Stop++);
 146
 147       // Double newline marks the end of the record
 148       if (Stop+1 < End && Stop[1] == '\n')
 149       {
 150          Indexes[TagCount] = Stop - Section;
 151          for (; Stop < End && (Stop[0] == '\n' || Stop[0] == '\r'); Stop++);
 152          return true;
 153       }
 154
 155       Stop++;
 156    }
 157
 158    if ((Stop+1 >= End) && (End[-1] == '\n' || End[-1] == '\r'))
 159    {
 160        Indexes[TagCount] = (End - 1) - Section;
 161        return true;
 162    }
 163
 164    return false;
 165 }
 166                                                                         /*}}}*/
 167 // TagSection::Trim - Trim off any trailing garbage                     /*{{{*/
 168 // ---------------------------------------------------------------------
 169 /* There should be exactly 1 newline at the end of the buffer, no more. */
 170 void pkgTagSection::Trim()
 171 {
 172    for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
 173 }
 174                                                                         /*}}}*/
 175 // TagSection::Find - Locate a tag                                      /*{{{*/
 176 // ---------------------------------------------------------------------
 177 /* This searches the section for a tag that matches the given string. */
 178 bool pkgTagSection::Find(const char *Tag,unsigned &Pos) const
 179 {
 180    unsigned int Length = strlen(Tag);
 181    unsigned int I = AlphaIndexes[AlphaHash(Tag)];
 182    if (I == 0)
 183       return false;
 184    I--;
 185
 186    for (unsigned int Counter = 0; Counter != TagCount; Counter++,
 187         I = (I+1)%TagCount)
 188    {
 189       const char *St;
 190       St = Section + Indexes[I];
 191       if (strncasecmp(Tag,St,Length) != 0)
 192          continue;
 193
 194       // Make sure the colon is in the right place
 195       const char *C = St + Length;
 196       for (; isspace(*C) != 0; C++);
 197       if (*C != ':')
 198          continue;
 199       Pos = I;
 200       return true;
 201    }
 202
 203    Pos = 0;
 204    return false;
 205 }
 206                                                                         /*}}}*/
 207 // TagSection::Find - Locate a tag                                      /*{{{*/
 208 // ---------------------------------------------------------------------
 209 /* This searches the section for a tag that matches the given string. */
 210 bool pkgTagSection::Find(const char *Tag,const char *&Start,
 211                          const char *&End) const
 212 {
 213    unsigned int Length = strlen(Tag);
 214    unsigned int I = AlphaIndexes[AlphaHash(Tag)];
 215    if (I == 0)
 216       return false;
 217    I--;
 218
 219    for (unsigned int Counter = 0; Counter != TagCount; Counter++,
 220         I = (I+1)%TagCount)
 221    {
 222       const char *St;
 223       St = Section + Indexes[I];
 224       if (strncasecmp(Tag,St,Length) != 0)
 225          continue;
 226
 227       // Make sure the colon is in the right place
 228       const char *C = St + Length;
 229       for (; isspace(*C) != 0; C++);
 230       if (*C != ':')
 231          continue;
 232
 233       // Strip off the gunk from the start end
 234       Start = C;
 235       End = Section + Indexes[I+1];
 236       if (Start >= End)
 237          return _error->Error("Internal parsing error");
 238
 239       for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
 240       for (; isspace(End[-1]) != 0 && End > Start; End--);
 241
 242       return true;
 243    }
 244
 245    Start = End = 0;
 246    return false;
 247 }
 248                                                                         /*}}}*/
 249 // TagSection::FindS - Find a string                                    /*{{{*/
 250 // ---------------------------------------------------------------------
 251 /* */
 252 string pkgTagSection::FindS(const char *Tag) const
 253 {
 254    const char *Start;
 255    const char *End;
 256    if (Find(Tag,Start,End) == false)
 257       return string();
 258    return string(Start,End);
 259 }
 260                                                                         /*}}}*/
 261 // TagSection::FindI - Find an integer                                  /*{{{*/
 262 // ---------------------------------------------------------------------
 263 /* */
 264 signed int pkgTagSection::FindI(const char *Tag,signed long Default) const
 265 {
 266    const char *Start;
 267    const char *Stop;
 268    if (Find(Tag,Start,Stop) == false)
 269       return Default;
 270
 271    // Copy it into a temp buffer so we can use strtol
 272    char S[300];
 273    if ((unsigned)(Stop - Start) >= sizeof(S))
 274       return Default;
 275    strncpy(S,Start,Stop-Start);
 276    S[Stop - Start] = 0;
 277
 278    char *End;
 279    signed long Result = strtol(S,&End,10);
 280    if (S == End)
 281       return Default;
 282    return Result;
 283 }
 284                                                                         /*}}}*/
 285 // TagSection::FindFlag - Locate a yes/no type flag                     /*{{{*/
 286 // ---------------------------------------------------------------------
 287 /* The bits marked in Flag are masked on/off in Flags */
 288 bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
 289                              unsigned long Flag) const
 290 {
 291    const char *Start;
 292    const char *Stop;
 293    if (Find(Tag,Start,Stop) == false)
 294       return true;
 295
 296    switch (StringToBool(string(Start,Stop)))
 297    {
 298       case 0:
 299       Flags &= ~Flag;
 300       return true;
 301
 302       case 1:
 303       Flags |= Flag;
 304       return true;
 305
 306       default:
 307       _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str());
 308       return true;
 309    }
 310    return true;
 311 }
 312                                                                         /*}}}*/
 313
 314 // TFRewrite - Rewrite a control record                                 /*{{{*/
 315 // ---------------------------------------------------------------------
 316 /* This writes the control record to stdout rewriting it as necessary. The
 317    override map item specificies the rewriting rules to follow. This also
 318    takes the time to sort the feild list. */
 319
 320 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
 321    array. */
 322 static const char *iTFRewritePackageOrder[] = {
 323                           "Package",
 324                           "Essential",
 325                           "Status",
 326                           "Priority",
 327                           "Section",
 328                           "Installed-Size",
 329                           "Maintainer",
 330                           "Architecture",
 331                           "Source",
 332                           "Version",
 333                            "Revision",         // Obsolete
 334                            "Config-Version",   // Obsolete
 335                           "Replaces",
 336                           "Provides",
 337                           "Depends",
 338                           "Pre-Depends",
 339                           "Recommends",
 340                           "Suggests",
 341                           "Conflicts",
 342                           "Conffiles",
 343                           "Filename",
 344                           "Size",
 345                           "MD5Sum",
 346                           "SHA1Sum",
 347                            "MSDOS-Filename",   // Obsolete
 348                           "Description",
 349                           0};
 350 static const char *iTFRewriteSourceOrder[] = {"Package",
 351                                       "Source",
 352                                       "Binary",
 353                                       "Version",
 354                                       "Priority",
 355                                       "Section",
 356                                       "Maintainer",
 357                                       "Build-Depends",
 358                                       "Build-Depends-Indep",
 359                                       "Build-Conflicts",
 360                                       "Build-Conflicts-Indep",
 361                                       "Architecture",
 362                                       "Standards-Version",
 363                                       "Format",
 364                                       "Directory",
 365                                       "Files",
 366                                       0};
 367
 368 /* Two levels of initialization are used because gcc will set the symbol
 369    size of an array to the length of the array, causing dynamic relinking
 370    errors. Doing this makes the symbol size constant */
 371 const char **TFRewritePackageOrder = iTFRewritePackageOrder;
 372 const char **TFRewriteSourceOrder = iTFRewriteSourceOrder;
 373
 374 bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
 375                TFRewriteData *Rewrite)
 376 {
 377    unsigned char Visited[256];   // Bit 1 is Order, Bit 2 is Rewrite
 378    for (unsigned I = 0; I != 256; I++)
 379       Visited[I] = 0;
 380
 381    // Set new tag up as necessary.
 382    for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
 383    {
 384       if (Rewrite[J].NewTag == 0)
 385          Rewrite[J].NewTag = Rewrite[J].Tag;
 386    }
 387
 388    // Write all all of the tags, in order.
 389    for (unsigned int I = 0; Order[I] != 0; I++)
 390    {
 391       bool Rewritten = false;
 392
 393       // See if this is a field that needs to be rewritten
 394       for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
 395       {
 396          if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
 397          {
 398             Visited[J] |= 2;
 399             if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
 400             {
 401                if (isspace(Rewrite[J].Rewrite[0]))
 402                   fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
 403                else
 404                   fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
 405             }
 406
 407             Rewritten = true;
 408             break;
 409          }
 410       }
 411
 412       // See if it is in the fragment
 413       unsigned Pos;
 414       if (Tags.Find(Order[I],Pos) == false)
 415          continue;
 416       Visited[Pos] |= 1;
 417
 418       if (Rewritten == true)
 419          continue;
 420
 421       /* Write out this element, taking a moment to rewrite the tag
 422          in case of changes of case. */
 423       const char *Start;
 424       const char *Stop;
 425       Tags.Get(Start,Stop,Pos);
 426
 427       if (fputs(Order[I],Output) < 0)
 428          return _error->Errno("fputs","IO Error to output");
 429       Start += strlen(Order[I]);
 430       if (fwrite(Start,Stop - Start,1,Output) != 1)
 431          return _error->Errno("fwrite","IO Error to output");
 432       if (Stop[-1] != '\n')
 433          fprintf(Output,"\n");
 434    }
 435
 436    // Now write all the old tags that were missed.
 437    for (unsigned int I = 0; I != Tags.Count(); I++)
 438    {
 439       if ((Visited[I] & 1) == 1)
 440          continue;
 441
 442       const char *Start;
 443       const char *Stop;
 444       Tags.Get(Start,Stop,I);
 445       const char *End = Start;
 446       for (; End < Stop && *End != ':'; End++);
 447
 448       // See if this is a field that needs to be rewritten
 449       bool Rewritten = false;
 450       for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
 451       {
 452          if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0)
 453          {
 454             Visited[J] |= 2;
 455             if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
 456             {
 457                if (isspace(Rewrite[J].Rewrite[0]))
 458                   fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
 459                else
 460                   fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
 461             }
 462
 463             Rewritten = true;
 464             break;
 465          }
 466       }
 467
 468       if (Rewritten == true)
 469          continue;
 470
 471       // Write out this element
 472       if (fwrite(Start,Stop - Start,1,Output) != 1)
 473          return _error->Errno("fwrite","IO Error to output");
 474       if (Stop[-1] != '\n')
 475          fprintf(Output,"\n");
 476    }
 477
 478    // Now write all the rewrites that were missed
 479    for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
 480    {
 481       if ((Visited[J] & 2) == 2)
 482          continue;
 483
 484       if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
 485       {
 486          if (isspace(Rewrite[J].Rewrite[0]))
 487             fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
 488          else
 489             fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
 490       }
 491    }
 492
 493    return true;
 494 }
 495                                                                         /*}}}*/