| 1 | // -*- mode: cpp; mode: fold -*- |
| 2 | // Description /*{{{*/ |
| 3 | // $Id: contents.cc,v 1.4 2003/02/10 07:34:41 doogie Exp $ |
| 4 | /* ###################################################################### |
| 5 | |
| 6 | contents - Archive contents generator |
| 7 | |
| 8 | The GenContents class is a back end for an archive contents generator. |
| 9 | It takes a list of per-deb file name and merges it into a memory |
| 10 | database of all previous output. This database is stored as a set |
| 11 | of binary trees linked across directories to form a tree of all files+dirs |
| 12 | given to it. The tree will also be sorted as it is built up thus |
| 13 | removing the massive sort time overhead. |
| 14 | |
| 15 | By breaking all the pathnames into components and storing them |
| 16 | separately a space saving is realized by not duplicating the string |
| 17 | over and over again. Ultimately this saving is sacrificed to storage of |
| 18 | the tree structure itself but the tree structure yields a speed gain |
| 19 | in the sorting and processing. Ultimately it takes about 5 seconds to |
| 20 | do 141000 nodes and about 5 meg of ram. |
| 21 | |
| 22 | The tree looks something like: |
| 23 | |
| 24 | usr/ |
| 25 | / \ / libslang |
| 26 | bin/ lib/ --> libc6 |
| 27 | / \ \ libfoo |
| 28 | games/ sbin/ |
| 29 | |
| 30 | The ---> is the DirDown link |
| 31 | |
| 32 | |
| 33 | ##################################################################### */ |
| 34 | /*}}}*/ |
| 35 | // Include Files /*{{{*/ |
| 36 | #include <config.h> |
| 37 | |
| 38 | #include <apt-pkg/debfile.h> |
| 39 | #include <apt-pkg/extracttar.h> |
| 40 | #include <apt-pkg/error.h> |
| 41 | #include <stdio.h> |
| 42 | #include <stdlib.h> |
| 43 | #include <string.h> |
| 44 | #include <malloc.h> |
| 45 | |
| 46 | #include <apti18n.h> |
| 47 | #include "contents.h" |
| 48 | /*}}}*/ |
| 49 | |
| 50 | // GenContents::~GenContents - Free allocated memory /*{{{*/ |
| 51 | // --------------------------------------------------------------------- |
| 52 | /* Since all our allocations are static big-block allocations all that is |
| 53 | needed is to free all of them. */ |
| 54 | GenContents::~GenContents() |
| 55 | { |
| 56 | while (BlockList != 0) |
| 57 | { |
| 58 | BigBlock *Old = BlockList; |
| 59 | BlockList = Old->Next; |
| 60 | free(Old->Block); |
| 61 | delete Old; |
| 62 | } |
| 63 | } |
| 64 | /*}}}*/ |
| 65 | // GenContents::Mystrdup - Custom strdup /*{{{*/ |
| 66 | // --------------------------------------------------------------------- |
| 67 | /* This strdup also uses a large block allocator to eliminate glibc |
| 68 | overhead */ |
| 69 | char *GenContents::Mystrdup(const char *From) |
| 70 | { |
| 71 | unsigned int Len = strlen(From) + 1; |
| 72 | if (StrLeft <= Len) |
| 73 | { |
| 74 | StrLeft = 4096*10; |
| 75 | StrPool = (char *)malloc(StrLeft); |
| 76 | |
| 77 | BigBlock *Block = new BigBlock; |
| 78 | Block->Block = StrPool; |
| 79 | Block->Next = BlockList; |
| 80 | BlockList = Block; |
| 81 | } |
| 82 | |
| 83 | memcpy(StrPool,From,Len); |
| 84 | StrLeft -= Len; |
| 85 | |
| 86 | char *Res = StrPool; |
| 87 | StrPool += Len; |
| 88 | return Res; |
| 89 | } |
| 90 | /*}}}*/ |
| 91 | // GenContents::Node::operator new - Big block allocator /*{{{*/ |
| 92 | // --------------------------------------------------------------------- |
| 93 | /* This eliminates glibc's malloc overhead by allocating large blocks and |
| 94 | having a continuous set of Nodes. This takes about 8 bytes off each nodes |
| 95 | space needs. Freeing is not supported. */ |
| 96 | void *GenContents::Node::operator new(size_t Amount,GenContents *Owner) |
| 97 | { |
| 98 | if (Owner->NodeLeft == 0) |
| 99 | { |
| 100 | Owner->NodeLeft = 10000; |
| 101 | Owner->NodePool = (Node *)malloc(Amount*Owner->NodeLeft); |
| 102 | BigBlock *Block = new BigBlock; |
| 103 | Block->Block = Owner->NodePool; |
| 104 | Block->Next = Owner->BlockList; |
| 105 | Owner->BlockList = Block; |
| 106 | } |
| 107 | |
| 108 | Owner->NodeLeft--; |
| 109 | return Owner->NodePool++; |
| 110 | } |
| 111 | /*}}}*/ |
| 112 | // GenContents::Grab - Grab a new node representing Name under Top /*{{{*/ |
| 113 | // --------------------------------------------------------------------- |
| 114 | /* This grabs a new node representing the pathname component Name under |
| 115 | the node Top. The node is given the name Package. It is assumed that Name |
| 116 | is inside of top. If a duplicate already entered name is found then |
| 117 | a note is made on the Dup list and the previous in-tree node is returned. */ |
| 118 | GenContents::Node *GenContents::Grab(GenContents::Node *Top,const char *Name, |
| 119 | const char *Package) |
| 120 | { |
| 121 | /* We drop down to the next dir level each call. This simplifies |
| 122 | the calling routine */ |
| 123 | if (Top->DirDown == 0) |
| 124 | { |
| 125 | Node *Item = new(this) Node; |
| 126 | Item->Path = Mystrdup(Name); |
| 127 | Item->Package = Package; |
| 128 | Top->DirDown = Item; |
| 129 | return Item; |
| 130 | } |
| 131 | Top = Top->DirDown; |
| 132 | |
| 133 | int Res; |
| 134 | while (1) |
| 135 | { |
| 136 | Res = strcmp(Name,Top->Path); |
| 137 | |
| 138 | // Collision! |
| 139 | if (Res == 0) |
| 140 | { |
| 141 | // See if this the the same package (multi-version dup) |
| 142 | if (Top->Package == Package || |
| 143 | strcasecmp(Top->Package,Package) == 0) |
| 144 | return Top; |
| 145 | |
| 146 | // Look for an already existing Dup |
| 147 | for (Node *I = Top->Dups; I != 0; I = I->Dups) |
| 148 | if (I->Package == Package || |
| 149 | strcasecmp(I->Package,Package) == 0) |
| 150 | return Top; |
| 151 | |
| 152 | // Add the dup in |
| 153 | Node *Item = new(this) Node; |
| 154 | Item->Path = Top->Path; |
| 155 | Item->Package = Package; |
| 156 | Item->Dups = Top->Dups; |
| 157 | Top->Dups = Item; |
| 158 | return Top; |
| 159 | } |
| 160 | |
| 161 | // Continue to traverse the tree |
| 162 | if (Res < 0) |
| 163 | { |
| 164 | if (Top->BTreeLeft == 0) |
| 165 | break; |
| 166 | Top = Top->BTreeLeft; |
| 167 | } |
| 168 | else |
| 169 | { |
| 170 | if (Top->BTreeRight == 0) |
| 171 | break; |
| 172 | Top = Top->BTreeRight; |
| 173 | } |
| 174 | } |
| 175 | |
| 176 | // The item was not found in the tree |
| 177 | Node *Item = new(this) Node; |
| 178 | Item->Path = Mystrdup(Name); |
| 179 | Item->Package = Package; |
| 180 | |
| 181 | // Link it into the tree |
| 182 | if (Res < 0) |
| 183 | { |
| 184 | Item->BTreeLeft = Top->BTreeLeft; |
| 185 | Top->BTreeLeft = Item; |
| 186 | } |
| 187 | else |
| 188 | { |
| 189 | Item->BTreeRight = Top->BTreeRight; |
| 190 | Top->BTreeRight = Item; |
| 191 | } |
| 192 | |
| 193 | return Item; |
| 194 | } |
| 195 | /*}}}*/ |
| 196 | // GenContents::Add - Add a path to the tree /*{{{*/ |
| 197 | // --------------------------------------------------------------------- |
| 198 | /* This takes a full pathname and adds it into the tree. We split the |
| 199 | pathname into directory fragments adding each one as we go. Technically |
| 200 | in output from tar this should result in hitting previous items. */ |
| 201 | void GenContents::Add(const char *Dir,const char *Package) |
| 202 | { |
| 203 | Node *Root = &this->Root; |
| 204 | |
| 205 | // Drop leading slashes |
| 206 | while (*Dir == '/' && *Dir != 0) |
| 207 | Dir++; |
| 208 | |
| 209 | // Run over the string and grab out each bit up to and including a / |
| 210 | const char *Start = Dir; |
| 211 | const char *I = Dir; |
| 212 | while (*I != 0) |
| 213 | { |
| 214 | if (*I != '/' || I - Start <= 1) |
| 215 | { |
| 216 | I++; |
| 217 | continue; |
| 218 | } |
| 219 | I++; |
| 220 | |
| 221 | // Copy the path fragment over |
| 222 | char Tmp[1024]; |
| 223 | strncpy(Tmp,Start,I - Start); |
| 224 | Tmp[I - Start] = 0; |
| 225 | |
| 226 | // Grab a node for it |
| 227 | Root = Grab(Root,Tmp,Package); |
| 228 | |
| 229 | Start = I; |
| 230 | } |
| 231 | |
| 232 | // The final component if it does not have a trailing / |
| 233 | if (I - Start >= 1) |
| 234 | Root = Grab(Root,Start,Package); |
| 235 | } |
| 236 | /*}}}*/ |
| 237 | // GenContents::WriteSpace - Write a given number of white space chars /*{{{*/ |
| 238 | // --------------------------------------------------------------------- |
| 239 | /* We mod 8 it and write tabs where possible. */ |
| 240 | void GenContents::WriteSpace(FILE *Out,unsigned int Current,unsigned int Target) |
| 241 | { |
| 242 | if (Target <= Current) |
| 243 | Target = Current + 1; |
| 244 | |
| 245 | /* Now we write tabs so long as the next tab stop would not pass |
| 246 | the target */ |
| 247 | for (; (Current/8 + 1)*8 < Target; Current = (Current/8 + 1)*8) |
| 248 | fputc('\t',Out); |
| 249 | |
| 250 | // Fill the last bit with spaces |
| 251 | for (; Current < Target; Current++) |
| 252 | fputc(' ',Out); |
| 253 | } |
| 254 | /*}}}*/ |
| 255 | // GenContents::Print - Display the tree /*{{{*/ |
| 256 | // --------------------------------------------------------------------- |
| 257 | /* This is the final result function. It takes the tree and recursively |
| 258 | calls itself and runs over each section of the tree printing out |
| 259 | the pathname and the hit packages. We use Buf to build the pathname |
| 260 | summed over all the directory parents of this node. */ |
| 261 | void GenContents::Print(FILE *Out) |
| 262 | { |
| 263 | char Buffer[1024]; |
| 264 | Buffer[0] = 0; |
| 265 | DoPrint(Out,&Root,Buffer); |
| 266 | } |
| 267 | void GenContents::DoPrint(FILE *Out,GenContents::Node *Top, char *Buf) |
| 268 | { |
| 269 | if (Top == 0) |
| 270 | return; |
| 271 | |
| 272 | // Go left |
| 273 | DoPrint(Out,Top->BTreeLeft,Buf); |
| 274 | |
| 275 | // Print the current dir location and then descend to lower dirs |
| 276 | char *OldEnd = Buf + strlen(Buf); |
| 277 | if (Top->Path != 0) |
| 278 | { |
| 279 | strcat(Buf,Top->Path); |
| 280 | |
| 281 | // Do not show the item if it is a directory with dups |
| 282 | if (Top->Path[strlen(Top->Path)-1] != '/' /*|| Top->Dups == 0*/) |
| 283 | { |
| 284 | fputs(Buf,Out); |
| 285 | WriteSpace(Out,strlen(Buf),60); |
| 286 | for (Node *I = Top; I != 0; I = I->Dups) |
| 287 | { |
| 288 | if (I != Top) |
| 289 | fputc(',',Out); |
| 290 | fputs(I->Package,Out); |
| 291 | } |
| 292 | fputc('\n',Out); |
| 293 | } |
| 294 | } |
| 295 | |
| 296 | // Go along the directory link |
| 297 | DoPrint(Out,Top->DirDown,Buf); |
| 298 | *OldEnd = 0; |
| 299 | |
| 300 | // Go right |
| 301 | DoPrint(Out,Top->BTreeRight,Buf); |
| 302 | } |
| 303 | /*}}}*/ |
| 304 | |
| 305 | // ContentsExtract::Read - Read the archive /*{{{*/ |
| 306 | // --------------------------------------------------------------------- |
| 307 | /* */ |
| 308 | bool ContentsExtract::Read(debDebFile &Deb) |
| 309 | { |
| 310 | Reset(); |
| 311 | return Deb.ExtractArchive(*this); |
| 312 | } |
| 313 | /*}}}*/ |
| 314 | // ContentsExtract::DoItem - Extract an item /*{{{*/ |
| 315 | // --------------------------------------------------------------------- |
| 316 | /* This just tacks the name onto the end of our memory buffer */ |
| 317 | bool ContentsExtract::DoItem(Item &Itm,int &Fd) |
| 318 | { |
| 319 | unsigned long Len = strlen(Itm.Name); |
| 320 | |
| 321 | // Strip leading ./'s |
| 322 | if (Itm.Name[0] == '.' && Itm.Name[1] == '/') |
| 323 | { |
| 324 | // == './' |
| 325 | if (Len == 2) |
| 326 | return true; |
| 327 | |
| 328 | Len -= 2; |
| 329 | Itm.Name += 2; |
| 330 | } |
| 331 | |
| 332 | // Allocate more storage for the string list |
| 333 | if (CurSize + Len + 2 >= MaxSize || Data == 0) |
| 334 | { |
| 335 | if (MaxSize == 0) |
| 336 | MaxSize = 512*1024/2; |
| 337 | char *NewData = (char *)realloc(Data,MaxSize*2); |
| 338 | if (NewData == 0) |
| 339 | return _error->Error(_("realloc - Failed to allocate memory")); |
| 340 | Data = NewData; |
| 341 | MaxSize *= 2; |
| 342 | } |
| 343 | |
| 344 | strcpy(Data+CurSize,Itm.Name); |
| 345 | CurSize += Len + 1; |
| 346 | return true; |
| 347 | } |
| 348 | /*}}}*/ |
| 349 | // ContentsExtract::TakeContents - Load the contents data /*{{{*/ |
| 350 | // --------------------------------------------------------------------- |
| 351 | /* */ |
| 352 | bool ContentsExtract::TakeContents(const void *NewData,unsigned long long Length) |
| 353 | { |
| 354 | if (Length == 0) |
| 355 | { |
| 356 | CurSize = 0; |
| 357 | return true; |
| 358 | } |
| 359 | |
| 360 | // Allocate more storage for the string list |
| 361 | if (Length + 2 >= MaxSize || Data == 0) |
| 362 | { |
| 363 | if (MaxSize == 0) |
| 364 | MaxSize = 512*1024/2; |
| 365 | while (MaxSize*2 <= Length) |
| 366 | MaxSize *= 2; |
| 367 | |
| 368 | char *NewData = (char *)realloc(Data,MaxSize*2); |
| 369 | if (NewData == 0) |
| 370 | return _error->Error(_("realloc - Failed to allocate memory")); |
| 371 | Data = NewData; |
| 372 | MaxSize *= 2; |
| 373 | } |
| 374 | memcpy(Data,NewData,Length); |
| 375 | CurSize = Length; |
| 376 | |
| 377 | return Data[CurSize-1] == 0; |
| 378 | } |
| 379 | /*}}}*/ |
| 380 | // ContentsExtract::Add - Read the contents data into the sorter /*{{{*/ |
| 381 | // --------------------------------------------------------------------- |
| 382 | /* */ |
| 383 | void ContentsExtract::Add(GenContents &Contents,string const &Package) |
| 384 | { |
| 385 | const char *Start = Data; |
| 386 | char *Pkg = Contents.Mystrdup(Package.c_str()); |
| 387 | for (const char *I = Data; I < Data + CurSize; I++) |
| 388 | { |
| 389 | if (*I == 0) |
| 390 | { |
| 391 | Contents.Add(Start,Pkg); |
| 392 | Start = ++I; |
| 393 | } |
| 394 | } |
| 395 | } |
| 396 | /*}}}*/ |