* ftparchive/contents.cc:
[ntk/apt.git] / ftparchive / contents.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: contents.cc,v 1.4 2003/02/10 07:34:41 doogie Exp $
4 /* ######################################################################
5
6 contents - Archive contents generator
7
8 The GenContents class is a back end for an archive contents generator.
9 It takes a list of per-deb file name and merges it into a memory
10 database of all previous output. This database is stored as a set
11 of binary trees linked across directories to form a tree of all files+dirs
12 given to it. The tree will also be sorted as it is built up thus
13 removing the massive sort time overhead.
14
15 By breaking all the pathnames into components and storing them
16 separately a space savings is realized by not duplicating the string
17 over and over again. Ultimately this saving is sacrificed to storage of
18 the tree structure itself but the tree structure yields a speed gain
19 in the sorting and processing. Ultimately it takes about 5 seconds to
20 do 141000 nodes and about 5 meg of ram.
21
22 The tree looks something like:
23
24 usr/
25 / \ / libslang
26 bin/ lib/ --> libc6
27 / \ \ libfoo
28 games/ sbin/
29
30 The ---> is the DirDown link
31
32
33 ##################################################################### */
34 /*}}}*/
35 // Include Files /*{{{*/
36 #include "contents.h"
37
38 #include <apti18n.h>
39 #include <apt-pkg/extracttar.h>
40 #include <apt-pkg/error.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <malloc.h>
45 /*}}}*/
46
47 // GenContents::~GenContents - Free allocated memory /*{{{*/
48 // ---------------------------------------------------------------------
49 /* Since all our allocations are static big-block allocations all that is
50 needed is to free all of them. */
51 GenContents::~GenContents()
52 {
53 while (BlockList != 0)
54 {
55 BigBlock *Old = BlockList;
56 BlockList = Old->Next;
57 free(Old->Block);
58 delete Old;
59 }
60 }
61 /*}}}*/
62 // GenContents::Mystrdup - Custom strdup /*{{{*/
63 // ---------------------------------------------------------------------
64 /* This strdup also uses a large block allocator to eliminate glibc
65 overhead */
66 char *GenContents::Mystrdup(const char *From)
67 {
68 unsigned int Len = strlen(From) + 1;
69 if (StrLeft <= Len)
70 {
71 StrLeft = 4096*10;
72 StrPool = (char *)malloc(StrLeft);
73
74 BigBlock *Block = new BigBlock;
75 Block->Block = StrPool;
76 Block->Next = BlockList;
77 BlockList = Block;
78 }
79
80 memcpy(StrPool,From,Len);
81 StrLeft -= Len;
82
83 char *Res = StrPool;
84 StrPool += Len;
85 return Res;
86 }
87 /*}}}*/
88 // GenContents::Node::operator new - Big block allocator /*{{{*/
89 // ---------------------------------------------------------------------
90 /* This eliminates glibc's malloc overhead by allocating large blocks and
91 having a continuous set of Nodes. This takes about 8 bytes off each nodes
92 space needs. Freeing is not supported. */
93 void *GenContents::Node::operator new(size_t Amount,GenContents *Owner)
94 {
95 if (Owner->NodeLeft == 0)
96 {
97 Owner->NodeLeft = 10000;
98 Owner->NodePool = (Node *)malloc(Amount*Owner->NodeLeft);
99 BigBlock *Block = new BigBlock;
100 Block->Block = Owner->NodePool;
101 Block->Next = Owner->BlockList;
102 Owner->BlockList = Block;
103 }
104
105 Owner->NodeLeft--;
106 return Owner->NodePool++;
107 }
108 /*}}}*/
109 // GenContents::Grab - Grab a new node representing Name under Top /*{{{*/
110 // ---------------------------------------------------------------------
111 /* This grabs a new node representing the pathname component Name under
112 the node Top. The node is given the name Package. It is assumed that Name
113 is inside of top. If a duplicate already entered name is found then
114 a note is made on the Dup list and the previous in-tree node is returned. */
115 GenContents::Node *GenContents::Grab(GenContents::Node *Top,const char *Name,
116 const char *Package)
117 {
118 /* We drop down to the next dir level each call. This simplifies
119 the calling routine */
120 if (Top->DirDown == 0)
121 {
122 Node *Item = new(this) Node;
123 Item->Path = Mystrdup(Name);
124 Item->Package = Package;
125 Top->DirDown = Item;
126 return Item;
127 }
128 Top = Top->DirDown;
129
130 int Res;
131 while (1)
132 {
133 Res = strcmp(Name,Top->Path);
134
135 // Collision!
136 if (Res == 0)
137 {
138 // See if this the the same package (multi-version dup)
139 if (Top->Package == Package ||
140 strcasecmp(Top->Package,Package) == 0)
141 return Top;
142
143 // Look for an already existing Dup
144 for (Node *I = Top->Dups; I != 0; I = I->Dups)
145 if (I->Package == Package ||
146 strcasecmp(I->Package,Package) == 0)
147 return Top;
148
149 // Add the dup in
150 Node *Item = new(this) Node;
151 Item->Path = Top->Path;
152 Item->Package = Package;
153 Item->Dups = Top->Dups;
154 Top->Dups = Item;
155 return Top;
156 }
157
158 // Continue to traverse the tree
159 if (Res < 0)
160 {
161 if (Top->BTreeLeft == 0)
162 break;
163 Top = Top->BTreeLeft;
164 }
165 else
166 {
167 if (Top->BTreeRight == 0)
168 break;
169 Top = Top->BTreeRight;
170 }
171 }
172
173 // The item was not found in the tree
174 Node *Item = new(this) Node;
175 Item->Path = Mystrdup(Name);
176 Item->Package = Package;
177
178 // Link it into the tree
179 if (Res < 0)
180 {
181 Item->BTreeLeft = Top->BTreeLeft;
182 Top->BTreeLeft = Item;
183 }
184 else
185 {
186 Item->BTreeRight = Top->BTreeRight;
187 Top->BTreeRight = Item;
188 }
189
190 return Item;
191 }
192 /*}}}*/
193 // GenContents::Add - Add a path to the tree /*{{{*/
194 // ---------------------------------------------------------------------
195 /* This takes a full pathname and adds it into the tree. We split the
196 pathname into directory fragments adding each one as we go. Technically
197 in output from tar this should result in hitting previous items. */
198 void GenContents::Add(const char *Dir,const char *Package)
199 {
200 Node *Root = &this->Root;
201
202 // Drop leading slashes
203 while (*Dir == '/' && *Dir != 0)
204 Dir++;
205
206 // Run over the string and grab out each bit up to and including a /
207 const char *Start = Dir;
208 const char *I = Dir;
209 while (*I != 0)
210 {
211 if (*I != '/' || I - Start <= 1)
212 {
213 I++;
214 continue;
215 }
216 I++;
217
218 // Copy the path fragment over
219 char Tmp[1024];
220 strncpy(Tmp,Start,I - Start);
221 Tmp[I - Start] = 0;
222
223 // Grab a node for it
224 Root = Grab(Root,Tmp,Package);
225
226 Start = I;
227 }
228
229 // The final component if it does not have a trailing /
230 if (I - Start >= 1)
231 Root = Grab(Root,Start,Package);
232 }
233 /*}}}*/
234 // GenContents::WriteSpace - Write a given number of white space chars /*{{{*/
235 // ---------------------------------------------------------------------
236 /* We mod 8 it and write tabs where possible. */
237 void GenContents::WriteSpace(FILE *Out,unsigned int Current,unsigned int Target)
238 {
239 if (Target <= Current)
240 Target = Current + 1;
241
242 /* Now we write tabs so long as the next tab stop would not pass
243 the target */
244 for (; (Current/8 + 1)*8 < Target; Current = (Current/8 + 1)*8)
245 fputc('\t',Out);
246
247 // Fill the last bit with spaces
248 for (; Current < Target; Current++)
249 fputc(' ',Out);
250 }
251 /*}}}*/
252 // GenContents::Print - Display the tree /*{{{*/
253 // ---------------------------------------------------------------------
254 /* This is the final result function. It takes the tree and recursively
255 calls itself and runs over each section of the tree printing out
256 the pathname and the hit packages. We use Buf to build the pathname
257 summed over all the directory parents of this node. */
258 void GenContents::Print(FILE *Out)
259 {
260 char Buffer[1024];
261 Buffer[0] = 0;
262 DoPrint(Out,&Root,Buffer);
263 }
264 void GenContents::DoPrint(FILE *Out,GenContents::Node *Top, char *Buf)
265 {
266 if (Top == 0)
267 return;
268
269 // Go left
270 DoPrint(Out,Top->BTreeLeft,Buf);
271
272 // Print the current dir location and then descend to lower dirs
273 char *OldEnd = Buf + strlen(Buf);
274 if (Top->Path != 0)
275 {
276 strcat(Buf,Top->Path);
277
278 // Do not show the item if it is a directory with dups
279 if (Top->Path[strlen(Top->Path)-1] != '/' /*|| Top->Dups == 0*/)
280 {
281 fputs(Buf,Out);
282 WriteSpace(Out,strlen(Buf),60);
283 for (Node *I = Top; I != 0; I = I->Dups)
284 {
285 if (I != Top)
286 fputc(',',Out);
287 fputs(I->Package,Out);
288 }
289 fputc('\n',Out);
290 }
291 }
292
293 // Go along the directory link
294 DoPrint(Out,Top->DirDown,Buf);
295 *OldEnd = 0;
296
297 // Go right
298 DoPrint(Out,Top->BTreeRight,Buf);
299 }
300 /*}}}*/
301
302 // ContentsExtract::Read - Read the archive /*{{{*/
303 // ---------------------------------------------------------------------
304 /* */
305 bool ContentsExtract::Read(debDebFile &Deb)
306 {
307 Reset();
308
309 // Get the archive member and positition the file
310 const ARArchive::Member *Member = Deb.GotoMember("data.tar.gz");
311 const char *Compressor = "gzip";
312 if (Member == 0) {
313 Member = Deb.GotoMember("data.tar.bz2");
314 Compressor = "bzip2";
315 }
316 if (Member == 0) {
317 Member = Deb.GotoMember("data.tar.lzma");
318 Compressor = "lzma";
319 }
320 if (Member == 0) {
321 _error->Error(_("Internal error, could not locate member %s"),
322 "data.tar.{gz,bz2,lzma}");
323 return false;
324 }
325
326 // Extract it.
327 ExtractTar Tar(Deb.GetFile(),Member->Size,Compressor);
328 if (Tar.Go(*this) == false)
329 return false;
330 return true;
331 }
332 /*}}}*/
333 // ContentsExtract::DoItem - Extract an item /*{{{*/
334 // ---------------------------------------------------------------------
335 /* This just tacks the name onto the end of our memory buffer */
336 bool ContentsExtract::DoItem(Item &Itm,int &Fd)
337 {
338 unsigned long Len = strlen(Itm.Name);
339
340 // Strip leading ./'s
341 if (Itm.Name[0] == '.' && Itm.Name[1] == '/')
342 {
343 // == './'
344 if (Len == 2)
345 return true;
346
347 Len -= 2;
348 Itm.Name += 2;
349 }
350
351 // Allocate more storage for the string list
352 if (CurSize + Len + 2 >= MaxSize || Data == 0)
353 {
354 if (MaxSize == 0)
355 MaxSize = 512*1024/2;
356 char *NewData = (char *)realloc(Data,MaxSize*2);
357 if (NewData == 0)
358 return _error->Error(_("realloc - Failed to allocate memory"));
359 Data = NewData;
360 MaxSize *= 2;
361 }
362
363 strcpy(Data+CurSize,Itm.Name);
364 CurSize += Len + 1;
365 return true;
366 }
367 /*}}}*/
368 // ContentsExtract::TakeContents - Load the contents data /*{{{*/
369 // ---------------------------------------------------------------------
370 /* */
371 bool ContentsExtract::TakeContents(const void *NewData,unsigned long Length)
372 {
373 if (Length == 0)
374 {
375 CurSize = 0;
376 return true;
377 }
378
379 // Allocate more storage for the string list
380 if (Length + 2 >= MaxSize || Data == 0)
381 {
382 if (MaxSize == 0)
383 MaxSize = 512*1024/2;
384 while (MaxSize*2 <= Length)
385 MaxSize *= 2;
386
387 char *NewData = (char *)realloc(Data,MaxSize*2);
388 if (NewData == 0)
389 return _error->Error(_("realloc - Failed to allocate memory"));
390 Data = NewData;
391 MaxSize *= 2;
392 }
393 memcpy(Data,NewData,Length);
394 CurSize = Length;
395
396 return Data[CurSize-1] == 0;
397 }
398 /*}}}*/
399 // ContentsExtract::Add - Read the contents data into the sorter /*{{{*/
400 // ---------------------------------------------------------------------
401 /* */
402 void ContentsExtract::Add(GenContents &Contents,string Package)
403 {
404 const char *Start = Data;
405 char *Pkg = Contents.Mystrdup(Package.c_str());
406 for (const char *I = Data; I < Data + CurSize; I++)
407 {
408 if (*I == 0)
409 {
410 Contents.Add(Start,Pkg);
411 Start = ++I;
412 }
413 }
414 }
415 /*}}}*/