G++3 fixes from Randolph
[ntk/apt.git] / ftparchive / multicompress.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: multicompress.cc,v 1.3 2001/05/29 03:48:27 jgg Exp $
4 /* ######################################################################
5
6 MultiCompressor
7
8 This class is very complicated in order to optimize for the common
9 case of its use, writing a large set of compressed files that are
10 different from the old set. It spawns off compressors in parallel
11 to maximize compression throughput and has a separate task managing
12 the data going into the compressors.
13
14 ##################################################################### */
15 /*}}}*/
16 // Include Files /*{{{*/
17 #ifdef __GNUG__
18 #pragma implementation "multicompress.h"
19 #endif
20
21 #include "multicompress.h"
22
23 #include <apt-pkg/strutl.h>
24 #include <apt-pkg/error.h>
25 #include <apt-pkg/md5.h>
26
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <utime.h>
30 #include <unistd.h>
31 #include <iostream>
32 /*}}}*/
33
34 using namespace std;
35
36 const MultiCompress::CompType MultiCompress::Compressors[] =
37 {{".","",0,0,0,1},
38 {"gzip",".gz","gzip","-9n","-d",2},
39 {"bzip2",".bz2","bzip2","-9","-d",3},
40 {}};
41
42 // MultiCompress::MultiCompress - Constructor /*{{{*/
43 // ---------------------------------------------------------------------
44 /* Setup the file outputs, compression modes and fork the writer child */
45 MultiCompress::MultiCompress(string Output,string Compress,
46 mode_t Permissions,bool Write)
47 {
48 Outputs = 0;
49 Outputter = -1;
50 Input = 0;
51 UpdateMTime = 0;
52 this->Permissions = Permissions;
53
54 /* Parse the compression string, a space separated lists of compresison
55 types */
56 string::const_iterator I = Compress.begin();
57 for (; I != Compress.end();)
58 {
59 for (; I != Compress.end() && isspace(*I); I++);
60
61 // Grab a word
62 string::const_iterator Start = I;
63 for (; I != Compress.end() && !isspace(*I); I++);
64
65 // Find the matching compressor
66 const CompType *Comp = Compressors;
67 for (; Comp->Name != 0; Comp++)
68 if (stringcmp(Start,I,Comp->Name) == 0)
69 break;
70
71 // Hmm.. unknown.
72 if (Comp->Name == 0)
73 {
74 _error->Warning("Unknown Compresison Algorithm '%s'",string(Start,I).c_str());
75 continue;
76 }
77
78 // Create and link in a new output
79 Files *NewOut = new Files;
80 NewOut->Next = Outputs;
81 Outputs = NewOut;
82 NewOut->CompressProg = Comp;
83 NewOut->Output = Output+Comp->Extension;
84
85 struct stat St;
86 if (stat(NewOut->Output.c_str(),&St) == 0)
87 NewOut->OldMTime = St.st_mtime;
88 else
89 NewOut->OldMTime = 0;
90 }
91
92 if (Write == false)
93 return;
94
95 /* Open all the temp files now so we can report any errors. File is
96 made unreable to prevent people from touching it during creating. */
97 for (Files *I = Outputs; I != 0; I = I->Next)
98 I->TmpFile.Open(I->Output + ".new",FileFd::WriteEmpty,0600);
99 if (_error->PendingError() == true)
100 return;
101
102 if (Outputs == 0)
103 {
104 _error->Error("Compressed output %s needs a compression set",Output.c_str());
105 return;
106 }
107
108 Start();
109 }
110 /*}}}*/
111 // MultiCompress::~MultiCompress - Destructor /*{{{*/
112 // ---------------------------------------------------------------------
113 /* Just erase the file linked list. */
114 MultiCompress::~MultiCompress()
115 {
116 Die();
117
118 for (; Outputs != 0;)
119 {
120 Files *Tmp = Outputs->Next;
121 delete Outputs;
122 Outputs = Tmp;
123 }
124 }
125 /*}}}*/
126 // MultiCompress::GetStat - Get stat information for compressed files /*{{{*/
127 // ---------------------------------------------------------------------
128 /* This checks each compressed file to make sure it exists and returns
129 stat information for a random file from the collection. False means
130 one or more of the files is missing. */
131 bool MultiCompress::GetStat(string Output,string Compress,struct stat &St)
132 {
133 /* Parse the compression string, a space separated lists of compresison
134 types */
135 string::const_iterator I = Compress.begin();
136 bool DidStat = false;
137 for (; I != Compress.end();)
138 {
139 for (; I != Compress.end() && isspace(*I); I++);
140
141 // Grab a word
142 string::const_iterator Start = I;
143 for (; I != Compress.end() && !isspace(*I); I++);
144
145 // Find the matching compressor
146 const CompType *Comp = Compressors;
147 for (; Comp->Name != 0; Comp++)
148 if (stringcmp(Start,I,Comp->Name) == 0)
149 break;
150
151 // Hmm.. unknown.
152 if (Comp->Name == 0)
153 continue;
154
155 string Name = Output+Comp->Extension;
156 if (stat(Name.c_str(),&St) != 0)
157 return false;
158 DidStat = true;
159 }
160 return DidStat;
161 }
162 /*}}}*/
163 // MultiCompress::Start - Start up the writer child /*{{{*/
164 // ---------------------------------------------------------------------
165 /* Fork a child and setup the communication pipe. */
166 bool MultiCompress::Start()
167 {
168 // Create a data pipe
169 int Pipe[2] = {-1,-1};
170 if (pipe(Pipe) != 0)
171 return _error->Errno("pipe","Failed to create IPC pipe to subprocess");
172 for (int I = 0; I != 2; I++)
173 SetCloseExec(Pipe[I],true);
174
175 // The child..
176 Outputter = fork();
177 if (Outputter == 0)
178 {
179 close(Pipe[1]);
180 Child(Pipe[0]);
181 if (_error->PendingError() == true)
182 {
183 _error->DumpErrors();
184 _exit(100);
185 }
186 _exit(0);
187 };
188
189 /* Tidy up the temp files, we open them in the constructor so as to
190 get proper error reporting. Close them now. */
191 for (Files *I = Outputs; I != 0; I = I->Next)
192 I->TmpFile.Close();
193
194 close(Pipe[0]);
195 Input = fdopen(Pipe[1],"w");
196 if (Input == 0)
197 return _error->Errno("fdopen","Failed to create FILE*");
198
199 if (Outputter == -1)
200 return _error->Errno("fork","Failed to fork");
201 return true;
202 }
203 /*}}}*/
204 // MultiCompress::Die - Clean up the writer /*{{{*/
205 // ---------------------------------------------------------------------
206 /* */
207 bool MultiCompress::Die()
208 {
209 if (Input == 0)
210 return true;
211
212 fclose(Input);
213 Input = 0;
214 bool Res = ExecWait(Outputter,"Compress Child",false);
215 Outputter = -1;
216 return Res;
217 }
218 /*}}}*/
219 // MultiCompress::Finalize - Finish up writing /*{{{*/
220 // ---------------------------------------------------------------------
221 /* This is only necessary for statistics reporting. */
222 bool MultiCompress::Finalize(unsigned long &OutSize)
223 {
224 OutSize = 0;
225 if (Input == 0 || Die() == false)
226 return false;
227
228 time_t Now;
229 time(&Now);
230
231 // Check the mtimes to see if the files were replaced.
232 bool Changed = false;
233 for (Files *I = Outputs; I != 0; I = I->Next)
234 {
235 struct stat St;
236 if (stat(I->Output.c_str(),&St) != 0)
237 return _error->Error("Internal Error, Failed to create %s",
238 I->Output.c_str());
239
240 if (I->OldMTime != St.st_mtime)
241 Changed = true;
242 else
243 {
244 // Update the mtime if necessary
245 if (UpdateMTime > 0 &&
246 (Now - St.st_mtime > (signed)UpdateMTime || St.st_mtime > Now))
247 {
248 struct utimbuf Buf;
249 Buf.actime = Buf.modtime = Now;
250 utime(I->Output.c_str(),&Buf);
251 Changed = true;
252 }
253 }
254
255 // Force the file permissions
256 if (St.st_mode != Permissions)
257 chmod(I->Output.c_str(),Permissions);
258
259 OutSize += St.st_size;
260 }
261
262 if (Changed == false)
263 OutSize = 0;
264
265 return true;
266 }
267 /*}}}*/
268 // MultiCompress::OpenCompress - Open the compressor /*{{{*/
269 // ---------------------------------------------------------------------
270 /* This opens the compressor, either in compress mode or decompress
271 mode. FileFd is always the compressor input/output file,
272 OutFd is the created pipe, Input for Compress, Output for Decompress. */
273 bool MultiCompress::OpenCompress(const CompType *Prog,int &Pid,int FileFd,
274 int &OutFd,bool Comp)
275 {
276 Pid = -1;
277
278 // No compression
279 if (Prog->Binary == 0)
280 {
281 OutFd = dup(FileFd);
282 return true;
283 }
284
285 // Create a data pipe
286 int Pipe[2] = {-1,-1};
287 if (pipe(Pipe) != 0)
288 return _error->Errno("pipe","Failed to create subprocess IPC");
289 for (int J = 0; J != 2; J++)
290 SetCloseExec(Pipe[J],true);
291
292 if (Comp == true)
293 OutFd = Pipe[1];
294 else
295 OutFd = Pipe[0];
296
297 // The child..
298 Pid = ExecFork();
299 if (Pid == 0)
300 {
301 if (Comp == true)
302 {
303 dup2(FileFd,STDOUT_FILENO);
304 dup2(Pipe[0],STDIN_FILENO);
305 }
306 else
307 {
308 dup2(FileFd,STDIN_FILENO);
309 dup2(Pipe[1],STDOUT_FILENO);
310 }
311
312 SetCloseExec(STDOUT_FILENO,false);
313 SetCloseExec(STDIN_FILENO,false);
314
315 const char *Args[3];
316 Args[0] = Prog->Binary;
317 if (Comp == true)
318 Args[1] = Prog->CompArgs;
319 else
320 Args[1] = Prog->UnCompArgs;
321 Args[2] = 0;
322 execvp(Args[0],(char **)Args);
323 cerr << "Failed to exec compressor " << Args[0] << endl;
324 _exit(100);
325 };
326 if (Comp == true)
327 close(Pipe[0]);
328 else
329 close(Pipe[1]);
330 return true;
331 }
332 /*}}}*/
333 // MultiCompress::OpenOld - Open an old file /*{{{*/
334 // ---------------------------------------------------------------------
335 /* This opens one of the original output files, possibly decompressing it. */
336 bool MultiCompress::OpenOld(int &Fd,int &Proc)
337 {
338 Files *Best = Outputs;
339 for (Files *I = Outputs; I != 0; I = I->Next)
340 if (Best->CompressProg->Cost > I->CompressProg->Cost)
341 Best = I;
342
343 // Open the file
344 FileFd F(Best->Output,FileFd::ReadOnly);
345 if (_error->PendingError() == true)
346 return false;
347
348 // Decompress the file so we can read it
349 if (OpenCompress(Best->CompressProg,Proc,F.Fd(),Fd,false) == false)
350 return false;
351
352 return true;
353 }
354 /*}}}*/
355 // MultiCompress::CloseOld - Close the old file /*{{{*/
356 // ---------------------------------------------------------------------
357 /* */
358 bool MultiCompress::CloseOld(int Fd,int Proc)
359 {
360 close(Fd);
361 if (Proc != -1)
362 if (ExecWait(Proc,"decompressor",false) == false)
363 return false;
364 return true;
365 }
366 /*}}}*/
367 // MultiCompress::Child - The writer child /*{{{*/
368 // ---------------------------------------------------------------------
369 /* The child process forks a bunch of compression children and takes
370 input on FD and passes it to all the compressor childer. On the way it
371 computes the MD5 of the raw data. After this the raw data in the
372 original files is compared to see if this data is new. If the data
373 is new then the temp files are renamed, otherwise they are erased. */
374 bool MultiCompress::Child(int FD)
375 {
376 // Start the compression children.
377 for (Files *I = Outputs; I != 0; I = I->Next)
378 {
379 if (OpenCompress(I->CompressProg,I->CompressProc,I->TmpFile.Fd(),
380 I->Fd,true) == false)
381 return false;
382 }
383
384 /* Okay, now we just feed data from FD to all the other FDs. Also
385 stash a hash of the data to use later. */
386 SetNonBlock(FD,false);
387 unsigned char Buffer[32*1024];
388 unsigned long FileSize = 0;
389 MD5Summation MD5;
390 while (1)
391 {
392 WaitFd(FD,false);
393 int Res = read(FD,Buffer,sizeof(Buffer));
394 if (Res == 0)
395 break;
396 if (Res < 0)
397 continue;
398
399 MD5.Add(Buffer,Res);
400 FileSize += Res;
401 for (Files *I = Outputs; I != 0; I = I->Next)
402 {
403 if (write(I->Fd,Buffer,Res) != Res)
404 {
405 _error->Errno("write","IO to subprocess/file failed");
406 break;
407 }
408 }
409 }
410
411 // Close all the writers
412 for (Files *I = Outputs; I != 0; I = I->Next)
413 close(I->Fd);
414
415 // Wait for the compressors to exit
416 for (Files *I = Outputs; I != 0; I = I->Next)
417 {
418 if (I->CompressProc != -1)
419 ExecWait(I->CompressProc,I->CompressProg->Binary,false);
420 }
421
422 if (_error->PendingError() == true)
423 return false;
424
425 /* Now we have to copy the files over, or erase them if they
426 have not changed. First find the cheapest decompressor */
427 bool Missing = false;
428 for (Files *I = Outputs; I != 0; I = I->Next)
429 {
430 if (I->OldMTime == 0)
431 {
432 Missing = true;
433 break;
434 }
435 }
436
437 // Check the MD5 of the lowest cost entity.
438 while (Missing == false)
439 {
440 int CompFd = -1;
441 int Proc = -1;
442 if (OpenOld(CompFd,Proc) == false)
443 {
444 _error->Discard();
445 break;
446 }
447
448 // Compute the hash
449 MD5Summation OldMD5;
450 unsigned long NewFileSize = 0;
451 while (1)
452 {
453 int Res = read(CompFd,Buffer,sizeof(Buffer));
454 if (Res == 0)
455 break;
456 if (Res < 0)
457 return _error->Errno("read","Failed to read while computing MD5");
458 NewFileSize += Res;
459 OldMD5.Add(Buffer,Res);
460 }
461
462 // Tidy the compressor
463 if (CloseOld(CompFd,Proc) == false)
464 return false;
465
466 // Check the hash
467 if (OldMD5.Result() == MD5.Result() &&
468 FileSize == NewFileSize)
469 {
470 for (Files *I = Outputs; I != 0; I = I->Next)
471 {
472 I->TmpFile.Close();
473 if (unlink(I->TmpFile.Name().c_str()) != 0)
474 _error->Errno("unlink","Problem unlinking %s",
475 I->TmpFile.Name().c_str());
476 }
477 return !_error->PendingError();
478 }
479 break;
480 }
481
482 // Finalize
483 for (Files *I = Outputs; I != 0; I = I->Next)
484 {
485 // Set the correct file modes
486 fchmod(I->TmpFile.Fd(),Permissions);
487
488 if (rename(I->TmpFile.Name().c_str(),I->Output.c_str()) != 0)
489 _error->Errno("rename","Failed to rename %s to %s",
490 I->TmpFile.Name().c_str(),I->Output.c_str());
491 I->TmpFile.Close();
492 }
493
494 return !_error->PendingError();
495 }
496 /*}}}*/
497