Merge misc-abi-changes
[ntk/apt.git] / ftparchive / multicompress.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: multicompress.cc,v 1.4 2003/02/10 07:34:41 doogie Exp $
4 /* ######################################################################
5
6 MultiCompressor
7
8 This class is very complicated in order to optimize for the common
9 case of its use, writing a large set of compressed files that are
10 different from the old set. It spawns off compressors in parallel
11 to maximize compression throughput and has a separate task managing
12 the data going into the compressors.
13
14 ##################################################################### */
15 /*}}}*/
16 // Include Files /*{{{*/
17 #ifdef __GNUG__
18 #pragma implementation "multicompress.h"
19 #endif
20
21 #include "multicompress.h"
22
23 #include <apti18n.h>
24 #include <apt-pkg/strutl.h>
25 #include <apt-pkg/error.h>
26 #include <apt-pkg/md5.h>
27
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <utime.h>
31 #include <unistd.h>
32 #include <iostream>
33 /*}}}*/
34
35 using namespace std;
36
37 const MultiCompress::CompType MultiCompress::Compressors[] =
38 {{".","",0,0,0,1},
39 {"gzip",".gz","gzip","-9n","-d",2},
40 {"bzip2",".bz2","bzip2","-9","-d",3},
41 {}};
42
43 // MultiCompress::MultiCompress - Constructor /*{{{*/
44 // ---------------------------------------------------------------------
45 /* Setup the file outputs, compression modes and fork the writer child */
46 MultiCompress::MultiCompress(string Output,string Compress,
47 mode_t Permissions,bool Write)
48 {
49 Outputs = 0;
50 Outputter = -1;
51 Input = 0;
52 UpdateMTime = 0;
53 this->Permissions = Permissions;
54
55 /* Parse the compression string, a space separated lists of compresison
56 types */
57 string::const_iterator I = Compress.begin();
58 for (; I != Compress.end();)
59 {
60 for (; I != Compress.end() && isspace(*I); I++);
61
62 // Grab a word
63 string::const_iterator Start = I;
64 for (; I != Compress.end() && !isspace(*I); I++);
65
66 // Find the matching compressor
67 const CompType *Comp = Compressors;
68 for (; Comp->Name != 0; Comp++)
69 if (stringcmp(Start,I,Comp->Name) == 0)
70 break;
71
72 // Hmm.. unknown.
73 if (Comp->Name == 0)
74 {
75 _error->Warning(_("Unknown Compresison Algorithm '%s'"),string(Start,I).c_str());
76 continue;
77 }
78
79 // Create and link in a new output
80 Files *NewOut = new Files;
81 NewOut->Next = Outputs;
82 Outputs = NewOut;
83 NewOut->CompressProg = Comp;
84 NewOut->Output = Output+Comp->Extension;
85
86 struct stat St;
87 if (stat(NewOut->Output.c_str(),&St) == 0)
88 NewOut->OldMTime = St.st_mtime;
89 else
90 NewOut->OldMTime = 0;
91 }
92
93 if (Write == false)
94 return;
95
96 /* Open all the temp files now so we can report any errors. File is
97 made unreable to prevent people from touching it during creating. */
98 for (Files *I = Outputs; I != 0; I = I->Next)
99 I->TmpFile.Open(I->Output + ".new",FileFd::WriteEmpty,0600);
100 if (_error->PendingError() == true)
101 return;
102
103 if (Outputs == 0)
104 {
105 _error->Error(_("Compressed output %s needs a compression set"),Output.c_str());
106 return;
107 }
108
109 Start();
110 }
111 /*}}}*/
112 // MultiCompress::~MultiCompress - Destructor /*{{{*/
113 // ---------------------------------------------------------------------
114 /* Just erase the file linked list. */
115 MultiCompress::~MultiCompress()
116 {
117 Die();
118
119 for (; Outputs != 0;)
120 {
121 Files *Tmp = Outputs->Next;
122 delete Outputs;
123 Outputs = Tmp;
124 }
125 }
126 /*}}}*/
127 // MultiCompress::GetStat - Get stat information for compressed files /*{{{*/
128 // ---------------------------------------------------------------------
129 /* This checks each compressed file to make sure it exists and returns
130 stat information for a random file from the collection. False means
131 one or more of the files is missing. */
132 bool MultiCompress::GetStat(string Output,string Compress,struct stat &St)
133 {
134 /* Parse the compression string, a space separated lists of compresison
135 types */
136 string::const_iterator I = Compress.begin();
137 bool DidStat = false;
138 for (; I != Compress.end();)
139 {
140 for (; I != Compress.end() && isspace(*I); I++);
141
142 // Grab a word
143 string::const_iterator Start = I;
144 for (; I != Compress.end() && !isspace(*I); I++);
145
146 // Find the matching compressor
147 const CompType *Comp = Compressors;
148 for (; Comp->Name != 0; Comp++)
149 if (stringcmp(Start,I,Comp->Name) == 0)
150 break;
151
152 // Hmm.. unknown.
153 if (Comp->Name == 0)
154 continue;
155
156 string Name = Output+Comp->Extension;
157 if (stat(Name.c_str(),&St) != 0)
158 return false;
159 DidStat = true;
160 }
161 return DidStat;
162 }
163 /*}}}*/
164 // MultiCompress::Start - Start up the writer child /*{{{*/
165 // ---------------------------------------------------------------------
166 /* Fork a child and setup the communication pipe. */
167 bool MultiCompress::Start()
168 {
169 // Create a data pipe
170 int Pipe[2] = {-1,-1};
171 if (pipe(Pipe) != 0)
172 return _error->Errno("pipe",_("Failed to create IPC pipe to subprocess"));
173 for (int I = 0; I != 2; I++)
174 SetCloseExec(Pipe[I],true);
175
176 // The child..
177 Outputter = fork();
178 if (Outputter == 0)
179 {
180 close(Pipe[1]);
181 Child(Pipe[0]);
182 if (_error->PendingError() == true)
183 {
184 _error->DumpErrors();
185 _exit(100);
186 }
187 _exit(0);
188 };
189
190 /* Tidy up the temp files, we open them in the constructor so as to
191 get proper error reporting. Close them now. */
192 for (Files *I = Outputs; I != 0; I = I->Next)
193 I->TmpFile.Close();
194
195 close(Pipe[0]);
196 Input = fdopen(Pipe[1],"w");
197 if (Input == 0)
198 return _error->Errno("fdopen",_("Failed to create FILE*"));
199
200 if (Outputter == -1)
201 return _error->Errno("fork",_("Failed to fork"));
202 return true;
203 }
204 /*}}}*/
205 // MultiCompress::Die - Clean up the writer /*{{{*/
206 // ---------------------------------------------------------------------
207 /* */
208 bool MultiCompress::Die()
209 {
210 if (Input == 0)
211 return true;
212
213 fclose(Input);
214 Input = 0;
215 bool Res = ExecWait(Outputter,_("Compress Child"),false);
216 Outputter = -1;
217 return Res;
218 }
219 /*}}}*/
220 // MultiCompress::Finalize - Finish up writing /*{{{*/
221 // ---------------------------------------------------------------------
222 /* This is only necessary for statistics reporting. */
223 bool MultiCompress::Finalize(unsigned long &OutSize)
224 {
225 OutSize = 0;
226 if (Input == 0 || Die() == false)
227 return false;
228
229 time_t Now;
230 time(&Now);
231
232 // Check the mtimes to see if the files were replaced.
233 bool Changed = false;
234 for (Files *I = Outputs; I != 0; I = I->Next)
235 {
236 struct stat St;
237 if (stat(I->Output.c_str(),&St) != 0)
238 return _error->Error(_("Internal Error, Failed to create %s"),
239 I->Output.c_str());
240
241 if (I->OldMTime != St.st_mtime)
242 Changed = true;
243 else
244 {
245 // Update the mtime if necessary
246 if (UpdateMTime > 0 &&
247 (Now - St.st_mtime > (signed)UpdateMTime || St.st_mtime > Now))
248 {
249 struct utimbuf Buf;
250 Buf.actime = Buf.modtime = Now;
251 utime(I->Output.c_str(),&Buf);
252 Changed = true;
253 }
254 }
255
256 // Force the file permissions
257 if (St.st_mode != Permissions)
258 chmod(I->Output.c_str(),Permissions);
259
260 OutSize += St.st_size;
261 }
262
263 if (Changed == false)
264 OutSize = 0;
265
266 return true;
267 }
268 /*}}}*/
269 // MultiCompress::OpenCompress - Open the compressor /*{{{*/
270 // ---------------------------------------------------------------------
271 /* This opens the compressor, either in compress mode or decompress
272 mode. FileFd is always the compressor input/output file,
273 OutFd is the created pipe, Input for Compress, Output for Decompress. */
274 bool MultiCompress::OpenCompress(const CompType *Prog,pid_t &Pid,int FileFd,
275 int &OutFd,bool Comp)
276 {
277 Pid = -1;
278
279 // No compression
280 if (Prog->Binary == 0)
281 {
282 OutFd = dup(FileFd);
283 return true;
284 }
285
286 // Create a data pipe
287 int Pipe[2] = {-1,-1};
288 if (pipe(Pipe) != 0)
289 return _error->Errno("pipe",_("Failed to create subprocess IPC"));
290 for (int J = 0; J != 2; J++)
291 SetCloseExec(Pipe[J],true);
292
293 if (Comp == true)
294 OutFd = Pipe[1];
295 else
296 OutFd = Pipe[0];
297
298 // The child..
299 Pid = ExecFork();
300 if (Pid == 0)
301 {
302 if (Comp == true)
303 {
304 dup2(FileFd,STDOUT_FILENO);
305 dup2(Pipe[0],STDIN_FILENO);
306 }
307 else
308 {
309 dup2(FileFd,STDIN_FILENO);
310 dup2(Pipe[1],STDOUT_FILENO);
311 }
312
313 SetCloseExec(STDOUT_FILENO,false);
314 SetCloseExec(STDIN_FILENO,false);
315
316 const char *Args[3];
317 Args[0] = Prog->Binary;
318 if (Comp == true)
319 Args[1] = Prog->CompArgs;
320 else
321 Args[1] = Prog->UnCompArgs;
322 Args[2] = 0;
323 execvp(Args[0],(char **)Args);
324 cerr << _("Failed to exec compressor ") << Args[0] << endl;
325 _exit(100);
326 };
327 if (Comp == true)
328 close(Pipe[0]);
329 else
330 close(Pipe[1]);
331 return true;
332 }
333 /*}}}*/
334 // MultiCompress::OpenOld - Open an old file /*{{{*/
335 // ---------------------------------------------------------------------
336 /* This opens one of the original output files, possibly decompressing it. */
337 bool MultiCompress::OpenOld(int &Fd,pid_t &Proc)
338 {
339 Files *Best = Outputs;
340 for (Files *I = Outputs; I != 0; I = I->Next)
341 if (Best->CompressProg->Cost > I->CompressProg->Cost)
342 Best = I;
343
344 // Open the file
345 FileFd F(Best->Output,FileFd::ReadOnly);
346 if (_error->PendingError() == true)
347 return false;
348
349 // Decompress the file so we can read it
350 if (OpenCompress(Best->CompressProg,Proc,F.Fd(),Fd,false) == false)
351 return false;
352
353 return true;
354 }
355 /*}}}*/
356 // MultiCompress::CloseOld - Close the old file /*{{{*/
357 // ---------------------------------------------------------------------
358 /* */
359 bool MultiCompress::CloseOld(int Fd,pid_t Proc)
360 {
361 close(Fd);
362 if (Proc != -1)
363 if (ExecWait(Proc,_("decompressor"),false) == false)
364 return false;
365 return true;
366 }
367 /*}}}*/
368 // MultiCompress::Child - The writer child /*{{{*/
369 // ---------------------------------------------------------------------
370 /* The child process forks a bunch of compression children and takes
371 input on FD and passes it to all the compressor childer. On the way it
372 computes the MD5 of the raw data. After this the raw data in the
373 original files is compared to see if this data is new. If the data
374 is new then the temp files are renamed, otherwise they are erased. */
375 bool MultiCompress::Child(int FD)
376 {
377 // Start the compression children.
378 for (Files *I = Outputs; I != 0; I = I->Next)
379 {
380 if (OpenCompress(I->CompressProg,I->CompressProc,I->TmpFile.Fd(),
381 I->Fd,true) == false)
382 return false;
383 }
384
385 /* Okay, now we just feed data from FD to all the other FDs. Also
386 stash a hash of the data to use later. */
387 SetNonBlock(FD,false);
388 unsigned char Buffer[32*1024];
389 unsigned long FileSize = 0;
390 MD5Summation MD5;
391 while (1)
392 {
393 WaitFd(FD,false);
394 int Res = read(FD,Buffer,sizeof(Buffer));
395 if (Res == 0)
396 break;
397 if (Res < 0)
398 continue;
399
400 MD5.Add(Buffer,Res);
401 FileSize += Res;
402 for (Files *I = Outputs; I != 0; I = I->Next)
403 {
404 if (write(I->Fd,Buffer,Res) != Res)
405 {
406 _error->Errno("write",_("IO to subprocess/file failed"));
407 break;
408 }
409 }
410 }
411
412 // Close all the writers
413 for (Files *I = Outputs; I != 0; I = I->Next)
414 close(I->Fd);
415
416 // Wait for the compressors to exit
417 for (Files *I = Outputs; I != 0; I = I->Next)
418 {
419 if (I->CompressProc != -1)
420 ExecWait(I->CompressProc,I->CompressProg->Binary,false);
421 }
422
423 if (_error->PendingError() == true)
424 return false;
425
426 /* Now we have to copy the files over, or erase them if they
427 have not changed. First find the cheapest decompressor */
428 bool Missing = false;
429 for (Files *I = Outputs; I != 0; I = I->Next)
430 {
431 if (I->OldMTime == 0)
432 {
433 Missing = true;
434 break;
435 }
436 }
437
438 // Check the MD5 of the lowest cost entity.
439 while (Missing == false)
440 {
441 int CompFd = -1;
442 pid_t Proc = -1;
443 if (OpenOld(CompFd,Proc) == false)
444 {
445 _error->Discard();
446 break;
447 }
448
449 // Compute the hash
450 MD5Summation OldMD5;
451 unsigned long NewFileSize = 0;
452 while (1)
453 {
454 int Res = read(CompFd,Buffer,sizeof(Buffer));
455 if (Res == 0)
456 break;
457 if (Res < 0)
458 return _error->Errno("read",_("Failed to read while computing MD5"));
459 NewFileSize += Res;
460 OldMD5.Add(Buffer,Res);
461 }
462
463 // Tidy the compressor
464 if (CloseOld(CompFd,Proc) == false)
465 return false;
466
467 // Check the hash
468 if (OldMD5.Result() == MD5.Result() &&
469 FileSize == NewFileSize)
470 {
471 for (Files *I = Outputs; I != 0; I = I->Next)
472 {
473 I->TmpFile.Close();
474 if (unlink(I->TmpFile.Name().c_str()) != 0)
475 _error->Errno("unlink",_("Problem unlinking %s"),
476 I->TmpFile.Name().c_str());
477 }
478 return !_error->PendingError();
479 }
480 break;
481 }
482
483 // Finalize
484 for (Files *I = Outputs; I != 0; I = I->Next)
485 {
486 // Set the correct file modes
487 fchmod(I->TmpFile.Fd(),Permissions);
488
489 if (rename(I->TmpFile.Name().c_str(),I->Output.c_str()) != 0)
490 _error->Errno("rename",_("Failed to rename %s to %s"),
491 I->TmpFile.Name().c_str(),I->Output.c_str());
492 I->TmpFile.Close();
493 }
494
495 return !_error->PendingError();
496 }
497 /*}}}*/
498