* ftparchive/multicompress.cc, apt-inst/deb/debfile.cc:
[ntk/apt.git] / ftparchive / multicompress.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: multicompress.cc,v 1.4 2003/02/10 07:34:41 doogie Exp $
4 /* ######################################################################
5
6 MultiCompressor
7
8 This class is very complicated in order to optimize for the common
9 case of its use, writing a large set of compressed files that are
10 different from the old set. It spawns off compressors in parallel
11 to maximize compression throughput and has a separate task managing
12 the data going into the compressors.
13
14 ##################################################################### */
15 /*}}}*/
16 // Include Files /*{{{*/
17 #include "multicompress.h"
18
19 #include <apti18n.h>
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/error.h>
22 #include <apt-pkg/md5.h>
23
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <utime.h>
27 #include <unistd.h>
28 #include <iostream>
29 /*}}}*/
30
31 using namespace std;
32
33 const MultiCompress::CompType MultiCompress::Compressors[] =
34 {{".","",0,0,0,1},
35 {"gzip",".gz","gzip","-9n","-d",2},
36 {"bzip2",".bz2","bzip2","-9","-d",3},
37 {"lzma",".lzma","lzma","-9","-d",4},
38 {"xz",".xz","xz","-6","-d",5},
39 {}};
40
41 // MultiCompress::MultiCompress - Constructor /*{{{*/
42 // ---------------------------------------------------------------------
43 /* Setup the file outputs, compression modes and fork the writer child */
44 MultiCompress::MultiCompress(string const &Output,string const &Compress,
45 mode_t const &Permissions,bool const &Write) :
46 Permissions(Permissions)
47 {
48 Outputs = 0;
49 Outputter = -1;
50 Input = 0;
51 UpdateMTime = 0;
52
53 /* Parse the compression string, a space separated lists of compresison
54 types */
55 string::const_iterator I = Compress.begin();
56 for (; I != Compress.end();)
57 {
58 for (; I != Compress.end() && isspace(*I); I++);
59
60 // Grab a word
61 string::const_iterator Start = I;
62 for (; I != Compress.end() && !isspace(*I); I++);
63
64 // Find the matching compressor
65 const CompType *Comp = Compressors;
66 for (; Comp->Name != 0; Comp++)
67 if (stringcmp(Start,I,Comp->Name) == 0)
68 break;
69
70 // Hmm.. unknown.
71 if (Comp->Name == 0)
72 {
73 _error->Warning(_("Unknown compression algorithm '%s'"),string(Start,I).c_str());
74 continue;
75 }
76
77 // Create and link in a new output
78 Files *NewOut = new Files;
79 NewOut->Next = Outputs;
80 Outputs = NewOut;
81 NewOut->CompressProg = Comp;
82 NewOut->Output = Output+Comp->Extension;
83
84 struct stat St;
85 if (stat(NewOut->Output.c_str(),&St) == 0)
86 NewOut->OldMTime = St.st_mtime;
87 else
88 NewOut->OldMTime = 0;
89 }
90
91 if (Write == false)
92 return;
93
94 /* Open all the temp files now so we can report any errors. File is
95 made unreable to prevent people from touching it during creating. */
96 for (Files *I = Outputs; I != 0; I = I->Next)
97 I->TmpFile.Open(I->Output + ".new",FileFd::WriteEmpty,0600);
98 if (_error->PendingError() == true)
99 return;
100
101 if (Outputs == 0)
102 {
103 _error->Error(_("Compressed output %s needs a compression set"),Output.c_str());
104 return;
105 }
106
107 Start();
108 }
109 /*}}}*/
110 // MultiCompress::~MultiCompress - Destructor /*{{{*/
111 // ---------------------------------------------------------------------
112 /* Just erase the file linked list. */
113 MultiCompress::~MultiCompress()
114 {
115 Die();
116
117 for (; Outputs != 0;)
118 {
119 Files *Tmp = Outputs->Next;
120 delete Outputs;
121 Outputs = Tmp;
122 }
123 }
124 /*}}}*/
125 // MultiCompress::GetStat - Get stat information for compressed files /*{{{*/
126 // ---------------------------------------------------------------------
127 /* This checks each compressed file to make sure it exists and returns
128 stat information for a random file from the collection. False means
129 one or more of the files is missing. */
130 bool MultiCompress::GetStat(string const &Output,string const &Compress,struct stat &St)
131 {
132 /* Parse the compression string, a space separated lists of compresison
133 types */
134 string::const_iterator I = Compress.begin();
135 bool DidStat = false;
136 for (; I != Compress.end();)
137 {
138 for (; I != Compress.end() && isspace(*I); I++);
139
140 // Grab a word
141 string::const_iterator Start = I;
142 for (; I != Compress.end() && !isspace(*I); I++);
143
144 // Find the matching compressor
145 const CompType *Comp = Compressors;
146 for (; Comp->Name != 0; Comp++)
147 if (stringcmp(Start,I,Comp->Name) == 0)
148 break;
149
150 // Hmm.. unknown.
151 if (Comp->Name == 0)
152 continue;
153
154 string Name = Output+Comp->Extension;
155 if (stat(Name.c_str(),&St) != 0)
156 return false;
157 DidStat = true;
158 }
159 return DidStat;
160 }
161 /*}}}*/
162 // MultiCompress::Start - Start up the writer child /*{{{*/
163 // ---------------------------------------------------------------------
164 /* Fork a child and setup the communication pipe. */
165 bool MultiCompress::Start()
166 {
167 // Create a data pipe
168 int Pipe[2] = {-1,-1};
169 if (pipe(Pipe) != 0)
170 return _error->Errno("pipe",_("Failed to create IPC pipe to subprocess"));
171 for (int I = 0; I != 2; I++)
172 SetCloseExec(Pipe[I],true);
173
174 // The child..
175 Outputter = fork();
176 if (Outputter == 0)
177 {
178 close(Pipe[1]);
179 Child(Pipe[0]);
180 if (_error->PendingError() == true)
181 {
182 _error->DumpErrors();
183 _exit(100);
184 }
185 _exit(0);
186 };
187
188 /* Tidy up the temp files, we open them in the constructor so as to
189 get proper error reporting. Close them now. */
190 for (Files *I = Outputs; I != 0; I = I->Next)
191 I->TmpFile.Close();
192
193 close(Pipe[0]);
194 Input = fdopen(Pipe[1],"w");
195 if (Input == 0)
196 return _error->Errno("fdopen",_("Failed to create FILE*"));
197
198 if (Outputter == -1)
199 return _error->Errno("fork",_("Failed to fork"));
200 return true;
201 }
202 /*}}}*/
203 // MultiCompress::Die - Clean up the writer /*{{{*/
204 // ---------------------------------------------------------------------
205 /* */
206 bool MultiCompress::Die()
207 {
208 if (Input == 0)
209 return true;
210
211 fclose(Input);
212 Input = 0;
213 bool Res = ExecWait(Outputter,_("Compress child"),false);
214 Outputter = -1;
215 return Res;
216 }
217 /*}}}*/
218 // MultiCompress::Finalize - Finish up writing /*{{{*/
219 // ---------------------------------------------------------------------
220 /* This is only necessary for statistics reporting. */
221 bool MultiCompress::Finalize(unsigned long &OutSize)
222 {
223 OutSize = 0;
224 if (Input == 0 || Die() == false)
225 return false;
226
227 time_t Now;
228 time(&Now);
229
230 // Check the mtimes to see if the files were replaced.
231 bool Changed = false;
232 for (Files *I = Outputs; I != 0; I = I->Next)
233 {
234 struct stat St;
235 if (stat(I->Output.c_str(),&St) != 0)
236 return _error->Error(_("Internal error, failed to create %s"),
237 I->Output.c_str());
238
239 if (I->OldMTime != St.st_mtime)
240 Changed = true;
241 else
242 {
243 // Update the mtime if necessary
244 if (UpdateMTime > 0 &&
245 (Now - St.st_mtime > (signed)UpdateMTime || St.st_mtime > Now))
246 {
247 struct utimbuf Buf;
248 Buf.actime = Buf.modtime = Now;
249 utime(I->Output.c_str(),&Buf);
250 Changed = true;
251 }
252 }
253
254 // Force the file permissions
255 if (St.st_mode != Permissions)
256 chmod(I->Output.c_str(),Permissions);
257
258 OutSize += St.st_size;
259 }
260
261 if (Changed == false)
262 OutSize = 0;
263
264 return true;
265 }
266 /*}}}*/
267 // MultiCompress::OpenCompress - Open the compressor /*{{{*/
268 // ---------------------------------------------------------------------
269 /* This opens the compressor, either in compress mode or decompress
270 mode. FileFd is always the compressor input/output file,
271 OutFd is the created pipe, Input for Compress, Output for Decompress. */
272 bool MultiCompress::OpenCompress(const CompType *Prog,pid_t &Pid,int const &FileFd,
273 int &OutFd,bool const &Comp)
274 {
275 Pid = -1;
276
277 // No compression
278 if (Prog->Binary == 0)
279 {
280 OutFd = dup(FileFd);
281 return true;
282 }
283
284 // Create a data pipe
285 int Pipe[2] = {-1,-1};
286 if (pipe(Pipe) != 0)
287 return _error->Errno("pipe",_("Failed to create subprocess IPC"));
288 for (int J = 0; J != 2; J++)
289 SetCloseExec(Pipe[J],true);
290
291 if (Comp == true)
292 OutFd = Pipe[1];
293 else
294 OutFd = Pipe[0];
295
296 // The child..
297 Pid = ExecFork();
298 if (Pid == 0)
299 {
300 if (Comp == true)
301 {
302 dup2(FileFd,STDOUT_FILENO);
303 dup2(Pipe[0],STDIN_FILENO);
304 }
305 else
306 {
307 dup2(FileFd,STDIN_FILENO);
308 dup2(Pipe[1],STDOUT_FILENO);
309 }
310
311 SetCloseExec(STDOUT_FILENO,false);
312 SetCloseExec(STDIN_FILENO,false);
313
314 const char *Args[3];
315 Args[0] = Prog->Binary;
316 if (Comp == true)
317 Args[1] = Prog->CompArgs;
318 else
319 Args[1] = Prog->UnCompArgs;
320 Args[2] = 0;
321 execvp(Args[0],(char **)Args);
322 cerr << _("Failed to exec compressor ") << Args[0] << endl;
323 _exit(100);
324 };
325 if (Comp == true)
326 close(Pipe[0]);
327 else
328 close(Pipe[1]);
329 return true;
330 }
331 /*}}}*/
332 // MultiCompress::OpenOld - Open an old file /*{{{*/
333 // ---------------------------------------------------------------------
334 /* This opens one of the original output files, possibly decompressing it. */
335 bool MultiCompress::OpenOld(int &Fd,pid_t &Proc)
336 {
337 Files *Best = Outputs;
338 for (Files *I = Outputs; I != 0; I = I->Next)
339 if (Best->CompressProg->Cost > I->CompressProg->Cost)
340 Best = I;
341
342 // Open the file
343 FileFd F(Best->Output,FileFd::ReadOnly);
344 if (_error->PendingError() == true)
345 return false;
346
347 // Decompress the file so we can read it
348 if (OpenCompress(Best->CompressProg,Proc,F.Fd(),Fd,false) == false)
349 return false;
350
351 return true;
352 }
353 /*}}}*/
354 // MultiCompress::CloseOld - Close the old file /*{{{*/
355 // ---------------------------------------------------------------------
356 /* */
357 bool MultiCompress::CloseOld(int Fd,pid_t Proc)
358 {
359 close(Fd);
360 if (Proc != -1)
361 if (ExecWait(Proc,_("decompressor"),false) == false)
362 return false;
363 return true;
364 }
365 /*}}}*/
366 // MultiCompress::Child - The writer child /*{{{*/
367 // ---------------------------------------------------------------------
368 /* The child process forks a bunch of compression children and takes
369 input on FD and passes it to all the compressor child. On the way it
370 computes the MD5 of the raw data. After this the raw data in the
371 original files is compared to see if this data is new. If the data
372 is new then the temp files are renamed, otherwise they are erased. */
373 bool MultiCompress::Child(int const &FD)
374 {
375 // Start the compression children.
376 for (Files *I = Outputs; I != 0; I = I->Next)
377 {
378 if (OpenCompress(I->CompressProg,I->CompressProc,I->TmpFile.Fd(),
379 I->Fd,true) == false)
380 return false;
381 }
382
383 /* Okay, now we just feed data from FD to all the other FDs. Also
384 stash a hash of the data to use later. */
385 SetNonBlock(FD,false);
386 unsigned char Buffer[32*1024];
387 unsigned long FileSize = 0;
388 MD5Summation MD5;
389 while (1)
390 {
391 WaitFd(FD,false);
392 int Res = read(FD,Buffer,sizeof(Buffer));
393 if (Res == 0)
394 break;
395 if (Res < 0)
396 continue;
397
398 MD5.Add(Buffer,Res);
399 FileSize += Res;
400 for (Files *I = Outputs; I != 0; I = I->Next)
401 {
402 if (write(I->Fd,Buffer,Res) != Res)
403 {
404 _error->Errno("write",_("IO to subprocess/file failed"));
405 break;
406 }
407 }
408 }
409
410 // Close all the writers
411 for (Files *I = Outputs; I != 0; I = I->Next)
412 close(I->Fd);
413
414 // Wait for the compressors to exit
415 for (Files *I = Outputs; I != 0; I = I->Next)
416 {
417 if (I->CompressProc != -1)
418 ExecWait(I->CompressProc,I->CompressProg->Binary,false);
419 }
420
421 if (_error->PendingError() == true)
422 return false;
423
424 /* Now we have to copy the files over, or erase them if they
425 have not changed. First find the cheapest decompressor */
426 bool Missing = false;
427 for (Files *I = Outputs; I != 0; I = I->Next)
428 {
429 if (I->OldMTime == 0)
430 {
431 Missing = true;
432 break;
433 }
434 }
435
436 // Check the MD5 of the lowest cost entity.
437 while (Missing == false)
438 {
439 int CompFd = -1;
440 pid_t Proc = -1;
441 if (OpenOld(CompFd,Proc) == false)
442 {
443 _error->Discard();
444 break;
445 }
446
447 // Compute the hash
448 MD5Summation OldMD5;
449 unsigned long NewFileSize = 0;
450 while (1)
451 {
452 int Res = read(CompFd,Buffer,sizeof(Buffer));
453 if (Res == 0)
454 break;
455 if (Res < 0)
456 return _error->Errno("read",_("Failed to read while computing MD5"));
457 NewFileSize += Res;
458 OldMD5.Add(Buffer,Res);
459 }
460
461 // Tidy the compressor
462 if (CloseOld(CompFd,Proc) == false)
463 return false;
464
465 // Check the hash
466 if (OldMD5.Result() == MD5.Result() &&
467 FileSize == NewFileSize)
468 {
469 for (Files *I = Outputs; I != 0; I = I->Next)
470 {
471 I->TmpFile.Close();
472 if (unlink(I->TmpFile.Name().c_str()) != 0)
473 _error->Errno("unlink",_("Problem unlinking %s"),
474 I->TmpFile.Name().c_str());
475 }
476 return !_error->PendingError();
477 }
478 break;
479 }
480
481 // Finalize
482 for (Files *I = Outputs; I != 0; I = I->Next)
483 {
484 // Set the correct file modes
485 fchmod(I->TmpFile.Fd(),Permissions);
486
487 if (rename(I->TmpFile.Name().c_str(),I->Output.c_str()) != 0)
488 _error->Errno("rename",_("Failed to rename %s to %s"),
489 I->TmpFile.Name().c_str(),I->Output.c_str());
490 I->TmpFile.Close();
491 }
492
493 return !_error->PendingError();
494 }
495 /*}}}*/
496