fix a few typos in strings, comments and manpage of apt-ftparchive
[ntk/apt.git] / ftparchive / multicompress.cc
CommitLineData
b2e465d6
AL
1// -*- mode: cpp; mode: fold -*-
2// Description /*{{{*/
dc738e7a 3// $Id: multicompress.cc,v 1.4 2003/02/10 07:34:41 doogie Exp $
b2e465d6
AL
4/* ######################################################################
5
6 MultiCompressor
7
8 This class is very complicated in order to optimize for the common
9 case of its use, writing a large set of compressed files that are
10 different from the old set. It spawns off compressors in parallel
11 to maximize compression throughput and has a separate task managing
12 the data going into the compressors.
13
14 ##################################################################### */
15 /*}}}*/
16// Include Files /*{{{*/
b2e465d6
AL
17#include "multicompress.h"
18
dc738e7a 19#include <apti18n.h>
b2e465d6
AL
20#include <apt-pkg/strutl.h>
21#include <apt-pkg/error.h>
22#include <apt-pkg/md5.h>
23
24#include <sys/types.h>
25#include <sys/stat.h>
26#include <utime.h>
27#include <unistd.h>
812f4169 28#include <iostream>
b2e465d6
AL
29 /*}}}*/
30
812f4169
AL
31using namespace std;
32
b2e465d6
AL
33const MultiCompress::CompType MultiCompress::Compressors[] =
34 {{".","",0,0,0,1},
35 {"gzip",".gz","gzip","-9n","-d",2},
36 {"bzip2",".bz2","bzip2","-9","-d",3},
24e135b6 37 {"lzma",".lzma","lzma","-9","-d",4},
b2e465d6
AL
38 {}};
39
40// MultiCompress::MultiCompress - Constructor /*{{{*/
41// ---------------------------------------------------------------------
42/* Setup the file outputs, compression modes and fork the writer child */
43MultiCompress::MultiCompress(string Output,string Compress,
44 mode_t Permissions,bool Write)
45{
46 Outputs = 0;
47 Outputter = -1;
48 Input = 0;
49 UpdateMTime = 0;
50 this->Permissions = Permissions;
51
52 /* Parse the compression string, a space separated lists of compresison
53 types */
54 string::const_iterator I = Compress.begin();
55 for (; I != Compress.end();)
56 {
57 for (; I != Compress.end() && isspace(*I); I++);
58
59 // Grab a word
60 string::const_iterator Start = I;
61 for (; I != Compress.end() && !isspace(*I); I++);
62
63 // Find the matching compressor
64 const CompType *Comp = Compressors;
65 for (; Comp->Name != 0; Comp++)
66 if (stringcmp(Start,I,Comp->Name) == 0)
67 break;
68
69 // Hmm.. unknown.
70 if (Comp->Name == 0)
71 {
db0db9fe 72 _error->Warning(_("Unknown compression algorithm '%s'"),string(Start,I).c_str());
b2e465d6
AL
73 continue;
74 }
75
76 // Create and link in a new output
77 Files *NewOut = new Files;
78 NewOut->Next = Outputs;
79 Outputs = NewOut;
80 NewOut->CompressProg = Comp;
81 NewOut->Output = Output+Comp->Extension;
82
83 struct stat St;
84 if (stat(NewOut->Output.c_str(),&St) == 0)
85 NewOut->OldMTime = St.st_mtime;
86 else
87 NewOut->OldMTime = 0;
88 }
89
90 if (Write == false)
91 return;
92
93 /* Open all the temp files now so we can report any errors. File is
94 made unreable to prevent people from touching it during creating. */
95 for (Files *I = Outputs; I != 0; I = I->Next)
96 I->TmpFile.Open(I->Output + ".new",FileFd::WriteEmpty,0600);
97 if (_error->PendingError() == true)
98 return;
99
100 if (Outputs == 0)
101 {
dc738e7a 102 _error->Error(_("Compressed output %s needs a compression set"),Output.c_str());
b2e465d6
AL
103 return;
104 }
105
106 Start();
107}
108 /*}}}*/
109// MultiCompress::~MultiCompress - Destructor /*{{{*/
110// ---------------------------------------------------------------------
111/* Just erase the file linked list. */
112MultiCompress::~MultiCompress()
113{
114 Die();
115
116 for (; Outputs != 0;)
117 {
118 Files *Tmp = Outputs->Next;
119 delete Outputs;
120 Outputs = Tmp;
121 }
122}
123 /*}}}*/
124// MultiCompress::GetStat - Get stat information for compressed files /*{{{*/
125// ---------------------------------------------------------------------
126/* This checks each compressed file to make sure it exists and returns
127 stat information for a random file from the collection. False means
128 one or more of the files is missing. */
129bool MultiCompress::GetStat(string Output,string Compress,struct stat &St)
130{
131 /* Parse the compression string, a space separated lists of compresison
132 types */
133 string::const_iterator I = Compress.begin();
134 bool DidStat = false;
135 for (; I != Compress.end();)
136 {
137 for (; I != Compress.end() && isspace(*I); I++);
138
139 // Grab a word
140 string::const_iterator Start = I;
141 for (; I != Compress.end() && !isspace(*I); I++);
142
143 // Find the matching compressor
144 const CompType *Comp = Compressors;
145 for (; Comp->Name != 0; Comp++)
146 if (stringcmp(Start,I,Comp->Name) == 0)
147 break;
148
149 // Hmm.. unknown.
150 if (Comp->Name == 0)
151 continue;
152
153 string Name = Output+Comp->Extension;
154 if (stat(Name.c_str(),&St) != 0)
155 return false;
156 DidStat = true;
157 }
158 return DidStat;
159}
160 /*}}}*/
161// MultiCompress::Start - Start up the writer child /*{{{*/
162// ---------------------------------------------------------------------
163/* Fork a child and setup the communication pipe. */
164bool MultiCompress::Start()
165{
166 // Create a data pipe
167 int Pipe[2] = {-1,-1};
168 if (pipe(Pipe) != 0)
dc738e7a 169 return _error->Errno("pipe",_("Failed to create IPC pipe to subprocess"));
b2e465d6
AL
170 for (int I = 0; I != 2; I++)
171 SetCloseExec(Pipe[I],true);
172
173 // The child..
174 Outputter = fork();
175 if (Outputter == 0)
176 {
177 close(Pipe[1]);
178 Child(Pipe[0]);
179 if (_error->PendingError() == true)
180 {
181 _error->DumpErrors();
182 _exit(100);
183 }
184 _exit(0);
185 };
186
187 /* Tidy up the temp files, we open them in the constructor so as to
188 get proper error reporting. Close them now. */
189 for (Files *I = Outputs; I != 0; I = I->Next)
190 I->TmpFile.Close();
191
192 close(Pipe[0]);
193 Input = fdopen(Pipe[1],"w");
194 if (Input == 0)
dc738e7a 195 return _error->Errno("fdopen",_("Failed to create FILE*"));
b2e465d6
AL
196
197 if (Outputter == -1)
dc738e7a 198 return _error->Errno("fork",_("Failed to fork"));
b2e465d6
AL
199 return true;
200}
201 /*}}}*/
202// MultiCompress::Die - Clean up the writer /*{{{*/
203// ---------------------------------------------------------------------
204/* */
205bool MultiCompress::Die()
206{
207 if (Input == 0)
208 return true;
209
210 fclose(Input);
211 Input = 0;
db0db9fe 212 bool Res = ExecWait(Outputter,_("Compress child"),false);
b2e465d6
AL
213 Outputter = -1;
214 return Res;
215}
216 /*}}}*/
217// MultiCompress::Finalize - Finish up writing /*{{{*/
218// ---------------------------------------------------------------------
219/* This is only necessary for statistics reporting. */
220bool MultiCompress::Finalize(unsigned long &OutSize)
221{
222 OutSize = 0;
223 if (Input == 0 || Die() == false)
224 return false;
225
226 time_t Now;
227 time(&Now);
228
229 // Check the mtimes to see if the files were replaced.
230 bool Changed = false;
231 for (Files *I = Outputs; I != 0; I = I->Next)
232 {
233 struct stat St;
234 if (stat(I->Output.c_str(),&St) != 0)
db0db9fe 235 return _error->Error(_("Internal error, failed to create %s"),
b2e465d6
AL
236 I->Output.c_str());
237
238 if (I->OldMTime != St.st_mtime)
239 Changed = true;
240 else
241 {
242 // Update the mtime if necessary
243 if (UpdateMTime > 0 &&
244 (Now - St.st_mtime > (signed)UpdateMTime || St.st_mtime > Now))
245 {
246 struct utimbuf Buf;
247 Buf.actime = Buf.modtime = Now;
248 utime(I->Output.c_str(),&Buf);
249 Changed = true;
250 }
251 }
252
253 // Force the file permissions
254 if (St.st_mode != Permissions)
255 chmod(I->Output.c_str(),Permissions);
256
257 OutSize += St.st_size;
258 }
259
260 if (Changed == false)
261 OutSize = 0;
262
263 return true;
264}
265 /*}}}*/
266// MultiCompress::OpenCompress - Open the compressor /*{{{*/
267// ---------------------------------------------------------------------
268/* This opens the compressor, either in compress mode or decompress
269 mode. FileFd is always the compressor input/output file,
270 OutFd is the created pipe, Input for Compress, Output for Decompress. */
3826564e 271bool MultiCompress::OpenCompress(const CompType *Prog,pid_t &Pid,int FileFd,
b2e465d6
AL
272 int &OutFd,bool Comp)
273{
274 Pid = -1;
275
276 // No compression
277 if (Prog->Binary == 0)
278 {
279 OutFd = dup(FileFd);
280 return true;
281 }
282
283 // Create a data pipe
284 int Pipe[2] = {-1,-1};
285 if (pipe(Pipe) != 0)
dc738e7a 286 return _error->Errno("pipe",_("Failed to create subprocess IPC"));
b2e465d6
AL
287 for (int J = 0; J != 2; J++)
288 SetCloseExec(Pipe[J],true);
289
290 if (Comp == true)
291 OutFd = Pipe[1];
292 else
293 OutFd = Pipe[0];
294
295 // The child..
296 Pid = ExecFork();
297 if (Pid == 0)
298 {
299 if (Comp == true)
300 {
301 dup2(FileFd,STDOUT_FILENO);
302 dup2(Pipe[0],STDIN_FILENO);
303 }
304 else
305 {
306 dup2(FileFd,STDIN_FILENO);
307 dup2(Pipe[1],STDOUT_FILENO);
308 }
309
310 SetCloseExec(STDOUT_FILENO,false);
311 SetCloseExec(STDIN_FILENO,false);
312
313 const char *Args[3];
314 Args[0] = Prog->Binary;
315 if (Comp == true)
316 Args[1] = Prog->CompArgs;
317 else
318 Args[1] = Prog->UnCompArgs;
319 Args[2] = 0;
320 execvp(Args[0],(char **)Args);
dc738e7a 321 cerr << _("Failed to exec compressor ") << Args[0] << endl;
b2e465d6
AL
322 _exit(100);
323 };
324 if (Comp == true)
325 close(Pipe[0]);
326 else
327 close(Pipe[1]);
328 return true;
329}
330 /*}}}*/
331// MultiCompress::OpenOld - Open an old file /*{{{*/
332// ---------------------------------------------------------------------
333/* This opens one of the original output files, possibly decompressing it. */
3826564e 334bool MultiCompress::OpenOld(int &Fd,pid_t &Proc)
b2e465d6
AL
335{
336 Files *Best = Outputs;
337 for (Files *I = Outputs; I != 0; I = I->Next)
338 if (Best->CompressProg->Cost > I->CompressProg->Cost)
339 Best = I;
340
341 // Open the file
342 FileFd F(Best->Output,FileFd::ReadOnly);
343 if (_error->PendingError() == true)
344 return false;
345
346 // Decompress the file so we can read it
347 if (OpenCompress(Best->CompressProg,Proc,F.Fd(),Fd,false) == false)
348 return false;
349
350 return true;
351}
352 /*}}}*/
353// MultiCompress::CloseOld - Close the old file /*{{{*/
354// ---------------------------------------------------------------------
355/* */
3826564e 356bool MultiCompress::CloseOld(int Fd,pid_t Proc)
b2e465d6
AL
357{
358 close(Fd);
359 if (Proc != -1)
dc738e7a 360 if (ExecWait(Proc,_("decompressor"),false) == false)
b2e465d6
AL
361 return false;
362 return true;
363}
364 /*}}}*/
365// MultiCompress::Child - The writer child /*{{{*/
366// ---------------------------------------------------------------------
367/* The child process forks a bunch of compression children and takes
ca4907db 368 input on FD and passes it to all the compressor child. On the way it
b2e465d6
AL
369 computes the MD5 of the raw data. After this the raw data in the
370 original files is compared to see if this data is new. If the data
371 is new then the temp files are renamed, otherwise they are erased. */
372bool MultiCompress::Child(int FD)
373{
374 // Start the compression children.
375 for (Files *I = Outputs; I != 0; I = I->Next)
376 {
377 if (OpenCompress(I->CompressProg,I->CompressProc,I->TmpFile.Fd(),
378 I->Fd,true) == false)
379 return false;
380 }
381
382 /* Okay, now we just feed data from FD to all the other FDs. Also
383 stash a hash of the data to use later. */
384 SetNonBlock(FD,false);
385 unsigned char Buffer[32*1024];
386 unsigned long FileSize = 0;
387 MD5Summation MD5;
388 while (1)
389 {
390 WaitFd(FD,false);
391 int Res = read(FD,Buffer,sizeof(Buffer));
392 if (Res == 0)
393 break;
394 if (Res < 0)
395 continue;
396
397 MD5.Add(Buffer,Res);
398 FileSize += Res;
399 for (Files *I = Outputs; I != 0; I = I->Next)
400 {
401 if (write(I->Fd,Buffer,Res) != Res)
402 {
dc738e7a 403 _error->Errno("write",_("IO to subprocess/file failed"));
b2e465d6
AL
404 break;
405 }
406 }
407 }
408
409 // Close all the writers
410 for (Files *I = Outputs; I != 0; I = I->Next)
411 close(I->Fd);
412
413 // Wait for the compressors to exit
414 for (Files *I = Outputs; I != 0; I = I->Next)
415 {
416 if (I->CompressProc != -1)
417 ExecWait(I->CompressProc,I->CompressProg->Binary,false);
418 }
419
420 if (_error->PendingError() == true)
421 return false;
422
423 /* Now we have to copy the files over, or erase them if they
424 have not changed. First find the cheapest decompressor */
425 bool Missing = false;
426 for (Files *I = Outputs; I != 0; I = I->Next)
427 {
428 if (I->OldMTime == 0)
429 {
430 Missing = true;
431 break;
432 }
433 }
434
435 // Check the MD5 of the lowest cost entity.
436 while (Missing == false)
437 {
438 int CompFd = -1;
3826564e 439 pid_t Proc = -1;
b2e465d6
AL
440 if (OpenOld(CompFd,Proc) == false)
441 {
442 _error->Discard();
443 break;
444 }
445
446 // Compute the hash
447 MD5Summation OldMD5;
448 unsigned long NewFileSize = 0;
449 while (1)
450 {
451 int Res = read(CompFd,Buffer,sizeof(Buffer));
452 if (Res == 0)
453 break;
454 if (Res < 0)
dc738e7a 455 return _error->Errno("read",_("Failed to read while computing MD5"));
b2e465d6
AL
456 NewFileSize += Res;
457 OldMD5.Add(Buffer,Res);
458 }
459
460 // Tidy the compressor
461 if (CloseOld(CompFd,Proc) == false)
462 return false;
463
464 // Check the hash
465 if (OldMD5.Result() == MD5.Result() &&
466 FileSize == NewFileSize)
467 {
468 for (Files *I = Outputs; I != 0; I = I->Next)
469 {
470 I->TmpFile.Close();
471 if (unlink(I->TmpFile.Name().c_str()) != 0)
dc738e7a 472 _error->Errno("unlink",_("Problem unlinking %s"),
b2e465d6
AL
473 I->TmpFile.Name().c_str());
474 }
475 return !_error->PendingError();
476 }
477 break;
478 }
479
480 // Finalize
481 for (Files *I = Outputs; I != 0; I = I->Next)
482 {
483 // Set the correct file modes
484 fchmod(I->TmpFile.Fd(),Permissions);
485
486 if (rename(I->TmpFile.Name().c_str(),I->Output.c_str()) != 0)
dc738e7a 487 _error->Errno("rename",_("Failed to rename %s to %s"),
b2e465d6
AL
488 I->TmpFile.Name().c_str(),I->Output.c_str());
489 I->TmpFile.Close();
490 }
491
492 return !_error->PendingError();
493}
494 /*}}}*/
495