test/pre-upload-check.py:
[ntk/apt.git] / ftparchive / multicompress.cc
CommitLineData
b2e465d6
AL
1// -*- mode: cpp; mode: fold -*-
2// Description /*{{{*/
dc738e7a 3// $Id: multicompress.cc,v 1.4 2003/02/10 07:34:41 doogie Exp $
b2e465d6
AL
4/* ######################################################################
5
6 MultiCompressor
7
8 This class is very complicated in order to optimize for the common
9 case of its use, writing a large set of compressed files that are
10 different from the old set. It spawns off compressors in parallel
11 to maximize compression throughput and has a separate task managing
12 the data going into the compressors.
13
14 ##################################################################### */
15 /*}}}*/
16// Include Files /*{{{*/
b2e465d6
AL
17#include "multicompress.h"
18
dc738e7a 19#include <apti18n.h>
b2e465d6
AL
20#include <apt-pkg/strutl.h>
21#include <apt-pkg/error.h>
22#include <apt-pkg/md5.h>
23
24#include <sys/types.h>
25#include <sys/stat.h>
26#include <utime.h>
27#include <unistd.h>
812f4169 28#include <iostream>
b2e465d6
AL
29 /*}}}*/
30
812f4169
AL
31using namespace std;
32
b2e465d6
AL
33const MultiCompress::CompType MultiCompress::Compressors[] =
34 {{".","",0,0,0,1},
35 {"gzip",".gz","gzip","-9n","-d",2},
36 {"bzip2",".bz2","bzip2","-9","-d",3},
37 {}};
38
39// MultiCompress::MultiCompress - Constructor /*{{{*/
40// ---------------------------------------------------------------------
41/* Setup the file outputs, compression modes and fork the writer child */
42MultiCompress::MultiCompress(string Output,string Compress,
43 mode_t Permissions,bool Write)
44{
45 Outputs = 0;
46 Outputter = -1;
47 Input = 0;
48 UpdateMTime = 0;
49 this->Permissions = Permissions;
50
51 /* Parse the compression string, a space separated lists of compresison
52 types */
53 string::const_iterator I = Compress.begin();
54 for (; I != Compress.end();)
55 {
56 for (; I != Compress.end() && isspace(*I); I++);
57
58 // Grab a word
59 string::const_iterator Start = I;
60 for (; I != Compress.end() && !isspace(*I); I++);
61
62 // Find the matching compressor
63 const CompType *Comp = Compressors;
64 for (; Comp->Name != 0; Comp++)
65 if (stringcmp(Start,I,Comp->Name) == 0)
66 break;
67
68 // Hmm.. unknown.
69 if (Comp->Name == 0)
70 {
db0db9fe 71 _error->Warning(_("Unknown compression algorithm '%s'"),string(Start,I).c_str());
b2e465d6
AL
72 continue;
73 }
74
75 // Create and link in a new output
76 Files *NewOut = new Files;
77 NewOut->Next = Outputs;
78 Outputs = NewOut;
79 NewOut->CompressProg = Comp;
80 NewOut->Output = Output+Comp->Extension;
81
82 struct stat St;
83 if (stat(NewOut->Output.c_str(),&St) == 0)
84 NewOut->OldMTime = St.st_mtime;
85 else
86 NewOut->OldMTime = 0;
87 }
88
89 if (Write == false)
90 return;
91
92 /* Open all the temp files now so we can report any errors. File is
93 made unreable to prevent people from touching it during creating. */
94 for (Files *I = Outputs; I != 0; I = I->Next)
95 I->TmpFile.Open(I->Output + ".new",FileFd::WriteEmpty,0600);
96 if (_error->PendingError() == true)
97 return;
98
99 if (Outputs == 0)
100 {
dc738e7a 101 _error->Error(_("Compressed output %s needs a compression set"),Output.c_str());
b2e465d6
AL
102 return;
103 }
104
105 Start();
106}
107 /*}}}*/
108// MultiCompress::~MultiCompress - Destructor /*{{{*/
109// ---------------------------------------------------------------------
110/* Just erase the file linked list. */
111MultiCompress::~MultiCompress()
112{
113 Die();
114
115 for (; Outputs != 0;)
116 {
117 Files *Tmp = Outputs->Next;
118 delete Outputs;
119 Outputs = Tmp;
120 }
121}
122 /*}}}*/
123// MultiCompress::GetStat - Get stat information for compressed files /*{{{*/
124// ---------------------------------------------------------------------
125/* This checks each compressed file to make sure it exists and returns
126 stat information for a random file from the collection. False means
127 one or more of the files is missing. */
128bool MultiCompress::GetStat(string Output,string Compress,struct stat &St)
129{
130 /* Parse the compression string, a space separated lists of compresison
131 types */
132 string::const_iterator I = Compress.begin();
133 bool DidStat = false;
134 for (; I != Compress.end();)
135 {
136 for (; I != Compress.end() && isspace(*I); I++);
137
138 // Grab a word
139 string::const_iterator Start = I;
140 for (; I != Compress.end() && !isspace(*I); I++);
141
142 // Find the matching compressor
143 const CompType *Comp = Compressors;
144 for (; Comp->Name != 0; Comp++)
145 if (stringcmp(Start,I,Comp->Name) == 0)
146 break;
147
148 // Hmm.. unknown.
149 if (Comp->Name == 0)
150 continue;
151
152 string Name = Output+Comp->Extension;
153 if (stat(Name.c_str(),&St) != 0)
154 return false;
155 DidStat = true;
156 }
157 return DidStat;
158}
159 /*}}}*/
160// MultiCompress::Start - Start up the writer child /*{{{*/
161// ---------------------------------------------------------------------
162/* Fork a child and setup the communication pipe. */
163bool MultiCompress::Start()
164{
165 // Create a data pipe
166 int Pipe[2] = {-1,-1};
167 if (pipe(Pipe) != 0)
dc738e7a 168 return _error->Errno("pipe",_("Failed to create IPC pipe to subprocess"));
b2e465d6
AL
169 for (int I = 0; I != 2; I++)
170 SetCloseExec(Pipe[I],true);
171
172 // The child..
173 Outputter = fork();
174 if (Outputter == 0)
175 {
176 close(Pipe[1]);
177 Child(Pipe[0]);
178 if (_error->PendingError() == true)
179 {
180 _error->DumpErrors();
181 _exit(100);
182 }
183 _exit(0);
184 };
185
186 /* Tidy up the temp files, we open them in the constructor so as to
187 get proper error reporting. Close them now. */
188 for (Files *I = Outputs; I != 0; I = I->Next)
189 I->TmpFile.Close();
190
191 close(Pipe[0]);
192 Input = fdopen(Pipe[1],"w");
193 if (Input == 0)
dc738e7a 194 return _error->Errno("fdopen",_("Failed to create FILE*"));
b2e465d6
AL
195
196 if (Outputter == -1)
dc738e7a 197 return _error->Errno("fork",_("Failed to fork"));
b2e465d6
AL
198 return true;
199}
200 /*}}}*/
201// MultiCompress::Die - Clean up the writer /*{{{*/
202// ---------------------------------------------------------------------
203/* */
204bool MultiCompress::Die()
205{
206 if (Input == 0)
207 return true;
208
209 fclose(Input);
210 Input = 0;
db0db9fe 211 bool Res = ExecWait(Outputter,_("Compress child"),false);
b2e465d6
AL
212 Outputter = -1;
213 return Res;
214}
215 /*}}}*/
216// MultiCompress::Finalize - Finish up writing /*{{{*/
217// ---------------------------------------------------------------------
218/* This is only necessary for statistics reporting. */
219bool MultiCompress::Finalize(unsigned long &OutSize)
220{
221 OutSize = 0;
222 if (Input == 0 || Die() == false)
223 return false;
224
225 time_t Now;
226 time(&Now);
227
228 // Check the mtimes to see if the files were replaced.
229 bool Changed = false;
230 for (Files *I = Outputs; I != 0; I = I->Next)
231 {
232 struct stat St;
233 if (stat(I->Output.c_str(),&St) != 0)
db0db9fe 234 return _error->Error(_("Internal error, failed to create %s"),
b2e465d6
AL
235 I->Output.c_str());
236
237 if (I->OldMTime != St.st_mtime)
238 Changed = true;
239 else
240 {
241 // Update the mtime if necessary
242 if (UpdateMTime > 0 &&
243 (Now - St.st_mtime > (signed)UpdateMTime || St.st_mtime > Now))
244 {
245 struct utimbuf Buf;
246 Buf.actime = Buf.modtime = Now;
247 utime(I->Output.c_str(),&Buf);
248 Changed = true;
249 }
250 }
251
252 // Force the file permissions
253 if (St.st_mode != Permissions)
254 chmod(I->Output.c_str(),Permissions);
255
256 OutSize += St.st_size;
257 }
258
259 if (Changed == false)
260 OutSize = 0;
261
262 return true;
263}
264 /*}}}*/
265// MultiCompress::OpenCompress - Open the compressor /*{{{*/
266// ---------------------------------------------------------------------
267/* This opens the compressor, either in compress mode or decompress
268 mode. FileFd is always the compressor input/output file,
269 OutFd is the created pipe, Input for Compress, Output for Decompress. */
3826564e 270bool MultiCompress::OpenCompress(const CompType *Prog,pid_t &Pid,int FileFd,
b2e465d6
AL
271 int &OutFd,bool Comp)
272{
273 Pid = -1;
274
275 // No compression
276 if (Prog->Binary == 0)
277 {
278 OutFd = dup(FileFd);
279 return true;
280 }
281
282 // Create a data pipe
283 int Pipe[2] = {-1,-1};
284 if (pipe(Pipe) != 0)
dc738e7a 285 return _error->Errno("pipe",_("Failed to create subprocess IPC"));
b2e465d6
AL
286 for (int J = 0; J != 2; J++)
287 SetCloseExec(Pipe[J],true);
288
289 if (Comp == true)
290 OutFd = Pipe[1];
291 else
292 OutFd = Pipe[0];
293
294 // The child..
295 Pid = ExecFork();
296 if (Pid == 0)
297 {
298 if (Comp == true)
299 {
300 dup2(FileFd,STDOUT_FILENO);
301 dup2(Pipe[0],STDIN_FILENO);
302 }
303 else
304 {
305 dup2(FileFd,STDIN_FILENO);
306 dup2(Pipe[1],STDOUT_FILENO);
307 }
308
309 SetCloseExec(STDOUT_FILENO,false);
310 SetCloseExec(STDIN_FILENO,false);
311
312 const char *Args[3];
313 Args[0] = Prog->Binary;
314 if (Comp == true)
315 Args[1] = Prog->CompArgs;
316 else
317 Args[1] = Prog->UnCompArgs;
318 Args[2] = 0;
319 execvp(Args[0],(char **)Args);
dc738e7a 320 cerr << _("Failed to exec compressor ") << Args[0] << endl;
b2e465d6
AL
321 _exit(100);
322 };
323 if (Comp == true)
324 close(Pipe[0]);
325 else
326 close(Pipe[1]);
327 return true;
328}
329 /*}}}*/
330// MultiCompress::OpenOld - Open an old file /*{{{*/
331// ---------------------------------------------------------------------
332/* This opens one of the original output files, possibly decompressing it. */
3826564e 333bool MultiCompress::OpenOld(int &Fd,pid_t &Proc)
b2e465d6
AL
334{
335 Files *Best = Outputs;
336 for (Files *I = Outputs; I != 0; I = I->Next)
337 if (Best->CompressProg->Cost > I->CompressProg->Cost)
338 Best = I;
339
340 // Open the file
341 FileFd F(Best->Output,FileFd::ReadOnly);
342 if (_error->PendingError() == true)
343 return false;
344
345 // Decompress the file so we can read it
346 if (OpenCompress(Best->CompressProg,Proc,F.Fd(),Fd,false) == false)
347 return false;
348
349 return true;
350}
351 /*}}}*/
352// MultiCompress::CloseOld - Close the old file /*{{{*/
353// ---------------------------------------------------------------------
354/* */
3826564e 355bool MultiCompress::CloseOld(int Fd,pid_t Proc)
b2e465d6
AL
356{
357 close(Fd);
358 if (Proc != -1)
dc738e7a 359 if (ExecWait(Proc,_("decompressor"),false) == false)
b2e465d6
AL
360 return false;
361 return true;
362}
363 /*}}}*/
364// MultiCompress::Child - The writer child /*{{{*/
365// ---------------------------------------------------------------------
366/* The child process forks a bunch of compression children and takes
367 input on FD and passes it to all the compressor childer. On the way it
368 computes the MD5 of the raw data. After this the raw data in the
369 original files is compared to see if this data is new. If the data
370 is new then the temp files are renamed, otherwise they are erased. */
371bool MultiCompress::Child(int FD)
372{
373 // Start the compression children.
374 for (Files *I = Outputs; I != 0; I = I->Next)
375 {
376 if (OpenCompress(I->CompressProg,I->CompressProc,I->TmpFile.Fd(),
377 I->Fd,true) == false)
378 return false;
379 }
380
381 /* Okay, now we just feed data from FD to all the other FDs. Also
382 stash a hash of the data to use later. */
383 SetNonBlock(FD,false);
384 unsigned char Buffer[32*1024];
385 unsigned long FileSize = 0;
386 MD5Summation MD5;
387 while (1)
388 {
389 WaitFd(FD,false);
390 int Res = read(FD,Buffer,sizeof(Buffer));
391 if (Res == 0)
392 break;
393 if (Res < 0)
394 continue;
395
396 MD5.Add(Buffer,Res);
397 FileSize += Res;
398 for (Files *I = Outputs; I != 0; I = I->Next)
399 {
400 if (write(I->Fd,Buffer,Res) != Res)
401 {
dc738e7a 402 _error->Errno("write",_("IO to subprocess/file failed"));
b2e465d6
AL
403 break;
404 }
405 }
406 }
407
408 // Close all the writers
409 for (Files *I = Outputs; I != 0; I = I->Next)
410 close(I->Fd);
411
412 // Wait for the compressors to exit
413 for (Files *I = Outputs; I != 0; I = I->Next)
414 {
415 if (I->CompressProc != -1)
416 ExecWait(I->CompressProc,I->CompressProg->Binary,false);
417 }
418
419 if (_error->PendingError() == true)
420 return false;
421
422 /* Now we have to copy the files over, or erase them if they
423 have not changed. First find the cheapest decompressor */
424 bool Missing = false;
425 for (Files *I = Outputs; I != 0; I = I->Next)
426 {
427 if (I->OldMTime == 0)
428 {
429 Missing = true;
430 break;
431 }
432 }
433
434 // Check the MD5 of the lowest cost entity.
435 while (Missing == false)
436 {
437 int CompFd = -1;
3826564e 438 pid_t Proc = -1;
b2e465d6
AL
439 if (OpenOld(CompFd,Proc) == false)
440 {
441 _error->Discard();
442 break;
443 }
444
445 // Compute the hash
446 MD5Summation OldMD5;
447 unsigned long NewFileSize = 0;
448 while (1)
449 {
450 int Res = read(CompFd,Buffer,sizeof(Buffer));
451 if (Res == 0)
452 break;
453 if (Res < 0)
dc738e7a 454 return _error->Errno("read",_("Failed to read while computing MD5"));
b2e465d6
AL
455 NewFileSize += Res;
456 OldMD5.Add(Buffer,Res);
457 }
458
459 // Tidy the compressor
460 if (CloseOld(CompFd,Proc) == false)
461 return false;
462
463 // Check the hash
464 if (OldMD5.Result() == MD5.Result() &&
465 FileSize == NewFileSize)
466 {
467 for (Files *I = Outputs; I != 0; I = I->Next)
468 {
469 I->TmpFile.Close();
470 if (unlink(I->TmpFile.Name().c_str()) != 0)
dc738e7a 471 _error->Errno("unlink",_("Problem unlinking %s"),
b2e465d6
AL
472 I->TmpFile.Name().c_str());
473 }
474 return !_error->PendingError();
475 }
476 break;
477 }
478
479 // Finalize
480 for (Files *I = Outputs; I != 0; I = I->Next)
481 {
482 // Set the correct file modes
483 fchmod(I->TmpFile.Fd(),Permissions);
484
485 if (rename(I->TmpFile.Name().c_str(),I->Output.c_str()) != 0)
dc738e7a 486 _error->Errno("rename",_("Failed to rename %s to %s"),
b2e465d6
AL
487 I->TmpFile.Name().c_str(),I->Output.c_str());
488 I->TmpFile.Close();
489 }
490
491 return !_error->PendingError();
492}
493 /*}}}*/
494