use FileFd instead of forking the compression childs by hand
[ntk/apt.git] / ftparchive / multicompress.cc
CommitLineData
b2e465d6
AL
1// -*- mode: cpp; mode: fold -*-
2// Description /*{{{*/
dc738e7a 3// $Id: multicompress.cc,v 1.4 2003/02/10 07:34:41 doogie Exp $
b2e465d6
AL
4/* ######################################################################
5
6 MultiCompressor
7
8 This class is very complicated in order to optimize for the common
9 case of its use, writing a large set of compressed files that are
10 different from the old set. It spawns off compressors in parallel
11 to maximize compression throughput and has a separate task managing
12 the data going into the compressors.
13
14 ##################################################################### */
15 /*}}}*/
16// Include Files /*{{{*/
ea542140
DK
17#include <config.h>
18
699b209e 19#include <apt-pkg/fileutl.h>
b2e465d6
AL
20#include <apt-pkg/strutl.h>
21#include <apt-pkg/error.h>
22#include <apt-pkg/md5.h>
ea542140 23
b2e465d6
AL
24#include <sys/types.h>
25#include <sys/stat.h>
26#include <utime.h>
27#include <unistd.h>
ea542140
DK
28#include <iostream>
29
30#include "multicompress.h"
31#include <apti18n.h>
b2e465d6
AL
32 /*}}}*/
33
812f4169
AL
34using namespace std;
35
b2e465d6
AL
36
37// MultiCompress::MultiCompress - Constructor /*{{{*/
38// ---------------------------------------------------------------------
39/* Setup the file outputs, compression modes and fork the writer child */
9209ec47
DK
40MultiCompress::MultiCompress(string const &Output,string const &Compress,
41 mode_t const &Permissions,bool const &Write) :
42 Permissions(Permissions)
b2e465d6
AL
43{
44 Outputs = 0;
45 Outputter = -1;
46 Input = 0;
47 UpdateMTime = 0;
03bef784 48
b2e465d6
AL
49 /* Parse the compression string, a space separated lists of compresison
50 types */
51 string::const_iterator I = Compress.begin();
52 for (; I != Compress.end();)
53 {
f7f0d6c7 54 for (; I != Compress.end() && isspace(*I); ++I);
b2e465d6
AL
55
56 // Grab a word
57 string::const_iterator Start = I;
f7f0d6c7 58 for (; I != Compress.end() && !isspace(*I); ++I);
b2e465d6
AL
59
60 // Find the matching compressor
03bef784
DK
61 std::vector<APT::Configuration::Compressor> Compressors = APT::Configuration::getCompressors();
62 std::vector<APT::Configuration::Compressor>::const_iterator Comp = Compressors.begin();
63 for (; Comp != Compressors.end(); ++Comp)
64 if (stringcmp(Start,I,Comp->Name.c_str()) == 0)
b2e465d6
AL
65 break;
66
67 // Hmm.. unknown.
03bef784 68 if (Comp == Compressors.end())
b2e465d6 69 {
db0db9fe 70 _error->Warning(_("Unknown compression algorithm '%s'"),string(Start,I).c_str());
b2e465d6
AL
71 continue;
72 }
73
74 // Create and link in a new output
75 Files *NewOut = new Files;
76 NewOut->Next = Outputs;
77 Outputs = NewOut;
03bef784 78 NewOut->CompressProg = *Comp;
b2e465d6
AL
79 NewOut->Output = Output+Comp->Extension;
80
81 struct stat St;
82 if (stat(NewOut->Output.c_str(),&St) == 0)
83 NewOut->OldMTime = St.st_mtime;
84 else
85 NewOut->OldMTime = 0;
86 }
87
88 if (Write == false)
89 return;
90
91 /* Open all the temp files now so we can report any errors. File is
92 made unreable to prevent people from touching it during creating. */
93 for (Files *I = Outputs; I != 0; I = I->Next)
52b47296 94 I->TmpFile.Open(I->Output + ".new", FileFd::WriteOnly | FileFd::Create | FileFd::Empty, FileFd::Extension, 0600);
b2e465d6
AL
95 if (_error->PendingError() == true)
96 return;
97
98 if (Outputs == 0)
99 {
dc738e7a 100 _error->Error(_("Compressed output %s needs a compression set"),Output.c_str());
b2e465d6
AL
101 return;
102 }
103
104 Start();
105}
106 /*}}}*/
107// MultiCompress::~MultiCompress - Destructor /*{{{*/
108// ---------------------------------------------------------------------
109/* Just erase the file linked list. */
110MultiCompress::~MultiCompress()
111{
112 Die();
113
114 for (; Outputs != 0;)
115 {
116 Files *Tmp = Outputs->Next;
117 delete Outputs;
118 Outputs = Tmp;
119 }
120}
121 /*}}}*/
122// MultiCompress::GetStat - Get stat information for compressed files /*{{{*/
123// ---------------------------------------------------------------------
124/* This checks each compressed file to make sure it exists and returns
125 stat information for a random file from the collection. False means
126 one or more of the files is missing. */
9209ec47 127bool MultiCompress::GetStat(string const &Output,string const &Compress,struct stat &St)
b2e465d6
AL
128{
129 /* Parse the compression string, a space separated lists of compresison
130 types */
131 string::const_iterator I = Compress.begin();
132 bool DidStat = false;
133 for (; I != Compress.end();)
134 {
f7f0d6c7 135 for (; I != Compress.end() && isspace(*I); ++I);
b2e465d6
AL
136
137 // Grab a word
138 string::const_iterator Start = I;
f7f0d6c7 139 for (; I != Compress.end() && !isspace(*I); ++I);
b2e465d6
AL
140
141 // Find the matching compressor
03bef784
DK
142 std::vector<APT::Configuration::Compressor> Compressors = APT::Configuration::getCompressors();
143 std::vector<APT::Configuration::Compressor>::const_iterator Comp = Compressors.begin();
144 for (; Comp != Compressors.end(); ++Comp)
145 if (stringcmp(Start,I,Comp->Name.c_str()) == 0)
b2e465d6
AL
146 break;
147
148 // Hmm.. unknown.
03bef784 149 if (Comp == Compressors.end())
b2e465d6
AL
150 continue;
151
152 string Name = Output+Comp->Extension;
153 if (stat(Name.c_str(),&St) != 0)
154 return false;
155 DidStat = true;
156 }
157 return DidStat;
158}
159 /*}}}*/
160// MultiCompress::Start - Start up the writer child /*{{{*/
161// ---------------------------------------------------------------------
162/* Fork a child and setup the communication pipe. */
163bool MultiCompress::Start()
164{
165 // Create a data pipe
166 int Pipe[2] = {-1,-1};
167 if (pipe(Pipe) != 0)
dc738e7a 168 return _error->Errno("pipe",_("Failed to create IPC pipe to subprocess"));
b2e465d6
AL
169 for (int I = 0; I != 2; I++)
170 SetCloseExec(Pipe[I],true);
171
172 // The child..
173 Outputter = fork();
174 if (Outputter == 0)
175 {
176 close(Pipe[1]);
177 Child(Pipe[0]);
178 if (_error->PendingError() == true)
179 {
180 _error->DumpErrors();
181 _exit(100);
182 }
183 _exit(0);
184 };
185
b2e465d6
AL
186 close(Pipe[0]);
187 Input = fdopen(Pipe[1],"w");
188 if (Input == 0)
dc738e7a 189 return _error->Errno("fdopen",_("Failed to create FILE*"));
b2e465d6
AL
190
191 if (Outputter == -1)
dc738e7a 192 return _error->Errno("fork",_("Failed to fork"));
b2e465d6
AL
193 return true;
194}
195 /*}}}*/
196// MultiCompress::Die - Clean up the writer /*{{{*/
197// ---------------------------------------------------------------------
198/* */
199bool MultiCompress::Die()
200{
201 if (Input == 0)
202 return true;
203
204 fclose(Input);
205 Input = 0;
db0db9fe 206 bool Res = ExecWait(Outputter,_("Compress child"),false);
b2e465d6
AL
207 Outputter = -1;
208 return Res;
209}
210 /*}}}*/
211// MultiCompress::Finalize - Finish up writing /*{{{*/
212// ---------------------------------------------------------------------
213/* This is only necessary for statistics reporting. */
650faab0 214bool MultiCompress::Finalize(unsigned long long &OutSize)
b2e465d6
AL
215{
216 OutSize = 0;
217 if (Input == 0 || Die() == false)
218 return false;
219
220 time_t Now;
221 time(&Now);
222
223 // Check the mtimes to see if the files were replaced.
224 bool Changed = false;
225 for (Files *I = Outputs; I != 0; I = I->Next)
226 {
227 struct stat St;
228 if (stat(I->Output.c_str(),&St) != 0)
db0db9fe 229 return _error->Error(_("Internal error, failed to create %s"),
b2e465d6
AL
230 I->Output.c_str());
231
232 if (I->OldMTime != St.st_mtime)
233 Changed = true;
234 else
235 {
236 // Update the mtime if necessary
237 if (UpdateMTime > 0 &&
238 (Now - St.st_mtime > (signed)UpdateMTime || St.st_mtime > Now))
239 {
240 struct utimbuf Buf;
241 Buf.actime = Buf.modtime = Now;
242 utime(I->Output.c_str(),&Buf);
243 Changed = true;
244 }
245 }
246
247 // Force the file permissions
248 if (St.st_mode != Permissions)
249 chmod(I->Output.c_str(),Permissions);
250
251 OutSize += St.st_size;
252 }
253
254 if (Changed == false)
255 OutSize = 0;
256
257 return true;
258}
259 /*}}}*/
b2e465d6
AL
260// MultiCompress::OpenOld - Open an old file /*{{{*/
261// ---------------------------------------------------------------------
262/* This opens one of the original output files, possibly decompressing it. */
3826564e 263bool MultiCompress::OpenOld(int &Fd,pid_t &Proc)
b2e465d6
AL
264{
265 Files *Best = Outputs;
266 for (Files *I = Outputs; I != 0; I = I->Next)
03bef784 267 if (Best->CompressProg.Cost > I->CompressProg.Cost)
b2e465d6
AL
268 Best = I;
269
270 // Open the file
271 FileFd F(Best->Output,FileFd::ReadOnly);
272 if (_error->PendingError() == true)
273 return false;
274
275 // Decompress the file so we can read it
699b209e 276 if (ExecCompressor(Best->CompressProg,&Proc,F.Fd(),Fd,false) == false)
b2e465d6
AL
277 return false;
278
279 return true;
280}
281 /*}}}*/
282// MultiCompress::CloseOld - Close the old file /*{{{*/
283// ---------------------------------------------------------------------
284/* */
3826564e 285bool MultiCompress::CloseOld(int Fd,pid_t Proc)
b2e465d6
AL
286{
287 close(Fd);
288 if (Proc != -1)
dc738e7a 289 if (ExecWait(Proc,_("decompressor"),false) == false)
b2e465d6
AL
290 return false;
291 return true;
292}
293 /*}}}*/
294// MultiCompress::Child - The writer child /*{{{*/
295// ---------------------------------------------------------------------
296/* The child process forks a bunch of compression children and takes
c6474fb6 297 input on FD and passes it to all the compressor child. On the way it
b2e465d6
AL
298 computes the MD5 of the raw data. After this the raw data in the
299 original files is compared to see if this data is new. If the data
300 is new then the temp files are renamed, otherwise they are erased. */
9209ec47 301bool MultiCompress::Child(int const &FD)
b2e465d6 302{
b2e465d6
AL
303 /* Okay, now we just feed data from FD to all the other FDs. Also
304 stash a hash of the data to use later. */
305 SetNonBlock(FD,false);
306 unsigned char Buffer[32*1024];
650faab0 307 unsigned long long FileSize = 0;
b2e465d6
AL
308 MD5Summation MD5;
309 while (1)
310 {
311 WaitFd(FD,false);
312 int Res = read(FD,Buffer,sizeof(Buffer));
313 if (Res == 0)
314 break;
315 if (Res < 0)
316 continue;
317
318 MD5.Add(Buffer,Res);
319 FileSize += Res;
320 for (Files *I = Outputs; I != 0; I = I->Next)
321 {
52b47296 322 if (I->TmpFile.Write(Buffer, Res) == false)
b2e465d6 323 {
dc738e7a 324 _error->Errno("write",_("IO to subprocess/file failed"));
b2e465d6
AL
325 break;
326 }
327 }
328 }
52b47296 329
b2e465d6
AL
330 if (_error->PendingError() == true)
331 return false;
332
333 /* Now we have to copy the files over, or erase them if they
334 have not changed. First find the cheapest decompressor */
335 bool Missing = false;
336 for (Files *I = Outputs; I != 0; I = I->Next)
337 {
338 if (I->OldMTime == 0)
339 {
340 Missing = true;
341 break;
342 }
343 }
344
345 // Check the MD5 of the lowest cost entity.
346 while (Missing == false)
347 {
348 int CompFd = -1;
3826564e 349 pid_t Proc = -1;
b2e465d6
AL
350 if (OpenOld(CompFd,Proc) == false)
351 {
352 _error->Discard();
353 break;
354 }
355
356 // Compute the hash
357 MD5Summation OldMD5;
650faab0 358 unsigned long long NewFileSize = 0;
b2e465d6
AL
359 while (1)
360 {
361 int Res = read(CompFd,Buffer,sizeof(Buffer));
362 if (Res == 0)
363 break;
364 if (Res < 0)
dc738e7a 365 return _error->Errno("read",_("Failed to read while computing MD5"));
b2e465d6
AL
366 NewFileSize += Res;
367 OldMD5.Add(Buffer,Res);
368 }
369
370 // Tidy the compressor
371 if (CloseOld(CompFd,Proc) == false)
372 return false;
373
374 // Check the hash
375 if (OldMD5.Result() == MD5.Result() &&
376 FileSize == NewFileSize)
377 {
378 for (Files *I = Outputs; I != 0; I = I->Next)
379 {
380 I->TmpFile.Close();
381 if (unlink(I->TmpFile.Name().c_str()) != 0)
dc738e7a 382 _error->Errno("unlink",_("Problem unlinking %s"),
b2e465d6
AL
383 I->TmpFile.Name().c_str());
384 }
385 return !_error->PendingError();
386 }
387 break;
388 }
389
390 // Finalize
391 for (Files *I = Outputs; I != 0; I = I->Next)
392 {
393 // Set the correct file modes
394 fchmod(I->TmpFile.Fd(),Permissions);
395
396 if (rename(I->TmpFile.Name().c_str(),I->Output.c_str()) != 0)
dc738e7a 397 _error->Errno("rename",_("Failed to rename %s to %s"),
b2e465d6
AL
398 I->TmpFile.Name().c_str(),I->Output.c_str());
399 I->TmpFile.Close();
400 }
401
402 return !_error->PendingError();
403}
404 /*}}}*/
405