releasing package apt version 0.9.15.2
[ntk/apt.git] / ftparchive / multicompress.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: multicompress.cc,v 1.4 2003/02/10 07:34:41 doogie Exp $
4 /* ######################################################################
5
6 MultiCompressor
7
8 This class is very complicated in order to optimize for the common
9 case of its use, writing a large set of compressed files that are
10 different from the old set. It spawns off compressors in parallel
11 to maximize compression throughput and has a separate task managing
12 the data going into the compressors.
13
14 ##################################################################### */
15 /*}}}*/
16 // Include Files /*{{{*/
17 #include <config.h>
18
19 #include <apt-pkg/fileutl.h>
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/error.h>
22 #include <apt-pkg/md5.h>
23
24 #include <fcntl.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <sys/time.h>
28 #include <unistd.h>
29 #include <iostream>
30
31 #include "multicompress.h"
32 #include <apti18n.h>
33 /*}}}*/
34
35 using namespace std;
36
37
38 // MultiCompress::MultiCompress - Constructor /*{{{*/
39 // ---------------------------------------------------------------------
40 /* Setup the file outputs, compression modes and fork the writer child */
41 MultiCompress::MultiCompress(string const &Output,string const &Compress,
42 mode_t const &Permissions,bool const &Write) :
43 Permissions(Permissions)
44 {
45 Outputs = 0;
46 Outputter = -1;
47 Input = 0;
48 UpdateMTime = 0;
49
50 /* Parse the compression string, a space separated lists of compresison
51 types */
52 string::const_iterator I = Compress.begin();
53 for (; I != Compress.end();)
54 {
55 for (; I != Compress.end() && isspace(*I); ++I);
56
57 // Grab a word
58 string::const_iterator Start = I;
59 for (; I != Compress.end() && !isspace(*I); ++I);
60
61 // Find the matching compressor
62 std::vector<APT::Configuration::Compressor> Compressors = APT::Configuration::getCompressors();
63 std::vector<APT::Configuration::Compressor>::const_iterator Comp = Compressors.begin();
64 for (; Comp != Compressors.end(); ++Comp)
65 if (stringcmp(Start,I,Comp->Name.c_str()) == 0)
66 break;
67
68 // Hmm.. unknown.
69 if (Comp == Compressors.end())
70 {
71 _error->Warning(_("Unknown compression algorithm '%s'"),string(Start,I).c_str());
72 continue;
73 }
74
75 // Create and link in a new output
76 Files *NewOut = new Files;
77 NewOut->Next = Outputs;
78 Outputs = NewOut;
79 NewOut->CompressProg = *Comp;
80 NewOut->Output = Output+Comp->Extension;
81
82 struct stat St;
83 if (stat(NewOut->Output.c_str(),&St) == 0)
84 NewOut->OldMTime = St.st_mtime;
85 else
86 NewOut->OldMTime = 0;
87 }
88
89 if (Write == false)
90 return;
91
92 /* Open all the temp files now so we can report any errors. File is
93 made unreable to prevent people from touching it during creating. */
94 for (Files *I = Outputs; I != 0; I = I->Next)
95 I->TmpFile.Open(I->Output + ".new", FileFd::WriteOnly | FileFd::Create | FileFd::Empty, FileFd::Extension, 0600);
96 if (_error->PendingError() == true)
97 return;
98
99 if (Outputs == 0)
100 {
101 _error->Error(_("Compressed output %s needs a compression set"),Output.c_str());
102 return;
103 }
104
105 Start();
106 }
107 /*}}}*/
108 // MultiCompress::~MultiCompress - Destructor /*{{{*/
109 // ---------------------------------------------------------------------
110 /* Just erase the file linked list. */
111 MultiCompress::~MultiCompress()
112 {
113 Die();
114
115 for (; Outputs != 0;)
116 {
117 Files *Tmp = Outputs->Next;
118 delete Outputs;
119 Outputs = Tmp;
120 }
121 }
122 /*}}}*/
123 // MultiCompress::GetStat - Get stat information for compressed files /*{{{*/
124 // ---------------------------------------------------------------------
125 /* This checks each compressed file to make sure it exists and returns
126 stat information for a random file from the collection. False means
127 one or more of the files is missing. */
128 bool MultiCompress::GetStat(string const &Output,string const &Compress,struct stat &St)
129 {
130 /* Parse the compression string, a space separated lists of compresison
131 types */
132 string::const_iterator I = Compress.begin();
133 bool DidStat = false;
134 for (; I != Compress.end();)
135 {
136 for (; I != Compress.end() && isspace(*I); ++I);
137
138 // Grab a word
139 string::const_iterator Start = I;
140 for (; I != Compress.end() && !isspace(*I); ++I);
141
142 // Find the matching compressor
143 std::vector<APT::Configuration::Compressor> Compressors = APT::Configuration::getCompressors();
144 std::vector<APT::Configuration::Compressor>::const_iterator Comp = Compressors.begin();
145 for (; Comp != Compressors.end(); ++Comp)
146 if (stringcmp(Start,I,Comp->Name.c_str()) == 0)
147 break;
148
149 // Hmm.. unknown.
150 if (Comp == Compressors.end())
151 continue;
152
153 string Name = Output+Comp->Extension;
154 if (stat(Name.c_str(),&St) != 0)
155 return false;
156 DidStat = true;
157 }
158 return DidStat;
159 }
160 /*}}}*/
161 // MultiCompress::Start - Start up the writer child /*{{{*/
162 // ---------------------------------------------------------------------
163 /* Fork a child and setup the communication pipe. */
164 bool MultiCompress::Start()
165 {
166 // Create a data pipe
167 int Pipe[2] = {-1,-1};
168 if (pipe(Pipe) != 0)
169 return _error->Errno("pipe",_("Failed to create IPC pipe to subprocess"));
170 for (int I = 0; I != 2; I++)
171 SetCloseExec(Pipe[I],true);
172
173 // The child..
174 Outputter = fork();
175 if (Outputter == 0)
176 {
177 close(Pipe[1]);
178 Child(Pipe[0]);
179 if (_error->PendingError() == true)
180 {
181 _error->DumpErrors();
182 _exit(100);
183 }
184 _exit(0);
185 };
186
187 close(Pipe[0]);
188 Input = fdopen(Pipe[1],"w");
189 if (Input == 0)
190 return _error->Errno("fdopen",_("Failed to create FILE*"));
191
192 if (Outputter == -1)
193 return _error->Errno("fork",_("Failed to fork"));
194 return true;
195 }
196 /*}}}*/
197 // MultiCompress::Die - Clean up the writer /*{{{*/
198 // ---------------------------------------------------------------------
199 /* */
200 bool MultiCompress::Die()
201 {
202 if (Input == 0)
203 return true;
204
205 fclose(Input);
206 Input = 0;
207 bool Res = ExecWait(Outputter,_("Compress child"),false);
208 Outputter = -1;
209 return Res;
210 }
211 /*}}}*/
212 // MultiCompress::Finalize - Finish up writing /*{{{*/
213 // ---------------------------------------------------------------------
214 /* This is only necessary for statistics reporting. */
215 bool MultiCompress::Finalize(unsigned long long &OutSize)
216 {
217 OutSize = 0;
218 if (Input == 0 || Die() == false)
219 return false;
220
221 time_t Now;
222 time(&Now);
223
224 // Check the mtimes to see if the files were replaced.
225 bool Changed = false;
226 for (Files *I = Outputs; I != 0; I = I->Next)
227 {
228 struct stat St;
229 if (stat(I->Output.c_str(),&St) != 0)
230 return _error->Error(_("Internal error, failed to create %s"),
231 I->Output.c_str());
232
233 if (I->OldMTime != St.st_mtime)
234 Changed = true;
235 else
236 {
237 // Update the mtime if necessary
238 if (UpdateMTime > 0 &&
239 (Now - St.st_mtime > (signed)UpdateMTime || St.st_mtime > Now))
240 {
241 utimes(I->Output.c_str(), NULL);
242 Changed = true;
243 }
244 }
245
246 // Force the file permissions
247 if (St.st_mode != Permissions)
248 chmod(I->Output.c_str(),Permissions);
249
250 OutSize += St.st_size;
251 }
252
253 if (Changed == false)
254 OutSize = 0;
255
256 return true;
257 }
258 /*}}}*/
259 // MultiCompress::OpenOld - Open an old file /*{{{*/
260 // ---------------------------------------------------------------------
261 /* This opens one of the original output files, possibly decompressing it. */
262 bool MultiCompress::OpenOld(FileFd &Fd)
263 {
264 Files *Best = Outputs;
265 for (Files *I = Outputs; I != 0; I = I->Next)
266 if (Best->CompressProg.Cost > I->CompressProg.Cost)
267 Best = I;
268
269 // Open the file
270 return Fd.Open(Best->Output, FileFd::ReadOnly, FileFd::Extension);
271 }
272 /*}}}*/
273 // MultiCompress::Child - The writer child /*{{{*/
274 // ---------------------------------------------------------------------
275 /* The child process forks a bunch of compression children and takes
276 input on FD and passes it to all the compressor child. On the way it
277 computes the MD5 of the raw data. After this the raw data in the
278 original files is compared to see if this data is new. If the data
279 is new then the temp files are renamed, otherwise they are erased. */
280 bool MultiCompress::Child(int const &FD)
281 {
282 /* Okay, now we just feed data from FD to all the other FDs. Also
283 stash a hash of the data to use later. */
284 SetNonBlock(FD,false);
285 unsigned char Buffer[32*1024];
286 unsigned long long FileSize = 0;
287 MD5Summation MD5;
288 while (1)
289 {
290 WaitFd(FD,false);
291 int Res = read(FD,Buffer,sizeof(Buffer));
292 if (Res == 0)
293 break;
294 if (Res < 0)
295 continue;
296
297 MD5.Add(Buffer,Res);
298 FileSize += Res;
299 for (Files *I = Outputs; I != 0; I = I->Next)
300 {
301 if (I->TmpFile.Write(Buffer, Res) == false)
302 {
303 _error->Errno("write",_("IO to subprocess/file failed"));
304 break;
305 }
306 }
307 }
308
309 if (_error->PendingError() == true)
310 return false;
311
312 /* Now we have to copy the files over, or erase them if they
313 have not changed. First find the cheapest decompressor */
314 bool Missing = false;
315 for (Files *I = Outputs; I != 0; I = I->Next)
316 {
317 if (I->OldMTime == 0)
318 {
319 Missing = true;
320 break;
321 }
322 }
323
324 // Check the MD5 of the lowest cost entity.
325 while (Missing == false)
326 {
327 FileFd CompFd;
328 if (OpenOld(CompFd) == false)
329 {
330 _error->Discard();
331 break;
332 }
333
334 // Compute the hash
335 MD5Summation OldMD5;
336 unsigned long long NewFileSize = 0;
337 while (1)
338 {
339 unsigned long long Res = 0;
340 if (CompFd.Read(Buffer,sizeof(Buffer), &Res) == false)
341 return _error->Errno("read",_("Failed to read while computing MD5"));
342 if (Res == 0)
343 break;
344 NewFileSize += Res;
345 OldMD5.Add(Buffer,Res);
346 }
347 CompFd.Close();
348
349 // Check the hash
350 if (OldMD5.Result() == MD5.Result() &&
351 FileSize == NewFileSize)
352 {
353 for (Files *I = Outputs; I != 0; I = I->Next)
354 {
355 I->TmpFile.Close();
356 if (unlink(I->TmpFile.Name().c_str()) != 0)
357 _error->Errno("unlink",_("Problem unlinking %s"),
358 I->TmpFile.Name().c_str());
359 }
360 return !_error->PendingError();
361 }
362 break;
363 }
364
365 // Finalize
366 for (Files *I = Outputs; I != 0; I = I->Next)
367 {
368 // Set the correct file modes
369 chmod(I->TmpFile.Name().c_str(),Permissions);
370
371 if (rename(I->TmpFile.Name().c_str(),I->Output.c_str()) != 0)
372 _error->Errno("rename",_("Failed to rename %s to %s"),
373 I->TmpFile.Name().c_str(),I->Output.c_str());
374 I->TmpFile.Close();
375 }
376
377 return !_error->PendingError();
378 }
379 /*}}}*/
380