use VersionSet in download to handle repeats
[ntk/apt.git] / ftparchive / multicompress.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: multicompress.cc,v 1.4 2003/02/10 07:34:41 doogie Exp $
4 /* ######################################################################
5
6 MultiCompressor
7
8 This class is very complicated in order to optimize for the common
9 case of its use, writing a large set of compressed files that are
10 different from the old set. It spawns off compressors in parallel
11 to maximize compression throughput and has a separate task managing
12 the data going into the compressors.
13
14 ##################################################################### */
15 /*}}}*/
16 // Include Files /*{{{*/
17 #include <config.h>
18
19 #include <apt-pkg/fileutl.h>
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/error.h>
22 #include <apt-pkg/md5.h>
23
24 #include <fcntl.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 #include <iostream>
29
30 #include "multicompress.h"
31 #include <apti18n.h>
32 /*}}}*/
33
34 using namespace std;
35
36
37 // MultiCompress::MultiCompress - Constructor /*{{{*/
38 // ---------------------------------------------------------------------
39 /* Setup the file outputs, compression modes and fork the writer child */
40 MultiCompress::MultiCompress(string const &Output,string const &Compress,
41 mode_t const &Permissions,bool const &Write) :
42 Permissions(Permissions)
43 {
44 Outputs = 0;
45 Outputter = -1;
46 Input = 0;
47 UpdateMTime = 0;
48
49 /* Parse the compression string, a space separated lists of compresison
50 types */
51 string::const_iterator I = Compress.begin();
52 for (; I != Compress.end();)
53 {
54 for (; I != Compress.end() && isspace(*I); ++I);
55
56 // Grab a word
57 string::const_iterator Start = I;
58 for (; I != Compress.end() && !isspace(*I); ++I);
59
60 // Find the matching compressor
61 std::vector<APT::Configuration::Compressor> Compressors = APT::Configuration::getCompressors();
62 std::vector<APT::Configuration::Compressor>::const_iterator Comp = Compressors.begin();
63 for (; Comp != Compressors.end(); ++Comp)
64 if (stringcmp(Start,I,Comp->Name.c_str()) == 0)
65 break;
66
67 // Hmm.. unknown.
68 if (Comp == Compressors.end())
69 {
70 _error->Warning(_("Unknown compression algorithm '%s'"),string(Start,I).c_str());
71 continue;
72 }
73
74 // Create and link in a new output
75 Files *NewOut = new Files;
76 NewOut->Next = Outputs;
77 Outputs = NewOut;
78 NewOut->CompressProg = *Comp;
79 NewOut->Output = Output+Comp->Extension;
80
81 struct stat St;
82 if (stat(NewOut->Output.c_str(),&St) == 0)
83 NewOut->OldMTime = St.st_mtime;
84 else
85 NewOut->OldMTime = 0;
86 }
87
88 if (Write == false)
89 return;
90
91 /* Open all the temp files now so we can report any errors. File is
92 made unreable to prevent people from touching it during creating. */
93 for (Files *I = Outputs; I != 0; I = I->Next)
94 I->TmpFile.Open(I->Output + ".new", FileFd::WriteOnly | FileFd::Create | FileFd::Empty, FileFd::Extension, 0600);
95 if (_error->PendingError() == true)
96 return;
97
98 if (Outputs == 0)
99 {
100 _error->Error(_("Compressed output %s needs a compression set"),Output.c_str());
101 return;
102 }
103
104 Start();
105 }
106 /*}}}*/
107 // MultiCompress::~MultiCompress - Destructor /*{{{*/
108 // ---------------------------------------------------------------------
109 /* Just erase the file linked list. */
110 MultiCompress::~MultiCompress()
111 {
112 Die();
113
114 for (; Outputs != 0;)
115 {
116 Files *Tmp = Outputs->Next;
117 delete Outputs;
118 Outputs = Tmp;
119 }
120 }
121 /*}}}*/
122 // MultiCompress::GetStat - Get stat information for compressed files /*{{{*/
123 // ---------------------------------------------------------------------
124 /* This checks each compressed file to make sure it exists and returns
125 stat information for a random file from the collection. False means
126 one or more of the files is missing. */
127 bool MultiCompress::GetStat(string const &Output,string const &Compress,struct stat &St)
128 {
129 /* Parse the compression string, a space separated lists of compresison
130 types */
131 string::const_iterator I = Compress.begin();
132 bool DidStat = false;
133 for (; I != Compress.end();)
134 {
135 for (; I != Compress.end() && isspace(*I); ++I);
136
137 // Grab a word
138 string::const_iterator Start = I;
139 for (; I != Compress.end() && !isspace(*I); ++I);
140
141 // Find the matching compressor
142 std::vector<APT::Configuration::Compressor> Compressors = APT::Configuration::getCompressors();
143 std::vector<APT::Configuration::Compressor>::const_iterator Comp = Compressors.begin();
144 for (; Comp != Compressors.end(); ++Comp)
145 if (stringcmp(Start,I,Comp->Name.c_str()) == 0)
146 break;
147
148 // Hmm.. unknown.
149 if (Comp == Compressors.end())
150 continue;
151
152 string Name = Output+Comp->Extension;
153 if (stat(Name.c_str(),&St) != 0)
154 return false;
155 DidStat = true;
156 }
157 return DidStat;
158 }
159 /*}}}*/
160 // MultiCompress::Start - Start up the writer child /*{{{*/
161 // ---------------------------------------------------------------------
162 /* Fork a child and setup the communication pipe. */
163 bool MultiCompress::Start()
164 {
165 // Create a data pipe
166 int Pipe[2] = {-1,-1};
167 if (pipe(Pipe) != 0)
168 return _error->Errno("pipe",_("Failed to create IPC pipe to subprocess"));
169 for (int I = 0; I != 2; I++)
170 SetCloseExec(Pipe[I],true);
171
172 // The child..
173 Outputter = fork();
174 if (Outputter == 0)
175 {
176 close(Pipe[1]);
177 Child(Pipe[0]);
178 if (_error->PendingError() == true)
179 {
180 _error->DumpErrors();
181 _exit(100);
182 }
183 _exit(0);
184 };
185
186 close(Pipe[0]);
187 Input = fdopen(Pipe[1],"w");
188 if (Input == 0)
189 return _error->Errno("fdopen",_("Failed to create FILE*"));
190
191 if (Outputter == -1)
192 return _error->Errno("fork",_("Failed to fork"));
193 return true;
194 }
195 /*}}}*/
196 // MultiCompress::Die - Clean up the writer /*{{{*/
197 // ---------------------------------------------------------------------
198 /* */
199 bool MultiCompress::Die()
200 {
201 if (Input == 0)
202 return true;
203
204 fclose(Input);
205 Input = 0;
206 bool Res = ExecWait(Outputter,_("Compress child"),false);
207 Outputter = -1;
208 return Res;
209 }
210 /*}}}*/
211 // MultiCompress::Finalize - Finish up writing /*{{{*/
212 // ---------------------------------------------------------------------
213 /* This is only necessary for statistics reporting. */
214 bool MultiCompress::Finalize(unsigned long long &OutSize)
215 {
216 OutSize = 0;
217 if (Input == 0 || Die() == false)
218 return false;
219
220 time_t Now;
221 time(&Now);
222
223 // Check the mtimes to see if the files were replaced.
224 bool Changed = false;
225 for (Files *I = Outputs; I != 0; I = I->Next)
226 {
227 struct stat St;
228 if (stat(I->Output.c_str(),&St) != 0)
229 return _error->Error(_("Internal error, failed to create %s"),
230 I->Output.c_str());
231
232 if (I->OldMTime != St.st_mtime)
233 Changed = true;
234 else
235 {
236 // Update the mtime if necessary
237 if (UpdateMTime > 0 &&
238 (Now - St.st_mtime > (signed)UpdateMTime || St.st_mtime > Now))
239 {
240 utimensat(AT_FDCWD, I->Output.c_str(), NULL, AT_SYMLINK_NOFOLLOW);
241 Changed = true;
242 }
243 }
244
245 // Force the file permissions
246 if (St.st_mode != Permissions)
247 chmod(I->Output.c_str(),Permissions);
248
249 OutSize += St.st_size;
250 }
251
252 if (Changed == false)
253 OutSize = 0;
254
255 return true;
256 }
257 /*}}}*/
258 // MultiCompress::OpenOld - Open an old file /*{{{*/
259 // ---------------------------------------------------------------------
260 /* This opens one of the original output files, possibly decompressing it. */
261 bool MultiCompress::OpenOld(FileFd &Fd)
262 {
263 Files *Best = Outputs;
264 for (Files *I = Outputs; I != 0; I = I->Next)
265 if (Best->CompressProg.Cost > I->CompressProg.Cost)
266 Best = I;
267
268 // Open the file
269 return Fd.Open(Best->Output, FileFd::ReadOnly, FileFd::Extension);
270 }
271 /*}}}*/
272 // MultiCompress::Child - The writer child /*{{{*/
273 // ---------------------------------------------------------------------
274 /* The child process forks a bunch of compression children and takes
275 input on FD and passes it to all the compressor child. On the way it
276 computes the MD5 of the raw data. After this the raw data in the
277 original files is compared to see if this data is new. If the data
278 is new then the temp files are renamed, otherwise they are erased. */
279 bool MultiCompress::Child(int const &FD)
280 {
281 /* Okay, now we just feed data from FD to all the other FDs. Also
282 stash a hash of the data to use later. */
283 SetNonBlock(FD,false);
284 unsigned char Buffer[32*1024];
285 unsigned long long FileSize = 0;
286 MD5Summation MD5;
287 while (1)
288 {
289 WaitFd(FD,false);
290 int Res = read(FD,Buffer,sizeof(Buffer));
291 if (Res == 0)
292 break;
293 if (Res < 0)
294 continue;
295
296 MD5.Add(Buffer,Res);
297 FileSize += Res;
298 for (Files *I = Outputs; I != 0; I = I->Next)
299 {
300 if (I->TmpFile.Write(Buffer, Res) == false)
301 {
302 _error->Errno("write",_("IO to subprocess/file failed"));
303 break;
304 }
305 }
306 }
307
308 if (_error->PendingError() == true)
309 return false;
310
311 /* Now we have to copy the files over, or erase them if they
312 have not changed. First find the cheapest decompressor */
313 bool Missing = false;
314 for (Files *I = Outputs; I != 0; I = I->Next)
315 {
316 if (I->OldMTime == 0)
317 {
318 Missing = true;
319 break;
320 }
321 }
322
323 // Check the MD5 of the lowest cost entity.
324 while (Missing == false)
325 {
326 FileFd CompFd;
327 if (OpenOld(CompFd) == false)
328 {
329 _error->Discard();
330 break;
331 }
332
333 // Compute the hash
334 MD5Summation OldMD5;
335 unsigned long long NewFileSize = 0;
336 while (1)
337 {
338 unsigned long long Res = 0;
339 if (CompFd.Read(Buffer,sizeof(Buffer), &Res) == false)
340 return _error->Errno("read",_("Failed to read while computing MD5"));
341 if (Res == 0)
342 break;
343 NewFileSize += Res;
344 OldMD5.Add(Buffer,Res);
345 }
346 CompFd.Close();
347
348 // Check the hash
349 if (OldMD5.Result() == MD5.Result() &&
350 FileSize == NewFileSize)
351 {
352 for (Files *I = Outputs; I != 0; I = I->Next)
353 {
354 I->TmpFile.Close();
355 if (unlink(I->TmpFile.Name().c_str()) != 0)
356 _error->Errno("unlink",_("Problem unlinking %s"),
357 I->TmpFile.Name().c_str());
358 }
359 return !_error->PendingError();
360 }
361 break;
362 }
363
364 // Finalize
365 for (Files *I = Outputs; I != 0; I = I->Next)
366 {
367 // Set the correct file modes
368 chmod(I->TmpFile.Name().c_str(),Permissions);
369
370 if (rename(I->TmpFile.Name().c_str(),I->Output.c_str()) != 0)
371 _error->Errno("rename",_("Failed to rename %s to %s"),
372 I->TmpFile.Name().c_str(),I->Output.c_str());
373 I->TmpFile.Close();
374 }
375
376 return !_error->PendingError();
377 }
378 /*}}}*/
379