enable FileFd to guess the compressor based on the filename if requested or
[ntk/apt.git] / methods / rred.cc
1 // Includes /*{{{*/
2 #include <config.h>
3
4 #include <apt-pkg/fileutl.h>
5 #include <apt-pkg/mmap.h>
6 #include <apt-pkg/error.h>
7 #include <apt-pkg/acquire-method.h>
8 #include <apt-pkg/strutl.h>
9 #include <apt-pkg/hashes.h>
10 #include <apt-pkg/configuration.h>
11
12 #include <sys/stat.h>
13 #include <sys/uio.h>
14 #include <unistd.h>
15 #include <utime.h>
16 #include <stdio.h>
17 #include <errno.h>
18 #include <zlib.h>
19 #include <apti18n.h>
20 /*}}}*/
21 /** \brief RredMethod - ed-style incremential patch method {{{
22 *
23 * This method implements a patch functionality similar to "patch --ed" that is
24 * used by the "tiffany" incremental packages download stuff. It differs from
25 * "ed" insofar that it is way more restricted (and therefore secure).
26 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
27 * "<em>d</em>elete" (diff doesn't output any other).
28 * Additionally the records must be reverse sorted by line number and
29 * may not overlap (diff *seems* to produce this kind of output).
30 * */
31 class RredMethod : public pkgAcqMethod {
32 bool Debug;
33 // the size of this doesn't really matter (except for performance)
34 const static int BUF_SIZE = 1024;
35 // the supported ed commands
36 enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
37 // return values
38 enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
39
40 State applyFile(gzFile &ed_cmds, FILE *in_file, FILE *out_file,
41 unsigned long &line, char *buffer, Hashes *hash) const;
42 void ignoreLineInFile(FILE *fin, char *buffer) const;
43 void ignoreLineInFile(gzFile &fin, char *buffer) const;
44 void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,
45 Hashes *hash, char *buffer) const;
46 void copyLinesFromFileToFile(gzFile &fin, FILE *fout, unsigned int lines,
47 Hashes *hash, char *buffer) const;
48
49 State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
50 State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
51
52 protected:
53 // the methods main method
54 virtual bool Fetch(FetchItem *Itm);
55
56 public:
57 RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig), Debug(false) {};
58 };
59 /*}}}*/
60 /** \brief applyFile - in reverse order with a tail recursion {{{
61 *
62 * As it is expected that the commands are in reversed order in the patch file
63 * we check in the first half if the command is valid, but doesn't execute it
64 * and move a step deeper. After reaching the end of the file we apply the
65 * patches in the correct order: last found command first.
66 *
67 * \param ed_cmds patch file to apply
68 * \param in_file base file we want to patch
69 * \param out_file file to write the patched result to
70 * \param line of command operation
71 * \param buffer internal used read/write buffer
72 * \param hash the created file for correctness
73 * \return the success State of the ed command executor
74 */
75 RredMethod::State RredMethod::applyFile(gzFile &ed_cmds, FILE *in_file, FILE *out_file,
76 unsigned long &line, char *buffer, Hashes *hash) const {
77 // get the current command and parse it
78 if (gzgets(ed_cmds, buffer, BUF_SIZE) == NULL) {
79 if (Debug == true)
80 std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
81 line = 0;
82 return ED_OK;
83 }
84
85 // parse in the effected linenumbers
86 char* idx;
87 errno=0;
88 unsigned long const startline = strtol(buffer, &idx, 10);
89 if (errno == ERANGE || errno == EINVAL) {
90 _error->Errno("rred", "startline is an invalid number");
91 return ED_PARSER;
92 }
93 if (startline > line) {
94 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
95 return ED_ORDERING;
96 }
97 unsigned long stopline;
98 if (*idx == ',') {
99 idx++;
100 errno=0;
101 stopline = strtol(idx, &idx, 10);
102 if (errno == ERANGE || errno == EINVAL) {
103 _error->Errno("rred", "stopline is an invalid number");
104 return ED_PARSER;
105 }
106 }
107 else {
108 stopline = startline;
109 }
110 line = startline;
111
112 // which command to execute on this line(s)?
113 switch (*idx) {
114 case MODE_CHANGED:
115 if (Debug == true)
116 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
117 break;
118 case MODE_ADDED:
119 if (Debug == true)
120 std::clog << "Insert after line " << startline << std::endl;
121 break;
122 case MODE_DELETED:
123 if (Debug == true)
124 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
125 break;
126 default:
127 _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
128 return ED_PARSER;
129 }
130 unsigned char mode = *idx;
131
132 // save the current position
133 unsigned const long pos = gztell(ed_cmds);
134
135 // if this is add or change then go to the next full stop
136 unsigned int data_length = 0;
137 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
138 do {
139 ignoreLineInFile(ed_cmds, buffer);
140 data_length++;
141 }
142 while (strncmp(buffer, ".", 1) != 0);
143 data_length--; // the dot should not be copied
144 }
145
146 // do the recursive call - the last command is the one we need to execute at first
147 const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
148 if (child != ED_OK) {
149 return child;
150 }
151
152 // change and delete are working on "line" - add is done after "line"
153 if (mode != MODE_ADDED)
154 line++;
155
156 // first wind to the current position and copy over all unchanged lines
157 if (line < startline) {
158 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
159 line = startline;
160 }
161
162 if (mode != MODE_ADDED)
163 line--;
164
165 // include data from ed script
166 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
167 gzseek(ed_cmds, pos, SEEK_SET);
168 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
169 }
170
171 // ignore the corresponding number of lines from input
172 if (mode == MODE_CHANGED || mode == MODE_DELETED) {
173 while (line < stopline) {
174 ignoreLineInFile(in_file, buffer);
175 line++;
176 }
177 }
178 return ED_OK;
179 }
180 /*}}}*/
181 void RredMethod::copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,/*{{{*/
182 Hashes *hash, char *buffer) const {
183 while (0 < lines--) {
184 do {
185 fgets(buffer, BUF_SIZE, fin);
186 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
187 hash->Add((unsigned char*)buffer, written);
188 } while (strlen(buffer) == (BUF_SIZE - 1) &&
189 buffer[BUF_SIZE - 2] != '\n');
190 }
191 }
192 /*}}}*/
193 void RredMethod::copyLinesFromFileToFile(gzFile &fin, FILE *fout, unsigned int lines,/*{{{*/
194 Hashes *hash, char *buffer) const {
195 while (0 < lines--) {
196 do {
197 gzgets(fin, buffer, BUF_SIZE);
198 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
199 hash->Add((unsigned char*)buffer, written);
200 } while (strlen(buffer) == (BUF_SIZE - 1) &&
201 buffer[BUF_SIZE - 2] != '\n');
202 }
203 }
204 /*}}}*/
205 void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/
206 fgets(buffer, BUF_SIZE, fin);
207 while (strlen(buffer) == (BUF_SIZE - 1) &&
208 buffer[BUF_SIZE - 2] != '\n') {
209 fgets(buffer, BUF_SIZE, fin);
210 buffer[0] = ' ';
211 }
212 }
213 /*}}}*/
214 void RredMethod::ignoreLineInFile(gzFile &fin, char *buffer) const { /*{{{*/
215 gzgets(fin, buffer, BUF_SIZE);
216 while (strlen(buffer) == (BUF_SIZE - 1) &&
217 buffer[BUF_SIZE - 2] != '\n') {
218 gzgets(fin, buffer, BUF_SIZE);
219 buffer[0] = ' ';
220 }
221 }
222 /*}}}*/
223 RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
224 FileFd &out_file, Hashes *hash) const {
225 char buffer[BUF_SIZE];
226 FILE* fFrom = fdopen(From.Fd(), "r");
227 gzFile fPatch = Patch.gzFd();
228 FILE* fTo = fdopen(out_file.Fd(), "w");
229
230 /* we do a tail recursion to read the commands in the right order */
231 unsigned long line = -1; // assign highest possible value
232 State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash);
233
234 /* read the rest from infile */
235 if (result == ED_OK) {
236 while (fgets(buffer, BUF_SIZE, fFrom) != NULL) {
237 size_t const written = fwrite(buffer, 1, strlen(buffer), fTo);
238 hash->Add((unsigned char*)buffer, written);
239 }
240 fflush(fTo);
241 }
242 return result;
243 }
244 /*}}}*/
245 /* struct EdCommand {{{*/
246 #ifdef _POSIX_MAPPED_FILES
247 struct EdCommand {
248 size_t data_start;
249 size_t data_end;
250 size_t data_lines;
251 size_t first_line;
252 size_t last_line;
253 char type;
254 };
255 #define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
256 #endif
257 /*}}}*/
258 RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
259 FileFd &out_file, Hashes *hash) const {
260 #ifdef _POSIX_MAPPED_FILES
261 MMap ed_cmds(MMap::ReadOnly);
262 if (Patch.gzFd() != NULL) {
263 unsigned long long mapSize = Patch.Size();
264 DynamicMMap* dyn = new DynamicMMap(0, mapSize, 0);
265 if (dyn->validData() == false) {
266 delete dyn;
267 return MMAP_FAILED;
268 }
269 dyn->AddSize(mapSize);
270 gzread(Patch.gzFd(), dyn->Data(), mapSize);
271 ed_cmds = *dyn;
272 } else
273 ed_cmds = MMap(Patch, MMap::ReadOnly);
274
275 MMap in_file(From, MMap::ReadOnly);
276
277 if (ed_cmds.Size() == 0 || in_file.Size() == 0)
278 return MMAP_FAILED;
279
280 EdCommand* commands = 0;
281 size_t command_count = 0;
282 size_t command_alloc = 0;
283
284 const char* begin = (char*) ed_cmds.Data();
285 const char* end = begin;
286 const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size();
287
288 const char* input = (char*) in_file.Data();
289 const char* input_end = (char*) in_file.Data() + in_file.Size();
290
291 size_t i;
292
293 /* 1. Parse entire script. It is executed in reverse order, so we cather it
294 * in the `commands' buffer first
295 */
296
297 for(;;) {
298 EdCommand cmd;
299 cmd.data_start = 0;
300 cmd.data_end = 0;
301
302 while(begin != ed_end && *begin == '\n')
303 ++begin;
304 while(end != ed_end && *end != '\n')
305 ++end;
306 if(end == ed_end && begin == end)
307 break;
308
309 /* Determine command range */
310 const char* tmp = begin;
311
312 for(;;) {
313 /* atoll is safe despite lacking NUL-termination; we know there's an
314 * alphabetic character at end[-1]
315 */
316 if(tmp == end) {
317 cmd.first_line = atol(begin);
318 cmd.last_line = cmd.first_line;
319 break;
320 }
321 if(*tmp == ',') {
322 cmd.first_line = atol(begin);
323 cmd.last_line = atol(tmp + 1);
324 break;
325 }
326 ++tmp;
327 }
328
329 // which command to execute on this line(s)?
330 switch (end[-1]) {
331 case MODE_CHANGED:
332 if (Debug == true)
333 std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
334 break;
335 case MODE_ADDED:
336 if (Debug == true)
337 std::clog << "Insert after line " << cmd.first_line << std::endl;
338 break;
339 case MODE_DELETED:
340 if (Debug == true)
341 std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
342 break;
343 default:
344 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
345 free(commands);
346 return ED_PARSER;
347 }
348 cmd.type = end[-1];
349
350 /* Determine the size of the inserted text, so we don't have to scan this
351 * text again later.
352 */
353 begin = end + 1;
354 end = begin;
355 cmd.data_lines = 0;
356
357 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
358 cmd.data_start = begin - (char*) ed_cmds.Data();
359 while(end != ed_end) {
360 if(*end == '\n') {
361 if(end[-1] == '.' && end[-2] == '\n')
362 break;
363 ++cmd.data_lines;
364 }
365 ++end;
366 }
367 cmd.data_end = end - (char*) ed_cmds.Data() - 1;
368 begin = end + 1;
369 end = begin;
370 }
371 if(command_count == command_alloc) {
372 command_alloc = (command_alloc + 64) * 3 / 2;
373 commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
374 }
375 commands[command_count++] = cmd;
376 }
377
378 struct iovec* iov = new struct iovec[IOV_COUNT];
379 size_t iov_size = 0;
380
381 size_t amount, remaining;
382 size_t line = 1;
383 EdCommand* cmd;
384
385 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
386 * using writev to minimize the number of system calls. Data is read
387 * directly from the memory mappings of the input file and the script.
388 */
389
390 for(i = command_count; i-- > 0; ) {
391 cmd = &commands[i];
392 if(cmd->type == MODE_ADDED)
393 amount = cmd->first_line + 1;
394 else
395 amount = cmd->first_line;
396
397 if(line < amount) {
398 begin = input;
399 while(line != amount) {
400 input = (const char*) memchr(input, '\n', input_end - input);
401 if(!input)
402 break;
403 ++line;
404 ++input;
405 }
406
407 iov[iov_size].iov_base = (void*) begin;
408 iov[iov_size].iov_len = input - begin;
409 hash->Add((const unsigned char*) begin, input - begin);
410
411 if(++iov_size == IOV_COUNT) {
412 writev(out_file.Fd(), iov, IOV_COUNT);
413 iov_size = 0;
414 }
415 }
416
417 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
418 remaining = (cmd->last_line - cmd->first_line) + 1;
419 line += remaining;
420 while(remaining) {
421 input = (const char*) memchr(input, '\n', input_end - input);
422 if(!input)
423 break;
424 --remaining;
425 ++input;
426 }
427 }
428
429 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
430 if(cmd->data_end != cmd->data_start) {
431 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
432 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
433 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
434 iov[iov_size].iov_len);
435
436 if(++iov_size == IOV_COUNT) {
437 writev(out_file.Fd(), iov, IOV_COUNT);
438 iov_size = 0;
439 }
440 }
441 }
442 }
443
444 if(input != input_end) {
445 iov[iov_size].iov_base = (void*) input;
446 iov[iov_size].iov_len = input_end - input;
447 hash->Add((const unsigned char*) input, input_end - input);
448 ++iov_size;
449 }
450
451 if(iov_size) {
452 writev(out_file.Fd(), iov, iov_size);
453 iov_size = 0;
454 }
455
456 for(i = 0; i < iov_size; i += IOV_COUNT) {
457 if(iov_size - i < IOV_COUNT)
458 writev(out_file.Fd(), iov + i, iov_size - i);
459 else
460 writev(out_file.Fd(), iov + i, IOV_COUNT);
461 }
462
463 delete [] iov;
464 free(commands);
465
466 return ED_OK;
467 #else
468 return MMAP_FAILED;
469 #endif
470 }
471 /*}}}*/
472 bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
473 {
474 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
475 URI Get = Itm->Uri;
476 std::string Path = Get.Host + Get.Path; // To account for relative paths
477
478 FetchResult Res;
479 Res.Filename = Itm->DestFile;
480 if (Itm->Uri.empty() == true) {
481 Path = Itm->DestFile;
482 Itm->DestFile.append(".result");
483 } else
484 URIStart(Res);
485
486 if (Debug == true)
487 std::clog << "Patching " << Path << " with " << Path
488 << ".ed and putting result into " << Itm->DestFile << std::endl;
489 // Open the source and destination files (the d'tor of FileFd will do
490 // the cleanup/closing of the fds)
491 FileFd From(Path,FileFd::ReadOnly);
492 FileFd Patch(Path+".ed",FileFd::ReadOnly, FileFd::Gzip);
493 FileFd To(Itm->DestFile,FileFd::WriteAtomic);
494 To.EraseOnFailure();
495 if (_error->PendingError() == true)
496 return false;
497
498 Hashes Hash;
499 // now do the actual patching
500 State const result = patchMMap(Patch, From, To, &Hash);
501 if (result == MMAP_FAILED) {
502 // retry with patchFile
503 Patch.Seek(0);
504 From.Seek(0);
505 To.Open(Itm->DestFile,FileFd::WriteAtomic);
506 if (_error->PendingError() == true)
507 return false;
508 if (patchFile(Patch, From, To, &Hash) != ED_OK) {
509 return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
510 } else if (Debug == true) {
511 std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
512 }
513 } else if (result != ED_OK) {
514 return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
515 } else if (Debug == true) {
516 std::clog << "rred: finished mmap patching of " << Path << std::endl;
517 }
518
519 // write out the result
520 From.Close();
521 Patch.Close();
522 To.Close();
523
524 /* Transfer the modification times from the patch file
525 to be able to see in which state the file should be
526 and use the access time from the "old" file */
527 struct stat BufBase, BufPatch;
528 if (stat(Path.c_str(),&BufBase) != 0 ||
529 stat(std::string(Path+".ed").c_str(),&BufPatch) != 0)
530 return _error->Errno("stat",_("Failed to stat"));
531
532 struct utimbuf TimeBuf;
533 TimeBuf.actime = BufBase.st_atime;
534 TimeBuf.modtime = BufPatch.st_mtime;
535 if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
536 return _error->Errno("utime",_("Failed to set modification time"));
537
538 if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
539 return _error->Errno("stat",_("Failed to stat"));
540
541 // return done
542 Res.LastModified = BufBase.st_mtime;
543 Res.Size = BufBase.st_size;
544 Res.TakeHashes(Hash);
545 URIDone(Res);
546
547 return true;
548 }
549 /*}}}*/
550 /** \brief Wrapper class for testing rred */ /*{{{*/
551 class TestRredMethod : public RredMethod {
552 public:
553 /** \brief Run rred in debug test mode
554 *
555 * This method can be used to run the rred method outside
556 * of the "normal" acquire environment for easier testing.
557 *
558 * \param base basename of all files involved in this rred test
559 */
560 bool Run(char const *base) {
561 _config->CndSet("Debug::pkgAcquire::RRed", "true");
562 FetchItem *test = new FetchItem;
563 test->DestFile = base;
564 return Fetch(test);
565 }
566 };
567 /*}}}*/
568 /** \brief Starter for the rred method (or its test method) {{{
569 *
570 * Used without parameters is the normal behavior for methods for
571 * the APT acquire system. While this works great for the acquire system
572 * it is very hard to test the method and therefore the method also
573 * accepts one parameter which will switch it directly to debug test mode:
574 * The test mode expects that if "Testfile" is given as parameter
575 * the file "Testfile" should be ed-style patched with "Testfile.ed"
576 * and will write the result to "Testfile.result".
577 */
578 int main(int argc, char *argv[]) {
579 if (argc <= 1) {
580 RredMethod Mth;
581 return Mth.Run();
582 } else {
583 TestRredMethod Mth;
584 bool result = Mth.Run(argv[1]);
585 _error->DumpErrors();
586 return result;
587 }
588 }
589 /*}}}*/