implement POC client-side merging of pdiffs via apt-file
[ntk/apt.git] / methods / rred.cc
1 // Includes /*{{{*/
2 #include <config.h>
3
4 #include <apt-pkg/fileutl.h>
5 #include <apt-pkg/mmap.h>
6 #include <apt-pkg/error.h>
7 #include <apt-pkg/acquire-method.h>
8 #include <apt-pkg/strutl.h>
9 #include <apt-pkg/hashes.h>
10 #include <apt-pkg/configuration.h>
11
12 #include <sys/stat.h>
13 #include <sys/uio.h>
14 #include <sys/types.h>
15 #include <fcntl.h>
16 #include <unistd.h>
17 #include <utime.h>
18 #include <stdio.h>
19 #include <errno.h>
20 #include <apti18n.h>
21 /*}}}*/
22 /** \brief RredMethod - ed-style incremential patch method {{{
23 *
24 * This method implements a patch functionality similar to "patch --ed" that is
25 * used by the "tiffany" incremental packages download stuff. It differs from
26 * "ed" insofar that it is way more restricted (and therefore secure).
27 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
28 * "<em>d</em>elete" (diff doesn't output any other).
29 * Additionally the records must be reverse sorted by line number and
30 * may not overlap (diff *seems* to produce this kind of output).
31 * */
32 class RredMethod : public pkgAcqMethod {
33 bool Debug;
34 // the size of this doesn't really matter (except for performance)
35 const static int BUF_SIZE = 1024;
36 // the supported ed commands
37 enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
38 // return values
39 enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
40
41 State applyFile(FileFd &ed_cmds, FileFd &in_file, FileFd &out_file,
42 unsigned long &line, char *buffer, Hashes *hash) const;
43 void ignoreLineInFile(FileFd &fin, char *buffer) const;
44 void copyLinesFromFileToFile(FileFd &fin, FileFd &fout, unsigned int lines,
45 Hashes *hash, char *buffer) const;
46
47 State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
48 State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
49
50 protected:
51 // the methods main method
52 virtual bool Fetch(FetchItem *Itm);
53
54 public:
55 RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig), Debug(false) {};
56 };
57 /*}}}*/
58 /** \brief applyFile - in reverse order with a tail recursion {{{
59 *
60 * As it is expected that the commands are in reversed order in the patch file
61 * we check in the first half if the command is valid, but doesn't execute it
62 * and move a step deeper. After reaching the end of the file we apply the
63 * patches in the correct order: last found command first.
64 *
65 * \param ed_cmds patch file to apply
66 * \param in_file base file we want to patch
67 * \param out_file file to write the patched result to
68 * \param line of command operation
69 * \param buffer internal used read/write buffer
70 * \param hash the created file for correctness
71 * \return the success State of the ed command executor
72 */
73 RredMethod::State RredMethod::applyFile(FileFd &ed_cmds, FileFd &in_file, FileFd &out_file,
74 unsigned long &line, char *buffer, Hashes *hash) const {
75 // get the current command and parse it
76 if (ed_cmds.ReadLine(buffer, BUF_SIZE) == NULL) {
77 if (Debug == true)
78 std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
79 line = 0;
80 return ED_OK;
81 }
82
83 // parse in the effected linenumbers
84 char* idx;
85 errno=0;
86 unsigned long const startline = strtol(buffer, &idx, 10);
87 if (errno == ERANGE || errno == EINVAL) {
88 _error->Errno("rred", "startline is an invalid number");
89 return ED_PARSER;
90 }
91 if (startline > line) {
92 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
93 return ED_ORDERING;
94 }
95 unsigned long stopline;
96 if (*idx == ',') {
97 idx++;
98 errno=0;
99 stopline = strtol(idx, &idx, 10);
100 if (errno == ERANGE || errno == EINVAL) {
101 _error->Errno("rred", "stopline is an invalid number");
102 return ED_PARSER;
103 }
104 }
105 else {
106 stopline = startline;
107 }
108 line = startline;
109
110 // which command to execute on this line(s)?
111 switch (*idx) {
112 case MODE_CHANGED:
113 if (Debug == true)
114 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
115 break;
116 case MODE_ADDED:
117 if (Debug == true)
118 std::clog << "Insert after line " << startline << std::endl;
119 break;
120 case MODE_DELETED:
121 if (Debug == true)
122 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
123 break;
124 default:
125 _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
126 return ED_PARSER;
127 }
128 unsigned char mode = *idx;
129
130 // save the current position
131 unsigned const long long pos = ed_cmds.Tell();
132
133 // if this is add or change then go to the next full stop
134 unsigned int data_length = 0;
135 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
136 do {
137 ignoreLineInFile(ed_cmds, buffer);
138 data_length++;
139 }
140 while (strncmp(buffer, ".", 1) != 0);
141 data_length--; // the dot should not be copied
142 }
143
144 // do the recursive call - the last command is the one we need to execute at first
145 const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
146 if (child != ED_OK) {
147 return child;
148 }
149
150 // change and delete are working on "line" - add is done after "line"
151 if (mode != MODE_ADDED)
152 line++;
153
154 // first wind to the current position and copy over all unchanged lines
155 if (line < startline) {
156 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
157 line = startline;
158 }
159
160 if (mode != MODE_ADDED)
161 line--;
162
163 // include data from ed script
164 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
165 ed_cmds.Seek(pos);
166 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
167 }
168
169 // ignore the corresponding number of lines from input
170 if (mode == MODE_CHANGED || mode == MODE_DELETED) {
171 while (line < stopline) {
172 ignoreLineInFile(in_file, buffer);
173 line++;
174 }
175 }
176 return ED_OK;
177 }
178 /*}}}*/
179 void RredMethod::copyLinesFromFileToFile(FileFd &fin, FileFd &fout, unsigned int lines,/*{{{*/
180 Hashes *hash, char *buffer) const {
181 while (0 < lines--) {
182 do {
183 fin.ReadLine(buffer, BUF_SIZE);
184 unsigned long long const towrite = strlen(buffer);
185 fout.Write(buffer, towrite);
186 hash->Add((unsigned char*)buffer, towrite);
187 } while (strlen(buffer) == (BUF_SIZE - 1) &&
188 buffer[BUF_SIZE - 2] != '\n');
189 }
190 }
191 /*}}}*/
192 void RredMethod::ignoreLineInFile(FileFd &fin, char *buffer) const { /*{{{*/
193 fin.ReadLine(buffer, BUF_SIZE);
194 while (strlen(buffer) == (BUF_SIZE - 1) &&
195 buffer[BUF_SIZE - 2] != '\n') {
196 fin.ReadLine(buffer, BUF_SIZE);
197 buffer[0] = ' ';
198 }
199 }
200 /*}}}*/
201 RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
202 FileFd &out_file, Hashes *hash) const {
203 char buffer[BUF_SIZE];
204
205 /* we do a tail recursion to read the commands in the right order */
206 unsigned long line = -1; // assign highest possible value
207 State const result = applyFile(Patch, From, out_file, line, buffer, hash);
208
209 /* read the rest from infile */
210 if (result == ED_OK) {
211 while (From.ReadLine(buffer, BUF_SIZE) != NULL) {
212 unsigned long long const towrite = strlen(buffer);
213 out_file.Write(buffer, towrite);
214 hash->Add((unsigned char*)buffer, towrite);
215 }
216 }
217 return result;
218 }
219 /*}}}*/
220 /* struct EdCommand {{{*/
221 #ifdef _POSIX_MAPPED_FILES
222 struct EdCommand {
223 size_t data_start;
224 size_t data_end;
225 size_t data_lines;
226 size_t first_line;
227 size_t last_line;
228 char type;
229 };
230 #define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
231 static ssize_t retry_writev(int fd, const struct iovec *iov, int iovcnt) {
232 ssize_t Res;
233 errno = 0;
234 ssize_t i = 0;
235 do {
236 Res = writev(fd, iov + i, iovcnt);
237 if (Res < 0 && errno == EINTR)
238 continue;
239 if (Res < 0)
240 return _error->Errno("writev",_("Write error"));
241 iovcnt -= Res;
242 i += Res;
243 } while (Res > 0 && iovcnt > 0);
244 return i;
245 }
246 #endif
247 /*}}}*/
248 RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
249 FileFd &out_file, Hashes *hash) const {
250 #ifdef _POSIX_MAPPED_FILES
251 MMap ed_cmds(Patch, MMap::ReadOnly);
252 MMap in_file(From, MMap::ReadOnly);
253
254 unsigned long long const ed_size = ed_cmds.Size();
255 unsigned long long const in_size = in_file.Size();
256 if (ed_size == 0 || in_size == 0)
257 return MMAP_FAILED;
258
259 EdCommand* commands = 0;
260 size_t command_count = 0;
261 size_t command_alloc = 0;
262
263 const char* begin = (char*) ed_cmds.Data();
264 const char* end = begin;
265 const char* ed_end = (char*) ed_cmds.Data() + ed_size;
266
267 const char* input = (char*) in_file.Data();
268 const char* input_end = (char*) in_file.Data() + in_size;
269
270 size_t i;
271
272 /* 1. Parse entire script. It is executed in reverse order, so we cather it
273 * in the `commands' buffer first
274 */
275
276 for(;;) {
277 EdCommand cmd;
278 cmd.data_start = 0;
279 cmd.data_end = 0;
280
281 while(begin != ed_end && *begin == '\n')
282 ++begin;
283 while(end != ed_end && *end != '\n')
284 ++end;
285 if(end == ed_end && begin == end)
286 break;
287
288 /* Determine command range */
289 const char* tmp = begin;
290
291 for(;;) {
292 /* atoll is safe despite lacking NUL-termination; we know there's an
293 * alphabetic character at end[-1]
294 */
295 if(tmp == end) {
296 cmd.first_line = atol(begin);
297 cmd.last_line = cmd.first_line;
298 break;
299 }
300 if(*tmp == ',') {
301 cmd.first_line = atol(begin);
302 cmd.last_line = atol(tmp + 1);
303 break;
304 }
305 ++tmp;
306 }
307
308 // which command to execute on this line(s)?
309 switch (end[-1]) {
310 case MODE_CHANGED:
311 if (Debug == true)
312 std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
313 break;
314 case MODE_ADDED:
315 if (Debug == true)
316 std::clog << "Insert after line " << cmd.first_line << std::endl;
317 break;
318 case MODE_DELETED:
319 if (Debug == true)
320 std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
321 break;
322 default:
323 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
324 free(commands);
325 return ED_PARSER;
326 }
327 cmd.type = end[-1];
328
329 /* Determine the size of the inserted text, so we don't have to scan this
330 * text again later.
331 */
332 begin = end + 1;
333 end = begin;
334 cmd.data_lines = 0;
335
336 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
337 cmd.data_start = begin - (char*) ed_cmds.Data();
338 while(end != ed_end) {
339 if(*end == '\n') {
340 if(end[-1] == '.' && end[-2] == '\n')
341 break;
342 ++cmd.data_lines;
343 }
344 ++end;
345 }
346 cmd.data_end = end - (char*) ed_cmds.Data() - 1;
347 begin = end + 1;
348 end = begin;
349 }
350 if(command_count == command_alloc) {
351 command_alloc = (command_alloc + 64) * 3 / 2;
352 EdCommand* newCommands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
353 if (newCommands == NULL) {
354 free(commands);
355 return MMAP_FAILED;
356 }
357 commands = newCommands;
358 }
359 commands[command_count++] = cmd;
360 }
361
362 struct iovec* iov = new struct iovec[IOV_COUNT];
363 size_t iov_size = 0;
364
365 size_t amount, remaining;
366 size_t line = 1;
367 EdCommand* cmd;
368
369 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
370 * using writev to minimize the number of system calls. Data is read
371 * directly from the memory mappings of the input file and the script.
372 */
373
374 for(i = command_count; i-- > 0; ) {
375 cmd = &commands[i];
376 if(cmd->type == MODE_ADDED)
377 amount = cmd->first_line + 1;
378 else
379 amount = cmd->first_line;
380
381 if(line < amount) {
382 begin = input;
383 while(line != amount) {
384 input = (const char*) memchr(input, '\n', input_end - input);
385 if(!input)
386 break;
387 ++line;
388 ++input;
389 }
390
391 iov[iov_size].iov_base = (void*) begin;
392 iov[iov_size].iov_len = input - begin;
393 hash->Add((const unsigned char*) begin, input - begin);
394
395 if(++iov_size == IOV_COUNT) {
396 retry_writev(out_file.Fd(), iov, IOV_COUNT);
397 iov_size = 0;
398 }
399 }
400
401 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
402 remaining = (cmd->last_line - cmd->first_line) + 1;
403 line += remaining;
404 while(remaining) {
405 input = (const char*) memchr(input, '\n', input_end - input);
406 if(!input)
407 break;
408 --remaining;
409 ++input;
410 }
411 }
412
413 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
414 if(cmd->data_end != cmd->data_start) {
415 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
416 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
417 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
418 iov[iov_size].iov_len);
419
420 if(++iov_size == IOV_COUNT) {
421 retry_writev(out_file.Fd(), iov, IOV_COUNT);
422 iov_size = 0;
423 }
424 }
425 }
426 }
427
428 if(input != input_end) {
429 iov[iov_size].iov_base = (void*) input;
430 iov[iov_size].iov_len = input_end - input;
431 hash->Add((const unsigned char*) input, input_end - input);
432 ++iov_size;
433 }
434
435 if(iov_size) {
436 retry_writev(out_file.Fd(), iov, iov_size);
437 iov_size = 0;
438 }
439
440 for(i = 0; i < iov_size; i += IOV_COUNT) {
441 if(iov_size - i < IOV_COUNT)
442 retry_writev(out_file.Fd(), iov + i, iov_size - i);
443 else
444 retry_writev(out_file.Fd(), iov + i, IOV_COUNT);
445 }
446
447 delete [] iov;
448 free(commands);
449
450 return ED_OK;
451 #else
452 return MMAP_FAILED;
453 #endif
454 }
455 /*}}}*/
456 bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
457 {
458 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
459 URI Get = Itm->Uri;
460 std::string Path = Get.Host + Get.Path; // To account for relative paths
461
462 FetchResult Res;
463 Res.Filename = Itm->DestFile;
464 if (Itm->Uri.empty() == true) {
465 Path = Itm->DestFile;
466 Itm->DestFile.append(".result");
467 } else
468 URIStart(Res);
469
470 std::string lastPatchName;
471 Hashes Hash;
472
473 // check for a single ed file
474 if (FileExists(Path+".ed") == true)
475 {
476 if (Debug == true)
477 std::clog << "Patching " << Path << " with " << Path
478 << ".ed and putting result into " << Itm->DestFile << std::endl;
479
480 // Open the source and destination files
481 lastPatchName = Path + ".ed";
482 FileFd From(Path,FileFd::ReadOnly);
483 FileFd To(Itm->DestFile,FileFd::WriteAtomic);
484 To.EraseOnFailure();
485 FileFd Patch(lastPatchName, FileFd::ReadOnly, FileFd::Gzip);
486 if (_error->PendingError() == true)
487 return false;
488
489 // now do the actual patching
490 State const result = patchMMap(Patch, From, To, &Hash);
491 if (result == MMAP_FAILED) {
492 // retry with patchFile
493 Patch.Seek(0);
494 From.Seek(0);
495 To.Open(Itm->DestFile,FileFd::WriteAtomic);
496 if (_error->PendingError() == true)
497 return false;
498 if (patchFile(Patch, From, To, &Hash) != ED_OK) {
499 return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
500 } else if (Debug == true) {
501 std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
502 }
503 } else if (result != ED_OK) {
504 return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
505 } else if (Debug == true) {
506 std::clog << "rred: finished mmap patching of " << Path << std::endl;
507 }
508
509 // write out the result
510 From.Close();
511 Patch.Close();
512 To.Close();
513 }
514 else
515 {
516 if (Debug == true)
517 std::clog << "Patching " << Path << " with all " << Path << ".ed.*.gz files and "
518 << "putting result into " << Itm->DestFile << std::endl;
519
520 int From = open(Path.c_str(), O_RDONLY);
521 unlink(Itm->DestFile.c_str());
522 int To = open(Itm->DestFile.c_str(), O_WRONLY | O_CREAT | O_EXCL, 0644);
523 SetCloseExec(From, false);
524 SetCloseExec(To, false);
525
526 _error->PushToStack();
527 std::vector<std::string> patches = GetListOfFilesInDir(flNotFile(Path), "gz", true, false);
528 _error->RevertToStack();
529
530 std::string externalrred = _config->Find("Dir::Bin::rred", "/usr/bin/diffindex-rred");
531 std::vector<const char *> Args;
532 Args.reserve(22);
533 Args.push_back(externalrred.c_str());
534
535 std::string const baseName = Path + ".ed.";
536 for (std::vector<std::string>::const_iterator p = patches.begin();
537 p != patches.end(); ++p)
538 if (p->compare(0, baseName.length(), baseName) == 0)
539 Args.push_back(p->c_str());
540
541 Args.push_back(NULL);
542
543 pid_t Patcher = ExecFork();
544 if (Patcher == 0) {
545 dup2(From, STDIN_FILENO);
546 dup2(To, STDOUT_FILENO);
547
548 execvp(Args[0], (char **) &Args[0]);
549 std::cerr << "Failed to execute patcher " << Args[0] << "!" << std::endl;
550 _exit(100);
551 }
552 // last is NULL, so the one before is the last patch
553 lastPatchName = Args[Args.size() - 2];
554
555 if (ExecWait(Patcher, "rred") == false)
556 return _error->Errno("rred", "Patching via external rred failed");
557
558 close(From);
559 close(To);
560
561 struct stat Buf;
562 if (stat(Itm->DestFile.c_str(), &Buf) != 0)
563 return _error->Errno("stat",_("Failed to stat"));
564
565 To = open(Path.c_str(), O_RDONLY);
566 Hash.AddFD(To, Buf.st_size);
567 close(To);
568 }
569
570 /* Transfer the modification times from the patch file
571 to be able to see in which state the file should be
572 and use the access time from the "old" file */
573 struct stat BufBase, BufPatch;
574 if (stat(Path.c_str(),&BufBase) != 0 ||
575 stat(lastPatchName.c_str(), &BufPatch) != 0)
576 return _error->Errno("stat",_("Failed to stat"));
577
578 struct utimbuf TimeBuf;
579 TimeBuf.actime = BufBase.st_atime;
580 TimeBuf.modtime = BufPatch.st_mtime;
581 if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
582 return _error->Errno("utime",_("Failed to set modification time"));
583
584 if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
585 return _error->Errno("stat",_("Failed to stat"));
586
587 // return done
588 Res.LastModified = BufBase.st_mtime;
589 Res.Size = BufBase.st_size;
590 Res.TakeHashes(Hash);
591 URIDone(Res);
592
593 return true;
594 }
595 /*}}}*/
596 /** \brief Wrapper class for testing rred */ /*{{{*/
597 class TestRredMethod : public RredMethod {
598 public:
599 /** \brief Run rred in debug test mode
600 *
601 * This method can be used to run the rred method outside
602 * of the "normal" acquire environment for easier testing.
603 *
604 * \param base basename of all files involved in this rred test
605 */
606 bool Run(char const *base) {
607 _config->CndSet("Debug::pkgAcquire::RRed", "true");
608 FetchItem *test = new FetchItem;
609 test->DestFile = base;
610 return Fetch(test);
611 }
612 };
613 /*}}}*/
614 /** \brief Starter for the rred method (or its test method) {{{
615 *
616 * Used without parameters is the normal behavior for methods for
617 * the APT acquire system. While this works great for the acquire system
618 * it is very hard to test the method and therefore the method also
619 * accepts one parameter which will switch it directly to debug test mode:
620 * The test mode expects that if "Testfile" is given as parameter
621 * the file "Testfile" should be ed-style patched with "Testfile.ed"
622 * and will write the result to "Testfile.result".
623 */
624 int main(int argc, char *argv[]) {
625 if (argc <= 1) {
626 RredMethod Mth;
627 return Mth.Run();
628 } else {
629 TestRredMethod Mth;
630 bool result = Mth.Run(argv[1]);
631 _error->DumpErrors();
632 return result;
633 }
634 }
635 /*}}}*/