reorder includes: add <config.h> if needed and include it at first
[ntk/apt.git] / methods / rred.cc
1 // Includes /*{{{*/
2 #include <config.h>
3
4 #include <apt-pkg/fileutl.h>
5 #include <apt-pkg/mmap.h>
6 #include <apt-pkg/error.h>
7 #include <apt-pkg/acquire-method.h>
8 #include <apt-pkg/strutl.h>
9 #include <apt-pkg/hashes.h>
10
11 #include <sys/stat.h>
12 #include <sys/uio.h>
13 #include <unistd.h>
14 #include <utime.h>
15 #include <stdio.h>
16 #include <errno.h>
17 #include <zlib.h>
18 #include <apti18n.h>
19 /*}}}*/
20 /** \brief RredMethod - ed-style incremential patch method {{{
21 *
22 * This method implements a patch functionality similar to "patch --ed" that is
23 * used by the "tiffany" incremental packages download stuff. It differs from
24 * "ed" insofar that it is way more restricted (and therefore secure).
25 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
26 * "<em>d</em>elete" (diff doesn't output any other).
27 * Additionally the records must be reverse sorted by line number and
28 * may not overlap (diff *seems* to produce this kind of output).
29 * */
30 class RredMethod : public pkgAcqMethod {
31 bool Debug;
32 // the size of this doesn't really matter (except for performance)
33 const static int BUF_SIZE = 1024;
34 // the supported ed commands
35 enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
36 // return values
37 enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
38
39 State applyFile(gzFile &ed_cmds, FILE *in_file, FILE *out_file,
40 unsigned long &line, char *buffer, Hashes *hash) const;
41 void ignoreLineInFile(FILE *fin, char *buffer) const;
42 void ignoreLineInFile(gzFile &fin, char *buffer) const;
43 void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,
44 Hashes *hash, char *buffer) const;
45 void copyLinesFromFileToFile(gzFile &fin, FILE *fout, unsigned int lines,
46 Hashes *hash, char *buffer) const;
47
48 State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
49 State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
50
51 protected:
52 // the methods main method
53 virtual bool Fetch(FetchItem *Itm);
54
55 public:
56 RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig) {};
57 };
58 /*}}}*/
59 /** \brief applyFile - in reverse order with a tail recursion {{{
60 *
61 * As it is expected that the commands are in reversed order in the patch file
62 * we check in the first half if the command is valid, but doesn't execute it
63 * and move a step deeper. After reaching the end of the file we apply the
64 * patches in the correct order: last found command first.
65 *
66 * \param ed_cmds patch file to apply
67 * \param in_file base file we want to patch
68 * \param out_file file to write the patched result to
69 * \param line of command operation
70 * \param buffer internal used read/write buffer
71 * \param hash the created file for correctness
72 * \return the success State of the ed command executor
73 */
74 RredMethod::State RredMethod::applyFile(gzFile &ed_cmds, FILE *in_file, FILE *out_file,
75 unsigned long &line, char *buffer, Hashes *hash) const {
76 // get the current command and parse it
77 if (gzgets(ed_cmds, buffer, BUF_SIZE) == NULL) {
78 if (Debug == true)
79 std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
80 line = 0;
81 return ED_OK;
82 }
83
84 // parse in the effected linenumbers
85 char* idx;
86 errno=0;
87 unsigned long const startline = strtol(buffer, &idx, 10);
88 if (errno == ERANGE || errno == EINVAL) {
89 _error->Errno("rred", "startline is an invalid number");
90 return ED_PARSER;
91 }
92 if (startline > line) {
93 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
94 return ED_ORDERING;
95 }
96 unsigned long stopline;
97 if (*idx == ',') {
98 idx++;
99 errno=0;
100 stopline = strtol(idx, &idx, 10);
101 if (errno == ERANGE || errno == EINVAL) {
102 _error->Errno("rred", "stopline is an invalid number");
103 return ED_PARSER;
104 }
105 }
106 else {
107 stopline = startline;
108 }
109 line = startline;
110
111 // which command to execute on this line(s)?
112 switch (*idx) {
113 case MODE_CHANGED:
114 if (Debug == true)
115 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
116 break;
117 case MODE_ADDED:
118 if (Debug == true)
119 std::clog << "Insert after line " << startline << std::endl;
120 break;
121 case MODE_DELETED:
122 if (Debug == true)
123 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
124 break;
125 default:
126 _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
127 return ED_PARSER;
128 }
129 unsigned char mode = *idx;
130
131 // save the current position
132 unsigned const long pos = gztell(ed_cmds);
133
134 // if this is add or change then go to the next full stop
135 unsigned int data_length = 0;
136 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
137 do {
138 ignoreLineInFile(ed_cmds, buffer);
139 data_length++;
140 }
141 while (strncmp(buffer, ".", 1) != 0);
142 data_length--; // the dot should not be copied
143 }
144
145 // do the recursive call - the last command is the one we need to execute at first
146 const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
147 if (child != ED_OK) {
148 return child;
149 }
150
151 // change and delete are working on "line" - add is done after "line"
152 if (mode != MODE_ADDED)
153 line++;
154
155 // first wind to the current position and copy over all unchanged lines
156 if (line < startline) {
157 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
158 line = startline;
159 }
160
161 if (mode != MODE_ADDED)
162 line--;
163
164 // include data from ed script
165 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
166 gzseek(ed_cmds, pos, SEEK_SET);
167 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
168 }
169
170 // ignore the corresponding number of lines from input
171 if (mode == MODE_CHANGED || mode == MODE_DELETED) {
172 while (line < stopline) {
173 ignoreLineInFile(in_file, buffer);
174 line++;
175 }
176 }
177 return ED_OK;
178 }
179 /*}}}*/
180 void RredMethod::copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,/*{{{*/
181 Hashes *hash, char *buffer) const {
182 while (0 < lines--) {
183 do {
184 fgets(buffer, BUF_SIZE, fin);
185 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
186 hash->Add((unsigned char*)buffer, written);
187 } while (strlen(buffer) == (BUF_SIZE - 1) &&
188 buffer[BUF_SIZE - 2] != '\n');
189 }
190 }
191 /*}}}*/
192 void RredMethod::copyLinesFromFileToFile(gzFile &fin, FILE *fout, unsigned int lines,/*{{{*/
193 Hashes *hash, char *buffer) const {
194 while (0 < lines--) {
195 do {
196 gzgets(fin, buffer, BUF_SIZE);
197 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
198 hash->Add((unsigned char*)buffer, written);
199 } while (strlen(buffer) == (BUF_SIZE - 1) &&
200 buffer[BUF_SIZE - 2] != '\n');
201 }
202 }
203 /*}}}*/
204 void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/
205 fgets(buffer, BUF_SIZE, fin);
206 while (strlen(buffer) == (BUF_SIZE - 1) &&
207 buffer[BUF_SIZE - 2] != '\n') {
208 fgets(buffer, BUF_SIZE, fin);
209 buffer[0] = ' ';
210 }
211 }
212 /*}}}*/
213 void RredMethod::ignoreLineInFile(gzFile &fin, char *buffer) const { /*{{{*/
214 gzgets(fin, buffer, BUF_SIZE);
215 while (strlen(buffer) == (BUF_SIZE - 1) &&
216 buffer[BUF_SIZE - 2] != '\n') {
217 gzgets(fin, buffer, BUF_SIZE);
218 buffer[0] = ' ';
219 }
220 }
221 /*}}}*/
222 RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
223 FileFd &out_file, Hashes *hash) const {
224 char buffer[BUF_SIZE];
225 FILE* fFrom = fdopen(From.Fd(), "r");
226 gzFile fPatch = Patch.gzFd();
227 FILE* fTo = fdopen(out_file.Fd(), "w");
228
229 /* we do a tail recursion to read the commands in the right order */
230 unsigned long line = -1; // assign highest possible value
231 State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash);
232
233 /* read the rest from infile */
234 if (result == ED_OK) {
235 while (fgets(buffer, BUF_SIZE, fFrom) != NULL) {
236 size_t const written = fwrite(buffer, 1, strlen(buffer), fTo);
237 hash->Add((unsigned char*)buffer, written);
238 }
239 fflush(fTo);
240 }
241 return result;
242 }
243 /*}}}*/
244 struct EdCommand { /*{{{*/
245 size_t data_start;
246 size_t data_end;
247 size_t data_lines;
248 size_t first_line;
249 size_t last_line;
250 char type;
251 };
252 #define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
253 /*}}}*/
254 RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
255 FileFd &out_file, Hashes *hash) const {
256 #ifdef _POSIX_MAPPED_FILES
257 MMap ed_cmds(MMap::ReadOnly);
258 if (Patch.gzFd() != NULL) {
259 unsigned long mapSize = Patch.Size();
260 DynamicMMap* dyn = new DynamicMMap(0, mapSize, 0);
261 if (dyn->validData() == false) {
262 delete dyn;
263 return MMAP_FAILED;
264 }
265 dyn->AddSize(mapSize);
266 gzread(Patch.gzFd(), dyn->Data(), mapSize);
267 ed_cmds = *dyn;
268 } else
269 ed_cmds = MMap(Patch, MMap::ReadOnly);
270
271 MMap in_file(From, MMap::ReadOnly);
272
273 if (ed_cmds.Size() == 0 || in_file.Size() == 0)
274 return MMAP_FAILED;
275
276 EdCommand* commands = 0;
277 size_t command_count = 0;
278 size_t command_alloc = 0;
279
280 const char* begin = (char*) ed_cmds.Data();
281 const char* end = begin;
282 const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size();
283
284 const char* input = (char*) in_file.Data();
285 const char* input_end = (char*) in_file.Data() + in_file.Size();
286
287 size_t i;
288
289 /* 1. Parse entire script. It is executed in reverse order, so we cather it
290 * in the `commands' buffer first
291 */
292
293 for(;;) {
294 EdCommand cmd;
295 cmd.data_start = 0;
296 cmd.data_end = 0;
297
298 while(begin != ed_end && *begin == '\n')
299 ++begin;
300 while(end != ed_end && *end != '\n')
301 ++end;
302 if(end == ed_end && begin == end)
303 break;
304
305 /* Determine command range */
306 const char* tmp = begin;
307
308 for(;;) {
309 /* atoll is safe despite lacking NUL-termination; we know there's an
310 * alphabetic character at end[-1]
311 */
312 if(tmp == end) {
313 cmd.first_line = atol(begin);
314 cmd.last_line = cmd.first_line;
315 break;
316 }
317 if(*tmp == ',') {
318 cmd.first_line = atol(begin);
319 cmd.last_line = atol(tmp + 1);
320 break;
321 }
322 ++tmp;
323 }
324
325 // which command to execute on this line(s)?
326 switch (end[-1]) {
327 case MODE_CHANGED:
328 if (Debug == true)
329 std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
330 break;
331 case MODE_ADDED:
332 if (Debug == true)
333 std::clog << "Insert after line " << cmd.first_line << std::endl;
334 break;
335 case MODE_DELETED:
336 if (Debug == true)
337 std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
338 break;
339 default:
340 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
341 free(commands);
342 return ED_PARSER;
343 }
344 cmd.type = end[-1];
345
346 /* Determine the size of the inserted text, so we don't have to scan this
347 * text again later.
348 */
349 begin = end + 1;
350 end = begin;
351 cmd.data_lines = 0;
352
353 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
354 cmd.data_start = begin - (char*) ed_cmds.Data();
355 while(end != ed_end) {
356 if(*end == '\n') {
357 if(end[-1] == '.' && end[-2] == '\n')
358 break;
359 ++cmd.data_lines;
360 }
361 ++end;
362 }
363 cmd.data_end = end - (char*) ed_cmds.Data() - 1;
364 begin = end + 1;
365 end = begin;
366 }
367 if(command_count == command_alloc) {
368 command_alloc = (command_alloc + 64) * 3 / 2;
369 commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
370 }
371 commands[command_count++] = cmd;
372 }
373
374 struct iovec* iov = new struct iovec[IOV_COUNT];
375 size_t iov_size = 0;
376
377 size_t amount, remaining;
378 size_t line = 1;
379 EdCommand* cmd;
380
381 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
382 * using writev to minimize the number of system calls. Data is read
383 * directly from the memory mappings of the input file and the script.
384 */
385
386 for(i = command_count; i-- > 0; ) {
387 cmd = &commands[i];
388 if(cmd->type == MODE_ADDED)
389 amount = cmd->first_line + 1;
390 else
391 amount = cmd->first_line;
392
393 if(line < amount) {
394 begin = input;
395 while(line != amount) {
396 input = (const char*) memchr(input, '\n', input_end - input);
397 if(!input)
398 break;
399 ++line;
400 ++input;
401 }
402
403 iov[iov_size].iov_base = (void*) begin;
404 iov[iov_size].iov_len = input - begin;
405 hash->Add((const unsigned char*) begin, input - begin);
406
407 if(++iov_size == IOV_COUNT) {
408 writev(out_file.Fd(), iov, IOV_COUNT);
409 iov_size = 0;
410 }
411 }
412
413 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
414 remaining = (cmd->last_line - cmd->first_line) + 1;
415 line += remaining;
416 while(remaining) {
417 input = (const char*) memchr(input, '\n', input_end - input);
418 if(!input)
419 break;
420 --remaining;
421 ++input;
422 }
423 }
424
425 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
426 if(cmd->data_end != cmd->data_start) {
427 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
428 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
429 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
430 iov[iov_size].iov_len);
431
432 if(++iov_size == IOV_COUNT) {
433 writev(out_file.Fd(), iov, IOV_COUNT);
434 iov_size = 0;
435 }
436 }
437 }
438 }
439
440 if(input != input_end) {
441 iov[iov_size].iov_base = (void*) input;
442 iov[iov_size].iov_len = input_end - input;
443 hash->Add((const unsigned char*) input, input_end - input);
444 ++iov_size;
445 }
446
447 if(iov_size) {
448 writev(out_file.Fd(), iov, iov_size);
449 iov_size = 0;
450 }
451
452 for(i = 0; i < iov_size; i += IOV_COUNT) {
453 if(iov_size - i < IOV_COUNT)
454 writev(out_file.Fd(), iov + i, iov_size - i);
455 else
456 writev(out_file.Fd(), iov + i, IOV_COUNT);
457 }
458
459 delete [] iov;
460 free(commands);
461
462 return ED_OK;
463 #else
464 return MMAP_FAILED;
465 #endif
466 }
467 /*}}}*/
468 bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
469 {
470 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
471 URI Get = Itm->Uri;
472 string Path = Get.Host + Get.Path; // To account for relative paths
473
474 FetchResult Res;
475 Res.Filename = Itm->DestFile;
476 if (Itm->Uri.empty() == true) {
477 Path = Itm->DestFile;
478 Itm->DestFile.append(".result");
479 } else
480 URIStart(Res);
481
482 if (Debug == true)
483 std::clog << "Patching " << Path << " with " << Path
484 << ".ed and putting result into " << Itm->DestFile << std::endl;
485 // Open the source and destination files (the d'tor of FileFd will do
486 // the cleanup/closing of the fds)
487 FileFd From(Path,FileFd::ReadOnly);
488 FileFd Patch(Path+".ed",FileFd::ReadOnlyGzip);
489 FileFd To(Itm->DestFile,FileFd::WriteAtomic);
490 To.EraseOnFailure();
491 if (_error->PendingError() == true)
492 return false;
493
494 Hashes Hash;
495 // now do the actual patching
496 State const result = patchMMap(Patch, From, To, &Hash);
497 if (result == MMAP_FAILED) {
498 // retry with patchFile
499 Patch.Seek(0);
500 From.Seek(0);
501 To.Open(Itm->DestFile,FileFd::WriteAtomic);
502 if (_error->PendingError() == true)
503 return false;
504 if (patchFile(Patch, From, To, &Hash) != ED_OK) {
505 return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
506 } else if (Debug == true) {
507 std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
508 }
509 } else if (result != ED_OK) {
510 return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
511 } else if (Debug == true) {
512 std::clog << "rred: finished mmap patching of " << Path << std::endl;
513 }
514
515 // write out the result
516 From.Close();
517 Patch.Close();
518 To.Close();
519
520 /* Transfer the modification times from the patch file
521 to be able to see in which state the file should be
522 and use the access time from the "old" file */
523 struct stat BufBase, BufPatch;
524 if (stat(Path.c_str(),&BufBase) != 0 ||
525 stat(string(Path+".ed").c_str(),&BufPatch) != 0)
526 return _error->Errno("stat",_("Failed to stat"));
527
528 struct utimbuf TimeBuf;
529 TimeBuf.actime = BufBase.st_atime;
530 TimeBuf.modtime = BufPatch.st_mtime;
531 if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
532 return _error->Errno("utime",_("Failed to set modification time"));
533
534 if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
535 return _error->Errno("stat",_("Failed to stat"));
536
537 // return done
538 Res.LastModified = BufBase.st_mtime;
539 Res.Size = BufBase.st_size;
540 Res.TakeHashes(Hash);
541 URIDone(Res);
542
543 return true;
544 }
545 /*}}}*/
546 /** \brief Wrapper class for testing rred */ /*{{{*/
547 class TestRredMethod : public RredMethod {
548 public:
549 /** \brief Run rred in debug test mode
550 *
551 * This method can be used to run the rred method outside
552 * of the "normal" acquire environment for easier testing.
553 *
554 * \param base basename of all files involved in this rred test
555 */
556 bool Run(char const *base) {
557 _config->CndSet("Debug::pkgAcquire::RRed", "true");
558 FetchItem *test = new FetchItem;
559 test->DestFile = base;
560 return Fetch(test);
561 }
562 };
563 /*}}}*/
564 /** \brief Starter for the rred method (or its test method) {{{
565 *
566 * Used without parameters is the normal behavior for methods for
567 * the APT acquire system. While this works great for the acquire system
568 * it is very hard to test the method and therefore the method also
569 * accepts one parameter which will switch it directly to debug test mode:
570 * The test mode expects that if "Testfile" is given as parameter
571 * the file "Testfile" should be ed-style patched with "Testfile.ed"
572 * and will write the result to "Testfile.result".
573 */
574 int main(int argc, char *argv[]) {
575 if (argc <= 1) {
576 RredMethod Mth;
577 return Mth.Run();
578 } else {
579 TestRredMethod Mth;
580 bool result = Mth.Run(argv[1]);
581 _error->DumpErrors();
582 return result;
583 }
584 }
585 /*}}}*/