merged from the debian-sid branch
[ntk/apt.git] / methods / rred.cc
1 // Includes /*{{{*/
2 #include <apt-pkg/fileutl.h>
3 #include <apt-pkg/mmap.h>
4 #include <apt-pkg/error.h>
5 #include <apt-pkg/acquire-method.h>
6 #include <apt-pkg/strutl.h>
7 #include <apt-pkg/hashes.h>
8
9 #include <sys/stat.h>
10 #include <sys/uio.h>
11 #include <unistd.h>
12 #include <utime.h>
13 #include <stdio.h>
14 #include <errno.h>
15 #include <zlib.h>
16 #include <apti18n.h>
17 /*}}}*/
18 /** \brief RredMethod - ed-style incremential patch method {{{
19 *
20 * This method implements a patch functionality similar to "patch --ed" that is
21 * used by the "tiffany" incremental packages download stuff. It differs from
22 * "ed" insofar that it is way more restricted (and therefore secure).
23 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
24 * "<em>d</em>elete" (diff doesn't output any other).
25 * Additionally the records must be reverse sorted by line number and
26 * may not overlap (diff *seems* to produce this kind of output).
27 * */
28 class RredMethod : public pkgAcqMethod {
29 bool Debug;
30 // the size of this doesn't really matter (except for performance)
31 const static int BUF_SIZE = 1024;
32 // the supported ed commands
33 enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
34 // return values
35 enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
36
37 State applyFile(gzFile &ed_cmds, FILE *in_file, FILE *out_file,
38 unsigned long &line, char *buffer, Hashes *hash) const;
39 void ignoreLineInFile(FILE *fin, char *buffer) const;
40 void ignoreLineInFile(gzFile &fin, char *buffer) const;
41 void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,
42 Hashes *hash, char *buffer) const;
43 void copyLinesFromFileToFile(gzFile &fin, FILE *fout, unsigned int lines,
44 Hashes *hash, char *buffer) const;
45
46 State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
47 State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
48
49 protected:
50 // the methods main method
51 virtual bool Fetch(FetchItem *Itm);
52
53 public:
54 RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig), Debug(false) {};
55 };
56 /*}}}*/
57 /** \brief applyFile - in reverse order with a tail recursion {{{
58 *
59 * As it is expected that the commands are in reversed order in the patch file
60 * we check in the first half if the command is valid, but doesn't execute it
61 * and move a step deeper. After reaching the end of the file we apply the
62 * patches in the correct order: last found command first.
63 *
64 * \param ed_cmds patch file to apply
65 * \param in_file base file we want to patch
66 * \param out_file file to write the patched result to
67 * \param line of command operation
68 * \param buffer internal used read/write buffer
69 * \param hash the created file for correctness
70 * \return the success State of the ed command executor
71 */
72 RredMethod::State RredMethod::applyFile(gzFile &ed_cmds, FILE *in_file, FILE *out_file,
73 unsigned long &line, char *buffer, Hashes *hash) const {
74 // get the current command and parse it
75 if (gzgets(ed_cmds, buffer, BUF_SIZE) == NULL) {
76 if (Debug == true)
77 std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
78 line = 0;
79 return ED_OK;
80 }
81
82 // parse in the effected linenumbers
83 char* idx;
84 errno=0;
85 unsigned long const startline = strtol(buffer, &idx, 10);
86 if (errno == ERANGE || errno == EINVAL) {
87 _error->Errno("rred", "startline is an invalid number");
88 return ED_PARSER;
89 }
90 if (startline > line) {
91 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
92 return ED_ORDERING;
93 }
94 unsigned long stopline;
95 if (*idx == ',') {
96 idx++;
97 errno=0;
98 stopline = strtol(idx, &idx, 10);
99 if (errno == ERANGE || errno == EINVAL) {
100 _error->Errno("rred", "stopline is an invalid number");
101 return ED_PARSER;
102 }
103 }
104 else {
105 stopline = startline;
106 }
107 line = startline;
108
109 // which command to execute on this line(s)?
110 switch (*idx) {
111 case MODE_CHANGED:
112 if (Debug == true)
113 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
114 break;
115 case MODE_ADDED:
116 if (Debug == true)
117 std::clog << "Insert after line " << startline << std::endl;
118 break;
119 case MODE_DELETED:
120 if (Debug == true)
121 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
122 break;
123 default:
124 _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
125 return ED_PARSER;
126 }
127 unsigned char mode = *idx;
128
129 // save the current position
130 unsigned const long pos = gztell(ed_cmds);
131
132 // if this is add or change then go to the next full stop
133 unsigned int data_length = 0;
134 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
135 do {
136 ignoreLineInFile(ed_cmds, buffer);
137 data_length++;
138 }
139 while (strncmp(buffer, ".", 1) != 0);
140 data_length--; // the dot should not be copied
141 }
142
143 // do the recursive call - the last command is the one we need to execute at first
144 const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
145 if (child != ED_OK) {
146 return child;
147 }
148
149 // change and delete are working on "line" - add is done after "line"
150 if (mode != MODE_ADDED)
151 line++;
152
153 // first wind to the current position and copy over all unchanged lines
154 if (line < startline) {
155 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
156 line = startline;
157 }
158
159 if (mode != MODE_ADDED)
160 line--;
161
162 // include data from ed script
163 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
164 gzseek(ed_cmds, pos, SEEK_SET);
165 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
166 }
167
168 // ignore the corresponding number of lines from input
169 if (mode == MODE_CHANGED || mode == MODE_DELETED) {
170 while (line < stopline) {
171 ignoreLineInFile(in_file, buffer);
172 line++;
173 }
174 }
175 return ED_OK;
176 }
177 /*}}}*/
178 void RredMethod::copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,/*{{{*/
179 Hashes *hash, char *buffer) const {
180 while (0 < lines--) {
181 do {
182 fgets(buffer, BUF_SIZE, fin);
183 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
184 hash->Add((unsigned char*)buffer, written);
185 } while (strlen(buffer) == (BUF_SIZE - 1) &&
186 buffer[BUF_SIZE - 2] != '\n');
187 }
188 }
189 /*}}}*/
190 void RredMethod::copyLinesFromFileToFile(gzFile &fin, FILE *fout, unsigned int lines,/*{{{*/
191 Hashes *hash, char *buffer) const {
192 while (0 < lines--) {
193 do {
194 gzgets(fin, buffer, BUF_SIZE);
195 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
196 hash->Add((unsigned char*)buffer, written);
197 } while (strlen(buffer) == (BUF_SIZE - 1) &&
198 buffer[BUF_SIZE - 2] != '\n');
199 }
200 }
201 /*}}}*/
202 void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/
203 fgets(buffer, BUF_SIZE, fin);
204 while (strlen(buffer) == (BUF_SIZE - 1) &&
205 buffer[BUF_SIZE - 2] != '\n') {
206 fgets(buffer, BUF_SIZE, fin);
207 buffer[0] = ' ';
208 }
209 }
210 /*}}}*/
211 void RredMethod::ignoreLineInFile(gzFile &fin, char *buffer) const { /*{{{*/
212 gzgets(fin, buffer, BUF_SIZE);
213 while (strlen(buffer) == (BUF_SIZE - 1) &&
214 buffer[BUF_SIZE - 2] != '\n') {
215 gzgets(fin, buffer, BUF_SIZE);
216 buffer[0] = ' ';
217 }
218 }
219 /*}}}*/
220 RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
221 FileFd &out_file, Hashes *hash) const {
222 char buffer[BUF_SIZE];
223 FILE* fFrom = fdopen(From.Fd(), "r");
224 gzFile fPatch = Patch.gzFd();
225 FILE* fTo = fdopen(out_file.Fd(), "w");
226
227 /* we do a tail recursion to read the commands in the right order */
228 unsigned long line = -1; // assign highest possible value
229 State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash);
230
231 /* read the rest from infile */
232 if (result == ED_OK) {
233 while (fgets(buffer, BUF_SIZE, fFrom) != NULL) {
234 size_t const written = fwrite(buffer, 1, strlen(buffer), fTo);
235 hash->Add((unsigned char*)buffer, written);
236 }
237 fflush(fTo);
238 }
239 return result;
240 }
241 /*}}}*/
242 /* struct EdCommand {{{*/
243 #ifdef _POSIX_MAPPED_FILES
244 struct EdCommand {
245 size_t data_start;
246 size_t data_end;
247 size_t data_lines;
248 size_t first_line;
249 size_t last_line;
250 char type;
251 };
252 #define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
253 #endif
254 /*}}}*/
255 RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
256 FileFd &out_file, Hashes *hash) const {
257 #ifdef _POSIX_MAPPED_FILES
258 MMap ed_cmds(MMap::ReadOnly);
259 if (Patch.gzFd() != NULL) {
260 unsigned long mapSize = Patch.Size();
261 DynamicMMap* dyn = new DynamicMMap(0, mapSize, 0);
262 if (dyn->validData() == false) {
263 delete dyn;
264 return MMAP_FAILED;
265 }
266 dyn->AddSize(mapSize);
267 gzread(Patch.gzFd(), dyn->Data(), mapSize);
268 ed_cmds = *dyn;
269 } else
270 ed_cmds = MMap(Patch, MMap::ReadOnly);
271
272 MMap in_file(From, MMap::ReadOnly);
273
274 if (ed_cmds.Size() == 0 || in_file.Size() == 0)
275 return MMAP_FAILED;
276
277 EdCommand* commands = 0;
278 size_t command_count = 0;
279 size_t command_alloc = 0;
280
281 const char* begin = (char*) ed_cmds.Data();
282 const char* end = begin;
283 const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size();
284
285 const char* input = (char*) in_file.Data();
286 const char* input_end = (char*) in_file.Data() + in_file.Size();
287
288 size_t i;
289
290 /* 1. Parse entire script. It is executed in reverse order, so we cather it
291 * in the `commands' buffer first
292 */
293
294 for(;;) {
295 EdCommand cmd;
296 cmd.data_start = 0;
297 cmd.data_end = 0;
298
299 while(begin != ed_end && *begin == '\n')
300 ++begin;
301 while(end != ed_end && *end != '\n')
302 ++end;
303 if(end == ed_end && begin == end)
304 break;
305
306 /* Determine command range */
307 const char* tmp = begin;
308
309 for(;;) {
310 /* atoll is safe despite lacking NUL-termination; we know there's an
311 * alphabetic character at end[-1]
312 */
313 if(tmp == end) {
314 cmd.first_line = atol(begin);
315 cmd.last_line = cmd.first_line;
316 break;
317 }
318 if(*tmp == ',') {
319 cmd.first_line = atol(begin);
320 cmd.last_line = atol(tmp + 1);
321 break;
322 }
323 ++tmp;
324 }
325
326 // which command to execute on this line(s)?
327 switch (end[-1]) {
328 case MODE_CHANGED:
329 if (Debug == true)
330 std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
331 break;
332 case MODE_ADDED:
333 if (Debug == true)
334 std::clog << "Insert after line " << cmd.first_line << std::endl;
335 break;
336 case MODE_DELETED:
337 if (Debug == true)
338 std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
339 break;
340 default:
341 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
342 free(commands);
343 return ED_PARSER;
344 }
345 cmd.type = end[-1];
346
347 /* Determine the size of the inserted text, so we don't have to scan this
348 * text again later.
349 */
350 begin = end + 1;
351 end = begin;
352 cmd.data_lines = 0;
353
354 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
355 cmd.data_start = begin - (char*) ed_cmds.Data();
356 while(end != ed_end) {
357 if(*end == '\n') {
358 if(end[-1] == '.' && end[-2] == '\n')
359 break;
360 ++cmd.data_lines;
361 }
362 ++end;
363 }
364 cmd.data_end = end - (char*) ed_cmds.Data() - 1;
365 begin = end + 1;
366 end = begin;
367 }
368 if(command_count == command_alloc) {
369 command_alloc = (command_alloc + 64) * 3 / 2;
370 commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
371 }
372 commands[command_count++] = cmd;
373 }
374
375 struct iovec* iov = new struct iovec[IOV_COUNT];
376 size_t iov_size = 0;
377
378 size_t amount, remaining;
379 size_t line = 1;
380 EdCommand* cmd;
381
382 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
383 * using writev to minimize the number of system calls. Data is read
384 * directly from the memory mappings of the input file and the script.
385 */
386
387 for(i = command_count; i-- > 0; ) {
388 cmd = &commands[i];
389 if(cmd->type == MODE_ADDED)
390 amount = cmd->first_line + 1;
391 else
392 amount = cmd->first_line;
393
394 if(line < amount) {
395 begin = input;
396 while(line != amount) {
397 input = (const char*) memchr(input, '\n', input_end - input);
398 if(!input)
399 break;
400 ++line;
401 ++input;
402 }
403
404 iov[iov_size].iov_base = (void*) begin;
405 iov[iov_size].iov_len = input - begin;
406 hash->Add((const unsigned char*) begin, input - begin);
407
408 if(++iov_size == IOV_COUNT) {
409 writev(out_file.Fd(), iov, IOV_COUNT);
410 iov_size = 0;
411 }
412 }
413
414 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
415 remaining = (cmd->last_line - cmd->first_line) + 1;
416 line += remaining;
417 while(remaining) {
418 input = (const char*) memchr(input, '\n', input_end - input);
419 if(!input)
420 break;
421 --remaining;
422 ++input;
423 }
424 }
425
426 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
427 if(cmd->data_end != cmd->data_start) {
428 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
429 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
430 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
431 iov[iov_size].iov_len);
432
433 if(++iov_size == IOV_COUNT) {
434 writev(out_file.Fd(), iov, IOV_COUNT);
435 iov_size = 0;
436 }
437 }
438 }
439 }
440
441 if(input != input_end) {
442 iov[iov_size].iov_base = (void*) input;
443 iov[iov_size].iov_len = input_end - input;
444 hash->Add((const unsigned char*) input, input_end - input);
445 ++iov_size;
446 }
447
448 if(iov_size) {
449 writev(out_file.Fd(), iov, iov_size);
450 iov_size = 0;
451 }
452
453 for(i = 0; i < iov_size; i += IOV_COUNT) {
454 if(iov_size - i < IOV_COUNT)
455 writev(out_file.Fd(), iov + i, iov_size - i);
456 else
457 writev(out_file.Fd(), iov + i, IOV_COUNT);
458 }
459
460 delete [] iov;
461 free(commands);
462
463 return ED_OK;
464 #else
465 return MMAP_FAILED;
466 #endif
467 }
468 /*}}}*/
469 bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
470 {
471 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
472 URI Get = Itm->Uri;
473 string Path = Get.Host + Get.Path; // To account for relative paths
474
475 FetchResult Res;
476 Res.Filename = Itm->DestFile;
477 if (Itm->Uri.empty() == true) {
478 Path = Itm->DestFile;
479 Itm->DestFile.append(".result");
480 } else
481 URIStart(Res);
482
483 if (Debug == true)
484 std::clog << "Patching " << Path << " with " << Path
485 << ".ed and putting result into " << Itm->DestFile << std::endl;
486 // Open the source and destination files (the d'tor of FileFd will do
487 // the cleanup/closing of the fds)
488 FileFd From(Path,FileFd::ReadOnly);
489 FileFd Patch(Path+".ed",FileFd::ReadOnlyGzip);
490 FileFd To(Itm->DestFile,FileFd::WriteAtomic);
491 To.EraseOnFailure();
492 if (_error->PendingError() == true)
493 return false;
494
495 Hashes Hash;
496 // now do the actual patching
497 State const result = patchMMap(Patch, From, To, &Hash);
498 if (result == MMAP_FAILED) {
499 // retry with patchFile
500 Patch.Seek(0);
501 From.Seek(0);
502 To.Open(Itm->DestFile,FileFd::WriteAtomic);
503 if (_error->PendingError() == true)
504 return false;
505 if (patchFile(Patch, From, To, &Hash) != ED_OK) {
506 return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
507 } else if (Debug == true) {
508 std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
509 }
510 } else if (result != ED_OK) {
511 return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
512 } else if (Debug == true) {
513 std::clog << "rred: finished mmap patching of " << Path << std::endl;
514 }
515
516 // write out the result
517 From.Close();
518 Patch.Close();
519 To.Close();
520
521 /* Transfer the modification times from the patch file
522 to be able to see in which state the file should be
523 and use the access time from the "old" file */
524 struct stat BufBase, BufPatch;
525 if (stat(Path.c_str(),&BufBase) != 0 ||
526 stat(string(Path+".ed").c_str(),&BufPatch) != 0)
527 return _error->Errno("stat",_("Failed to stat"));
528
529 struct utimbuf TimeBuf;
530 TimeBuf.actime = BufBase.st_atime;
531 TimeBuf.modtime = BufPatch.st_mtime;
532 if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
533 return _error->Errno("utime",_("Failed to set modification time"));
534
535 if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
536 return _error->Errno("stat",_("Failed to stat"));
537
538 // return done
539 Res.LastModified = BufBase.st_mtime;
540 Res.Size = BufBase.st_size;
541 Res.TakeHashes(Hash);
542 URIDone(Res);
543
544 return true;
545 }
546 /*}}}*/
547 /** \brief Wrapper class for testing rred */ /*{{{*/
548 class TestRredMethod : public RredMethod {
549 public:
550 /** \brief Run rred in debug test mode
551 *
552 * This method can be used to run the rred method outside
553 * of the "normal" acquire environment for easier testing.
554 *
555 * \param base basename of all files involved in this rred test
556 */
557 bool Run(char const *base) {
558 _config->CndSet("Debug::pkgAcquire::RRed", "true");
559 FetchItem *test = new FetchItem;
560 test->DestFile = base;
561 return Fetch(test);
562 }
563 };
564 /*}}}*/
565 /** \brief Starter for the rred method (or its test method) {{{
566 *
567 * Used without parameters is the normal behavior for methods for
568 * the APT acquire system. While this works great for the acquire system
569 * it is very hard to test the method and therefore the method also
570 * accepts one parameter which will switch it directly to debug test mode:
571 * The test mode expects that if "Testfile" is given as parameter
572 * the file "Testfile" should be ed-style patched with "Testfile.ed"
573 * and will write the result to "Testfile.result".
574 */
575 int main(int argc, char *argv[]) {
576 if (argc <= 1) {
577 RredMethod Mth;
578 return Mth.Run();
579 } else {
580 TestRredMethod Mth;
581 bool result = Mth.Run(argv[1]);
582 _error->DumpErrors();
583 return result;
584 }
585 }
586 /*}}}*/