merged from lp:~mvo/apt/mvo (which is really lp:~donkult/apt/sid with some updated...
[ntk/apt.git] / methods / rred.cc
1 // Includes /*{{{*/
2 #include <apt-pkg/fileutl.h>
3 #include <apt-pkg/mmap.h>
4 #include <apt-pkg/error.h>
5 #include <apt-pkg/acquire-method.h>
6 #include <apt-pkg/strutl.h>
7 #include <apt-pkg/hashes.h>
8
9 #include <sys/stat.h>
10 #include <sys/uio.h>
11 #include <unistd.h>
12 #include <utime.h>
13 #include <stdio.h>
14 #include <errno.h>
15 #include <zlib.h>
16 #include <apti18n.h>
17 /*}}}*/
18 /** \brief RredMethod - ed-style incremential patch method {{{
19 *
20 * This method implements a patch functionality similar to "patch --ed" that is
21 * used by the "tiffany" incremental packages download stuff. It differs from
22 * "ed" insofar that it is way more restricted (and therefore secure).
23 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
24 * "<em>d</em>elete" (diff doesn't output any other).
25 * Additionally the records must be reverse sorted by line number and
26 * may not overlap (diff *seems* to produce this kind of output).
27 * */
28 class RredMethod : public pkgAcqMethod {
29 bool Debug;
30 // the size of this doesn't really matter (except for performance)
31 const static int BUF_SIZE = 1024;
32 // the supported ed commands
33 enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
34 // return values
35 enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
36
37 State applyFile(gzFile &ed_cmds, FILE *in_file, FILE *out_file,
38 unsigned long &line, char *buffer, Hashes *hash) const;
39 void ignoreLineInFile(FILE *fin, char *buffer) const;
40 void ignoreLineInFile(gzFile &fin, char *buffer) const;
41 void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,
42 Hashes *hash, char *buffer) const;
43 void copyLinesFromFileToFile(gzFile &fin, FILE *fout, unsigned int lines,
44 Hashes *hash, char *buffer) const;
45
46 State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
47 State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
48
49 protected:
50 // the methods main method
51 virtual bool Fetch(FetchItem *Itm);
52
53 public:
54 RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig) {};
55 };
56 /*}}}*/
57 /** \brief applyFile - in reverse order with a tail recursion {{{
58 *
59 * As it is expected that the commands are in reversed order in the patch file
60 * we check in the first half if the command is valid, but doesn't execute it
61 * and move a step deeper. After reaching the end of the file we apply the
62 * patches in the correct order: last found command first.
63 *
64 * \param ed_cmds patch file to apply
65 * \param in_file base file we want to patch
66 * \param out_file file to write the patched result to
67 * \param line of command operation
68 * \param buffer internal used read/write buffer
69 * \param hash the created file for correctness
70 * \return the success State of the ed command executor
71 */
72 RredMethod::State RredMethod::applyFile(gzFile &ed_cmds, FILE *in_file, FILE *out_file,
73 unsigned long &line, char *buffer, Hashes *hash) const {
74 // get the current command and parse it
75 if (gzgets(ed_cmds, buffer, BUF_SIZE) == NULL) {
76 if (Debug == true)
77 std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
78 line = 0;
79 return ED_OK;
80 }
81
82 // parse in the effected linenumbers
83 char* idx;
84 errno=0;
85 unsigned long const startline = strtol(buffer, &idx, 10);
86 if (errno == ERANGE || errno == EINVAL) {
87 _error->Errno("rred", "startline is an invalid number");
88 return ED_PARSER;
89 }
90 if (startline > line) {
91 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
92 return ED_ORDERING;
93 }
94 unsigned long stopline;
95 if (*idx == ',') {
96 idx++;
97 errno=0;
98 stopline = strtol(idx, &idx, 10);
99 if (errno == ERANGE || errno == EINVAL) {
100 _error->Errno("rred", "stopline is an invalid number");
101 return ED_PARSER;
102 }
103 }
104 else {
105 stopline = startline;
106 }
107 line = startline;
108
109 // which command to execute on this line(s)?
110 switch (*idx) {
111 case MODE_CHANGED:
112 if (Debug == true)
113 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
114 break;
115 case MODE_ADDED:
116 if (Debug == true)
117 std::clog << "Insert after line " << startline << std::endl;
118 break;
119 case MODE_DELETED:
120 if (Debug == true)
121 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
122 break;
123 default:
124 _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
125 return ED_PARSER;
126 }
127 unsigned char mode = *idx;
128
129 // save the current position
130 unsigned const long pos = gztell(ed_cmds);
131
132 // if this is add or change then go to the next full stop
133 unsigned int data_length = 0;
134 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
135 do {
136 ignoreLineInFile(ed_cmds, buffer);
137 data_length++;
138 }
139 while (strncmp(buffer, ".", 1) != 0);
140 data_length--; // the dot should not be copied
141 }
142
143 // do the recursive call - the last command is the one we need to execute at first
144 const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
145 if (child != ED_OK) {
146 return child;
147 }
148
149 // change and delete are working on "line" - add is done after "line"
150 if (mode != MODE_ADDED)
151 line++;
152
153 // first wind to the current position and copy over all unchanged lines
154 if (line < startline) {
155 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
156 line = startline;
157 }
158
159 if (mode != MODE_ADDED)
160 line--;
161
162 // include data from ed script
163 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
164 gzseek(ed_cmds, pos, SEEK_SET);
165 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
166 }
167
168 // ignore the corresponding number of lines from input
169 if (mode == MODE_CHANGED || mode == MODE_DELETED) {
170 while (line < stopline) {
171 ignoreLineInFile(in_file, buffer);
172 line++;
173 }
174 }
175 return ED_OK;
176 }
177 /*}}}*/
178 void RredMethod::copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,/*{{{*/
179 Hashes *hash, char *buffer) const {
180 while (0 < lines--) {
181 do {
182 fgets(buffer, BUF_SIZE, fin);
183 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
184 hash->Add((unsigned char*)buffer, written);
185 } while (strlen(buffer) == (BUF_SIZE - 1) &&
186 buffer[BUF_SIZE - 2] != '\n');
187 }
188 }
189 /*}}}*/
190 void RredMethod::copyLinesFromFileToFile(gzFile &fin, FILE *fout, unsigned int lines,/*{{{*/
191 Hashes *hash, char *buffer) const {
192 while (0 < lines--) {
193 do {
194 gzgets(fin, buffer, BUF_SIZE);
195 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
196 hash->Add((unsigned char*)buffer, written);
197 } while (strlen(buffer) == (BUF_SIZE - 1) &&
198 buffer[BUF_SIZE - 2] != '\n');
199 }
200 }
201 /*}}}*/
202 void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/
203 fgets(buffer, BUF_SIZE, fin);
204 while (strlen(buffer) == (BUF_SIZE - 1) &&
205 buffer[BUF_SIZE - 2] != '\n') {
206 fgets(buffer, BUF_SIZE, fin);
207 buffer[0] = ' ';
208 }
209 }
210 /*}}}*/
211 void RredMethod::ignoreLineInFile(gzFile &fin, char *buffer) const { /*{{{*/
212 gzgets(fin, buffer, BUF_SIZE);
213 while (strlen(buffer) == (BUF_SIZE - 1) &&
214 buffer[BUF_SIZE - 2] != '\n') {
215 gzgets(fin, buffer, BUF_SIZE);
216 buffer[0] = ' ';
217 }
218 }
219 /*}}}*/
220 RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
221 FileFd &out_file, Hashes *hash) const {
222 char buffer[BUF_SIZE];
223 FILE* fFrom = fdopen(From.Fd(), "r");
224 gzFile fPatch = Patch.gzFd();
225 FILE* fTo = fdopen(out_file.Fd(), "w");
226
227 /* we do a tail recursion to read the commands in the right order */
228 unsigned long line = -1; // assign highest possible value
229 State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash);
230
231 /* read the rest from infile */
232 if (result == ED_OK) {
233 while (fgets(buffer, BUF_SIZE, fFrom) != NULL) {
234 size_t const written = fwrite(buffer, 1, strlen(buffer), fTo);
235 hash->Add((unsigned char*)buffer, written);
236 }
237 fflush(fTo);
238 }
239 return result;
240 }
241 /*}}}*/
242 struct EdCommand { /*{{{*/
243 size_t data_start;
244 size_t data_end;
245 size_t data_lines;
246 size_t first_line;
247 size_t last_line;
248 char type;
249 };
250 #define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
251 /*}}}*/
252 RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
253 FileFd &out_file, Hashes *hash) const {
254 #ifdef _POSIX_MAPPED_FILES
255 MMap ed_cmds(Patch, MMap::ReadOnly);
256 if (Patch.gzFd() != NULL) {
257 unsigned long mapSize = Patch.Size();
258 DynamicMMap dyn(0, mapSize, 0);
259 gzread(Patch.gzFd(), dyn.Data(), mapSize);
260 ed_cmds = dyn;
261 }
262 MMap in_file(From, MMap::ReadOnly);
263
264 if (ed_cmds.Size() == 0 || in_file.Size() == 0)
265 return MMAP_FAILED;
266
267 EdCommand* commands = 0;
268 size_t command_count = 0;
269 size_t command_alloc = 0;
270
271 const char* begin = (char*) ed_cmds.Data();
272 const char* end = begin;
273 const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size();
274
275 const char* input = (char*) in_file.Data();
276 const char* input_end = (char*) in_file.Data() + in_file.Size();
277
278 size_t i;
279
280 /* 1. Parse entire script. It is executed in reverse order, so we cather it
281 * in the `commands' buffer first
282 */
283
284 for(;;) {
285 EdCommand cmd;
286 cmd.data_start = 0;
287 cmd.data_end = 0;
288
289 while(begin != ed_end && *begin == '\n')
290 ++begin;
291 while(end != ed_end && *end != '\n')
292 ++end;
293 if(end == ed_end && begin == end)
294 break;
295
296 /* Determine command range */
297 const char* tmp = begin;
298
299 for(;;) {
300 /* atoll is safe despite lacking NUL-termination; we know there's an
301 * alphabetic character at end[-1]
302 */
303 if(tmp == end) {
304 cmd.first_line = atol(begin);
305 cmd.last_line = cmd.first_line;
306 break;
307 }
308 if(*tmp == ',') {
309 cmd.first_line = atol(begin);
310 cmd.last_line = atol(tmp + 1);
311 break;
312 }
313 ++tmp;
314 }
315
316 // which command to execute on this line(s)?
317 switch (end[-1]) {
318 case MODE_CHANGED:
319 if (Debug == true)
320 std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
321 break;
322 case MODE_ADDED:
323 if (Debug == true)
324 std::clog << "Insert after line " << cmd.first_line << std::endl;
325 break;
326 case MODE_DELETED:
327 if (Debug == true)
328 std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
329 break;
330 default:
331 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
332 free(commands);
333 return ED_PARSER;
334 }
335 cmd.type = end[-1];
336
337 /* Determine the size of the inserted text, so we don't have to scan this
338 * text again later.
339 */
340 begin = end + 1;
341 end = begin;
342 cmd.data_lines = 0;
343
344 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
345 cmd.data_start = begin - (char*) ed_cmds.Data();
346 while(end != ed_end) {
347 if(*end == '\n') {
348 if(end[-1] == '.' && end[-2] == '\n')
349 break;
350 ++cmd.data_lines;
351 }
352 ++end;
353 }
354 cmd.data_end = end - (char*) ed_cmds.Data() - 1;
355 begin = end + 1;
356 end = begin;
357 }
358 if(command_count == command_alloc) {
359 command_alloc = (command_alloc + 64) * 3 / 2;
360 commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
361 }
362 commands[command_count++] = cmd;
363 }
364
365 struct iovec* iov = new struct iovec[IOV_COUNT];
366 size_t iov_size = 0;
367
368 size_t amount, remaining;
369 size_t line = 1;
370 EdCommand* cmd;
371
372 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
373 * using writev to minimize the number of system calls. Data is read
374 * directly from the memory mappings of the input file and the script.
375 */
376
377 for(i = command_count; i-- > 0; ) {
378 cmd = &commands[i];
379 if(cmd->type == MODE_ADDED)
380 amount = cmd->first_line + 1;
381 else
382 amount = cmd->first_line;
383
384 if(line < amount) {
385 begin = input;
386 while(line != amount) {
387 input = (const char*) memchr(input, '\n', input_end - input);
388 if(!input)
389 break;
390 ++line;
391 ++input;
392 }
393
394 iov[iov_size].iov_base = (void*) begin;
395 iov[iov_size].iov_len = input - begin;
396 hash->Add((const unsigned char*) begin, input - begin);
397
398 if(++iov_size == IOV_COUNT) {
399 writev(out_file.Fd(), iov, IOV_COUNT);
400 iov_size = 0;
401 }
402 }
403
404 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
405 remaining = (cmd->last_line - cmd->first_line) + 1;
406 line += remaining;
407 while(remaining) {
408 input = (const char*) memchr(input, '\n', input_end - input);
409 if(!input)
410 break;
411 --remaining;
412 ++input;
413 }
414 }
415
416 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
417 if(cmd->data_end != cmd->data_start) {
418 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
419 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
420 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
421 iov[iov_size].iov_len);
422
423 if(++iov_size == IOV_COUNT) {
424 writev(out_file.Fd(), iov, IOV_COUNT);
425 iov_size = 0;
426 }
427 }
428 }
429 }
430
431 if(input != input_end) {
432 iov[iov_size].iov_base = (void*) input;
433 iov[iov_size].iov_len = input_end - input;
434 hash->Add((const unsigned char*) input, input_end - input);
435 ++iov_size;
436 }
437
438 if(iov_size) {
439 writev(out_file.Fd(), iov, iov_size);
440 iov_size = 0;
441 }
442
443 for(i = 0; i < iov_size; i += IOV_COUNT) {
444 if(iov_size - i < IOV_COUNT)
445 writev(out_file.Fd(), iov + i, iov_size - i);
446 else
447 writev(out_file.Fd(), iov + i, IOV_COUNT);
448 }
449
450 delete [] iov;
451 free(commands);
452
453 return ED_OK;
454 #else
455 return MMAP_FAILED;
456 #endif
457 }
458 /*}}}*/
459 bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
460 {
461 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
462 URI Get = Itm->Uri;
463 string Path = Get.Host + Get.Path; // To account for relative paths
464
465 FetchResult Res;
466 Res.Filename = Itm->DestFile;
467 if (Itm->Uri.empty() == true) {
468 Path = Itm->DestFile;
469 Itm->DestFile.append(".result");
470 } else
471 URIStart(Res);
472
473 if (Debug == true)
474 std::clog << "Patching " << Path << " with " << Path
475 << ".ed and putting result into " << Itm->DestFile << std::endl;
476 // Open the source and destination files (the d'tor of FileFd will do
477 // the cleanup/closing of the fds)
478 FileFd From(Path,FileFd::ReadOnly);
479 FileFd Patch(Path+".ed",FileFd::ReadOnlyGzip);
480 FileFd To(Itm->DestFile,FileFd::WriteAtomic);
481 To.EraseOnFailure();
482 if (_error->PendingError() == true)
483 return false;
484
485 Hashes Hash;
486 // now do the actual patching
487 State const result = patchMMap(Patch, From, To, &Hash);
488 if (result == MMAP_FAILED) {
489 // retry with patchFile
490 Patch.Seek(0);
491 From.Seek(0);
492 To.Open(Itm->DestFile,FileFd::WriteAtomic);
493 if (_error->PendingError() == true)
494 return false;
495 if (patchFile(Patch, From, To, &Hash) != ED_OK) {
496 return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
497 } else if (Debug == true) {
498 std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
499 }
500 } else if (result != ED_OK) {
501 return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
502 } else if (Debug == true) {
503 std::clog << "rred: finished mmap patching of " << Path << std::endl;
504 }
505
506 // write out the result
507 From.Close();
508 Patch.Close();
509 To.Close();
510
511 /* Transfer the modification times from the patch file
512 to be able to see in which state the file should be
513 and use the access time from the "old" file */
514 struct stat BufBase, BufPatch;
515 if (stat(Path.c_str(),&BufBase) != 0 ||
516 stat(string(Path+".ed").c_str(),&BufPatch) != 0)
517 return _error->Errno("stat",_("Failed to stat"));
518
519 struct utimbuf TimeBuf;
520 TimeBuf.actime = BufBase.st_atime;
521 TimeBuf.modtime = BufPatch.st_mtime;
522 if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
523 return _error->Errno("utime",_("Failed to set modification time"));
524
525 if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
526 return _error->Errno("stat",_("Failed to stat"));
527
528 // return done
529 Res.LastModified = BufBase.st_mtime;
530 Res.Size = BufBase.st_size;
531 Res.TakeHashes(Hash);
532 URIDone(Res);
533
534 return true;
535 }
536 /*}}}*/
537 /** \brief Wrapper class for testing rred */ /*{{{*/
538 class TestRredMethod : public RredMethod {
539 public:
540 /** \brief Run rred in debug test mode
541 *
542 * This method can be used to run the rred method outside
543 * of the "normal" acquire environment for easier testing.
544 *
545 * \param base basename of all files involved in this rred test
546 */
547 bool Run(char const *base) {
548 _config->CndSet("Debug::pkgAcquire::RRed", "true");
549 FetchItem *test = new FetchItem;
550 test->DestFile = base;
551 return Fetch(test);
552 }
553 };
554 /*}}}*/
555 /** \brief Starter for the rred method (or its test method) {{{
556 *
557 * Used without parameters is the normal behavior for methods for
558 * the APT acquire system. While this works great for the acquire system
559 * it is very hard to test the method and therefore the method also
560 * accepts one parameter which will switch it directly to debug test mode:
561 * The test mode expects that if "Testfile" is given as parameter
562 * the file "Testfile" should be ed-style patched with "Testfile.ed"
563 * and will write the result to "Testfile.result".
564 */
565 int main(int argc, char *argv[]) {
566 if (argc <= 1) {
567 RredMethod Mth;
568 return Mth.Run();
569 } else {
570 TestRredMethod Mth;
571 bool result = Mth.Run(argv[1]);
572 _error->DumpErrors();
573 return result;
574 }
575 }
576 /*}}}*/