merge with debian-sid (some version screw going on here)
[ntk/apt.git] / methods / rred.cc
1 // Includes /*{{{*/
2 #include <config.h>
3
4 #include <apt-pkg/fileutl.h>
5 #include <apt-pkg/mmap.h>
6 #include <apt-pkg/error.h>
7 #include <apt-pkg/acquire-method.h>
8 #include <apt-pkg/strutl.h>
9 #include <apt-pkg/hashes.h>
10 #include <apt-pkg/configuration.h>
11
12 #include <sys/stat.h>
13 #include <sys/uio.h>
14 #include <unistd.h>
15 #include <utime.h>
16 #include <stdio.h>
17 #include <errno.h>
18 #include <zlib.h>
19 #include <apti18n.h>
20 /*}}}*/
21 /** \brief RredMethod - ed-style incremential patch method {{{
22 *
23 * This method implements a patch functionality similar to "patch --ed" that is
24 * used by the "tiffany" incremental packages download stuff. It differs from
25 * "ed" insofar that it is way more restricted (and therefore secure).
26 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
27 * "<em>d</em>elete" (diff doesn't output any other).
28 * Additionally the records must be reverse sorted by line number and
29 * may not overlap (diff *seems* to produce this kind of output).
30 * */
31 class RredMethod : public pkgAcqMethod {
32 bool Debug;
33 // the size of this doesn't really matter (except for performance)
34 const static int BUF_SIZE = 1024;
35 // the supported ed commands
36 enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
37 // return values
38 enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
39
40 State applyFile(FileFd &ed_cmds, FileFd &in_file, FileFd &out_file,
41 unsigned long &line, char *buffer, Hashes *hash) const;
42 void ignoreLineInFile(FileFd &fin, char *buffer) const;
43 void copyLinesFromFileToFile(FileFd &fin, FileFd &fout, unsigned int lines,
44 Hashes *hash, char *buffer) const;
45
46 State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
47 State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
48
49 protected:
50 // the methods main method
51 virtual bool Fetch(FetchItem *Itm);
52
53 public:
54 RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig), Debug(false) {};
55 };
56 /*}}}*/
57 /** \brief applyFile - in reverse order with a tail recursion {{{
58 *
59 * As it is expected that the commands are in reversed order in the patch file
60 * we check in the first half if the command is valid, but doesn't execute it
61 * and move a step deeper. After reaching the end of the file we apply the
62 * patches in the correct order: last found command first.
63 *
64 * \param ed_cmds patch file to apply
65 * \param in_file base file we want to patch
66 * \param out_file file to write the patched result to
67 * \param line of command operation
68 * \param buffer internal used read/write buffer
69 * \param hash the created file for correctness
70 * \return the success State of the ed command executor
71 */
72 RredMethod::State RredMethod::applyFile(FileFd &ed_cmds, FileFd &in_file, FileFd &out_file,
73 unsigned long &line, char *buffer, Hashes *hash) const {
74 // get the current command and parse it
75 if (ed_cmds.ReadLine(buffer, BUF_SIZE) == NULL) {
76 if (Debug == true)
77 std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
78 line = 0;
79 return ED_OK;
80 }
81
82 // parse in the effected linenumbers
83 char* idx;
84 errno=0;
85 unsigned long const startline = strtol(buffer, &idx, 10);
86 if (errno == ERANGE || errno == EINVAL) {
87 _error->Errno("rred", "startline is an invalid number");
88 return ED_PARSER;
89 }
90 if (startline > line) {
91 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
92 return ED_ORDERING;
93 }
94 unsigned long stopline;
95 if (*idx == ',') {
96 idx++;
97 errno=0;
98 stopline = strtol(idx, &idx, 10);
99 if (errno == ERANGE || errno == EINVAL) {
100 _error->Errno("rred", "stopline is an invalid number");
101 return ED_PARSER;
102 }
103 }
104 else {
105 stopline = startline;
106 }
107 line = startline;
108
109 // which command to execute on this line(s)?
110 switch (*idx) {
111 case MODE_CHANGED:
112 if (Debug == true)
113 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
114 break;
115 case MODE_ADDED:
116 if (Debug == true)
117 std::clog << "Insert after line " << startline << std::endl;
118 break;
119 case MODE_DELETED:
120 if (Debug == true)
121 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
122 break;
123 default:
124 _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
125 return ED_PARSER;
126 }
127 unsigned char mode = *idx;
128
129 // save the current position
130 unsigned const long long pos = ed_cmds.Tell();
131
132 // if this is add or change then go to the next full stop
133 unsigned int data_length = 0;
134 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
135 do {
136 ignoreLineInFile(ed_cmds, buffer);
137 data_length++;
138 }
139 while (strncmp(buffer, ".", 1) != 0);
140 data_length--; // the dot should not be copied
141 }
142
143 // do the recursive call - the last command is the one we need to execute at first
144 const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
145 if (child != ED_OK) {
146 return child;
147 }
148
149 // change and delete are working on "line" - add is done after "line"
150 if (mode != MODE_ADDED)
151 line++;
152
153 // first wind to the current position and copy over all unchanged lines
154 if (line < startline) {
155 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
156 line = startline;
157 }
158
159 if (mode != MODE_ADDED)
160 line--;
161
162 // include data from ed script
163 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
164 ed_cmds.Seek(pos);
165 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
166 }
167
168 // ignore the corresponding number of lines from input
169 if (mode == MODE_CHANGED || mode == MODE_DELETED) {
170 while (line < stopline) {
171 ignoreLineInFile(in_file, buffer);
172 line++;
173 }
174 }
175 return ED_OK;
176 }
177 /*}}}*/
178 void RredMethod::copyLinesFromFileToFile(FileFd &fin, FileFd &fout, unsigned int lines,/*{{{*/
179 Hashes *hash, char *buffer) const {
180 while (0 < lines--) {
181 do {
182 fin.ReadLine(buffer, BUF_SIZE);
183 unsigned long long const towrite = strlen(buffer);
184 fout.Write(buffer, towrite);
185 hash->Add((unsigned char*)buffer, towrite);
186 } while (strlen(buffer) == (BUF_SIZE - 1) &&
187 buffer[BUF_SIZE - 2] != '\n');
188 }
189 }
190 /*}}}*/
191 void RredMethod::ignoreLineInFile(FileFd &fin, char *buffer) const { /*{{{*/
192 fin.ReadLine(buffer, BUF_SIZE);
193 while (strlen(buffer) == (BUF_SIZE - 1) &&
194 buffer[BUF_SIZE - 2] != '\n') {
195 fin.ReadLine(buffer, BUF_SIZE);
196 buffer[0] = ' ';
197 }
198 }
199 /*}}}*/
200 RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
201 FileFd &out_file, Hashes *hash) const {
202 char buffer[BUF_SIZE];
203
204 /* we do a tail recursion to read the commands in the right order */
205 unsigned long line = -1; // assign highest possible value
206 State const result = applyFile(Patch, From, out_file, line, buffer, hash);
207
208 /* read the rest from infile */
209 if (result == ED_OK) {
210 while (From.ReadLine(buffer, BUF_SIZE) != NULL) {
211 unsigned long long const towrite = strlen(buffer);
212 out_file.Write(buffer, towrite);
213 hash->Add((unsigned char*)buffer, towrite);
214 }
215 }
216 return result;
217 }
218 /*}}}*/
219 /* struct EdCommand {{{*/
220 #ifdef _POSIX_MAPPED_FILES
221 struct EdCommand {
222 size_t data_start;
223 size_t data_end;
224 size_t data_lines;
225 size_t first_line;
226 size_t last_line;
227 char type;
228 };
229 #define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
230 ssize_t retry_writev(int fd, const struct iovec *iov, int iovcnt) {
231 ssize_t Res;
232 errno = 0;
233 ssize_t i = 0;
234 do {
235 Res = writev(fd, iov + i, iovcnt);
236 if (Res < 0 && errno == EINTR)
237 continue;
238 if (Res < 0)
239 return _error->Errno("writev",_("Write error"));
240 iovcnt -= Res;
241 i += Res;
242 } while (Res > 0 && iovcnt > 0);
243 return i;
244 }
245 #endif
246 /*}}}*/
247 RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
248 FileFd &out_file, Hashes *hash) const {
249 #ifdef _POSIX_MAPPED_FILES
250 MMap ed_cmds(Patch, MMap::ReadOnly);
251 MMap in_file(From, MMap::ReadOnly);
252
253 unsigned long long const ed_size = ed_cmds.Size();
254 unsigned long long const in_size = in_file.Size();
255 if (ed_size == 0 || in_size == 0)
256 return MMAP_FAILED;
257
258 EdCommand* commands = 0;
259 size_t command_count = 0;
260 size_t command_alloc = 0;
261
262 const char* begin = (char*) ed_cmds.Data();
263 const char* end = begin;
264 const char* ed_end = (char*) ed_cmds.Data() + ed_size;
265
266 const char* input = (char*) in_file.Data();
267 const char* input_end = (char*) in_file.Data() + in_size;
268
269 size_t i;
270
271 /* 1. Parse entire script. It is executed in reverse order, so we cather it
272 * in the `commands' buffer first
273 */
274
275 for(;;) {
276 EdCommand cmd;
277 cmd.data_start = 0;
278 cmd.data_end = 0;
279
280 while(begin != ed_end && *begin == '\n')
281 ++begin;
282 while(end != ed_end && *end != '\n')
283 ++end;
284 if(end == ed_end && begin == end)
285 break;
286
287 /* Determine command range */
288 const char* tmp = begin;
289
290 for(;;) {
291 /* atoll is safe despite lacking NUL-termination; we know there's an
292 * alphabetic character at end[-1]
293 */
294 if(tmp == end) {
295 cmd.first_line = atol(begin);
296 cmd.last_line = cmd.first_line;
297 break;
298 }
299 if(*tmp == ',') {
300 cmd.first_line = atol(begin);
301 cmd.last_line = atol(tmp + 1);
302 break;
303 }
304 ++tmp;
305 }
306
307 // which command to execute on this line(s)?
308 switch (end[-1]) {
309 case MODE_CHANGED:
310 if (Debug == true)
311 std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
312 break;
313 case MODE_ADDED:
314 if (Debug == true)
315 std::clog << "Insert after line " << cmd.first_line << std::endl;
316 break;
317 case MODE_DELETED:
318 if (Debug == true)
319 std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
320 break;
321 default:
322 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
323 free(commands);
324 return ED_PARSER;
325 }
326 cmd.type = end[-1];
327
328 /* Determine the size of the inserted text, so we don't have to scan this
329 * text again later.
330 */
331 begin = end + 1;
332 end = begin;
333 cmd.data_lines = 0;
334
335 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
336 cmd.data_start = begin - (char*) ed_cmds.Data();
337 while(end != ed_end) {
338 if(*end == '\n') {
339 if(end[-1] == '.' && end[-2] == '\n')
340 break;
341 ++cmd.data_lines;
342 }
343 ++end;
344 }
345 cmd.data_end = end - (char*) ed_cmds.Data() - 1;
346 begin = end + 1;
347 end = begin;
348 }
349 if(command_count == command_alloc) {
350 command_alloc = (command_alloc + 64) * 3 / 2;
351 EdCommand* newCommands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
352 if (newCommands == NULL) {
353 free(commands);
354 return MMAP_FAILED;
355 }
356 commands = newCommands;
357 }
358 commands[command_count++] = cmd;
359 }
360
361 struct iovec* iov = new struct iovec[IOV_COUNT];
362 size_t iov_size = 0;
363
364 size_t amount, remaining;
365 size_t line = 1;
366 EdCommand* cmd;
367
368 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
369 * using writev to minimize the number of system calls. Data is read
370 * directly from the memory mappings of the input file and the script.
371 */
372
373 for(i = command_count; i-- > 0; ) {
374 cmd = &commands[i];
375 if(cmd->type == MODE_ADDED)
376 amount = cmd->first_line + 1;
377 else
378 amount = cmd->first_line;
379
380 if(line < amount) {
381 begin = input;
382 while(line != amount) {
383 input = (const char*) memchr(input, '\n', input_end - input);
384 if(!input)
385 break;
386 ++line;
387 ++input;
388 }
389
390 iov[iov_size].iov_base = (void*) begin;
391 iov[iov_size].iov_len = input - begin;
392 hash->Add((const unsigned char*) begin, input - begin);
393
394 if(++iov_size == IOV_COUNT) {
395 retry_writev(out_file.Fd(), iov, IOV_COUNT);
396 iov_size = 0;
397 }
398 }
399
400 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
401 remaining = (cmd->last_line - cmd->first_line) + 1;
402 line += remaining;
403 while(remaining) {
404 input = (const char*) memchr(input, '\n', input_end - input);
405 if(!input)
406 break;
407 --remaining;
408 ++input;
409 }
410 }
411
412 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
413 if(cmd->data_end != cmd->data_start) {
414 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
415 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
416 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
417 iov[iov_size].iov_len);
418
419 if(++iov_size == IOV_COUNT) {
420 retry_writev(out_file.Fd(), iov, IOV_COUNT);
421 iov_size = 0;
422 }
423 }
424 }
425 }
426
427 if(input != input_end) {
428 iov[iov_size].iov_base = (void*) input;
429 iov[iov_size].iov_len = input_end - input;
430 hash->Add((const unsigned char*) input, input_end - input);
431 ++iov_size;
432 }
433
434 if(iov_size) {
435 retry_writev(out_file.Fd(), iov, iov_size);
436 iov_size = 0;
437 }
438
439 for(i = 0; i < iov_size; i += IOV_COUNT) {
440 if(iov_size - i < IOV_COUNT)
441 retry_writev(out_file.Fd(), iov + i, iov_size - i);
442 else
443 retry_writev(out_file.Fd(), iov + i, IOV_COUNT);
444 }
445
446 delete [] iov;
447 free(commands);
448
449 return ED_OK;
450 #else
451 return MMAP_FAILED;
452 #endif
453 }
454 /*}}}*/
455 bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
456 {
457 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
458 URI Get = Itm->Uri;
459 std::string Path = Get.Host + Get.Path; // To account for relative paths
460
461 FetchResult Res;
462 Res.Filename = Itm->DestFile;
463 if (Itm->Uri.empty() == true) {
464 Path = Itm->DestFile;
465 Itm->DestFile.append(".result");
466 } else
467 URIStart(Res);
468
469 if (Debug == true)
470 std::clog << "Patching " << Path << " with " << Path
471 << ".ed and putting result into " << Itm->DestFile << std::endl;
472 // Open the source and destination files (the d'tor of FileFd will do
473 // the cleanup/closing of the fds)
474 FileFd From(Path,FileFd::ReadOnly);
475 FileFd Patch(Path+".ed",FileFd::ReadOnly, FileFd::Gzip);
476 FileFd To(Itm->DestFile,FileFd::WriteAtomic);
477 To.EraseOnFailure();
478 if (_error->PendingError() == true)
479 return false;
480
481 Hashes Hash;
482 // now do the actual patching
483 State const result = patchMMap(Patch, From, To, &Hash);
484 if (result == MMAP_FAILED) {
485 // retry with patchFile
486 Patch.Seek(0);
487 From.Seek(0);
488 To.Open(Itm->DestFile,FileFd::WriteAtomic);
489 if (_error->PendingError() == true)
490 return false;
491 if (patchFile(Patch, From, To, &Hash) != ED_OK) {
492 return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
493 } else if (Debug == true) {
494 std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
495 }
496 } else if (result != ED_OK) {
497 return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
498 } else if (Debug == true) {
499 std::clog << "rred: finished mmap patching of " << Path << std::endl;
500 }
501
502 // write out the result
503 From.Close();
504 Patch.Close();
505 To.Close();
506
507 /* Transfer the modification times from the patch file
508 to be able to see in which state the file should be
509 and use the access time from the "old" file */
510 struct stat BufBase, BufPatch;
511 if (stat(Path.c_str(),&BufBase) != 0 ||
512 stat(std::string(Path+".ed").c_str(),&BufPatch) != 0)
513 return _error->Errno("stat",_("Failed to stat"));
514
515 struct utimbuf TimeBuf;
516 TimeBuf.actime = BufBase.st_atime;
517 TimeBuf.modtime = BufPatch.st_mtime;
518 if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
519 return _error->Errno("utime",_("Failed to set modification time"));
520
521 if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
522 return _error->Errno("stat",_("Failed to stat"));
523
524 // return done
525 Res.LastModified = BufBase.st_mtime;
526 Res.Size = BufBase.st_size;
527 Res.TakeHashes(Hash);
528 URIDone(Res);
529
530 return true;
531 }
532 /*}}}*/
533 /** \brief Wrapper class for testing rred */ /*{{{*/
534 class TestRredMethod : public RredMethod {
535 public:
536 /** \brief Run rred in debug test mode
537 *
538 * This method can be used to run the rred method outside
539 * of the "normal" acquire environment for easier testing.
540 *
541 * \param base basename of all files involved in this rred test
542 */
543 bool Run(char const *base) {
544 _config->CndSet("Debug::pkgAcquire::RRed", "true");
545 FetchItem *test = new FetchItem;
546 test->DestFile = base;
547 return Fetch(test);
548 }
549 };
550 /*}}}*/
551 /** \brief Starter for the rred method (or its test method) {{{
552 *
553 * Used without parameters is the normal behavior for methods for
554 * the APT acquire system. While this works great for the acquire system
555 * it is very hard to test the method and therefore the method also
556 * accepts one parameter which will switch it directly to debug test mode:
557 * The test mode expects that if "Testfile" is given as parameter
558 * the file "Testfile" should be ed-style patched with "Testfile.ed"
559 * and will write the result to "Testfile.result".
560 */
561 int main(int argc, char *argv[]) {
562 if (argc <= 1) {
563 RredMethod Mth;
564 return Mth.Run();
565 } else {
566 TestRredMethod Mth;
567 bool result = Mth.Run(argv[1]);
568 _error->DumpErrors();
569 return result;
570 }
571 }
572 /*}}}*/