merge from the mvo branch
[ntk/apt.git] / methods / rred.cc
1 // Includes /*{{{*/
2 #include <apt-pkg/fileutl.h>
3 #include <apt-pkg/mmap.h>
4 #include <apt-pkg/error.h>
5 #include <apt-pkg/acquire-method.h>
6 #include <apt-pkg/strutl.h>
7 #include <apt-pkg/hashes.h>
8
9 #include <sys/stat.h>
10 #include <sys/uio.h>
11 #include <unistd.h>
12 #include <utime.h>
13 #include <stdio.h>
14 #include <errno.h>
15 #include <apti18n.h>
16 /*}}}*/
17 /** \brief RredMethod - ed-style incremential patch method {{{
18 *
19 * This method implements a patch functionality similar to "patch --ed" that is
20 * used by the "tiffany" incremental packages download stuff. It differs from
21 * "ed" insofar that it is way more restricted (and therefore secure).
22 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
23 * "<em>d</em>elete" (diff doesn't output any other).
24 * Additionally the records must be reverse sorted by line number and
25 * may not overlap (diff *seems* to produce this kind of output).
26 * */
27 class RredMethod : public pkgAcqMethod {
28 bool Debug;
29 // the size of this doesn't really matter (except for performance)
30 const static int BUF_SIZE = 1024;
31 // the supported ed commands
32 enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
33 // return values
34 enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
35
36 State applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file,
37 unsigned long &line, char *buffer, Hashes *hash) const;
38 void ignoreLineInFile(FILE *fin, char *buffer) const;
39 void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,
40 Hashes *hash, char *buffer) const;
41
42 State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
43 State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
44
45 protected:
46 // the methods main method
47 virtual bool Fetch(FetchItem *Itm);
48
49 public:
50 RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig) {};
51 };
52 /*}}}*/
53 /** \brief applyFile - in reverse order with a tail recursion {{{
54 *
55 * As it is expected that the commands are in reversed order in the patch file
56 * we check in the first half if the command is valid, but doesn't execute it
57 * and move a step deeper. After reaching the end of the file we apply the
58 * patches in the correct order: last found command first.
59 *
60 * \param ed_cmds patch file to apply
61 * \param in_file base file we want to patch
62 * \param out_file file to write the patched result to
63 * \param line of command operation
64 * \param buffer internal used read/write buffer
65 * \param hash the created file for correctness
66 * \return the success State of the ed command executor
67 */
68 RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file,
69 unsigned long &line, char *buffer, Hashes *hash) const {
70 // get the current command and parse it
71 if (fgets(buffer, BUF_SIZE, ed_cmds) == NULL) {
72 if (Debug == true)
73 std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
74 line = 0;
75 return ED_OK;
76 }
77
78 // parse in the effected linenumbers
79 char* idx;
80 errno=0;
81 unsigned long const startline = strtol(buffer, &idx, 10);
82 if (errno == ERANGE || errno == EINVAL) {
83 _error->Errno("rred", "startline is an invalid number");
84 return ED_PARSER;
85 }
86 if (startline > line) {
87 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
88 return ED_ORDERING;
89 }
90 unsigned long stopline;
91 if (*idx == ',') {
92 idx++;
93 errno=0;
94 stopline = strtol(idx, &idx, 10);
95 if (errno == ERANGE || errno == EINVAL) {
96 _error->Errno("rred", "stopline is an invalid number");
97 return ED_PARSER;
98 }
99 }
100 else {
101 stopline = startline;
102 }
103 line = startline;
104
105 // which command to execute on this line(s)?
106 switch (*idx) {
107 case MODE_CHANGED:
108 if (Debug == true)
109 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
110 break;
111 case MODE_ADDED:
112 if (Debug == true)
113 std::clog << "Insert after line " << startline << std::endl;
114 break;
115 case MODE_DELETED:
116 if (Debug == true)
117 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
118 break;
119 default:
120 _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
121 return ED_PARSER;
122 }
123 unsigned char mode = *idx;
124
125 // save the current position
126 unsigned const long pos = ftell(ed_cmds);
127
128 // if this is add or change then go to the next full stop
129 unsigned int data_length = 0;
130 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
131 do {
132 ignoreLineInFile(ed_cmds, buffer);
133 data_length++;
134 }
135 while (strncmp(buffer, ".", 1) != 0);
136 data_length--; // the dot should not be copied
137 }
138
139 // do the recursive call - the last command is the one we need to execute at first
140 const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
141 if (child != ED_OK) {
142 return child;
143 }
144
145 // change and delete are working on "line" - add is done after "line"
146 if (mode != MODE_ADDED)
147 line++;
148
149 // first wind to the current position and copy over all unchanged lines
150 if (line < startline) {
151 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
152 line = startline;
153 }
154
155 if (mode != MODE_ADDED)
156 line--;
157
158 // include data from ed script
159 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
160 fseek(ed_cmds, pos, SEEK_SET);
161 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
162 }
163
164 // ignore the corresponding number of lines from input
165 if (mode == MODE_CHANGED || mode == MODE_DELETED) {
166 while (line < stopline) {
167 ignoreLineInFile(in_file, buffer);
168 line++;
169 }
170 }
171 return ED_OK;
172 }
173 /*}}}*/
174 void RredMethod::copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,/*{{{*/
175 Hashes *hash, char *buffer) const {
176 while (0 < lines--) {
177 do {
178 fgets(buffer, BUF_SIZE, fin);
179 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
180 hash->Add((unsigned char*)buffer, written);
181 } while (strlen(buffer) == (BUF_SIZE - 1) &&
182 buffer[BUF_SIZE - 2] != '\n');
183 }
184 }
185 /*}}}*/
186 void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/
187 fgets(buffer, BUF_SIZE, fin);
188 while (strlen(buffer) == (BUF_SIZE - 1) &&
189 buffer[BUF_SIZE - 2] != '\n') {
190 fgets(buffer, BUF_SIZE, fin);
191 buffer[0] = ' ';
192 }
193 }
194 /*}}}*/
195 RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
196 FileFd &out_file, Hashes *hash) const {
197 char buffer[BUF_SIZE];
198 FILE* fFrom = fdopen(From.Fd(), "r");
199 FILE* fPatch = fdopen(Patch.Fd(), "r");
200 FILE* fTo = fdopen(out_file.Fd(), "w");
201
202 /* we do a tail recursion to read the commands in the right order */
203 unsigned long line = -1; // assign highest possible value
204 State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash);
205
206 /* read the rest from infile */
207 if (result == ED_OK) {
208 while (fgets(buffer, BUF_SIZE, fFrom) != NULL) {
209 size_t const written = fwrite(buffer, 1, strlen(buffer), fTo);
210 hash->Add((unsigned char*)buffer, written);
211 }
212 fflush(fTo);
213 }
214 return result;
215 }
216 /*}}}*/
217 struct EdCommand { /*{{{*/
218 size_t data_start;
219 size_t data_end;
220 size_t data_lines;
221 size_t first_line;
222 size_t last_line;
223 char type;
224 };
225 #define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
226 /*}}}*/
227 RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
228 FileFd &out_file, Hashes *hash) const {
229 #ifdef _POSIX_MAPPED_FILES
230 MMap ed_cmds(Patch, MMap::ReadOnly);
231 MMap in_file(From, MMap::ReadOnly);
232
233 if (ed_cmds.Size() == 0 || in_file.Size() == 0)
234 return MMAP_FAILED;
235
236 EdCommand* commands = 0;
237 size_t command_count = 0;
238 size_t command_alloc = 0;
239
240 const char* begin = (char*) ed_cmds.Data();
241 const char* end = begin;
242 const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size();
243
244 const char* input = (char*) in_file.Data();
245 const char* input_end = (char*) in_file.Data() + in_file.Size();
246
247 size_t i;
248
249 /* 1. Parse entire script. It is executed in reverse order, so we cather it
250 * in the `commands' buffer first
251 */
252
253 for(;;) {
254 EdCommand cmd;
255 cmd.data_start = 0;
256 cmd.data_end = 0;
257
258 while(begin != ed_end && *begin == '\n')
259 ++begin;
260 while(end != ed_end && *end != '\n')
261 ++end;
262 if(end == ed_end && begin == end)
263 break;
264
265 /* Determine command range */
266 const char* tmp = begin;
267
268 for(;;) {
269 /* atoll is safe despite lacking NUL-termination; we know there's an
270 * alphabetic character at end[-1]
271 */
272 if(tmp == end) {
273 cmd.first_line = atol(begin);
274 cmd.last_line = cmd.first_line;
275 break;
276 }
277 if(*tmp == ',') {
278 cmd.first_line = atol(begin);
279 cmd.last_line = atol(tmp + 1);
280 break;
281 }
282 ++tmp;
283 }
284
285 // which command to execute on this line(s)?
286 switch (end[-1]) {
287 case MODE_CHANGED:
288 if (Debug == true)
289 std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
290 break;
291 case MODE_ADDED:
292 if (Debug == true)
293 std::clog << "Insert after line " << cmd.first_line << std::endl;
294 break;
295 case MODE_DELETED:
296 if (Debug == true)
297 std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
298 break;
299 default:
300 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
301 free(commands);
302 return ED_PARSER;
303 }
304 cmd.type = end[-1];
305
306 /* Determine the size of the inserted text, so we don't have to scan this
307 * text again later.
308 */
309 begin = end + 1;
310 end = begin;
311 cmd.data_lines = 0;
312
313 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
314 cmd.data_start = begin - (char*) ed_cmds.Data();
315 while(end != ed_end) {
316 if(*end == '\n') {
317 if(end[-1] == '.' && end[-2] == '\n')
318 break;
319 ++cmd.data_lines;
320 }
321 ++end;
322 }
323 cmd.data_end = end - (char*) ed_cmds.Data() - 1;
324 begin = end + 1;
325 end = begin;
326 }
327 if(command_count == command_alloc) {
328 command_alloc = (command_alloc + 64) * 3 / 2;
329 commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
330 }
331 commands[command_count++] = cmd;
332 }
333
334 struct iovec* iov = new struct iovec[IOV_COUNT];
335 size_t iov_size = 0;
336
337 size_t amount, remaining;
338 size_t line = 1;
339 EdCommand* cmd;
340
341 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
342 * using writev to minimize the number of system calls. Data is read
343 * directly from the memory mappings of the input file and the script.
344 */
345
346 for(i = command_count; i-- > 0; ) {
347 cmd = &commands[i];
348 if(cmd->type == MODE_ADDED)
349 amount = cmd->first_line + 1;
350 else
351 amount = cmd->first_line;
352
353 if(line < amount) {
354 begin = input;
355 while(line != amount) {
356 input = (const char*) memchr(input, '\n', input_end - input);
357 if(!input)
358 break;
359 ++line;
360 ++input;
361 }
362
363 iov[iov_size].iov_base = (void*) begin;
364 iov[iov_size].iov_len = input - begin;
365 hash->Add((const unsigned char*) begin, input - begin);
366
367 if(++iov_size == IOV_COUNT) {
368 writev(out_file.Fd(), iov, IOV_COUNT);
369 iov_size = 0;
370 }
371 }
372
373 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
374 remaining = (cmd->last_line - cmd->first_line) + 1;
375 line += remaining;
376 while(remaining) {
377 input = (const char*) memchr(input, '\n', input_end - input);
378 if(!input)
379 break;
380 --remaining;
381 ++input;
382 }
383 }
384
385 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
386 if(cmd->data_end != cmd->data_start) {
387 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
388 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
389 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
390 iov[iov_size].iov_len);
391
392 if(++iov_size == IOV_COUNT) {
393 writev(out_file.Fd(), iov, IOV_COUNT);
394 iov_size = 0;
395 }
396 }
397 }
398 }
399
400 if(input != input_end) {
401 iov[iov_size].iov_base = (void*) input;
402 iov[iov_size].iov_len = input_end - input;
403 hash->Add((const unsigned char*) input, input_end - input);
404 ++iov_size;
405 }
406
407 if(iov_size) {
408 writev(out_file.Fd(), iov, iov_size);
409 iov_size = 0;
410 }
411
412 for(i = 0; i < iov_size; i += IOV_COUNT) {
413 if(iov_size - i < IOV_COUNT)
414 writev(out_file.Fd(), iov + i, iov_size - i);
415 else
416 writev(out_file.Fd(), iov + i, IOV_COUNT);
417 }
418
419 delete [] iov;
420 free(commands);
421
422 return ED_OK;
423 #else
424 return MMAP_FAILED;
425 #endif
426 }
427 /*}}}*/
428 bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
429 {
430 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
431 URI Get = Itm->Uri;
432 string Path = Get.Host + Get.Path; // To account for relative paths
433
434 FetchResult Res;
435 Res.Filename = Itm->DestFile;
436 if (Itm->Uri.empty() == true) {
437 Path = Itm->DestFile;
438 Itm->DestFile.append(".result");
439 } else
440 URIStart(Res);
441
442 if (Debug == true)
443 std::clog << "Patching " << Path << " with " << Path
444 << ".ed and putting result into " << Itm->DestFile << std::endl;
445 // Open the source and destination files (the d'tor of FileFd will do
446 // the cleanup/closing of the fds)
447 FileFd From(Path,FileFd::ReadOnly);
448 FileFd Patch(Path+".ed",FileFd::ReadOnly);
449 FileFd To(Itm->DestFile,FileFd::WriteEmpty);
450 To.EraseOnFailure();
451 if (_error->PendingError() == true)
452 return false;
453
454 Hashes Hash;
455 // now do the actual patching
456 State const result = patchMMap(Patch, From, To, &Hash);
457 if (result == MMAP_FAILED) {
458 // retry with patchFile
459 lseek(Patch.Fd(), 0, SEEK_SET);
460 lseek(From.Fd(), 0, SEEK_SET);
461 To.Open(Itm->DestFile,FileFd::WriteEmpty);
462 if (_error->PendingError() == true)
463 return false;
464 if (patchFile(Patch, From, To, &Hash) != ED_OK) {
465 return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
466 } else if (Debug == true) {
467 std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
468 }
469 } else if (result != ED_OK) {
470 return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
471 } else if (Debug == true) {
472 std::clog << "rred: finished mmap patching of " << Path << std::endl;
473 }
474
475 // write out the result
476 From.Close();
477 Patch.Close();
478 To.Close();
479
480 // Transfer the modification times
481 struct stat Buf;
482 if (stat(Path.c_str(),&Buf) != 0)
483 return _error->Errno("stat",_("Failed to stat"));
484
485 struct utimbuf TimeBuf;
486 TimeBuf.actime = Buf.st_atime;
487 TimeBuf.modtime = Buf.st_mtime;
488 if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
489 return _error->Errno("utime",_("Failed to set modification time"));
490
491 if (stat(Itm->DestFile.c_str(),&Buf) != 0)
492 return _error->Errno("stat",_("Failed to stat"));
493
494 // return done
495 Res.LastModified = Buf.st_mtime;
496 Res.Size = Buf.st_size;
497 Res.TakeHashes(Hash);
498 URIDone(Res);
499
500 return true;
501 }
502 /*}}}*/
503 /** \brief Wrapper class for testing rred */ /*{{{*/
504 class TestRredMethod : public RredMethod {
505 public:
506 /** \brief Run rred in debug test mode
507 *
508 * This method can be used to run the rred method outside
509 * of the "normal" acquire environment for easier testing.
510 *
511 * \param base basename of all files involved in this rred test
512 */
513 bool Run(char const *base) {
514 _config->CndSet("Debug::pkgAcquire::RRed", "true");
515 FetchItem *test = new FetchItem;
516 test->DestFile = base;
517 return Fetch(test);
518 }
519 };
520 /*}}}*/
521 /** \brief Starter for the rred method (or its test method) {{{
522 *
523 * Used without parameters is the normal behavior for methods for
524 * the APT acquire system. While this works great for the acquire system
525 * it is very hard to test the method and therefore the method also
526 * accepts one parameter which will switch it directly to debug test mode:
527 * The test mode expects that if "Testfile" is given as parameter
528 * the file "Testfile" should be ed-style patched with "Testfile.ed"
529 * and will write the result to "Testfile.result".
530 */
531 int main(int argc, char *argv[]) {
532 if (argc <= 1) {
533 RredMethod Mth;
534 return Mth.Run();
535 } else {
536 TestRredMethod Mth;
537 bool result = Mth.Run(argv[1]);
538 _error->DumpErrors();
539 return result;
540 }
541 }
542 /*}}}*/