* apt-pkg/aptconfiguration.cc:
[ntk/apt.git] / methods / rred.cc
1 // Includes /*{{{*/
2 #include <config.h>
3
4 #include <apt-pkg/fileutl.h>
5 #include <apt-pkg/mmap.h>
6 #include <apt-pkg/error.h>
7 #include <apt-pkg/acquire-method.h>
8 #include <apt-pkg/strutl.h>
9 #include <apt-pkg/hashes.h>
10 #include <apt-pkg/configuration.h>
11
12 #include <sys/stat.h>
13 #include <sys/uio.h>
14 #include <unistd.h>
15 #include <utime.h>
16 #include <stdio.h>
17 #include <errno.h>
18 #include <zlib.h>
19 #include <apti18n.h>
20 /*}}}*/
21 /** \brief RredMethod - ed-style incremential patch method {{{
22 *
23 * This method implements a patch functionality similar to "patch --ed" that is
24 * used by the "tiffany" incremental packages download stuff. It differs from
25 * "ed" insofar that it is way more restricted (and therefore secure).
26 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
27 * "<em>d</em>elete" (diff doesn't output any other).
28 * Additionally the records must be reverse sorted by line number and
29 * may not overlap (diff *seems* to produce this kind of output).
30 * */
31 class RredMethod : public pkgAcqMethod {
32 bool Debug;
33 // the size of this doesn't really matter (except for performance)
34 const static int BUF_SIZE = 1024;
35 // the supported ed commands
36 enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
37 // return values
38 enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
39
40 State applyFile(FileFd &ed_cmds, FileFd &in_file, FileFd &out_file,
41 unsigned long &line, char *buffer, Hashes *hash) const;
42 void ignoreLineInFile(FileFd &fin, char *buffer) const;
43 void copyLinesFromFileToFile(FileFd &fin, FileFd &fout, unsigned int lines,
44 Hashes *hash, char *buffer) const;
45
46 State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
47 State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
48
49 protected:
50 // the methods main method
51 virtual bool Fetch(FetchItem *Itm);
52
53 public:
54 RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig), Debug(false) {};
55 };
56 /*}}}*/
57 /** \brief applyFile - in reverse order with a tail recursion {{{
58 *
59 * As it is expected that the commands are in reversed order in the patch file
60 * we check in the first half if the command is valid, but doesn't execute it
61 * and move a step deeper. After reaching the end of the file we apply the
62 * patches in the correct order: last found command first.
63 *
64 * \param ed_cmds patch file to apply
65 * \param in_file base file we want to patch
66 * \param out_file file to write the patched result to
67 * \param line of command operation
68 * \param buffer internal used read/write buffer
69 * \param hash the created file for correctness
70 * \return the success State of the ed command executor
71 */
72 RredMethod::State RredMethod::applyFile(FileFd &ed_cmds, FileFd &in_file, FileFd &out_file,
73 unsigned long &line, char *buffer, Hashes *hash) const {
74 // get the current command and parse it
75 if (ed_cmds.ReadLine(buffer, BUF_SIZE) == NULL) {
76 if (Debug == true)
77 std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
78 line = 0;
79 return ED_OK;
80 }
81
82 // parse in the effected linenumbers
83 char* idx;
84 errno=0;
85 unsigned long const startline = strtol(buffer, &idx, 10);
86 if (errno == ERANGE || errno == EINVAL) {
87 _error->Errno("rred", "startline is an invalid number");
88 return ED_PARSER;
89 }
90 if (startline > line) {
91 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
92 return ED_ORDERING;
93 }
94 unsigned long stopline;
95 if (*idx == ',') {
96 idx++;
97 errno=0;
98 stopline = strtol(idx, &idx, 10);
99 if (errno == ERANGE || errno == EINVAL) {
100 _error->Errno("rred", "stopline is an invalid number");
101 return ED_PARSER;
102 }
103 }
104 else {
105 stopline = startline;
106 }
107 line = startline;
108
109 // which command to execute on this line(s)?
110 switch (*idx) {
111 case MODE_CHANGED:
112 if (Debug == true)
113 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
114 break;
115 case MODE_ADDED:
116 if (Debug == true)
117 std::clog << "Insert after line " << startline << std::endl;
118 break;
119 case MODE_DELETED:
120 if (Debug == true)
121 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
122 break;
123 default:
124 _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
125 return ED_PARSER;
126 }
127 unsigned char mode = *idx;
128
129 // save the current position
130 unsigned const long long pos = ed_cmds.Tell();
131
132 // if this is add or change then go to the next full stop
133 unsigned int data_length = 0;
134 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
135 do {
136 ignoreLineInFile(ed_cmds, buffer);
137 data_length++;
138 }
139 while (strncmp(buffer, ".", 1) != 0);
140 data_length--; // the dot should not be copied
141 }
142
143 // do the recursive call - the last command is the one we need to execute at first
144 const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
145 if (child != ED_OK) {
146 return child;
147 }
148
149 // change and delete are working on "line" - add is done after "line"
150 if (mode != MODE_ADDED)
151 line++;
152
153 // first wind to the current position and copy over all unchanged lines
154 if (line < startline) {
155 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
156 line = startline;
157 }
158
159 if (mode != MODE_ADDED)
160 line--;
161
162 // include data from ed script
163 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
164 ed_cmds.Seek(pos);
165 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
166 }
167
168 // ignore the corresponding number of lines from input
169 if (mode == MODE_CHANGED || mode == MODE_DELETED) {
170 while (line < stopline) {
171 ignoreLineInFile(in_file, buffer);
172 line++;
173 }
174 }
175 return ED_OK;
176 }
177 /*}}}*/
178 void RredMethod::copyLinesFromFileToFile(FileFd &fin, FileFd &fout, unsigned int lines,/*{{{*/
179 Hashes *hash, char *buffer) const {
180 while (0 < lines--) {
181 do {
182 fin.ReadLine(buffer, BUF_SIZE);
183 unsigned long long const towrite = strlen(buffer);
184 fout.Write(buffer, towrite);
185 hash->Add((unsigned char*)buffer, towrite);
186 } while (strlen(buffer) == (BUF_SIZE - 1) &&
187 buffer[BUF_SIZE - 2] != '\n');
188 }
189 }
190 /*}}}*/
191 void RredMethod::ignoreLineInFile(FileFd &fin, char *buffer) const { /*{{{*/
192 fin.ReadLine(buffer, BUF_SIZE);
193 while (strlen(buffer) == (BUF_SIZE - 1) &&
194 buffer[BUF_SIZE - 2] != '\n') {
195 fin.ReadLine(buffer, BUF_SIZE);
196 buffer[0] = ' ';
197 }
198 }
199 /*}}}*/
200 RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
201 FileFd &out_file, Hashes *hash) const {
202 char buffer[BUF_SIZE];
203
204 /* we do a tail recursion to read the commands in the right order */
205 unsigned long line = -1; // assign highest possible value
206 State const result = applyFile(Patch, From, out_file, line, buffer, hash);
207
208 /* read the rest from infile */
209 if (result == ED_OK) {
210 while (From.ReadLine(buffer, BUF_SIZE) != NULL) {
211 unsigned long long const towrite = strlen(buffer);
212 out_file.Write(buffer, towrite);
213 hash->Add((unsigned char*)buffer, towrite);
214 }
215 }
216 return result;
217 }
218 /*}}}*/
219 /* struct EdCommand {{{*/
220 #ifdef _POSIX_MAPPED_FILES
221 struct EdCommand {
222 size_t data_start;
223 size_t data_end;
224 size_t data_lines;
225 size_t first_line;
226 size_t last_line;
227 char type;
228 };
229 #define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
230 #endif
231 /*}}}*/
232 RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
233 FileFd &out_file, Hashes *hash) const {
234 #ifdef _POSIX_MAPPED_FILES
235 MMap ed_cmds(Patch, MMap::ReadOnly);
236 MMap in_file(From, MMap::ReadOnly);
237
238 unsigned long long const ed_size = ed_cmds.Size();
239 unsigned long long const in_size = in_file.Size();
240 if (ed_size == 0 || in_size == 0)
241 return MMAP_FAILED;
242
243 EdCommand* commands = 0;
244 size_t command_count = 0;
245 size_t command_alloc = 0;
246
247 const char* begin = (char*) ed_cmds.Data();
248 const char* end = begin;
249 const char* ed_end = (char*) ed_cmds.Data() + ed_size;
250
251 const char* input = (char*) in_file.Data();
252 const char* input_end = (char*) in_file.Data() + in_size;
253
254 size_t i;
255
256 /* 1. Parse entire script. It is executed in reverse order, so we cather it
257 * in the `commands' buffer first
258 */
259
260 for(;;) {
261 EdCommand cmd;
262 cmd.data_start = 0;
263 cmd.data_end = 0;
264
265 while(begin != ed_end && *begin == '\n')
266 ++begin;
267 while(end != ed_end && *end != '\n')
268 ++end;
269 if(end == ed_end && begin == end)
270 break;
271
272 /* Determine command range */
273 const char* tmp = begin;
274
275 for(;;) {
276 /* atoll is safe despite lacking NUL-termination; we know there's an
277 * alphabetic character at end[-1]
278 */
279 if(tmp == end) {
280 cmd.first_line = atol(begin);
281 cmd.last_line = cmd.first_line;
282 break;
283 }
284 if(*tmp == ',') {
285 cmd.first_line = atol(begin);
286 cmd.last_line = atol(tmp + 1);
287 break;
288 }
289 ++tmp;
290 }
291
292 // which command to execute on this line(s)?
293 switch (end[-1]) {
294 case MODE_CHANGED:
295 if (Debug == true)
296 std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
297 break;
298 case MODE_ADDED:
299 if (Debug == true)
300 std::clog << "Insert after line " << cmd.first_line << std::endl;
301 break;
302 case MODE_DELETED:
303 if (Debug == true)
304 std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
305 break;
306 default:
307 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
308 free(commands);
309 return ED_PARSER;
310 }
311 cmd.type = end[-1];
312
313 /* Determine the size of the inserted text, so we don't have to scan this
314 * text again later.
315 */
316 begin = end + 1;
317 end = begin;
318 cmd.data_lines = 0;
319
320 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
321 cmd.data_start = begin - (char*) ed_cmds.Data();
322 while(end != ed_end) {
323 if(*end == '\n') {
324 if(end[-1] == '.' && end[-2] == '\n')
325 break;
326 ++cmd.data_lines;
327 }
328 ++end;
329 }
330 cmd.data_end = end - (char*) ed_cmds.Data() - 1;
331 begin = end + 1;
332 end = begin;
333 }
334 if(command_count == command_alloc) {
335 command_alloc = (command_alloc + 64) * 3 / 2;
336 commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
337 }
338 commands[command_count++] = cmd;
339 }
340
341 struct iovec* iov = new struct iovec[IOV_COUNT];
342 size_t iov_size = 0;
343
344 size_t amount, remaining;
345 size_t line = 1;
346 EdCommand* cmd;
347
348 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
349 * using writev to minimize the number of system calls. Data is read
350 * directly from the memory mappings of the input file and the script.
351 */
352
353 for(i = command_count; i-- > 0; ) {
354 cmd = &commands[i];
355 if(cmd->type == MODE_ADDED)
356 amount = cmd->first_line + 1;
357 else
358 amount = cmd->first_line;
359
360 if(line < amount) {
361 begin = input;
362 while(line != amount) {
363 input = (const char*) memchr(input, '\n', input_end - input);
364 if(!input)
365 break;
366 ++line;
367 ++input;
368 }
369
370 iov[iov_size].iov_base = (void*) begin;
371 iov[iov_size].iov_len = input - begin;
372 hash->Add((const unsigned char*) begin, input - begin);
373
374 if(++iov_size == IOV_COUNT) {
375 writev(out_file.Fd(), iov, IOV_COUNT);
376 iov_size = 0;
377 }
378 }
379
380 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
381 remaining = (cmd->last_line - cmd->first_line) + 1;
382 line += remaining;
383 while(remaining) {
384 input = (const char*) memchr(input, '\n', input_end - input);
385 if(!input)
386 break;
387 --remaining;
388 ++input;
389 }
390 }
391
392 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
393 if(cmd->data_end != cmd->data_start) {
394 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
395 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
396 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
397 iov[iov_size].iov_len);
398
399 if(++iov_size == IOV_COUNT) {
400 writev(out_file.Fd(), iov, IOV_COUNT);
401 iov_size = 0;
402 }
403 }
404 }
405 }
406
407 if(input != input_end) {
408 iov[iov_size].iov_base = (void*) input;
409 iov[iov_size].iov_len = input_end - input;
410 hash->Add((const unsigned char*) input, input_end - input);
411 ++iov_size;
412 }
413
414 if(iov_size) {
415 writev(out_file.Fd(), iov, iov_size);
416 iov_size = 0;
417 }
418
419 for(i = 0; i < iov_size; i += IOV_COUNT) {
420 if(iov_size - i < IOV_COUNT)
421 writev(out_file.Fd(), iov + i, iov_size - i);
422 else
423 writev(out_file.Fd(), iov + i, IOV_COUNT);
424 }
425
426 delete [] iov;
427 free(commands);
428
429 return ED_OK;
430 #else
431 return MMAP_FAILED;
432 #endif
433 }
434 /*}}}*/
435 bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
436 {
437 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
438 URI Get = Itm->Uri;
439 std::string Path = Get.Host + Get.Path; // To account for relative paths
440
441 FetchResult Res;
442 Res.Filename = Itm->DestFile;
443 if (Itm->Uri.empty() == true) {
444 Path = Itm->DestFile;
445 Itm->DestFile.append(".result");
446 } else
447 URIStart(Res);
448
449 if (Debug == true)
450 std::clog << "Patching " << Path << " with " << Path
451 << ".ed and putting result into " << Itm->DestFile << std::endl;
452 // Open the source and destination files (the d'tor of FileFd will do
453 // the cleanup/closing of the fds)
454 FileFd From(Path,FileFd::ReadOnly);
455 FileFd Patch(Path+".ed",FileFd::ReadOnly, FileFd::Gzip);
456 FileFd To(Itm->DestFile,FileFd::WriteAtomic);
457 To.EraseOnFailure();
458 if (_error->PendingError() == true)
459 return false;
460
461 Hashes Hash;
462 // now do the actual patching
463 State const result = patchMMap(Patch, From, To, &Hash);
464 if (result == MMAP_FAILED) {
465 // retry with patchFile
466 Patch.Seek(0);
467 From.Seek(0);
468 To.Open(Itm->DestFile,FileFd::WriteAtomic);
469 if (_error->PendingError() == true)
470 return false;
471 if (patchFile(Patch, From, To, &Hash) != ED_OK) {
472 return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
473 } else if (Debug == true) {
474 std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
475 }
476 } else if (result != ED_OK) {
477 return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
478 } else if (Debug == true) {
479 std::clog << "rred: finished mmap patching of " << Path << std::endl;
480 }
481
482 // write out the result
483 From.Close();
484 Patch.Close();
485 To.Close();
486
487 /* Transfer the modification times from the patch file
488 to be able to see in which state the file should be
489 and use the access time from the "old" file */
490 struct stat BufBase, BufPatch;
491 if (stat(Path.c_str(),&BufBase) != 0 ||
492 stat(std::string(Path+".ed").c_str(),&BufPatch) != 0)
493 return _error->Errno("stat",_("Failed to stat"));
494
495 struct utimbuf TimeBuf;
496 TimeBuf.actime = BufBase.st_atime;
497 TimeBuf.modtime = BufPatch.st_mtime;
498 if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
499 return _error->Errno("utime",_("Failed to set modification time"));
500
501 if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
502 return _error->Errno("stat",_("Failed to stat"));
503
504 // return done
505 Res.LastModified = BufBase.st_mtime;
506 Res.Size = BufBase.st_size;
507 Res.TakeHashes(Hash);
508 URIDone(Res);
509
510 return true;
511 }
512 /*}}}*/
513 /** \brief Wrapper class for testing rred */ /*{{{*/
514 class TestRredMethod : public RredMethod {
515 public:
516 /** \brief Run rred in debug test mode
517 *
518 * This method can be used to run the rred method outside
519 * of the "normal" acquire environment for easier testing.
520 *
521 * \param base basename of all files involved in this rred test
522 */
523 bool Run(char const *base) {
524 _config->CndSet("Debug::pkgAcquire::RRed", "true");
525 FetchItem *test = new FetchItem;
526 test->DestFile = base;
527 return Fetch(test);
528 }
529 };
530 /*}}}*/
531 /** \brief Starter for the rred method (or its test method) {{{
532 *
533 * Used without parameters is the normal behavior for methods for
534 * the APT acquire system. While this works great for the acquire system
535 * it is very hard to test the method and therefore the method also
536 * accepts one parameter which will switch it directly to debug test mode:
537 * The test mode expects that if "Testfile" is given as parameter
538 * the file "Testfile" should be ed-style patched with "Testfile.ed"
539 * and will write the result to "Testfile.result".
540 */
541 int main(int argc, char *argv[]) {
542 if (argc <= 1) {
543 RredMethod Mth;
544 return Mth.Run();
545 } else {
546 TestRredMethod Mth;
547 bool result = Mth.Run(argv[1]);
548 _error->DumpErrors();
549 return result;
550 }
551 }
552 /*}}}*/