* debian/rules:
[ntk/apt.git] / methods / rred.cc
CommitLineData
bb1293d9 1// Includes /*{{{*/
ea542140
DK
2#include <config.h>
3
2e178d1c 4#include <apt-pkg/fileutl.h>
bb1293d9 5#include <apt-pkg/mmap.h>
2e178d1c
MV
6#include <apt-pkg/error.h>
7#include <apt-pkg/acquire-method.h>
8#include <apt-pkg/strutl.h>
9#include <apt-pkg/hashes.h>
472ff00e 10#include <apt-pkg/configuration.h>
2e178d1c
MV
11
12#include <sys/stat.h>
bb1293d9 13#include <sys/uio.h>
2e178d1c
MV
14#include <unistd.h>
15#include <utime.h>
16#include <stdio.h>
17#include <errno.h>
caffd480 18#include <zlib.h>
2e178d1c 19#include <apti18n.h>
bb1293d9
DK
20 /*}}}*/
21/** \brief RredMethod - ed-style incremential patch method {{{
22 *
23 * This method implements a patch functionality similar to "patch --ed" that is
24 * used by the "tiffany" incremental packages download stuff. It differs from
25 * "ed" insofar that it is way more restricted (and therefore secure).
26 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
27 * "<em>d</em>elete" (diff doesn't output any other).
28 * Additionally the records must be reverse sorted by line number and
29 * may not overlap (diff *seems* to produce this kind of output).
d84cd865 30 * */
bb1293d9
DK
31class RredMethod : public pkgAcqMethod {
32 bool Debug;
33 // the size of this doesn't really matter (except for performance)
34 const static int BUF_SIZE = 1024;
35 // the supported ed commands
36 enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
37 // return values
38 enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
d84cd865 39
29966fd1 40 State applyFile(FileFd &ed_cmds, FileFd &in_file, FileFd &out_file,
bb1293d9 41 unsigned long &line, char *buffer, Hashes *hash) const;
032bd56f 42 void ignoreLineInFile(FileFd &fin, char *buffer) const;
29966fd1 43 void copyLinesFromFileToFile(FileFd &fin, FileFd &fout, unsigned int lines,
caffd480 44 Hashes *hash, char *buffer) const;
2e178d1c 45
bb1293d9
DK
46 State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
47 State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
48
49protected:
50 // the methods main method
51 virtual bool Fetch(FetchItem *Itm);
52
53public:
f5a34606 54 RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig), Debug(false) {};
2e178d1c 55};
bb1293d9
DK
56 /*}}}*/
57/** \brief applyFile - in reverse order with a tail recursion {{{
58 *
59 * As it is expected that the commands are in reversed order in the patch file
60 * we check in the first half if the command is valid, but doesn't execute it
61 * and move a step deeper. After reaching the end of the file we apply the
62 * patches in the correct order: last found command first.
63 *
64 * \param ed_cmds patch file to apply
65 * \param in_file base file we want to patch
66 * \param out_file file to write the patched result to
67 * \param line of command operation
68 * \param buffer internal used read/write buffer
69 * \param hash the created file for correctness
70 * \return the success State of the ed command executor
71 */
29966fd1 72RredMethod::State RredMethod::applyFile(FileFd &ed_cmds, FileFd &in_file, FileFd &out_file,
bb1293d9
DK
73 unsigned long &line, char *buffer, Hashes *hash) const {
74 // get the current command and parse it
032bd56f 75 if (ed_cmds.ReadLine(buffer, BUF_SIZE) == NULL) {
bb1293d9
DK
76 if (Debug == true)
77 std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
78 line = 0;
79 return ED_OK;
80 }
2e178d1c 81
bb1293d9
DK
82 // parse in the effected linenumbers
83 char* idx;
84 errno=0;
85 unsigned long const startline = strtol(buffer, &idx, 10);
86 if (errno == ERANGE || errno == EINVAL) {
87 _error->Errno("rred", "startline is an invalid number");
88 return ED_PARSER;
89 }
90 if (startline > line) {
91 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
92 return ED_ORDERING;
93 }
94 unsigned long stopline;
95 if (*idx == ',') {
96 idx++;
97 errno=0;
98 stopline = strtol(idx, &idx, 10);
99 if (errno == ERANGE || errno == EINVAL) {
100 _error->Errno("rred", "stopline is an invalid number");
101 return ED_PARSER;
102 }
103 }
104 else {
105 stopline = startline;
106 }
107 line = startline;
108
109 // which command to execute on this line(s)?
110 switch (*idx) {
111 case MODE_CHANGED:
112 if (Debug == true)
113 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
114 break;
115 case MODE_ADDED:
116 if (Debug == true)
117 std::clog << "Insert after line " << startline << std::endl;
118 break;
119 case MODE_DELETED:
120 if (Debug == true)
121 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
122 break;
123 default:
124 _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
125 return ED_PARSER;
126 }
127 unsigned char mode = *idx;
128
129 // save the current position
032bd56f 130 unsigned const long long pos = ed_cmds.Tell();
bb1293d9
DK
131
132 // if this is add or change then go to the next full stop
133 unsigned int data_length = 0;
134 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
135 do {
136 ignoreLineInFile(ed_cmds, buffer);
137 data_length++;
138 }
139 while (strncmp(buffer, ".", 1) != 0);
140 data_length--; // the dot should not be copied
141 }
142
143 // do the recursive call - the last command is the one we need to execute at first
144 const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
145 if (child != ED_OK) {
146 return child;
147 }
148
149 // change and delete are working on "line" - add is done after "line"
150 if (mode != MODE_ADDED)
151 line++;
152
153 // first wind to the current position and copy over all unchanged lines
154 if (line < startline) {
155 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
156 line = startline;
157 }
2e178d1c 158
bb1293d9
DK
159 if (mode != MODE_ADDED)
160 line--;
161
162 // include data from ed script
163 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
032bd56f 164 ed_cmds.Seek(pos);
bb1293d9
DK
165 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
166 }
167
168 // ignore the corresponding number of lines from input
169 if (mode == MODE_CHANGED || mode == MODE_DELETED) {
170 while (line < stopline) {
171 ignoreLineInFile(in_file, buffer);
172 line++;
173 }
174 }
175 return ED_OK;
176}
177 /*}}}*/
29966fd1 178void RredMethod::copyLinesFromFileToFile(FileFd &fin, FileFd &fout, unsigned int lines,/*{{{*/
caffd480
DK
179 Hashes *hash, char *buffer) const {
180 while (0 < lines--) {
181 do {
032bd56f 182 fin.ReadLine(buffer, BUF_SIZE);
29966fd1
DK
183 unsigned long long const towrite = strlen(buffer);
184 fout.Write(buffer, towrite);
185 hash->Add((unsigned char*)buffer, towrite);
caffd480
DK
186 } while (strlen(buffer) == (BUF_SIZE - 1) &&
187 buffer[BUF_SIZE - 2] != '\n');
188 }
189}
190 /*}}}*/
032bd56f
DK
191void RredMethod::ignoreLineInFile(FileFd &fin, char *buffer) const { /*{{{*/
192 fin.ReadLine(buffer, BUF_SIZE);
caffd480
DK
193 while (strlen(buffer) == (BUF_SIZE - 1) &&
194 buffer[BUF_SIZE - 2] != '\n') {
032bd56f 195 fin.ReadLine(buffer, BUF_SIZE);
caffd480
DK
196 buffer[0] = ' ';
197 }
198}
199 /*}}}*/
bb1293d9
DK
200RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
201 FileFd &out_file, Hashes *hash) const {
d84cd865 202 char buffer[BUF_SIZE];
bb1293d9 203
d84cd865 204 /* we do a tail recursion to read the commands in the right order */
bb1293d9 205 unsigned long line = -1; // assign highest possible value
29966fd1 206 State const result = applyFile(Patch, From, out_file, line, buffer, hash);
d84cd865
MV
207
208 /* read the rest from infile */
bb1293d9 209 if (result == ED_OK) {
29966fd1
DK
210 while (From.ReadLine(buffer, BUF_SIZE) != NULL) {
211 unsigned long long const towrite = strlen(buffer);
212 out_file.Write(buffer, towrite);
213 hash->Add((unsigned char*)buffer, towrite);
d84cd865
MV
214 }
215 }
bb1293d9 216 return result;
2e178d1c 217}
bb1293d9 218 /*}}}*/
f5a34606
DK
219/* struct EdCommand {{{*/
220#ifdef _POSIX_MAPPED_FILES
221struct EdCommand {
bb1293d9
DK
222 size_t data_start;
223 size_t data_end;
224 size_t data_lines;
225 size_t first_line;
226 size_t last_line;
227 char type;
228};
229#define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
319790f4
DK
230ssize_t retry_writev(int fd, const struct iovec *iov, int iovcnt) {
231 ssize_t Res;
232 errno = 0;
233 ssize_t i = 0;
234 do {
235 Res = writev(fd, iov + i, iovcnt);
236 if (Res < 0 && errno == EINTR)
237 continue;
238 if (Res < 0)
239 return _error->Errno("writev",_("Write error"));
240 iovcnt -= Res;
241 i += Res;
242 } while (Res > 0 && iovcnt > 0);
243 return i;
244}
f5a34606 245#endif
bb1293d9
DK
246 /*}}}*/
247RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
248 FileFd &out_file, Hashes *hash) const {
249#ifdef _POSIX_MAPPED_FILES
032bd56f 250 MMap ed_cmds(Patch, MMap::ReadOnly);
bb1293d9
DK
251 MMap in_file(From, MMap::ReadOnly);
252
f23a94d5
DK
253 unsigned long long const ed_size = ed_cmds.Size();
254 unsigned long long const in_size = in_file.Size();
255 if (ed_size == 0 || in_size == 0)
bb1293d9
DK
256 return MMAP_FAILED;
257
258 EdCommand* commands = 0;
259 size_t command_count = 0;
260 size_t command_alloc = 0;
261
262 const char* begin = (char*) ed_cmds.Data();
263 const char* end = begin;
f23a94d5 264 const char* ed_end = (char*) ed_cmds.Data() + ed_size;
bb1293d9
DK
265
266 const char* input = (char*) in_file.Data();
f23a94d5 267 const char* input_end = (char*) in_file.Data() + in_size;
bb1293d9
DK
268
269 size_t i;
270
271 /* 1. Parse entire script. It is executed in reverse order, so we cather it
272 * in the `commands' buffer first
273 */
274
275 for(;;) {
276 EdCommand cmd;
277 cmd.data_start = 0;
278 cmd.data_end = 0;
279
280 while(begin != ed_end && *begin == '\n')
281 ++begin;
282 while(end != ed_end && *end != '\n')
283 ++end;
284 if(end == ed_end && begin == end)
285 break;
286
287 /* Determine command range */
288 const char* tmp = begin;
289
290 for(;;) {
291 /* atoll is safe despite lacking NUL-termination; we know there's an
292 * alphabetic character at end[-1]
293 */
294 if(tmp == end) {
295 cmd.first_line = atol(begin);
296 cmd.last_line = cmd.first_line;
297 break;
298 }
299 if(*tmp == ',') {
300 cmd.first_line = atol(begin);
301 cmd.last_line = atol(tmp + 1);
302 break;
303 }
304 ++tmp;
305 }
306
307 // which command to execute on this line(s)?
308 switch (end[-1]) {
309 case MODE_CHANGED:
310 if (Debug == true)
311 std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
312 break;
313 case MODE_ADDED:
314 if (Debug == true)
315 std::clog << "Insert after line " << cmd.first_line << std::endl;
316 break;
317 case MODE_DELETED:
318 if (Debug == true)
319 std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
320 break;
321 default:
322 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
323 free(commands);
324 return ED_PARSER;
325 }
326 cmd.type = end[-1];
327
328 /* Determine the size of the inserted text, so we don't have to scan this
329 * text again later.
330 */
331 begin = end + 1;
332 end = begin;
333 cmd.data_lines = 0;
334
335 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
336 cmd.data_start = begin - (char*) ed_cmds.Data();
337 while(end != ed_end) {
338 if(*end == '\n') {
339 if(end[-1] == '.' && end[-2] == '\n')
340 break;
341 ++cmd.data_lines;
342 }
343 ++end;
344 }
345 cmd.data_end = end - (char*) ed_cmds.Data() - 1;
346 begin = end + 1;
347 end = begin;
348 }
349 if(command_count == command_alloc) {
350 command_alloc = (command_alloc + 64) * 3 / 2;
aaab1007
DK
351 EdCommand* newCommands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
352 if (newCommands == NULL) {
353 free(commands);
354 return MMAP_FAILED;
355 }
356 commands = newCommands;
bb1293d9
DK
357 }
358 commands[command_count++] = cmd;
359 }
360
361 struct iovec* iov = new struct iovec[IOV_COUNT];
362 size_t iov_size = 0;
363
364 size_t amount, remaining;
365 size_t line = 1;
366 EdCommand* cmd;
367
368 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
369 * using writev to minimize the number of system calls. Data is read
370 * directly from the memory mappings of the input file and the script.
371 */
372
373 for(i = command_count; i-- > 0; ) {
374 cmd = &commands[i];
375 if(cmd->type == MODE_ADDED)
376 amount = cmd->first_line + 1;
377 else
378 amount = cmd->first_line;
379
380 if(line < amount) {
381 begin = input;
382 while(line != amount) {
383 input = (const char*) memchr(input, '\n', input_end - input);
384 if(!input)
385 break;
386 ++line;
387 ++input;
388 }
2e178d1c 389
bb1293d9
DK
390 iov[iov_size].iov_base = (void*) begin;
391 iov[iov_size].iov_len = input - begin;
392 hash->Add((const unsigned char*) begin, input - begin);
2e178d1c 393
bb1293d9 394 if(++iov_size == IOV_COUNT) {
319790f4 395 retry_writev(out_file.Fd(), iov, IOV_COUNT);
bb1293d9
DK
396 iov_size = 0;
397 }
398 }
399
400 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
401 remaining = (cmd->last_line - cmd->first_line) + 1;
402 line += remaining;
403 while(remaining) {
404 input = (const char*) memchr(input, '\n', input_end - input);
405 if(!input)
406 break;
407 --remaining;
408 ++input;
409 }
410 }
411
412 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
413 if(cmd->data_end != cmd->data_start) {
414 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
415 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
416 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
417 iov[iov_size].iov_len);
418
419 if(++iov_size == IOV_COUNT) {
319790f4 420 retry_writev(out_file.Fd(), iov, IOV_COUNT);
bb1293d9
DK
421 iov_size = 0;
422 }
423 }
424 }
425 }
426
427 if(input != input_end) {
428 iov[iov_size].iov_base = (void*) input;
429 iov[iov_size].iov_len = input_end - input;
430 hash->Add((const unsigned char*) input, input_end - input);
431 ++iov_size;
432 }
433
434 if(iov_size) {
319790f4 435 retry_writev(out_file.Fd(), iov, iov_size);
bb1293d9
DK
436 iov_size = 0;
437 }
438
439 for(i = 0; i < iov_size; i += IOV_COUNT) {
440 if(iov_size - i < IOV_COUNT)
319790f4 441 retry_writev(out_file.Fd(), iov + i, iov_size - i);
bb1293d9 442 else
319790f4 443 retry_writev(out_file.Fd(), iov + i, IOV_COUNT);
bb1293d9
DK
444 }
445
446 delete [] iov;
447 free(commands);
448
449 return ED_OK;
450#else
451 return MMAP_FAILED;
452#endif
453}
454 /*}}}*/
455bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
2e178d1c 456{
bb1293d9 457 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
2e178d1c 458 URI Get = Itm->Uri;
8f3ba4e8 459 std::string Path = Get.Host + Get.Path; // To account for relative paths
bb1293d9 460
2e178d1c
MV
461 FetchResult Res;
462 Res.Filename = Itm->DestFile;
bb1293d9
DK
463 if (Itm->Uri.empty() == true) {
464 Path = Itm->DestFile;
465 Itm->DestFile.append(".result");
466 } else
467 URIStart(Res);
4a0a786f 468
6040f589
MV
469 if (Debug == true)
470 std::clog << "Patching " << Path << " with " << Path
471 << ".ed and putting result into " << Itm->DestFile << std::endl;
59a704f0
MV
472 // Open the source and destination files (the d'tor of FileFd will do
473 // the cleanup/closing of the fds)
2e178d1c 474 FileFd From(Path,FileFd::ReadOnly);
468720c5 475 FileFd Patch(Path+".ed",FileFd::ReadOnly, FileFd::Gzip);
22041bd2 476 FileFd To(Itm->DestFile,FileFd::WriteAtomic);
2e178d1c
MV
477 To.EraseOnFailure();
478 if (_error->PendingError() == true)
479 return false;
480
481 Hashes Hash;
2e178d1c 482 // now do the actual patching
bb1293d9
DK
483 State const result = patchMMap(Patch, From, To, &Hash);
484 if (result == MMAP_FAILED) {
485 // retry with patchFile
caffd480
DK
486 Patch.Seek(0);
487 From.Seek(0);
22041bd2 488 To.Open(Itm->DestFile,FileFd::WriteAtomic);
bb1293d9
DK
489 if (_error->PendingError() == true)
490 return false;
491 if (patchFile(Patch, From, To, &Hash) != ED_OK) {
492 return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
493 } else if (Debug == true) {
494 std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
495 }
496 } else if (result != ED_OK) {
497 return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
498 } else if (Debug == true) {
499 std::clog << "rred: finished mmap patching of " << Path << std::endl;
3de9ff77
MV
500 }
501
502 // write out the result
3de9ff77
MV
503 From.Close();
504 Patch.Close();
505 To.Close();
506
1082d4c7
DK
507 /* Transfer the modification times from the patch file
508 to be able to see in which state the file should be
509 and use the access time from the "old" file */
510 struct stat BufBase, BufPatch;
511 if (stat(Path.c_str(),&BufBase) != 0 ||
8f3ba4e8 512 stat(std::string(Path+".ed").c_str(),&BufPatch) != 0)
3de9ff77
MV
513 return _error->Errno("stat",_("Failed to stat"));
514
515 struct utimbuf TimeBuf;
1082d4c7
DK
516 TimeBuf.actime = BufBase.st_atime;
517 TimeBuf.modtime = BufPatch.st_mtime;
3de9ff77
MV
518 if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
519 return _error->Errno("utime",_("Failed to set modification time"));
520
1082d4c7 521 if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
3de9ff77
MV
522 return _error->Errno("stat",_("Failed to stat"));
523
524 // return done
1082d4c7
DK
525 Res.LastModified = BufBase.st_mtime;
526 Res.Size = BufBase.st_size;
2e178d1c
MV
527 Res.TakeHashes(Hash);
528 URIDone(Res);
3de9ff77 529
2e178d1c
MV
530 return true;
531}
bb1293d9
DK
532 /*}}}*/
533/** \brief Wrapper class for testing rred */ /*{{{*/
534class TestRredMethod : public RredMethod {
535public:
536 /** \brief Run rred in debug test mode
537 *
538 * This method can be used to run the rred method outside
539 * of the "normal" acquire environment for easier testing.
540 *
541 * \param base basename of all files involved in this rred test
542 */
543 bool Run(char const *base) {
544 _config->CndSet("Debug::pkgAcquire::RRed", "true");
545 FetchItem *test = new FetchItem;
546 test->DestFile = base;
547 return Fetch(test);
548 }
549};
550 /*}}}*/
551/** \brief Starter for the rred method (or its test method) {{{
552 *
553 * Used without parameters is the normal behavior for methods for
554 * the APT acquire system. While this works great for the acquire system
555 * it is very hard to test the method and therefore the method also
556 * accepts one parameter which will switch it directly to debug test mode:
557 * The test mode expects that if "Testfile" is given as parameter
558 * the file "Testfile" should be ed-style patched with "Testfile.ed"
559 * and will write the result to "Testfile.result".
560 */
561int main(int argc, char *argv[]) {
562 if (argc <= 1) {
563 RredMethod Mth;
564 return Mth.Run();
565 } else {
566 TestRredMethod Mth;
567 bool result = Mth.Run(argv[1]);
568 _error->DumpErrors();
569 return result;
570 }
2e178d1c 571}
bb1293d9 572 /*}}}*/