* debian/control:
[ntk/apt.git] / methods / rred.cc
CommitLineData
bb1293d9 1// Includes /*{{{*/
2e178d1c 2#include <apt-pkg/fileutl.h>
bb1293d9 3#include <apt-pkg/mmap.h>
2e178d1c
MV
4#include <apt-pkg/error.h>
5#include <apt-pkg/acquire-method.h>
6#include <apt-pkg/strutl.h>
7#include <apt-pkg/hashes.h>
8
9#include <sys/stat.h>
bb1293d9 10#include <sys/uio.h>
2e178d1c
MV
11#include <unistd.h>
12#include <utime.h>
13#include <stdio.h>
14#include <errno.h>
15#include <apti18n.h>
bb1293d9
DK
16 /*}}}*/
17/** \brief RredMethod - ed-style incremential patch method {{{
18 *
19 * This method implements a patch functionality similar to "patch --ed" that is
20 * used by the "tiffany" incremental packages download stuff. It differs from
21 * "ed" insofar that it is way more restricted (and therefore secure).
22 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
23 * "<em>d</em>elete" (diff doesn't output any other).
24 * Additionally the records must be reverse sorted by line number and
25 * may not overlap (diff *seems* to produce this kind of output).
d84cd865 26 * */
bb1293d9
DK
27class RredMethod : public pkgAcqMethod {
28 bool Debug;
29 // the size of this doesn't really matter (except for performance)
30 const static int BUF_SIZE = 1024;
31 // the supported ed commands
32 enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
33 // return values
34 enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
d84cd865 35
bb1293d9
DK
36 State applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file,
37 unsigned long &line, char *buffer, Hashes *hash) const;
38 void ignoreLineInFile(FILE *fin, char *buffer) const;
39 void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,
40 Hashes *hash, char *buffer) const;
2e178d1c 41
bb1293d9
DK
42 State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
43 State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
44
45protected:
46 // the methods main method
47 virtual bool Fetch(FetchItem *Itm);
48
49public:
50 RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig) {};
2e178d1c 51};
bb1293d9
DK
52 /*}}}*/
53/** \brief applyFile - in reverse order with a tail recursion {{{
54 *
55 * As it is expected that the commands are in reversed order in the patch file
56 * we check in the first half if the command is valid, but doesn't execute it
57 * and move a step deeper. After reaching the end of the file we apply the
58 * patches in the correct order: last found command first.
59 *
60 * \param ed_cmds patch file to apply
61 * \param in_file base file we want to patch
62 * \param out_file file to write the patched result to
63 * \param line of command operation
64 * \param buffer internal used read/write buffer
65 * \param hash the created file for correctness
66 * \return the success State of the ed command executor
67 */
68RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file,
69 unsigned long &line, char *buffer, Hashes *hash) const {
70 // get the current command and parse it
71 if (fgets(buffer, BUF_SIZE, ed_cmds) == NULL) {
72 if (Debug == true)
73 std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
74 line = 0;
75 return ED_OK;
76 }
2e178d1c 77
bb1293d9
DK
78 // parse in the effected linenumbers
79 char* idx;
80 errno=0;
81 unsigned long const startline = strtol(buffer, &idx, 10);
82 if (errno == ERANGE || errno == EINVAL) {
83 _error->Errno("rred", "startline is an invalid number");
84 return ED_PARSER;
85 }
86 if (startline > line) {
87 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
88 return ED_ORDERING;
89 }
90 unsigned long stopline;
91 if (*idx == ',') {
92 idx++;
93 errno=0;
94 stopline = strtol(idx, &idx, 10);
95 if (errno == ERANGE || errno == EINVAL) {
96 _error->Errno("rred", "stopline is an invalid number");
97 return ED_PARSER;
98 }
99 }
100 else {
101 stopline = startline;
102 }
103 line = startline;
104
105 // which command to execute on this line(s)?
106 switch (*idx) {
107 case MODE_CHANGED:
108 if (Debug == true)
109 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
110 break;
111 case MODE_ADDED:
112 if (Debug == true)
113 std::clog << "Insert after line " << startline << std::endl;
114 break;
115 case MODE_DELETED:
116 if (Debug == true)
117 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
118 break;
119 default:
120 _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
121 return ED_PARSER;
122 }
123 unsigned char mode = *idx;
124
125 // save the current position
126 unsigned const long pos = ftell(ed_cmds);
127
128 // if this is add or change then go to the next full stop
129 unsigned int data_length = 0;
130 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
131 do {
132 ignoreLineInFile(ed_cmds, buffer);
133 data_length++;
134 }
135 while (strncmp(buffer, ".", 1) != 0);
136 data_length--; // the dot should not be copied
137 }
138
139 // do the recursive call - the last command is the one we need to execute at first
140 const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
141 if (child != ED_OK) {
142 return child;
143 }
144
145 // change and delete are working on "line" - add is done after "line"
146 if (mode != MODE_ADDED)
147 line++;
148
149 // first wind to the current position and copy over all unchanged lines
150 if (line < startline) {
151 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
152 line = startline;
153 }
2e178d1c 154
bb1293d9
DK
155 if (mode != MODE_ADDED)
156 line--;
157
158 // include data from ed script
159 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
160 fseek(ed_cmds, pos, SEEK_SET);
161 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
162 }
163
164 // ignore the corresponding number of lines from input
165 if (mode == MODE_CHANGED || mode == MODE_DELETED) {
166 while (line < stopline) {
167 ignoreLineInFile(in_file, buffer);
168 line++;
169 }
170 }
171 return ED_OK;
172}
173 /*}}}*/
174void RredMethod::copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,/*{{{*/
175 Hashes *hash, char *buffer) const {
176 while (0 < lines--) {
177 do {
178 fgets(buffer, BUF_SIZE, fin);
179 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
180 hash->Add((unsigned char*)buffer, written);
181 } while (strlen(buffer) == (BUF_SIZE - 1) &&
182 buffer[BUF_SIZE - 2] != '\n');
183 }
184}
185 /*}}}*/
186void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/
187 fgets(buffer, BUF_SIZE, fin);
188 while (strlen(buffer) == (BUF_SIZE - 1) &&
189 buffer[BUF_SIZE - 2] != '\n') {
190 fgets(buffer, BUF_SIZE, fin);
191 buffer[0] = ' ';
192 }
193}
194 /*}}}*/
195RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
196 FileFd &out_file, Hashes *hash) const {
d84cd865 197 char buffer[BUF_SIZE];
bb1293d9
DK
198 FILE* fFrom = fdopen(From.Fd(), "r");
199 FILE* fPatch = fdopen(Patch.Fd(), "r");
200 FILE* fTo = fdopen(out_file.Fd(), "w");
201
d84cd865 202 /* we do a tail recursion to read the commands in the right order */
bb1293d9
DK
203 unsigned long line = -1; // assign highest possible value
204 State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash);
d84cd865
MV
205
206 /* read the rest from infile */
bb1293d9
DK
207 if (result == ED_OK) {
208 while (fgets(buffer, BUF_SIZE, fFrom) != NULL) {
209 size_t const written = fwrite(buffer, 1, strlen(buffer), fTo);
d84cd865
MV
210 hash->Add((unsigned char*)buffer, written);
211 }
bb1293d9 212 fflush(fTo);
d84cd865 213 }
bb1293d9 214 return result;
2e178d1c 215}
bb1293d9
DK
216 /*}}}*/
217struct EdCommand { /*{{{*/
218 size_t data_start;
219 size_t data_end;
220 size_t data_lines;
221 size_t first_line;
222 size_t last_line;
223 char type;
224};
225#define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
226 /*}}}*/
227RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
228 FileFd &out_file, Hashes *hash) const {
229#ifdef _POSIX_MAPPED_FILES
230 MMap ed_cmds(Patch, MMap::ReadOnly);
231 MMap in_file(From, MMap::ReadOnly);
232
233 if (ed_cmds.Size() == 0 || in_file.Size() == 0)
234 return MMAP_FAILED;
235
236 EdCommand* commands = 0;
237 size_t command_count = 0;
238 size_t command_alloc = 0;
239
240 const char* begin = (char*) ed_cmds.Data();
241 const char* end = begin;
242 const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size();
243
244 const char* input = (char*) in_file.Data();
245 const char* input_end = (char*) in_file.Data() + in_file.Size();
246
247 size_t i;
248
249 /* 1. Parse entire script. It is executed in reverse order, so we cather it
250 * in the `commands' buffer first
251 */
252
253 for(;;) {
254 EdCommand cmd;
255 cmd.data_start = 0;
256 cmd.data_end = 0;
257
258 while(begin != ed_end && *begin == '\n')
259 ++begin;
260 while(end != ed_end && *end != '\n')
261 ++end;
262 if(end == ed_end && begin == end)
263 break;
264
265 /* Determine command range */
266 const char* tmp = begin;
267
268 for(;;) {
269 /* atoll is safe despite lacking NUL-termination; we know there's an
270 * alphabetic character at end[-1]
271 */
272 if(tmp == end) {
273 cmd.first_line = atol(begin);
274 cmd.last_line = cmd.first_line;
275 break;
276 }
277 if(*tmp == ',') {
278 cmd.first_line = atol(begin);
279 cmd.last_line = atol(tmp + 1);
280 break;
281 }
282 ++tmp;
283 }
284
285 // which command to execute on this line(s)?
286 switch (end[-1]) {
287 case MODE_CHANGED:
288 if (Debug == true)
289 std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
290 break;
291 case MODE_ADDED:
292 if (Debug == true)
293 std::clog << "Insert after line " << cmd.first_line << std::endl;
294 break;
295 case MODE_DELETED:
296 if (Debug == true)
297 std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
298 break;
299 default:
300 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
301 free(commands);
302 return ED_PARSER;
303 }
304 cmd.type = end[-1];
305
306 /* Determine the size of the inserted text, so we don't have to scan this
307 * text again later.
308 */
309 begin = end + 1;
310 end = begin;
311 cmd.data_lines = 0;
312
313 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
314 cmd.data_start = begin - (char*) ed_cmds.Data();
315 while(end != ed_end) {
316 if(*end == '\n') {
317 if(end[-1] == '.' && end[-2] == '\n')
318 break;
319 ++cmd.data_lines;
320 }
321 ++end;
322 }
323 cmd.data_end = end - (char*) ed_cmds.Data() - 1;
324 begin = end + 1;
325 end = begin;
326 }
327 if(command_count == command_alloc) {
328 command_alloc = (command_alloc + 64) * 3 / 2;
329 commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
330 }
331 commands[command_count++] = cmd;
332 }
333
334 struct iovec* iov = new struct iovec[IOV_COUNT];
335 size_t iov_size = 0;
336
337 size_t amount, remaining;
338 size_t line = 1;
339 EdCommand* cmd;
340
341 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
342 * using writev to minimize the number of system calls. Data is read
343 * directly from the memory mappings of the input file and the script.
344 */
345
346 for(i = command_count; i-- > 0; ) {
347 cmd = &commands[i];
348 if(cmd->type == MODE_ADDED)
349 amount = cmd->first_line + 1;
350 else
351 amount = cmd->first_line;
352
353 if(line < amount) {
354 begin = input;
355 while(line != amount) {
356 input = (const char*) memchr(input, '\n', input_end - input);
357 if(!input)
358 break;
359 ++line;
360 ++input;
361 }
2e178d1c 362
bb1293d9
DK
363 iov[iov_size].iov_base = (void*) begin;
364 iov[iov_size].iov_len = input - begin;
365 hash->Add((const unsigned char*) begin, input - begin);
2e178d1c 366
bb1293d9
DK
367 if(++iov_size == IOV_COUNT) {
368 writev(out_file.Fd(), iov, IOV_COUNT);
369 iov_size = 0;
370 }
371 }
372
373 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
374 remaining = (cmd->last_line - cmd->first_line) + 1;
375 line += remaining;
376 while(remaining) {
377 input = (const char*) memchr(input, '\n', input_end - input);
378 if(!input)
379 break;
380 --remaining;
381 ++input;
382 }
383 }
384
385 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
386 if(cmd->data_end != cmd->data_start) {
387 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
388 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
389 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
390 iov[iov_size].iov_len);
391
392 if(++iov_size == IOV_COUNT) {
393 writev(out_file.Fd(), iov, IOV_COUNT);
394 iov_size = 0;
395 }
396 }
397 }
398 }
399
400 if(input != input_end) {
401 iov[iov_size].iov_base = (void*) input;
402 iov[iov_size].iov_len = input_end - input;
403 hash->Add((const unsigned char*) input, input_end - input);
404 ++iov_size;
405 }
406
407 if(iov_size) {
408 writev(out_file.Fd(), iov, iov_size);
409 iov_size = 0;
410 }
411
412 for(i = 0; i < iov_size; i += IOV_COUNT) {
413 if(iov_size - i < IOV_COUNT)
414 writev(out_file.Fd(), iov + i, iov_size - i);
415 else
416 writev(out_file.Fd(), iov + i, IOV_COUNT);
417 }
418
419 delete [] iov;
420 free(commands);
421
422 return ED_OK;
423#else
424 return MMAP_FAILED;
425#endif
426}
427 /*}}}*/
428bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
2e178d1c 429{
bb1293d9 430 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
2e178d1c
MV
431 URI Get = Itm->Uri;
432 string Path = Get.Host + Get.Path; // To account for relative paths
bb1293d9 433
2e178d1c
MV
434 FetchResult Res;
435 Res.Filename = Itm->DestFile;
bb1293d9
DK
436 if (Itm->Uri.empty() == true) {
437 Path = Itm->DestFile;
438 Itm->DestFile.append(".result");
439 } else
440 URIStart(Res);
4a0a786f 441
6040f589
MV
442 if (Debug == true)
443 std::clog << "Patching " << Path << " with " << Path
444 << ".ed and putting result into " << Itm->DestFile << std::endl;
59a704f0
MV
445 // Open the source and destination files (the d'tor of FileFd will do
446 // the cleanup/closing of the fds)
2e178d1c
MV
447 FileFd From(Path,FileFd::ReadOnly);
448 FileFd Patch(Path+".ed",FileFd::ReadOnly);
22041bd2 449 FileFd To(Itm->DestFile,FileFd::WriteAtomic);
2e178d1c
MV
450 To.EraseOnFailure();
451 if (_error->PendingError() == true)
452 return false;
453
454 Hashes Hash;
2e178d1c 455 // now do the actual patching
bb1293d9
DK
456 State const result = patchMMap(Patch, From, To, &Hash);
457 if (result == MMAP_FAILED) {
458 // retry with patchFile
459 lseek(Patch.Fd(), 0, SEEK_SET);
460 lseek(From.Fd(), 0, SEEK_SET);
22041bd2 461 To.Open(Itm->DestFile,FileFd::WriteAtomic);
bb1293d9
DK
462 if (_error->PendingError() == true)
463 return false;
464 if (patchFile(Patch, From, To, &Hash) != ED_OK) {
465 return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
466 } else if (Debug == true) {
467 std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
468 }
469 } else if (result != ED_OK) {
470 return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
471 } else if (Debug == true) {
472 std::clog << "rred: finished mmap patching of " << Path << std::endl;
3de9ff77
MV
473 }
474
475 // write out the result
3de9ff77
MV
476 From.Close();
477 Patch.Close();
478 To.Close();
479
1082d4c7
DK
480 /* Transfer the modification times from the patch file
481 to be able to see in which state the file should be
482 and use the access time from the "old" file */
483 struct stat BufBase, BufPatch;
484 if (stat(Path.c_str(),&BufBase) != 0 ||
485 stat(string(Path+".ed").c_str(),&BufPatch) != 0)
3de9ff77
MV
486 return _error->Errno("stat",_("Failed to stat"));
487
488 struct utimbuf TimeBuf;
1082d4c7
DK
489 TimeBuf.actime = BufBase.st_atime;
490 TimeBuf.modtime = BufPatch.st_mtime;
3de9ff77
MV
491 if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
492 return _error->Errno("utime",_("Failed to set modification time"));
493
1082d4c7 494 if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
3de9ff77
MV
495 return _error->Errno("stat",_("Failed to stat"));
496
497 // return done
1082d4c7
DK
498 Res.LastModified = BufBase.st_mtime;
499 Res.Size = BufBase.st_size;
2e178d1c
MV
500 Res.TakeHashes(Hash);
501 URIDone(Res);
3de9ff77 502
2e178d1c
MV
503 return true;
504}
bb1293d9
DK
505 /*}}}*/
506/** \brief Wrapper class for testing rred */ /*{{{*/
507class TestRredMethod : public RredMethod {
508public:
509 /** \brief Run rred in debug test mode
510 *
511 * This method can be used to run the rred method outside
512 * of the "normal" acquire environment for easier testing.
513 *
514 * \param base basename of all files involved in this rred test
515 */
516 bool Run(char const *base) {
517 _config->CndSet("Debug::pkgAcquire::RRed", "true");
518 FetchItem *test = new FetchItem;
519 test->DestFile = base;
520 return Fetch(test);
521 }
522};
523 /*}}}*/
524/** \brief Starter for the rred method (or its test method) {{{
525 *
526 * Used without parameters is the normal behavior for methods for
527 * the APT acquire system. While this works great for the acquire system
528 * it is very hard to test the method and therefore the method also
529 * accepts one parameter which will switch it directly to debug test mode:
530 * The test mode expects that if "Testfile" is given as parameter
531 * the file "Testfile" should be ed-style patched with "Testfile.ed"
532 * and will write the result to "Testfile.result".
533 */
534int main(int argc, char *argv[]) {
535 if (argc <= 1) {
536 RredMethod Mth;
537 return Mth.Run();
538 } else {
539 TestRredMethod Mth;
540 bool result = Mth.Run(argv[1]);
541 _error->DumpErrors();
542 return result;
543 }
2e178d1c 544}
bb1293d9 545 /*}}}*/