do not pollute namespace in the headers with using (Closes: #500198)
[ntk/apt.git] / methods / rred.cc
CommitLineData
bb1293d9 1// Includes /*{{{*/
ea542140
DK
2#include <config.h>
3
2e178d1c 4#include <apt-pkg/fileutl.h>
bb1293d9 5#include <apt-pkg/mmap.h>
2e178d1c
MV
6#include <apt-pkg/error.h>
7#include <apt-pkg/acquire-method.h>
8#include <apt-pkg/strutl.h>
9#include <apt-pkg/hashes.h>
10
11#include <sys/stat.h>
bb1293d9 12#include <sys/uio.h>
2e178d1c
MV
13#include <unistd.h>
14#include <utime.h>
15#include <stdio.h>
16#include <errno.h>
caffd480 17#include <zlib.h>
2e178d1c 18#include <apti18n.h>
bb1293d9
DK
19 /*}}}*/
20/** \brief RredMethod - ed-style incremential patch method {{{
21 *
22 * This method implements a patch functionality similar to "patch --ed" that is
23 * used by the "tiffany" incremental packages download stuff. It differs from
24 * "ed" insofar that it is way more restricted (and therefore secure).
25 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
26 * "<em>d</em>elete" (diff doesn't output any other).
27 * Additionally the records must be reverse sorted by line number and
28 * may not overlap (diff *seems* to produce this kind of output).
d84cd865 29 * */
bb1293d9
DK
30class RredMethod : public pkgAcqMethod {
31 bool Debug;
32 // the size of this doesn't really matter (except for performance)
33 const static int BUF_SIZE = 1024;
34 // the supported ed commands
35 enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
36 // return values
37 enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
d84cd865 38
caffd480 39 State applyFile(gzFile &ed_cmds, FILE *in_file, FILE *out_file,
bb1293d9
DK
40 unsigned long &line, char *buffer, Hashes *hash) const;
41 void ignoreLineInFile(FILE *fin, char *buffer) const;
caffd480 42 void ignoreLineInFile(gzFile &fin, char *buffer) const;
bb1293d9
DK
43 void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,
44 Hashes *hash, char *buffer) const;
caffd480
DK
45 void copyLinesFromFileToFile(gzFile &fin, FILE *fout, unsigned int lines,
46 Hashes *hash, char *buffer) const;
2e178d1c 47
bb1293d9
DK
48 State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
49 State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
50
51protected:
52 // the methods main method
53 virtual bool Fetch(FetchItem *Itm);
54
55public:
f5a34606 56 RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig), Debug(false) {};
2e178d1c 57};
bb1293d9
DK
58 /*}}}*/
59/** \brief applyFile - in reverse order with a tail recursion {{{
60 *
61 * As it is expected that the commands are in reversed order in the patch file
62 * we check in the first half if the command is valid, but doesn't execute it
63 * and move a step deeper. After reaching the end of the file we apply the
64 * patches in the correct order: last found command first.
65 *
66 * \param ed_cmds patch file to apply
67 * \param in_file base file we want to patch
68 * \param out_file file to write the patched result to
69 * \param line of command operation
70 * \param buffer internal used read/write buffer
71 * \param hash the created file for correctness
72 * \return the success State of the ed command executor
73 */
caffd480 74RredMethod::State RredMethod::applyFile(gzFile &ed_cmds, FILE *in_file, FILE *out_file,
bb1293d9
DK
75 unsigned long &line, char *buffer, Hashes *hash) const {
76 // get the current command and parse it
caffd480 77 if (gzgets(ed_cmds, buffer, BUF_SIZE) == NULL) {
bb1293d9
DK
78 if (Debug == true)
79 std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
80 line = 0;
81 return ED_OK;
82 }
2e178d1c 83
bb1293d9
DK
84 // parse in the effected linenumbers
85 char* idx;
86 errno=0;
87 unsigned long const startline = strtol(buffer, &idx, 10);
88 if (errno == ERANGE || errno == EINVAL) {
89 _error->Errno("rred", "startline is an invalid number");
90 return ED_PARSER;
91 }
92 if (startline > line) {
93 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
94 return ED_ORDERING;
95 }
96 unsigned long stopline;
97 if (*idx == ',') {
98 idx++;
99 errno=0;
100 stopline = strtol(idx, &idx, 10);
101 if (errno == ERANGE || errno == EINVAL) {
102 _error->Errno("rred", "stopline is an invalid number");
103 return ED_PARSER;
104 }
105 }
106 else {
107 stopline = startline;
108 }
109 line = startline;
110
111 // which command to execute on this line(s)?
112 switch (*idx) {
113 case MODE_CHANGED:
114 if (Debug == true)
115 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
116 break;
117 case MODE_ADDED:
118 if (Debug == true)
119 std::clog << "Insert after line " << startline << std::endl;
120 break;
121 case MODE_DELETED:
122 if (Debug == true)
123 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
124 break;
125 default:
126 _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
127 return ED_PARSER;
128 }
129 unsigned char mode = *idx;
130
131 // save the current position
caffd480 132 unsigned const long pos = gztell(ed_cmds);
bb1293d9
DK
133
134 // if this is add or change then go to the next full stop
135 unsigned int data_length = 0;
136 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
137 do {
138 ignoreLineInFile(ed_cmds, buffer);
139 data_length++;
140 }
141 while (strncmp(buffer, ".", 1) != 0);
142 data_length--; // the dot should not be copied
143 }
144
145 // do the recursive call - the last command is the one we need to execute at first
146 const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
147 if (child != ED_OK) {
148 return child;
149 }
150
151 // change and delete are working on "line" - add is done after "line"
152 if (mode != MODE_ADDED)
153 line++;
154
155 // first wind to the current position and copy over all unchanged lines
156 if (line < startline) {
157 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
158 line = startline;
159 }
2e178d1c 160
bb1293d9
DK
161 if (mode != MODE_ADDED)
162 line--;
163
164 // include data from ed script
165 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
caffd480 166 gzseek(ed_cmds, pos, SEEK_SET);
bb1293d9
DK
167 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
168 }
169
170 // ignore the corresponding number of lines from input
171 if (mode == MODE_CHANGED || mode == MODE_DELETED) {
172 while (line < stopline) {
173 ignoreLineInFile(in_file, buffer);
174 line++;
175 }
176 }
177 return ED_OK;
178}
179 /*}}}*/
180void RredMethod::copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,/*{{{*/
181 Hashes *hash, char *buffer) const {
182 while (0 < lines--) {
183 do {
184 fgets(buffer, BUF_SIZE, fin);
185 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
186 hash->Add((unsigned char*)buffer, written);
187 } while (strlen(buffer) == (BUF_SIZE - 1) &&
188 buffer[BUF_SIZE - 2] != '\n');
189 }
190}
191 /*}}}*/
caffd480
DK
192void RredMethod::copyLinesFromFileToFile(gzFile &fin, FILE *fout, unsigned int lines,/*{{{*/
193 Hashes *hash, char *buffer) const {
194 while (0 < lines--) {
195 do {
196 gzgets(fin, buffer, BUF_SIZE);
197 size_t const written = fwrite(buffer, 1, strlen(buffer), fout);
198 hash->Add((unsigned char*)buffer, written);
199 } while (strlen(buffer) == (BUF_SIZE - 1) &&
200 buffer[BUF_SIZE - 2] != '\n');
201 }
202}
203 /*}}}*/
bb1293d9
DK
204void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/
205 fgets(buffer, BUF_SIZE, fin);
206 while (strlen(buffer) == (BUF_SIZE - 1) &&
207 buffer[BUF_SIZE - 2] != '\n') {
208 fgets(buffer, BUF_SIZE, fin);
209 buffer[0] = ' ';
210 }
211}
212 /*}}}*/
caffd480
DK
213void RredMethod::ignoreLineInFile(gzFile &fin, char *buffer) const { /*{{{*/
214 gzgets(fin, buffer, BUF_SIZE);
215 while (strlen(buffer) == (BUF_SIZE - 1) &&
216 buffer[BUF_SIZE - 2] != '\n') {
217 gzgets(fin, buffer, BUF_SIZE);
218 buffer[0] = ' ';
219 }
220}
221 /*}}}*/
bb1293d9
DK
222RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
223 FileFd &out_file, Hashes *hash) const {
d84cd865 224 char buffer[BUF_SIZE];
bb1293d9 225 FILE* fFrom = fdopen(From.Fd(), "r");
caffd480 226 gzFile fPatch = Patch.gzFd();
bb1293d9
DK
227 FILE* fTo = fdopen(out_file.Fd(), "w");
228
d84cd865 229 /* we do a tail recursion to read the commands in the right order */
bb1293d9
DK
230 unsigned long line = -1; // assign highest possible value
231 State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash);
d84cd865
MV
232
233 /* read the rest from infile */
bb1293d9
DK
234 if (result == ED_OK) {
235 while (fgets(buffer, BUF_SIZE, fFrom) != NULL) {
236 size_t const written = fwrite(buffer, 1, strlen(buffer), fTo);
d84cd865
MV
237 hash->Add((unsigned char*)buffer, written);
238 }
bb1293d9 239 fflush(fTo);
d84cd865 240 }
bb1293d9 241 return result;
2e178d1c 242}
bb1293d9 243 /*}}}*/
f5a34606
DK
244/* struct EdCommand {{{*/
245#ifdef _POSIX_MAPPED_FILES
246struct EdCommand {
bb1293d9
DK
247 size_t data_start;
248 size_t data_end;
249 size_t data_lines;
250 size_t first_line;
251 size_t last_line;
252 char type;
253};
254#define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
f5a34606 255#endif
bb1293d9
DK
256 /*}}}*/
257RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
258 FileFd &out_file, Hashes *hash) const {
259#ifdef _POSIX_MAPPED_FILES
37254316 260 MMap ed_cmds(MMap::ReadOnly);
caffd480 261 if (Patch.gzFd() != NULL) {
650faab0 262 unsigned long long mapSize = Patch.Size();
00542838 263 DynamicMMap* dyn = new DynamicMMap(0, mapSize, 0);
2a79d5b5 264 if (dyn->validData() == false) {
00542838
DK
265 delete dyn;
266 return MMAP_FAILED;
267 }
268 dyn->AddSize(mapSize);
269 gzread(Patch.gzFd(), dyn->Data(), mapSize);
270 ed_cmds = *dyn;
37254316
DK
271 } else
272 ed_cmds = MMap(Patch, MMap::ReadOnly);
273
bb1293d9
DK
274 MMap in_file(From, MMap::ReadOnly);
275
276 if (ed_cmds.Size() == 0 || in_file.Size() == 0)
277 return MMAP_FAILED;
278
279 EdCommand* commands = 0;
280 size_t command_count = 0;
281 size_t command_alloc = 0;
282
283 const char* begin = (char*) ed_cmds.Data();
284 const char* end = begin;
285 const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size();
286
287 const char* input = (char*) in_file.Data();
288 const char* input_end = (char*) in_file.Data() + in_file.Size();
289
290 size_t i;
291
292 /* 1. Parse entire script. It is executed in reverse order, so we cather it
293 * in the `commands' buffer first
294 */
295
296 for(;;) {
297 EdCommand cmd;
298 cmd.data_start = 0;
299 cmd.data_end = 0;
300
301 while(begin != ed_end && *begin == '\n')
302 ++begin;
303 while(end != ed_end && *end != '\n')
304 ++end;
305 if(end == ed_end && begin == end)
306 break;
307
308 /* Determine command range */
309 const char* tmp = begin;
310
311 for(;;) {
312 /* atoll is safe despite lacking NUL-termination; we know there's an
313 * alphabetic character at end[-1]
314 */
315 if(tmp == end) {
316 cmd.first_line = atol(begin);
317 cmd.last_line = cmd.first_line;
318 break;
319 }
320 if(*tmp == ',') {
321 cmd.first_line = atol(begin);
322 cmd.last_line = atol(tmp + 1);
323 break;
324 }
325 ++tmp;
326 }
327
328 // which command to execute on this line(s)?
329 switch (end[-1]) {
330 case MODE_CHANGED:
331 if (Debug == true)
332 std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
333 break;
334 case MODE_ADDED:
335 if (Debug == true)
336 std::clog << "Insert after line " << cmd.first_line << std::endl;
337 break;
338 case MODE_DELETED:
339 if (Debug == true)
340 std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
341 break;
342 default:
343 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
344 free(commands);
345 return ED_PARSER;
346 }
347 cmd.type = end[-1];
348
349 /* Determine the size of the inserted text, so we don't have to scan this
350 * text again later.
351 */
352 begin = end + 1;
353 end = begin;
354 cmd.data_lines = 0;
355
356 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
357 cmd.data_start = begin - (char*) ed_cmds.Data();
358 while(end != ed_end) {
359 if(*end == '\n') {
360 if(end[-1] == '.' && end[-2] == '\n')
361 break;
362 ++cmd.data_lines;
363 }
364 ++end;
365 }
366 cmd.data_end = end - (char*) ed_cmds.Data() - 1;
367 begin = end + 1;
368 end = begin;
369 }
370 if(command_count == command_alloc) {
371 command_alloc = (command_alloc + 64) * 3 / 2;
372 commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
373 }
374 commands[command_count++] = cmd;
375 }
376
377 struct iovec* iov = new struct iovec[IOV_COUNT];
378 size_t iov_size = 0;
379
380 size_t amount, remaining;
381 size_t line = 1;
382 EdCommand* cmd;
383
384 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
385 * using writev to minimize the number of system calls. Data is read
386 * directly from the memory mappings of the input file and the script.
387 */
388
389 for(i = command_count; i-- > 0; ) {
390 cmd = &commands[i];
391 if(cmd->type == MODE_ADDED)
392 amount = cmd->first_line + 1;
393 else
394 amount = cmd->first_line;
395
396 if(line < amount) {
397 begin = input;
398 while(line != amount) {
399 input = (const char*) memchr(input, '\n', input_end - input);
400 if(!input)
401 break;
402 ++line;
403 ++input;
404 }
2e178d1c 405
bb1293d9
DK
406 iov[iov_size].iov_base = (void*) begin;
407 iov[iov_size].iov_len = input - begin;
408 hash->Add((const unsigned char*) begin, input - begin);
2e178d1c 409
bb1293d9
DK
410 if(++iov_size == IOV_COUNT) {
411 writev(out_file.Fd(), iov, IOV_COUNT);
412 iov_size = 0;
413 }
414 }
415
416 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
417 remaining = (cmd->last_line - cmd->first_line) + 1;
418 line += remaining;
419 while(remaining) {
420 input = (const char*) memchr(input, '\n', input_end - input);
421 if(!input)
422 break;
423 --remaining;
424 ++input;
425 }
426 }
427
428 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
429 if(cmd->data_end != cmd->data_start) {
430 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
431 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
432 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
433 iov[iov_size].iov_len);
434
435 if(++iov_size == IOV_COUNT) {
436 writev(out_file.Fd(), iov, IOV_COUNT);
437 iov_size = 0;
438 }
439 }
440 }
441 }
442
443 if(input != input_end) {
444 iov[iov_size].iov_base = (void*) input;
445 iov[iov_size].iov_len = input_end - input;
446 hash->Add((const unsigned char*) input, input_end - input);
447 ++iov_size;
448 }
449
450 if(iov_size) {
451 writev(out_file.Fd(), iov, iov_size);
452 iov_size = 0;
453 }
454
455 for(i = 0; i < iov_size; i += IOV_COUNT) {
456 if(iov_size - i < IOV_COUNT)
457 writev(out_file.Fd(), iov + i, iov_size - i);
458 else
459 writev(out_file.Fd(), iov + i, IOV_COUNT);
460 }
461
462 delete [] iov;
463 free(commands);
464
465 return ED_OK;
466#else
467 return MMAP_FAILED;
468#endif
469}
470 /*}}}*/
471bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
2e178d1c 472{
bb1293d9 473 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
2e178d1c 474 URI Get = Itm->Uri;
8f3ba4e8 475 std::string Path = Get.Host + Get.Path; // To account for relative paths
bb1293d9 476
2e178d1c
MV
477 FetchResult Res;
478 Res.Filename = Itm->DestFile;
bb1293d9
DK
479 if (Itm->Uri.empty() == true) {
480 Path = Itm->DestFile;
481 Itm->DestFile.append(".result");
482 } else
483 URIStart(Res);
4a0a786f 484
6040f589
MV
485 if (Debug == true)
486 std::clog << "Patching " << Path << " with " << Path
487 << ".ed and putting result into " << Itm->DestFile << std::endl;
59a704f0
MV
488 // Open the source and destination files (the d'tor of FileFd will do
489 // the cleanup/closing of the fds)
2e178d1c 490 FileFd From(Path,FileFd::ReadOnly);
caffd480 491 FileFd Patch(Path+".ed",FileFd::ReadOnlyGzip);
22041bd2 492 FileFd To(Itm->DestFile,FileFd::WriteAtomic);
2e178d1c
MV
493 To.EraseOnFailure();
494 if (_error->PendingError() == true)
495 return false;
496
497 Hashes Hash;
2e178d1c 498 // now do the actual patching
bb1293d9
DK
499 State const result = patchMMap(Patch, From, To, &Hash);
500 if (result == MMAP_FAILED) {
501 // retry with patchFile
caffd480
DK
502 Patch.Seek(0);
503 From.Seek(0);
22041bd2 504 To.Open(Itm->DestFile,FileFd::WriteAtomic);
bb1293d9
DK
505 if (_error->PendingError() == true)
506 return false;
507 if (patchFile(Patch, From, To, &Hash) != ED_OK) {
508 return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
509 } else if (Debug == true) {
510 std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
511 }
512 } else if (result != ED_OK) {
513 return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
514 } else if (Debug == true) {
515 std::clog << "rred: finished mmap patching of " << Path << std::endl;
3de9ff77
MV
516 }
517
518 // write out the result
3de9ff77
MV
519 From.Close();
520 Patch.Close();
521 To.Close();
522
1082d4c7
DK
523 /* Transfer the modification times from the patch file
524 to be able to see in which state the file should be
525 and use the access time from the "old" file */
526 struct stat BufBase, BufPatch;
527 if (stat(Path.c_str(),&BufBase) != 0 ||
8f3ba4e8 528 stat(std::string(Path+".ed").c_str(),&BufPatch) != 0)
3de9ff77
MV
529 return _error->Errno("stat",_("Failed to stat"));
530
531 struct utimbuf TimeBuf;
1082d4c7
DK
532 TimeBuf.actime = BufBase.st_atime;
533 TimeBuf.modtime = BufPatch.st_mtime;
3de9ff77
MV
534 if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
535 return _error->Errno("utime",_("Failed to set modification time"));
536
1082d4c7 537 if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
3de9ff77
MV
538 return _error->Errno("stat",_("Failed to stat"));
539
540 // return done
1082d4c7
DK
541 Res.LastModified = BufBase.st_mtime;
542 Res.Size = BufBase.st_size;
2e178d1c
MV
543 Res.TakeHashes(Hash);
544 URIDone(Res);
3de9ff77 545
2e178d1c
MV
546 return true;
547}
bb1293d9
DK
548 /*}}}*/
549/** \brief Wrapper class for testing rred */ /*{{{*/
550class TestRredMethod : public RredMethod {
551public:
552 /** \brief Run rred in debug test mode
553 *
554 * This method can be used to run the rred method outside
555 * of the "normal" acquire environment for easier testing.
556 *
557 * \param base basename of all files involved in this rred test
558 */
559 bool Run(char const *base) {
560 _config->CndSet("Debug::pkgAcquire::RRed", "true");
561 FetchItem *test = new FetchItem;
562 test->DestFile = base;
563 return Fetch(test);
564 }
565};
566 /*}}}*/
567/** \brief Starter for the rred method (or its test method) {{{
568 *
569 * Used without parameters is the normal behavior for methods for
570 * the APT acquire system. While this works great for the acquire system
571 * it is very hard to test the method and therefore the method also
572 * accepts one parameter which will switch it directly to debug test mode:
573 * The test mode expects that if "Testfile" is given as parameter
574 * the file "Testfile" should be ed-style patched with "Testfile.ed"
575 * and will write the result to "Testfile.result".
576 */
577int main(int argc, char *argv[]) {
578 if (argc <= 1) {
579 RredMethod Mth;
580 return Mth.Run();
581 } else {
582 TestRredMethod Mth;
583 bool result = Mth.Run(argv[1]);
584 _error->DumpErrors();
585 return result;
586 }
2e178d1c 587}
bb1293d9 588 /*}}}*/