retry without partial data after a 416 response
[ntk/apt.git] / methods / rred.cc
1 // Includes /*{{{*/
2 #include <config.h>
3
4 #include <apt-pkg/fileutl.h>
5 #include <apt-pkg/mmap.h>
6 #include <apt-pkg/error.h>
7 #include <apt-pkg/acquire-method.h>
8 #include <apt-pkg/strutl.h>
9 #include <apt-pkg/hashes.h>
10 #include <apt-pkg/configuration.h>
11
12 #include <sys/stat.h>
13 #include <sys/uio.h>
14 #include <unistd.h>
15 #include <utime.h>
16 #include <stdio.h>
17 #include <errno.h>
18 #include <apti18n.h>
19 /*}}}*/
20 /** \brief RredMethod - ed-style incremential patch method {{{
21 *
22 * This method implements a patch functionality similar to "patch --ed" that is
23 * used by the "tiffany" incremental packages download stuff. It differs from
24 * "ed" insofar that it is way more restricted (and therefore secure).
25 * The currently supported ed commands are "<em>c</em>hange", "<em>a</em>dd" and
26 * "<em>d</em>elete" (diff doesn't output any other).
27 * Additionally the records must be reverse sorted by line number and
28 * may not overlap (diff *seems* to produce this kind of output).
29 * */
30 class RredMethod : public pkgAcqMethod {
31 bool Debug;
32 // the size of this doesn't really matter (except for performance)
33 const static int BUF_SIZE = 1024;
34 // the supported ed commands
35 enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
36 // return values
37 enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
38
39 State applyFile(FileFd &ed_cmds, FileFd &in_file, FileFd &out_file,
40 unsigned long &line, char *buffer, Hashes *hash) const;
41 void ignoreLineInFile(FileFd &fin, char *buffer) const;
42 void copyLinesFromFileToFile(FileFd &fin, FileFd &fout, unsigned int lines,
43 Hashes *hash, char *buffer) const;
44
45 State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
46 State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
47
48 protected:
49 // the methods main method
50 virtual bool Fetch(FetchItem *Itm);
51
52 public:
53 RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig), Debug(false) {};
54 };
55 /*}}}*/
56 /** \brief applyFile - in reverse order with a tail recursion {{{
57 *
58 * As it is expected that the commands are in reversed order in the patch file
59 * we check in the first half if the command is valid, but doesn't execute it
60 * and move a step deeper. After reaching the end of the file we apply the
61 * patches in the correct order: last found command first.
62 *
63 * \param ed_cmds patch file to apply
64 * \param in_file base file we want to patch
65 * \param out_file file to write the patched result to
66 * \param line of command operation
67 * \param buffer internal used read/write buffer
68 * \param hash the created file for correctness
69 * \return the success State of the ed command executor
70 */
71 RredMethod::State RredMethod::applyFile(FileFd &ed_cmds, FileFd &in_file, FileFd &out_file,
72 unsigned long &line, char *buffer, Hashes *hash) const {
73 // get the current command and parse it
74 if (ed_cmds.ReadLine(buffer, BUF_SIZE) == NULL) {
75 if (Debug == true)
76 std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
77 line = 0;
78 return ED_OK;
79 }
80
81 // parse in the effected linenumbers
82 char* idx;
83 errno=0;
84 unsigned long const startline = strtol(buffer, &idx, 10);
85 if (errno == ERANGE || errno == EINVAL) {
86 _error->Errno("rred", "startline is an invalid number");
87 return ED_PARSER;
88 }
89 if (startline > line) {
90 _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line);
91 return ED_ORDERING;
92 }
93 unsigned long stopline;
94 if (*idx == ',') {
95 idx++;
96 errno=0;
97 stopline = strtol(idx, &idx, 10);
98 if (errno == ERANGE || errno == EINVAL) {
99 _error->Errno("rred", "stopline is an invalid number");
100 return ED_PARSER;
101 }
102 }
103 else {
104 stopline = startline;
105 }
106 line = startline;
107
108 // which command to execute on this line(s)?
109 switch (*idx) {
110 case MODE_CHANGED:
111 if (Debug == true)
112 std::clog << "Change from line " << startline << " to " << stopline << std::endl;
113 break;
114 case MODE_ADDED:
115 if (Debug == true)
116 std::clog << "Insert after line " << startline << std::endl;
117 break;
118 case MODE_DELETED:
119 if (Debug == true)
120 std::clog << "Delete from line " << startline << " to " << stopline << std::endl;
121 break;
122 default:
123 _error->Error("rred: Unknown ed command '%c'. Abort.", *idx);
124 return ED_PARSER;
125 }
126 unsigned char mode = *idx;
127
128 // save the current position
129 unsigned const long long pos = ed_cmds.Tell();
130
131 // if this is add or change then go to the next full stop
132 unsigned int data_length = 0;
133 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
134 do {
135 ignoreLineInFile(ed_cmds, buffer);
136 data_length++;
137 }
138 while (strncmp(buffer, ".", 1) != 0);
139 data_length--; // the dot should not be copied
140 }
141
142 // do the recursive call - the last command is the one we need to execute at first
143 const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
144 if (child != ED_OK) {
145 return child;
146 }
147
148 // change and delete are working on "line" - add is done after "line"
149 if (mode != MODE_ADDED)
150 line++;
151
152 // first wind to the current position and copy over all unchanged lines
153 if (line < startline) {
154 copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer);
155 line = startline;
156 }
157
158 if (mode != MODE_ADDED)
159 line--;
160
161 // include data from ed script
162 if (mode == MODE_CHANGED || mode == MODE_ADDED) {
163 ed_cmds.Seek(pos);
164 copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer);
165 }
166
167 // ignore the corresponding number of lines from input
168 if (mode == MODE_CHANGED || mode == MODE_DELETED) {
169 while (line < stopline) {
170 ignoreLineInFile(in_file, buffer);
171 line++;
172 }
173 }
174 return ED_OK;
175 }
176 /*}}}*/
177 void RredMethod::copyLinesFromFileToFile(FileFd &fin, FileFd &fout, unsigned int lines,/*{{{*/
178 Hashes *hash, char *buffer) const {
179 while (0 < lines--) {
180 do {
181 fin.ReadLine(buffer, BUF_SIZE);
182 unsigned long long const towrite = strlen(buffer);
183 fout.Write(buffer, towrite);
184 hash->Add((unsigned char*)buffer, towrite);
185 } while (strlen(buffer) == (BUF_SIZE - 1) &&
186 buffer[BUF_SIZE - 2] != '\n');
187 }
188 }
189 /*}}}*/
190 void RredMethod::ignoreLineInFile(FileFd &fin, char *buffer) const { /*{{{*/
191 fin.ReadLine(buffer, BUF_SIZE);
192 while (strlen(buffer) == (BUF_SIZE - 1) &&
193 buffer[BUF_SIZE - 2] != '\n') {
194 fin.ReadLine(buffer, BUF_SIZE);
195 buffer[0] = ' ';
196 }
197 }
198 /*}}}*/
199 RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
200 FileFd &out_file, Hashes *hash) const {
201 char buffer[BUF_SIZE];
202
203 /* we do a tail recursion to read the commands in the right order */
204 unsigned long line = -1; // assign highest possible value
205 State const result = applyFile(Patch, From, out_file, line, buffer, hash);
206
207 /* read the rest from infile */
208 if (result == ED_OK) {
209 while (From.ReadLine(buffer, BUF_SIZE) != NULL) {
210 unsigned long long const towrite = strlen(buffer);
211 out_file.Write(buffer, towrite);
212 hash->Add((unsigned char*)buffer, towrite);
213 }
214 }
215 return result;
216 }
217 /*}}}*/
218 /* struct EdCommand {{{*/
219 #ifdef _POSIX_MAPPED_FILES
220 struct EdCommand {
221 size_t data_start;
222 size_t data_end;
223 size_t data_lines;
224 size_t first_line;
225 size_t last_line;
226 char type;
227 };
228 #define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
229 static ssize_t retry_writev(int fd, const struct iovec *iov, int iovcnt) {
230 ssize_t Res;
231 errno = 0;
232 ssize_t i = 0;
233 do {
234 Res = writev(fd, iov + i, iovcnt);
235 if (Res < 0 && errno == EINTR)
236 continue;
237 if (Res < 0)
238 return _error->Errno("writev",_("Write error"));
239 iovcnt -= Res;
240 i += Res;
241 } while (Res > 0 && iovcnt > 0);
242 return i;
243 }
244 #endif
245 /*}}}*/
246 RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
247 FileFd &out_file, Hashes *hash) const {
248 #ifdef _POSIX_MAPPED_FILES
249 MMap ed_cmds(Patch, MMap::ReadOnly);
250 MMap in_file(From, MMap::ReadOnly);
251
252 unsigned long long const ed_size = ed_cmds.Size();
253 unsigned long long const in_size = in_file.Size();
254 if (ed_size == 0 || in_size == 0)
255 return MMAP_FAILED;
256
257 EdCommand* commands = 0;
258 size_t command_count = 0;
259 size_t command_alloc = 0;
260
261 const char* begin = (char*) ed_cmds.Data();
262 const char* end = begin;
263 const char* ed_end = (char*) ed_cmds.Data() + ed_size;
264
265 const char* input = (char*) in_file.Data();
266 const char* input_end = (char*) in_file.Data() + in_size;
267
268 size_t i;
269
270 /* 1. Parse entire script. It is executed in reverse order, so we cather it
271 * in the `commands' buffer first
272 */
273
274 for(;;) {
275 EdCommand cmd;
276 cmd.data_start = 0;
277 cmd.data_end = 0;
278
279 while(begin != ed_end && *begin == '\n')
280 ++begin;
281 while(end != ed_end && *end != '\n')
282 ++end;
283 if(end == ed_end && begin == end)
284 break;
285
286 /* Determine command range */
287 const char* tmp = begin;
288
289 for(;;) {
290 /* atoll is safe despite lacking NUL-termination; we know there's an
291 * alphabetic character at end[-1]
292 */
293 if(tmp == end) {
294 cmd.first_line = atol(begin);
295 cmd.last_line = cmd.first_line;
296 break;
297 }
298 if(*tmp == ',') {
299 cmd.first_line = atol(begin);
300 cmd.last_line = atol(tmp + 1);
301 break;
302 }
303 ++tmp;
304 }
305
306 // which command to execute on this line(s)?
307 switch (end[-1]) {
308 case MODE_CHANGED:
309 if (Debug == true)
310 std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
311 break;
312 case MODE_ADDED:
313 if (Debug == true)
314 std::clog << "Insert after line " << cmd.first_line << std::endl;
315 break;
316 case MODE_DELETED:
317 if (Debug == true)
318 std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
319 break;
320 default:
321 _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
322 free(commands);
323 return ED_PARSER;
324 }
325 cmd.type = end[-1];
326
327 /* Determine the size of the inserted text, so we don't have to scan this
328 * text again later.
329 */
330 begin = end + 1;
331 end = begin;
332 cmd.data_lines = 0;
333
334 if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
335 cmd.data_start = begin - (char*) ed_cmds.Data();
336 while(end != ed_end) {
337 if(*end == '\n') {
338 if(end[-1] == '.' && end[-2] == '\n')
339 break;
340 ++cmd.data_lines;
341 }
342 ++end;
343 }
344 cmd.data_end = end - (char*) ed_cmds.Data() - 1;
345 begin = end + 1;
346 end = begin;
347 }
348 if(command_count == command_alloc) {
349 command_alloc = (command_alloc + 64) * 3 / 2;
350 EdCommand* newCommands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
351 if (newCommands == NULL) {
352 free(commands);
353 return MMAP_FAILED;
354 }
355 commands = newCommands;
356 }
357 commands[command_count++] = cmd;
358 }
359
360 struct iovec* iov = new struct iovec[IOV_COUNT];
361 size_t iov_size = 0;
362
363 size_t amount, remaining;
364 size_t line = 1;
365 EdCommand* cmd;
366
367 /* 2. Execute script. We gather writes in a `struct iov' array, and flush
368 * using writev to minimize the number of system calls. Data is read
369 * directly from the memory mappings of the input file and the script.
370 */
371
372 for(i = command_count; i-- > 0; ) {
373 cmd = &commands[i];
374 if(cmd->type == MODE_ADDED)
375 amount = cmd->first_line + 1;
376 else
377 amount = cmd->first_line;
378
379 if(line < amount) {
380 begin = input;
381 while(line != amount) {
382 input = (const char*) memchr(input, '\n', input_end - input);
383 if(!input)
384 break;
385 ++line;
386 ++input;
387 }
388
389 iov[iov_size].iov_base = (void*) begin;
390 iov[iov_size].iov_len = input - begin;
391 hash->Add((const unsigned char*) begin, input - begin);
392
393 if(++iov_size == IOV_COUNT) {
394 retry_writev(out_file.Fd(), iov, IOV_COUNT);
395 iov_size = 0;
396 }
397 }
398
399 if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
400 remaining = (cmd->last_line - cmd->first_line) + 1;
401 line += remaining;
402 while(remaining) {
403 input = (const char*) memchr(input, '\n', input_end - input);
404 if(!input)
405 break;
406 --remaining;
407 ++input;
408 }
409 }
410
411 if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
412 if(cmd->data_end != cmd->data_start) {
413 iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
414 iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
415 hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
416 iov[iov_size].iov_len);
417
418 if(++iov_size == IOV_COUNT) {
419 retry_writev(out_file.Fd(), iov, IOV_COUNT);
420 iov_size = 0;
421 }
422 }
423 }
424 }
425
426 if(input != input_end) {
427 iov[iov_size].iov_base = (void*) input;
428 iov[iov_size].iov_len = input_end - input;
429 hash->Add((const unsigned char*) input, input_end - input);
430 ++iov_size;
431 }
432
433 if(iov_size) {
434 retry_writev(out_file.Fd(), iov, iov_size);
435 iov_size = 0;
436 }
437
438 for(i = 0; i < iov_size; i += IOV_COUNT) {
439 if(iov_size - i < IOV_COUNT)
440 retry_writev(out_file.Fd(), iov + i, iov_size - i);
441 else
442 retry_writev(out_file.Fd(), iov + i, IOV_COUNT);
443 }
444
445 delete [] iov;
446 free(commands);
447
448 return ED_OK;
449 #else
450 return MMAP_FAILED;
451 #endif
452 }
453 /*}}}*/
454 bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
455 {
456 Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
457 URI Get = Itm->Uri;
458 std::string Path = Get.Host + Get.Path; // To account for relative paths
459
460 FetchResult Res;
461 Res.Filename = Itm->DestFile;
462 if (Itm->Uri.empty() == true) {
463 Path = Itm->DestFile;
464 Itm->DestFile.append(".result");
465 } else
466 URIStart(Res);
467
468 if (Debug == true)
469 std::clog << "Patching " << Path << " with " << Path
470 << ".ed and putting result into " << Itm->DestFile << std::endl;
471 // Open the source and destination files (the d'tor of FileFd will do
472 // the cleanup/closing of the fds)
473 FileFd From(Path,FileFd::ReadOnly);
474 FileFd Patch(Path+".ed",FileFd::ReadOnly, FileFd::Gzip);
475 FileFd To(Itm->DestFile,FileFd::WriteAtomic);
476 To.EraseOnFailure();
477 if (_error->PendingError() == true)
478 return false;
479
480 Hashes Hash;
481 // now do the actual patching
482 State const result = patchMMap(Patch, From, To, &Hash);
483 if (result == MMAP_FAILED) {
484 // retry with patchFile
485 Patch.Seek(0);
486 From.Seek(0);
487 To.Open(Itm->DestFile,FileFd::WriteAtomic);
488 if (_error->PendingError() == true)
489 return false;
490 if (patchFile(Patch, From, To, &Hash) != ED_OK) {
491 return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
492 } else if (Debug == true) {
493 std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
494 }
495 } else if (result != ED_OK) {
496 return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
497 } else if (Debug == true) {
498 std::clog << "rred: finished mmap patching of " << Path << std::endl;
499 }
500
501 // write out the result
502 From.Close();
503 Patch.Close();
504 To.Close();
505
506 /* Transfer the modification times from the patch file
507 to be able to see in which state the file should be
508 and use the access time from the "old" file */
509 struct stat BufBase, BufPatch;
510 if (stat(Path.c_str(),&BufBase) != 0 ||
511 stat(std::string(Path+".ed").c_str(),&BufPatch) != 0)
512 return _error->Errno("stat",_("Failed to stat"));
513
514 struct utimbuf TimeBuf;
515 TimeBuf.actime = BufBase.st_atime;
516 TimeBuf.modtime = BufPatch.st_mtime;
517 if (utime(Itm->DestFile.c_str(),&TimeBuf) != 0)
518 return _error->Errno("utime",_("Failed to set modification time"));
519
520 if (stat(Itm->DestFile.c_str(),&BufBase) != 0)
521 return _error->Errno("stat",_("Failed to stat"));
522
523 // return done
524 Res.LastModified = BufBase.st_mtime;
525 Res.Size = BufBase.st_size;
526 Res.TakeHashes(Hash);
527 URIDone(Res);
528
529 return true;
530 }
531 /*}}}*/
532 /** \brief Wrapper class for testing rred */ /*{{{*/
533 class TestRredMethod : public RredMethod {
534 public:
535 /** \brief Run rred in debug test mode
536 *
537 * This method can be used to run the rred method outside
538 * of the "normal" acquire environment for easier testing.
539 *
540 * \param base basename of all files involved in this rred test
541 */
542 bool Run(char const *base) {
543 _config->CndSet("Debug::pkgAcquire::RRed", "true");
544 FetchItem *test = new FetchItem;
545 test->DestFile = base;
546 return Fetch(test);
547 }
548 };
549 /*}}}*/
550 /** \brief Starter for the rred method (or its test method) {{{
551 *
552 * Used without parameters is the normal behavior for methods for
553 * the APT acquire system. While this works great for the acquire system
554 * it is very hard to test the method and therefore the method also
555 * accepts one parameter which will switch it directly to debug test mode:
556 * The test mode expects that if "Testfile" is given as parameter
557 * the file "Testfile" should be ed-style patched with "Testfile.ed"
558 * and will write the result to "Testfile.result".
559 */
560 int main(int argc, char *argv[]) {
561 if (argc <= 1) {
562 RredMethod Mth;
563 return Mth.Run();
564 } else {
565 TestRredMethod Mth;
566 bool result = Mth.Run(argv[1]);
567 _error->DumpErrors();
568 return result;
569 }
570 }
571 /*}}}*/