Merge branch 'debian/sid' into bugfix/bts731738-fancy-progess
[ntk/apt.git] / apt-pkg / tagfile.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4 /* ######################################################################
5
6 Fast scanner for RFC-822 type header information
7
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
10
11 ##################################################################### */
12 /*}}}*/
13 // Include Files /*{{{*/
14 #include<config.h>
15
16 #include <apt-pkg/tagfile.h>
17 #include <apt-pkg/error.h>
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
20
21 #include <string>
22 #include <stdio.h>
23 #include <ctype.h>
24
25 #include <apti18n.h>
26 /*}}}*/
27
28 using std::string;
29
30 class pkgTagFilePrivate
31 {
32 public:
33 pkgTagFilePrivate(FileFd *pFd, unsigned long long Size) : Fd(*pFd), Buffer(NULL),
34 Start(NULL), End(NULL),
35 Done(false), iOffset(0),
36 Size(Size)
37 {
38 }
39 FileFd &Fd;
40 char *Buffer;
41 char *Start;
42 char *End;
43 bool Done;
44 unsigned long long iOffset;
45 unsigned long long Size;
46 };
47
48 // TagFile::pkgTagFile - Constructor /*{{{*/
49 // ---------------------------------------------------------------------
50 /* */
51 pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long long Size)
52 {
53 /* The size is increased by 4 because if we start with the Size of the
54 filename we need to try to read 1 char more to see an EOF faster, 1
55 char the end-pointer can be on and maybe 2 newlines need to be added
56 to the end of the file -> 4 extra chars */
57 Size += 4;
58 d = new pkgTagFilePrivate(pFd, Size);
59
60 if (d->Fd.IsOpen() == false)
61 d->Start = d->End = d->Buffer = 0;
62 else
63 d->Buffer = (char*)malloc(sizeof(char) * Size);
64
65 if (d->Buffer == NULL)
66 d->Done = true;
67 else
68 d->Done = false;
69
70 d->Start = d->End = d->Buffer;
71 d->iOffset = 0;
72 if (d->Done == false)
73 Fill();
74 }
75 /*}}}*/
76 // TagFile::~pkgTagFile - Destructor /*{{{*/
77 // ---------------------------------------------------------------------
78 /* */
79 pkgTagFile::~pkgTagFile()
80 {
81 free(d->Buffer);
82 delete d;
83 }
84 /*}}}*/
85 // TagFile::Offset - Return the current offset in the buffer /*{{{*/
86 unsigned long pkgTagFile::Offset()
87 {
88 return d->iOffset;
89 }
90 /*}}}*/
91 // TagFile::Resize - Resize the internal buffer /*{{{*/
92 // ---------------------------------------------------------------------
93 /* Resize the internal buffer (double it in size). Fail if a maximum size
94 * size is reached.
95 */
96 bool pkgTagFile::Resize()
97 {
98 // fail is the buffer grows too big
99 if(d->Size > 1024*1024+1)
100 return false;
101
102 return Resize(d->Size * 2);
103 }
104 bool pkgTagFile::Resize(unsigned long long const newSize)
105 {
106 unsigned long long const EndSize = d->End - d->Start;
107
108 // get new buffer and use it
109 char* newBuffer = (char*)realloc(d->Buffer, sizeof(char) * newSize);
110 if (newBuffer == NULL)
111 return false;
112 d->Buffer = newBuffer;
113 d->Size = newSize;
114
115 // update the start/end pointers to the new buffer
116 d->Start = d->Buffer;
117 d->End = d->Start + EndSize;
118 return true;
119 }
120 /*}}}*/
121 // TagFile::Step - Advance to the next section /*{{{*/
122 // ---------------------------------------------------------------------
123 /* If the Section Scanner fails we refill the buffer and try again.
124 * If that fails too, double the buffer size and try again until a
125 * maximum buffer is reached.
126 */
127 bool pkgTagFile::Step(pkgTagSection &Tag)
128 {
129 while (Tag.Scan(d->Start,d->End - d->Start) == false)
130 {
131 if (Fill() == false)
132 return false;
133
134 if(Tag.Scan(d->Start,d->End - d->Start))
135 break;
136
137 if (Resize() == false)
138 return _error->Error(_("Unable to parse package file %s (1)"),
139 d->Fd.Name().c_str());
140 }
141 d->Start += Tag.size();
142 d->iOffset += Tag.size();
143
144 Tag.Trim();
145 return true;
146 }
147 /*}}}*/
148 // TagFile::Fill - Top up the buffer /*{{{*/
149 // ---------------------------------------------------------------------
150 /* This takes the bit at the end of the buffer and puts it at the start
151 then fills the rest from the file */
152 bool pkgTagFile::Fill()
153 {
154 unsigned long long EndSize = d->End - d->Start;
155 unsigned long long Actual = 0;
156
157 memmove(d->Buffer,d->Start,EndSize);
158 d->Start = d->Buffer;
159 d->End = d->Buffer + EndSize;
160
161 if (d->Done == false)
162 {
163 // See if only a bit of the file is left
164 unsigned long long const dataSize = d->Size - ((d->End - d->Buffer) + 1);
165 if (d->Fd.Read(d->End, dataSize, &Actual) == false)
166 return false;
167 if (Actual != dataSize)
168 d->Done = true;
169 d->End += Actual;
170 }
171
172 if (d->Done == true)
173 {
174 if (EndSize <= 3 && Actual == 0)
175 return false;
176 if (d->Size - (d->End - d->Buffer) < 4)
177 return true;
178
179 // Append a double new line if one does not exist
180 unsigned int LineCount = 0;
181 for (const char *E = d->End - 1; E - d->End < 6 && (*E == '\n' || *E == '\r'); E--)
182 if (*E == '\n')
183 LineCount++;
184 if (LineCount < 2)
185 {
186 if ((unsigned)(d->End - d->Buffer) >= d->Size)
187 Resize(d->Size + 3);
188 for (; LineCount < 2; LineCount++)
189 *d->End++ = '\n';
190 }
191
192 return true;
193 }
194
195 return true;
196 }
197 /*}}}*/
198 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
199 // ---------------------------------------------------------------------
200 /* This jumps to a pre-recorded file location and reads the record
201 that is there */
202 bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long long Offset)
203 {
204 // We are within a buffer space of the next hit..
205 if (Offset >= d->iOffset && d->iOffset + (d->End - d->Start) > Offset)
206 {
207 unsigned long long Dist = Offset - d->iOffset;
208 d->Start += Dist;
209 d->iOffset += Dist;
210 return Step(Tag);
211 }
212
213 // Reposition and reload..
214 d->iOffset = Offset;
215 d->Done = false;
216 if (d->Fd.Seek(Offset) == false)
217 return false;
218 d->End = d->Start = d->Buffer;
219
220 if (Fill() == false)
221 return false;
222
223 if (Tag.Scan(d->Start, d->End - d->Start) == true)
224 return true;
225
226 // This appends a double new line (for the real eof handling)
227 if (Fill() == false)
228 return false;
229
230 if (Tag.Scan(d->Start, d->End - d->Start) == false)
231 return _error->Error(_("Unable to parse package file %s (2)"),d->Fd.Name().c_str());
232
233 return true;
234 }
235 /*}}}*/
236 // pkgTagSection::pkgTagSection - Constructor /*{{{*/
237 // ---------------------------------------------------------------------
238 /* */
239 pkgTagSection::pkgTagSection()
240 : Section(0), TagCount(0), d(NULL), Stop(0)
241 {
242 memset(&Indexes, 0, sizeof(Indexes));
243 memset(&AlphaIndexes, 0, sizeof(AlphaIndexes));
244 }
245 /*}}}*/
246 // TagSection::Scan - Scan for the end of the header information /*{{{*/
247 // ---------------------------------------------------------------------
248 /* This looks for the first double new line in the data stream.
249 It also indexes the tags in the section. */
250 bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
251 {
252 const char *End = Start + MaxLength;
253 Stop = Section = Start;
254 memset(AlphaIndexes,0,sizeof(AlphaIndexes));
255
256 if (Stop == 0)
257 return false;
258
259 TagCount = 0;
260 while (TagCount+1 < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
261 {
262 TrimRecord(true,End);
263
264 // this can happen when TrimRecord trims away the entire Record
265 // (e.g. because it just contains comments)
266 if(Stop == End)
267 return true;
268
269 // Start a new index and add it to the hash
270 if (isspace(Stop[0]) == 0)
271 {
272 Indexes[TagCount++] = Stop - Section;
273 AlphaIndexes[AlphaHash(Stop,End)] = TagCount;
274 }
275
276 Stop = (const char *)memchr(Stop,'\n',End - Stop);
277
278 if (Stop == 0)
279 return false;
280
281 for (; Stop+1 < End && Stop[1] == '\r'; Stop++)
282 /* nothing */
283 ;
284
285 // Double newline marks the end of the record
286 if (Stop+1 < End && Stop[1] == '\n')
287 {
288 Indexes[TagCount] = Stop - Section;
289 TrimRecord(false,End);
290 return true;
291 }
292
293 Stop++;
294 }
295
296 return false;
297 }
298 /*}}}*/
299 // TagSection::TrimRecord - Trim off any garbage before/after a record /*{{{*/
300 // ---------------------------------------------------------------------
301 /* There should be exactly 2 newline at the end of the record, no more. */
302 void pkgTagSection::TrimRecord(bool BeforeRecord, const char*& End)
303 {
304 if (BeforeRecord == true)
305 return;
306 for (; Stop < End && (Stop[0] == '\n' || Stop[0] == '\r'); Stop++);
307 }
308 /*}}}*/
309 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
310 // ---------------------------------------------------------------------
311 /* There should be exactly 1 newline at the end of the buffer, no more. */
312 void pkgTagSection::Trim()
313 {
314 for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
315 }
316 /*}}}*/
317 // TagSection::Exists - return True if a tag exists /*{{{*/
318 bool pkgTagSection::Exists(const char* const Tag)
319 {
320 unsigned int tmp;
321 return Find(Tag, tmp);
322 }
323 /*}}}*/
324 // TagSection::Find - Locate a tag /*{{{*/
325 // ---------------------------------------------------------------------
326 /* This searches the section for a tag that matches the given string. */
327 bool pkgTagSection::Find(const char *Tag,unsigned int &Pos) const
328 {
329 unsigned int Length = strlen(Tag);
330 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
331 if (I == 0)
332 return false;
333 I--;
334
335 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
336 I = (I+1)%TagCount)
337 {
338 const char *St;
339 St = Section + Indexes[I];
340 if (strncasecmp(Tag,St,Length) != 0)
341 continue;
342
343 // Make sure the colon is in the right place
344 const char *C = St + Length;
345 for (; isspace(*C) != 0; C++);
346 if (*C != ':')
347 continue;
348 Pos = I;
349 return true;
350 }
351
352 Pos = 0;
353 return false;
354 }
355 /*}}}*/
356 // TagSection::Find - Locate a tag /*{{{*/
357 // ---------------------------------------------------------------------
358 /* This searches the section for a tag that matches the given string. */
359 bool pkgTagSection::Find(const char *Tag,const char *&Start,
360 const char *&End) const
361 {
362 unsigned int Length = strlen(Tag);
363 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
364 if (I == 0)
365 return false;
366 I--;
367
368 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
369 I = (I+1)%TagCount)
370 {
371 const char *St;
372 St = Section + Indexes[I];
373 if (strncasecmp(Tag,St,Length) != 0)
374 continue;
375
376 // Make sure the colon is in the right place
377 const char *C = St + Length;
378 for (; isspace(*C) != 0; C++);
379 if (*C != ':')
380 continue;
381
382 // Strip off the gunk from the start end
383 Start = C;
384 End = Section + Indexes[I+1];
385 if (Start >= End)
386 return _error->Error("Internal parsing error");
387
388 for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
389 for (; isspace(End[-1]) != 0 && End > Start; End--);
390
391 return true;
392 }
393
394 Start = End = 0;
395 return false;
396 }
397 /*}}}*/
398 // TagSection::FindS - Find a string /*{{{*/
399 // ---------------------------------------------------------------------
400 /* */
401 string pkgTagSection::FindS(const char *Tag) const
402 {
403 const char *Start;
404 const char *End;
405 if (Find(Tag,Start,End) == false)
406 return string();
407 return string(Start,End);
408 }
409 /*}}}*/
410 // TagSection::FindI - Find an integer /*{{{*/
411 // ---------------------------------------------------------------------
412 /* */
413 signed int pkgTagSection::FindI(const char *Tag,signed long Default) const
414 {
415 const char *Start;
416 const char *Stop;
417 if (Find(Tag,Start,Stop) == false)
418 return Default;
419
420 // Copy it into a temp buffer so we can use strtol
421 char S[300];
422 if ((unsigned)(Stop - Start) >= sizeof(S))
423 return Default;
424 strncpy(S,Start,Stop-Start);
425 S[Stop - Start] = 0;
426
427 char *End;
428 signed long Result = strtol(S,&End,10);
429 if (S == End)
430 return Default;
431 return Result;
432 }
433 /*}}}*/
434 // TagSection::FindULL - Find an unsigned long long integer /*{{{*/
435 // ---------------------------------------------------------------------
436 /* */
437 unsigned long long pkgTagSection::FindULL(const char *Tag, unsigned long long const &Default) const
438 {
439 const char *Start;
440 const char *Stop;
441 if (Find(Tag,Start,Stop) == false)
442 return Default;
443
444 // Copy it into a temp buffer so we can use strtoull
445 char S[100];
446 if ((unsigned)(Stop - Start) >= sizeof(S))
447 return Default;
448 strncpy(S,Start,Stop-Start);
449 S[Stop - Start] = 0;
450
451 char *End;
452 unsigned long long Result = strtoull(S,&End,10);
453 if (S == End)
454 return Default;
455 return Result;
456 }
457 /*}}}*/
458 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
459 // ---------------------------------------------------------------------
460 /* The bits marked in Flag are masked on/off in Flags */
461 bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
462 unsigned long Flag) const
463 {
464 const char *Start;
465 const char *Stop;
466 if (Find(Tag,Start,Stop) == false)
467 return true;
468 return FindFlag(Flags, Flag, Start, Stop);
469 }
470 bool const pkgTagSection::FindFlag(unsigned long &Flags, unsigned long Flag,
471 char const* Start, char const* Stop)
472 {
473 switch (StringToBool(string(Start, Stop)))
474 {
475 case 0:
476 Flags &= ~Flag;
477 return true;
478
479 case 1:
480 Flags |= Flag;
481 return true;
482
483 default:
484 _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str());
485 return true;
486 }
487 return true;
488 }
489 /*}}}*/
490 // TFRewrite - Rewrite a control record /*{{{*/
491 // ---------------------------------------------------------------------
492 /* This writes the control record to stdout rewriting it as necessary. The
493 override map item specificies the rewriting rules to follow. This also
494 takes the time to sort the feild list. */
495
496 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
497 array. */
498 static const char *iTFRewritePackageOrder[] = {
499 "Package",
500 "Essential",
501 "Status",
502 "Priority",
503 "Section",
504 "Installed-Size",
505 "Maintainer",
506 "Original-Maintainer",
507 "Architecture",
508 "Source",
509 "Version",
510 "Revision", // Obsolete
511 "Config-Version", // Obsolete
512 "Replaces",
513 "Provides",
514 "Depends",
515 "Pre-Depends",
516 "Recommends",
517 "Suggests",
518 "Conflicts",
519 "Breaks",
520 "Conffiles",
521 "Filename",
522 "Size",
523 "MD5Sum",
524 "SHA1",
525 "SHA256",
526 "SHA512",
527 "MSDOS-Filename", // Obsolete
528 "Description",
529 0};
530 static const char *iTFRewriteSourceOrder[] = {"Package",
531 "Source",
532 "Binary",
533 "Version",
534 "Priority",
535 "Section",
536 "Maintainer",
537 "Original-Maintainer",
538 "Build-Depends",
539 "Build-Depends-Indep",
540 "Build-Conflicts",
541 "Build-Conflicts-Indep",
542 "Architecture",
543 "Standards-Version",
544 "Format",
545 "Directory",
546 "Files",
547 0};
548
549 /* Two levels of initialization are used because gcc will set the symbol
550 size of an array to the length of the array, causing dynamic relinking
551 errors. Doing this makes the symbol size constant */
552 const char **TFRewritePackageOrder = iTFRewritePackageOrder;
553 const char **TFRewriteSourceOrder = iTFRewriteSourceOrder;
554
555 bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
556 TFRewriteData *Rewrite)
557 {
558 unsigned char Visited[256]; // Bit 1 is Order, Bit 2 is Rewrite
559 for (unsigned I = 0; I != 256; I++)
560 Visited[I] = 0;
561
562 // Set new tag up as necessary.
563 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
564 {
565 if (Rewrite[J].NewTag == 0)
566 Rewrite[J].NewTag = Rewrite[J].Tag;
567 }
568
569 // Write all all of the tags, in order.
570 for (unsigned int I = 0; Order[I] != 0; I++)
571 {
572 bool Rewritten = false;
573
574 // See if this is a field that needs to be rewritten
575 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
576 {
577 if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
578 {
579 Visited[J] |= 2;
580 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
581 {
582 if (isspace(Rewrite[J].Rewrite[0]))
583 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
584 else
585 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
586 }
587
588 Rewritten = true;
589 break;
590 }
591 }
592
593 // See if it is in the fragment
594 unsigned Pos;
595 if (Tags.Find(Order[I],Pos) == false)
596 continue;
597 Visited[Pos] |= 1;
598
599 if (Rewritten == true)
600 continue;
601
602 /* Write out this element, taking a moment to rewrite the tag
603 in case of changes of case. */
604 const char *Start;
605 const char *Stop;
606 Tags.Get(Start,Stop,Pos);
607
608 if (fputs(Order[I],Output) < 0)
609 return _error->Errno("fputs","IO Error to output");
610 Start += strlen(Order[I]);
611 if (fwrite(Start,Stop - Start,1,Output) != 1)
612 return _error->Errno("fwrite","IO Error to output");
613 if (Stop[-1] != '\n')
614 fprintf(Output,"\n");
615 }
616
617 // Now write all the old tags that were missed.
618 for (unsigned int I = 0; I != Tags.Count(); I++)
619 {
620 if ((Visited[I] & 1) == 1)
621 continue;
622
623 const char *Start;
624 const char *Stop;
625 Tags.Get(Start,Stop,I);
626 const char *End = Start;
627 for (; End < Stop && *End != ':'; End++);
628
629 // See if this is a field that needs to be rewritten
630 bool Rewritten = false;
631 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
632 {
633 if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0)
634 {
635 Visited[J] |= 2;
636 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
637 {
638 if (isspace(Rewrite[J].Rewrite[0]))
639 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
640 else
641 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
642 }
643
644 Rewritten = true;
645 break;
646 }
647 }
648
649 if (Rewritten == true)
650 continue;
651
652 // Write out this element
653 if (fwrite(Start,Stop - Start,1,Output) != 1)
654 return _error->Errno("fwrite","IO Error to output");
655 if (Stop[-1] != '\n')
656 fprintf(Output,"\n");
657 }
658
659 // Now write all the rewrites that were missed
660 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
661 {
662 if ((Visited[J] & 2) == 2)
663 continue;
664
665 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
666 {
667 if (isspace(Rewrite[J].Rewrite[0]))
668 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
669 else
670 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
671 }
672 }
673
674 return true;
675 }
676 /*}}}*/