pkgTagFile: if we have seen the end, do not try to see more
[ntk/apt.git] / apt-pkg / tagfile.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4 /* ######################################################################
5
6 Fast scanner for RFC-822 type header information
7
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
10
11 ##################################################################### */
12 /*}}}*/
13 // Include Files /*{{{*/
14 #include<config.h>
15
16 #include <apt-pkg/tagfile.h>
17 #include <apt-pkg/error.h>
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
20
21 #include <string>
22 #include <stdio.h>
23 #include <ctype.h>
24
25 #include <apti18n.h>
26 /*}}}*/
27
28 using std::string;
29
30 class pkgTagFilePrivate
31 {
32 public:
33 pkgTagFilePrivate(FileFd *pFd, unsigned long long Size) : Fd(*pFd), Buffer(NULL),
34 Start(NULL), End(NULL),
35 Done(false), iOffset(0),
36 Size(Size)
37 {
38 }
39 FileFd &Fd;
40 char *Buffer;
41 char *Start;
42 char *End;
43 bool Done;
44 unsigned long long iOffset;
45 unsigned long long Size;
46 };
47
48 // TagFile::pkgTagFile - Constructor /*{{{*/
49 // ---------------------------------------------------------------------
50 /* */
51 pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long long Size)
52 {
53 /* The size is increased by 4 because if we start with the Size of the
54 filename we need to try to read 1 char more to see an EOF faster, 1
55 char the end-pointer can be on and maybe 2 newlines need to be added
56 to the end of the file -> 4 extra chars */
57 Size += 4;
58 d = new pkgTagFilePrivate(pFd, Size);
59
60 if (d->Fd.IsOpen() == false)
61 d->Start = d->End = d->Buffer = 0;
62 else
63 d->Buffer = (char*)malloc(sizeof(char) * Size);
64
65 if (d->Buffer == NULL)
66 d->Done = true;
67 else
68 d->Done = false;
69
70 d->Start = d->End = d->Buffer;
71 d->iOffset = 0;
72 if (d->Done == false)
73 Fill();
74 }
75 /*}}}*/
76 // TagFile::~pkgTagFile - Destructor /*{{{*/
77 // ---------------------------------------------------------------------
78 /* */
79 pkgTagFile::~pkgTagFile()
80 {
81 free(d->Buffer);
82 delete d;
83 }
84 /*}}}*/
85 // TagFile::Offset - Return the current offset in the buffer /*{{{*/
86 unsigned long pkgTagFile::Offset()
87 {
88 return d->iOffset;
89 }
90 /*}}}*/
91 // TagFile::Resize - Resize the internal buffer /*{{{*/
92 // ---------------------------------------------------------------------
93 /* Resize the internal buffer (double it in size). Fail if a maximum size
94 * size is reached.
95 */
96 bool pkgTagFile::Resize()
97 {
98 // fail is the buffer grows too big
99 if(d->Size > 1024*1024+1)
100 return false;
101
102 return Resize(d->Size * 2);
103 }
104 bool pkgTagFile::Resize(unsigned long long const newSize)
105 {
106 unsigned long long const EndSize = d->End - d->Start;
107
108 // get new buffer and use it
109 char* newBuffer = (char*)realloc(d->Buffer, sizeof(char) * newSize);
110 if (newBuffer == NULL)
111 return false;
112 d->Buffer = newBuffer;
113 d->Size = newSize;
114
115 // update the start/end pointers to the new buffer
116 d->Start = d->Buffer;
117 d->End = d->Start + EndSize;
118 return true;
119 }
120 /*}}}*/
121 // TagFile::Step - Advance to the next section /*{{{*/
122 // ---------------------------------------------------------------------
123 /* If the Section Scanner fails we refill the buffer and try again.
124 * If that fails too, double the buffer size and try again until a
125 * maximum buffer is reached.
126 */
127 bool pkgTagFile::Step(pkgTagSection &Tag)
128 {
129 while (Tag.Scan(d->Start,d->End - d->Start) == false)
130 {
131 if (Fill() == false)
132 return false;
133
134 if(Tag.Scan(d->Start,d->End - d->Start))
135 break;
136
137 if (Resize() == false)
138 return _error->Error(_("Unable to parse package file %s (1)"),
139 d->Fd.Name().c_str());
140 }
141 d->Start += Tag.size();
142 d->iOffset += Tag.size();
143
144 Tag.Trim();
145 return true;
146 }
147 /*}}}*/
148 // TagFile::Fill - Top up the buffer /*{{{*/
149 // ---------------------------------------------------------------------
150 /* This takes the bit at the end of the buffer and puts it at the start
151 then fills the rest from the file */
152 bool pkgTagFile::Fill()
153 {
154 unsigned long long EndSize = d->End - d->Start;
155 unsigned long long Actual = 0;
156
157 memmove(d->Buffer,d->Start,EndSize);
158 d->Start = d->Buffer;
159 d->End = d->Buffer + EndSize;
160
161 if (d->Done == false)
162 {
163 // See if only a bit of the file is left
164 unsigned long long const dataSize = d->Size - ((d->End - d->Buffer) + 1);
165 if (d->Fd.Read(d->End, dataSize, &Actual) == false)
166 return false;
167 if (Actual != dataSize)
168 d->Done = true;
169 d->End += Actual;
170 }
171
172 if (d->Done == true)
173 {
174 if (EndSize <= 3 && Actual == 0)
175 return false;
176 if (d->Size - (d->End - d->Buffer) < 4)
177 return true;
178
179 // Append a double new line if one does not exist
180 unsigned int LineCount = 0;
181 for (const char *E = d->End - 1; E - d->End < 6 && (*E == '\n' || *E == '\r'); E--)
182 if (*E == '\n')
183 LineCount++;
184 if (LineCount < 2)
185 {
186 if ((unsigned)(d->End - d->Buffer) >= d->Size)
187 Resize(d->Size + 3);
188 for (; LineCount < 2; LineCount++)
189 *d->End++ = '\n';
190 }
191
192 return true;
193 }
194
195 return true;
196 }
197 /*}}}*/
198 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
199 // ---------------------------------------------------------------------
200 /* This jumps to a pre-recorded file location and reads the record
201 that is there */
202 bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long long Offset)
203 {
204 // We are within a buffer space of the next hit..
205 if (Offset >= d->iOffset && d->iOffset + (d->End - d->Start) > Offset)
206 {
207 unsigned long long Dist = Offset - d->iOffset;
208 d->Start += Dist;
209 d->iOffset += Dist;
210 // if we have seen the end, don't ask for more
211 if (d->Done == true)
212 return Tag.Scan(d->Start, d->End - d->Start);
213 else
214 return Step(Tag);
215 }
216
217 // Reposition and reload..
218 d->iOffset = Offset;
219 d->Done = false;
220 if (d->Fd.Seek(Offset) == false)
221 return false;
222 d->End = d->Start = d->Buffer;
223
224 if (Fill() == false)
225 return false;
226
227 if (Tag.Scan(d->Start, d->End - d->Start) == true)
228 return true;
229
230 // This appends a double new line (for the real eof handling)
231 if (Fill() == false)
232 return false;
233
234 if (Tag.Scan(d->Start, d->End - d->Start) == false)
235 return _error->Error(_("Unable to parse package file %s (2)"),d->Fd.Name().c_str());
236
237 return true;
238 }
239 /*}}}*/
240 // pkgTagSection::pkgTagSection - Constructor /*{{{*/
241 // ---------------------------------------------------------------------
242 /* */
243 pkgTagSection::pkgTagSection()
244 : Section(0), TagCount(0), d(NULL), Stop(0)
245 {
246 memset(&Indexes, 0, sizeof(Indexes));
247 memset(&AlphaIndexes, 0, sizeof(AlphaIndexes));
248 }
249 /*}}}*/
250 // TagSection::Scan - Scan for the end of the header information /*{{{*/
251 // ---------------------------------------------------------------------
252 /* This looks for the first double new line in the data stream.
253 It also indexes the tags in the section. */
254 bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
255 {
256 const char *End = Start + MaxLength;
257 Stop = Section = Start;
258 memset(AlphaIndexes,0,sizeof(AlphaIndexes));
259
260 if (Stop == 0)
261 return false;
262
263 TagCount = 0;
264 while (TagCount+1 < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
265 {
266 TrimRecord(true,End);
267
268 // this can happen when TrimRecord trims away the entire Record
269 // (e.g. because it just contains comments)
270 if(Stop == End)
271 return true;
272
273 // Start a new index and add it to the hash
274 if (isspace(Stop[0]) == 0)
275 {
276 Indexes[TagCount++] = Stop - Section;
277 AlphaIndexes[AlphaHash(Stop,End)] = TagCount;
278 }
279
280 Stop = (const char *)memchr(Stop,'\n',End - Stop);
281
282 if (Stop == 0)
283 return false;
284
285 for (; Stop+1 < End && Stop[1] == '\r'; Stop++)
286 /* nothing */
287 ;
288
289 // Double newline marks the end of the record
290 if (Stop+1 < End && Stop[1] == '\n')
291 {
292 Indexes[TagCount] = Stop - Section;
293 TrimRecord(false,End);
294 return true;
295 }
296
297 Stop++;
298 }
299
300 return false;
301 }
302 /*}}}*/
303 // TagSection::TrimRecord - Trim off any garbage before/after a record /*{{{*/
304 // ---------------------------------------------------------------------
305 /* There should be exactly 2 newline at the end of the record, no more. */
306 void pkgTagSection::TrimRecord(bool BeforeRecord, const char*& End)
307 {
308 if (BeforeRecord == true)
309 return;
310 for (; Stop < End && (Stop[0] == '\n' || Stop[0] == '\r'); Stop++);
311 }
312 /*}}}*/
313 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
314 // ---------------------------------------------------------------------
315 /* There should be exactly 1 newline at the end of the buffer, no more. */
316 void pkgTagSection::Trim()
317 {
318 for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
319 }
320 /*}}}*/
321 // TagSection::Exists - return True if a tag exists /*{{{*/
322 bool pkgTagSection::Exists(const char* const Tag)
323 {
324 unsigned int tmp;
325 return Find(Tag, tmp);
326 }
327 /*}}}*/
328 // TagSection::Find - Locate a tag /*{{{*/
329 // ---------------------------------------------------------------------
330 /* This searches the section for a tag that matches the given string. */
331 bool pkgTagSection::Find(const char *Tag,unsigned int &Pos) const
332 {
333 unsigned int Length = strlen(Tag);
334 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
335 if (I == 0)
336 return false;
337 I--;
338
339 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
340 I = (I+1)%TagCount)
341 {
342 const char *St;
343 St = Section + Indexes[I];
344 if (strncasecmp(Tag,St,Length) != 0)
345 continue;
346
347 // Make sure the colon is in the right place
348 const char *C = St + Length;
349 for (; isspace(*C) != 0; C++);
350 if (*C != ':')
351 continue;
352 Pos = I;
353 return true;
354 }
355
356 Pos = 0;
357 return false;
358 }
359 /*}}}*/
360 // TagSection::Find - Locate a tag /*{{{*/
361 // ---------------------------------------------------------------------
362 /* This searches the section for a tag that matches the given string. */
363 bool pkgTagSection::Find(const char *Tag,const char *&Start,
364 const char *&End) const
365 {
366 unsigned int Length = strlen(Tag);
367 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
368 if (I == 0)
369 return false;
370 I--;
371
372 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
373 I = (I+1)%TagCount)
374 {
375 const char *St;
376 St = Section + Indexes[I];
377 if (strncasecmp(Tag,St,Length) != 0)
378 continue;
379
380 // Make sure the colon is in the right place
381 const char *C = St + Length;
382 for (; isspace(*C) != 0; C++);
383 if (*C != ':')
384 continue;
385
386 // Strip off the gunk from the start end
387 Start = C;
388 End = Section + Indexes[I+1];
389 if (Start >= End)
390 return _error->Error("Internal parsing error");
391
392 for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
393 for (; isspace(End[-1]) != 0 && End > Start; End--);
394
395 return true;
396 }
397
398 Start = End = 0;
399 return false;
400 }
401 /*}}}*/
402 // TagSection::FindS - Find a string /*{{{*/
403 // ---------------------------------------------------------------------
404 /* */
405 string pkgTagSection::FindS(const char *Tag) const
406 {
407 const char *Start;
408 const char *End;
409 if (Find(Tag,Start,End) == false)
410 return string();
411 return string(Start,End);
412 }
413 /*}}}*/
414 // TagSection::FindI - Find an integer /*{{{*/
415 // ---------------------------------------------------------------------
416 /* */
417 signed int pkgTagSection::FindI(const char *Tag,signed long Default) const
418 {
419 const char *Start;
420 const char *Stop;
421 if (Find(Tag,Start,Stop) == false)
422 return Default;
423
424 // Copy it into a temp buffer so we can use strtol
425 char S[300];
426 if ((unsigned)(Stop - Start) >= sizeof(S))
427 return Default;
428 strncpy(S,Start,Stop-Start);
429 S[Stop - Start] = 0;
430
431 char *End;
432 signed long Result = strtol(S,&End,10);
433 if (S == End)
434 return Default;
435 return Result;
436 }
437 /*}}}*/
438 // TagSection::FindULL - Find an unsigned long long integer /*{{{*/
439 // ---------------------------------------------------------------------
440 /* */
441 unsigned long long pkgTagSection::FindULL(const char *Tag, unsigned long long const &Default) const
442 {
443 const char *Start;
444 const char *Stop;
445 if (Find(Tag,Start,Stop) == false)
446 return Default;
447
448 // Copy it into a temp buffer so we can use strtoull
449 char S[100];
450 if ((unsigned)(Stop - Start) >= sizeof(S))
451 return Default;
452 strncpy(S,Start,Stop-Start);
453 S[Stop - Start] = 0;
454
455 char *End;
456 unsigned long long Result = strtoull(S,&End,10);
457 if (S == End)
458 return Default;
459 return Result;
460 }
461 /*}}}*/
462 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
463 // ---------------------------------------------------------------------
464 /* The bits marked in Flag are masked on/off in Flags */
465 bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
466 unsigned long Flag) const
467 {
468 const char *Start;
469 const char *Stop;
470 if (Find(Tag,Start,Stop) == false)
471 return true;
472 return FindFlag(Flags, Flag, Start, Stop);
473 }
474 bool const pkgTagSection::FindFlag(unsigned long &Flags, unsigned long Flag,
475 char const* Start, char const* Stop)
476 {
477 switch (StringToBool(string(Start, Stop)))
478 {
479 case 0:
480 Flags &= ~Flag;
481 return true;
482
483 case 1:
484 Flags |= Flag;
485 return true;
486
487 default:
488 _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str());
489 return true;
490 }
491 return true;
492 }
493 /*}}}*/
494 // TFRewrite - Rewrite a control record /*{{{*/
495 // ---------------------------------------------------------------------
496 /* This writes the control record to stdout rewriting it as necessary. The
497 override map item specificies the rewriting rules to follow. This also
498 takes the time to sort the feild list. */
499
500 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
501 array. */
502 static const char *iTFRewritePackageOrder[] = {
503 "Package",
504 "Essential",
505 "Status",
506 "Priority",
507 "Section",
508 "Installed-Size",
509 "Maintainer",
510 "Original-Maintainer",
511 "Architecture",
512 "Source",
513 "Version",
514 "Revision", // Obsolete
515 "Config-Version", // Obsolete
516 "Replaces",
517 "Provides",
518 "Depends",
519 "Pre-Depends",
520 "Recommends",
521 "Suggests",
522 "Conflicts",
523 "Breaks",
524 "Conffiles",
525 "Filename",
526 "Size",
527 "MD5Sum",
528 "SHA1",
529 "SHA256",
530 "SHA512",
531 "MSDOS-Filename", // Obsolete
532 "Description",
533 0};
534 static const char *iTFRewriteSourceOrder[] = {"Package",
535 "Source",
536 "Binary",
537 "Version",
538 "Priority",
539 "Section",
540 "Maintainer",
541 "Original-Maintainer",
542 "Build-Depends",
543 "Build-Depends-Indep",
544 "Build-Conflicts",
545 "Build-Conflicts-Indep",
546 "Architecture",
547 "Standards-Version",
548 "Format",
549 "Directory",
550 "Files",
551 0};
552
553 /* Two levels of initialization are used because gcc will set the symbol
554 size of an array to the length of the array, causing dynamic relinking
555 errors. Doing this makes the symbol size constant */
556 const char **TFRewritePackageOrder = iTFRewritePackageOrder;
557 const char **TFRewriteSourceOrder = iTFRewriteSourceOrder;
558
559 bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
560 TFRewriteData *Rewrite)
561 {
562 unsigned char Visited[256]; // Bit 1 is Order, Bit 2 is Rewrite
563 for (unsigned I = 0; I != 256; I++)
564 Visited[I] = 0;
565
566 // Set new tag up as necessary.
567 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
568 {
569 if (Rewrite[J].NewTag == 0)
570 Rewrite[J].NewTag = Rewrite[J].Tag;
571 }
572
573 // Write all all of the tags, in order.
574 if (Order != NULL)
575 {
576 for (unsigned int I = 0; Order[I] != 0; I++)
577 {
578 bool Rewritten = false;
579
580 // See if this is a field that needs to be rewritten
581 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
582 {
583 if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
584 {
585 Visited[J] |= 2;
586 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
587 {
588 if (isspace(Rewrite[J].Rewrite[0]))
589 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
590 else
591 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
592 }
593 Rewritten = true;
594 break;
595 }
596 }
597
598 // See if it is in the fragment
599 unsigned Pos;
600 if (Tags.Find(Order[I],Pos) == false)
601 continue;
602 Visited[Pos] |= 1;
603
604 if (Rewritten == true)
605 continue;
606
607 /* Write out this element, taking a moment to rewrite the tag
608 in case of changes of case. */
609 const char *Start;
610 const char *Stop;
611 Tags.Get(Start,Stop,Pos);
612
613 if (fputs(Order[I],Output) < 0)
614 return _error->Errno("fputs","IO Error to output");
615 Start += strlen(Order[I]);
616 if (fwrite(Start,Stop - Start,1,Output) != 1)
617 return _error->Errno("fwrite","IO Error to output");
618 if (Stop[-1] != '\n')
619 fprintf(Output,"\n");
620 }
621 }
622
623 // Now write all the old tags that were missed.
624 for (unsigned int I = 0; I != Tags.Count(); I++)
625 {
626 if ((Visited[I] & 1) == 1)
627 continue;
628
629 const char *Start;
630 const char *Stop;
631 Tags.Get(Start,Stop,I);
632 const char *End = Start;
633 for (; End < Stop && *End != ':'; End++);
634
635 // See if this is a field that needs to be rewritten
636 bool Rewritten = false;
637 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
638 {
639 if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0)
640 {
641 Visited[J] |= 2;
642 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
643 {
644 if (isspace(Rewrite[J].Rewrite[0]))
645 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
646 else
647 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
648 }
649
650 Rewritten = true;
651 break;
652 }
653 }
654
655 if (Rewritten == true)
656 continue;
657
658 // Write out this element
659 if (fwrite(Start,Stop - Start,1,Output) != 1)
660 return _error->Errno("fwrite","IO Error to output");
661 if (Stop[-1] != '\n')
662 fprintf(Output,"\n");
663 }
664
665 // Now write all the rewrites that were missed
666 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
667 {
668 if ((Visited[J] & 2) == 2)
669 continue;
670
671 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
672 {
673 if (isspace(Rewrite[J].Rewrite[0]))
674 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
675 else
676 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
677 }
678 }
679
680 return true;
681 }
682 /*}}}*/