Merge branch 'debian/sid' of ssh://git.debian.org/git/apt/apt into debian/sid
[ntk/apt.git] / apt-pkg / tagfile.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: tagfile.cc,v 1.37.2.2 2003/12/31 16:02:30 mdz Exp $
4 /* ######################################################################
5
6 Fast scanner for RFC-822 type header information
7
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
10
11 ##################################################################### */
12 /*}}}*/
13 // Include Files /*{{{*/
14 #include<config.h>
15
16 #include <apt-pkg/tagfile.h>
17 #include <apt-pkg/error.h>
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
20
21 #include <string>
22 #include <stdio.h>
23 #include <ctype.h>
24
25 #include <apti18n.h>
26 /*}}}*/
27
28 using std::string;
29
30 class pkgTagFilePrivate
31 {
32 public:
33 pkgTagFilePrivate(FileFd *pFd, unsigned long long Size) : Fd(*pFd), Buffer(NULL),
34 Start(NULL), End(NULL),
35 Done(false), iOffset(0),
36 Size(Size)
37 {
38 }
39 FileFd &Fd;
40 char *Buffer;
41 char *Start;
42 char *End;
43 bool Done;
44 unsigned long long iOffset;
45 unsigned long long Size;
46 };
47
48 // TagFile::pkgTagFile - Constructor /*{{{*/
49 // ---------------------------------------------------------------------
50 /* */
51 pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long long Size)
52 {
53 /* The size is increased by 4 because if we start with the Size of the
54 filename we need to try to read 1 char more to see an EOF faster, 1
55 char the end-pointer can be on and maybe 2 newlines need to be added
56 to the end of the file -> 4 extra chars */
57 Size += 4;
58 d = new pkgTagFilePrivate(pFd, Size);
59
60 if (d->Fd.IsOpen() == false)
61 d->Start = d->End = d->Buffer = 0;
62 else
63 d->Buffer = (char*)malloc(sizeof(char) * Size);
64
65 if (d->Buffer == NULL)
66 d->Done = true;
67 else
68 d->Done = false;
69
70 d->Start = d->End = d->Buffer;
71 d->iOffset = 0;
72 if (d->Done == false)
73 Fill();
74 }
75 /*}}}*/
76 // TagFile::~pkgTagFile - Destructor /*{{{*/
77 // ---------------------------------------------------------------------
78 /* */
79 pkgTagFile::~pkgTagFile()
80 {
81 free(d->Buffer);
82 delete d;
83 }
84 /*}}}*/
85 // TagFile::Offset - Return the current offset in the buffer /*{{{*/
86 unsigned long pkgTagFile::Offset()
87 {
88 return d->iOffset;
89 }
90 /*}}}*/
91 // TagFile::Resize - Resize the internal buffer /*{{{*/
92 // ---------------------------------------------------------------------
93 /* Resize the internal buffer (double it in size). Fail if a maximum size
94 * size is reached.
95 */
96 bool pkgTagFile::Resize()
97 {
98 // fail is the buffer grows too big
99 if(d->Size > 1024*1024+1)
100 return false;
101
102 return Resize(d->Size * 2);
103 }
104 bool pkgTagFile::Resize(unsigned long long const newSize)
105 {
106 unsigned long long const EndSize = d->End - d->Start;
107
108 // get new buffer and use it
109 char* newBuffer = (char*)realloc(d->Buffer, sizeof(char) * newSize);
110 if (newBuffer == NULL)
111 return false;
112 d->Buffer = newBuffer;
113 d->Size = newSize;
114
115 // update the start/end pointers to the new buffer
116 d->Start = d->Buffer;
117 d->End = d->Start + EndSize;
118 return true;
119 }
120 /*}}}*/
121 // TagFile::Step - Advance to the next section /*{{{*/
122 // ---------------------------------------------------------------------
123 /* If the Section Scanner fails we refill the buffer and try again.
124 * If that fails too, double the buffer size and try again until a
125 * maximum buffer is reached.
126 */
127 bool pkgTagFile::Step(pkgTagSection &Tag)
128 {
129 while (Tag.Scan(d->Start,d->End - d->Start) == false)
130 {
131 if (Fill() == false)
132 return false;
133
134 if(Tag.Scan(d->Start,d->End - d->Start))
135 break;
136
137 if (Resize() == false)
138 return _error->Error(_("Unable to parse package file %s (1)"),
139 d->Fd.Name().c_str());
140 }
141 d->Start += Tag.size();
142 d->iOffset += Tag.size();
143
144 Tag.Trim();
145 return true;
146 }
147 /*}}}*/
148 // TagFile::Fill - Top up the buffer /*{{{*/
149 // ---------------------------------------------------------------------
150 /* This takes the bit at the end of the buffer and puts it at the start
151 then fills the rest from the file */
152 bool pkgTagFile::Fill()
153 {
154 unsigned long long EndSize = d->End - d->Start;
155 unsigned long long Actual = 0;
156
157 memmove(d->Buffer,d->Start,EndSize);
158 d->Start = d->Buffer;
159 d->End = d->Buffer + EndSize;
160
161 if (d->Done == false)
162 {
163 // See if only a bit of the file is left
164 unsigned long long const dataSize = d->Size - ((d->End - d->Buffer) + 1);
165 if (d->Fd.Read(d->End, dataSize, &Actual) == false)
166 return false;
167 if (Actual != dataSize || d->Fd.Eof() == true)
168 d->Done = true;
169 d->End += Actual;
170 }
171
172 if (d->Done == true)
173 {
174 if (EndSize <= 3 && Actual == 0)
175 return false;
176 if (d->Size - (d->End - d->Buffer) < 4)
177 return true;
178
179 // Append a double new line if one does not exist
180 unsigned int LineCount = 0;
181 for (const char *E = d->End - 1; E - d->End < 6 && (*E == '\n' || *E == '\r'); E--)
182 if (*E == '\n')
183 LineCount++;
184 if (LineCount < 2)
185 {
186 if ((unsigned)(d->End - d->Buffer) >= d->Size)
187 Resize(d->Size + 3);
188 for (; LineCount < 2; LineCount++)
189 *d->End++ = '\n';
190 }
191
192 return true;
193 }
194
195 return true;
196 }
197 /*}}}*/
198 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
199 // ---------------------------------------------------------------------
200 /* This jumps to a pre-recorded file location and reads the record
201 that is there */
202 bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long long Offset)
203 {
204 // We are within a buffer space of the next hit..
205 if (Offset >= d->iOffset && d->iOffset + (d->End - d->Start) > Offset)
206 {
207 unsigned long long Dist = Offset - d->iOffset;
208 d->Start += Dist;
209 d->iOffset += Dist;
210 return Step(Tag);
211 }
212
213 // Reposition and reload..
214 d->iOffset = Offset;
215 d->Done = false;
216 if (d->Fd.Seek(Offset) == false)
217 return false;
218 d->End = d->Start = d->Buffer;
219
220 if (Fill() == false)
221 return false;
222
223 if (Tag.Scan(d->Start, d->End - d->Start) == true)
224 return true;
225
226 // This appends a double new line (for the real eof handling)
227 if (Fill() == false)
228 return false;
229
230 if (Tag.Scan(d->Start, d->End - d->Start) == false)
231 return _error->Error(_("Unable to parse package file %s (2)"),d->Fd.Name().c_str());
232
233 return true;
234 }
235 /*}}}*/
236 // pkgTagSection::pkgTagSection - Constructor /*{{{*/
237 // ---------------------------------------------------------------------
238 /* */
239 pkgTagSection::pkgTagSection()
240 : Section(0), TagCount(0), d(NULL), Stop(0)
241 {
242 memset(&Indexes, 0, sizeof(Indexes));
243 memset(&AlphaIndexes, 0, sizeof(AlphaIndexes));
244 }
245 /*}}}*/
246 // TagSection::Scan - Scan for the end of the header information /*{{{*/
247 // ---------------------------------------------------------------------
248 /* This looks for the first double new line in the data stream.
249 It also indexes the tags in the section. */
250 bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
251 {
252 const char *End = Start + MaxLength;
253 Stop = Section = Start;
254 memset(AlphaIndexes,0,sizeof(AlphaIndexes));
255
256 if (Stop == 0)
257 return false;
258
259 TagCount = 0;
260 while (TagCount+1 < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
261 {
262 TrimRecord(true,End);
263
264 // Start a new index and add it to the hash
265 if (isspace(Stop[0]) == 0)
266 {
267 Indexes[TagCount++] = Stop - Section;
268 AlphaIndexes[AlphaHash(Stop,End)] = TagCount;
269 }
270
271 Stop = (const char *)memchr(Stop,'\n',End - Stop);
272
273 if (Stop == 0)
274 return false;
275
276 for (; Stop+1 < End && Stop[1] == '\r'; Stop++);
277
278 // Double newline marks the end of the record
279 if (Stop+1 < End && Stop[1] == '\n')
280 {
281 Indexes[TagCount] = Stop - Section;
282 TrimRecord(false,End);
283 return true;
284 }
285
286 Stop++;
287 }
288
289 return false;
290 }
291 /*}}}*/
292 // TagSection::TrimRecord - Trim off any garbage before/after a record /*{{{*/
293 // ---------------------------------------------------------------------
294 /* There should be exactly 2 newline at the end of the record, no more. */
295 void pkgTagSection::TrimRecord(bool BeforeRecord, const char*& End)
296 {
297 if (BeforeRecord == true)
298 return;
299 for (; Stop < End && (Stop[0] == '\n' || Stop[0] == '\r'); Stop++);
300 }
301 /*}}}*/
302 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
303 // ---------------------------------------------------------------------
304 /* There should be exactly 1 newline at the end of the buffer, no more. */
305 void pkgTagSection::Trim()
306 {
307 for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
308 }
309 /*}}}*/
310 // TagSection::Exists - return True if a tag exists /*{{{*/
311 bool pkgTagSection::Exists(const char* const Tag)
312 {
313 unsigned int tmp;
314 return Find(Tag, tmp);
315 }
316 /*}}}*/
317 // TagSection::Find - Locate a tag /*{{{*/
318 // ---------------------------------------------------------------------
319 /* This searches the section for a tag that matches the given string. */
320 bool pkgTagSection::Find(const char *Tag,unsigned int &Pos) const
321 {
322 unsigned int Length = strlen(Tag);
323 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
324 if (I == 0)
325 return false;
326 I--;
327
328 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
329 I = (I+1)%TagCount)
330 {
331 const char *St;
332 St = Section + Indexes[I];
333 if (strncasecmp(Tag,St,Length) != 0)
334 continue;
335
336 // Make sure the colon is in the right place
337 const char *C = St + Length;
338 for (; isspace(*C) != 0; C++);
339 if (*C != ':')
340 continue;
341 Pos = I;
342 return true;
343 }
344
345 Pos = 0;
346 return false;
347 }
348 /*}}}*/
349 // TagSection::Find - Locate a tag /*{{{*/
350 // ---------------------------------------------------------------------
351 /* This searches the section for a tag that matches the given string. */
352 bool pkgTagSection::Find(const char *Tag,const char *&Start,
353 const char *&End) const
354 {
355 unsigned int Length = strlen(Tag);
356 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
357 if (I == 0)
358 return false;
359 I--;
360
361 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
362 I = (I+1)%TagCount)
363 {
364 const char *St;
365 St = Section + Indexes[I];
366 if (strncasecmp(Tag,St,Length) != 0)
367 continue;
368
369 // Make sure the colon is in the right place
370 const char *C = St + Length;
371 for (; isspace(*C) != 0; C++);
372 if (*C != ':')
373 continue;
374
375 // Strip off the gunk from the start end
376 Start = C;
377 End = Section + Indexes[I+1];
378 if (Start >= End)
379 return _error->Error("Internal parsing error");
380
381 for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
382 for (; isspace(End[-1]) != 0 && End > Start; End--);
383
384 return true;
385 }
386
387 Start = End = 0;
388 return false;
389 }
390 /*}}}*/
391 // TagSection::FindS - Find a string /*{{{*/
392 // ---------------------------------------------------------------------
393 /* */
394 string pkgTagSection::FindS(const char *Tag) const
395 {
396 const char *Start;
397 const char *End;
398 if (Find(Tag,Start,End) == false)
399 return string();
400 return string(Start,End);
401 }
402 /*}}}*/
403 // TagSection::FindI - Find an integer /*{{{*/
404 // ---------------------------------------------------------------------
405 /* */
406 signed int pkgTagSection::FindI(const char *Tag,signed long Default) const
407 {
408 const char *Start;
409 const char *Stop;
410 if (Find(Tag,Start,Stop) == false)
411 return Default;
412
413 // Copy it into a temp buffer so we can use strtol
414 char S[300];
415 if ((unsigned)(Stop - Start) >= sizeof(S))
416 return Default;
417 strncpy(S,Start,Stop-Start);
418 S[Stop - Start] = 0;
419
420 char *End;
421 signed long Result = strtol(S,&End,10);
422 if (S == End)
423 return Default;
424 return Result;
425 }
426 /*}}}*/
427 // TagSection::FindULL - Find an unsigned long long integer /*{{{*/
428 // ---------------------------------------------------------------------
429 /* */
430 unsigned long long pkgTagSection::FindULL(const char *Tag, unsigned long long const &Default) const
431 {
432 const char *Start;
433 const char *Stop;
434 if (Find(Tag,Start,Stop) == false)
435 return Default;
436
437 // Copy it into a temp buffer so we can use strtoull
438 char S[100];
439 if ((unsigned)(Stop - Start) >= sizeof(S))
440 return Default;
441 strncpy(S,Start,Stop-Start);
442 S[Stop - Start] = 0;
443
444 char *End;
445 unsigned long long Result = strtoull(S,&End,10);
446 if (S == End)
447 return Default;
448 return Result;
449 }
450 /*}}}*/
451 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
452 // ---------------------------------------------------------------------
453 /* The bits marked in Flag are masked on/off in Flags */
454 bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
455 unsigned long Flag) const
456 {
457 const char *Start;
458 const char *Stop;
459 if (Find(Tag,Start,Stop) == false)
460 return true;
461 return FindFlag(Flags, Flag, Start, Stop);
462 }
463 bool const pkgTagSection::FindFlag(unsigned long &Flags, unsigned long Flag,
464 char const* Start, char const* Stop)
465 {
466 switch (StringToBool(string(Start, Stop)))
467 {
468 case 0:
469 Flags &= ~Flag;
470 return true;
471
472 case 1:
473 Flags |= Flag;
474 return true;
475
476 default:
477 _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str());
478 return true;
479 }
480 return true;
481 }
482 /*}}}*/
483 // TFRewrite - Rewrite a control record /*{{{*/
484 // ---------------------------------------------------------------------
485 /* This writes the control record to stdout rewriting it as necessary. The
486 override map item specificies the rewriting rules to follow. This also
487 takes the time to sort the feild list. */
488
489 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
490 array. */
491 static const char *iTFRewritePackageOrder[] = {
492 "Package",
493 "Essential",
494 "Status",
495 "Priority",
496 "Section",
497 "Installed-Size",
498 "Maintainer",
499 "Original-Maintainer",
500 "Architecture",
501 "Source",
502 "Version",
503 "Revision", // Obsolete
504 "Config-Version", // Obsolete
505 "Replaces",
506 "Provides",
507 "Depends",
508 "Pre-Depends",
509 "Recommends",
510 "Suggests",
511 "Conflicts",
512 "Breaks",
513 "Conffiles",
514 "Filename",
515 "Size",
516 "MD5Sum",
517 "SHA1",
518 "SHA256",
519 "SHA512",
520 "MSDOS-Filename", // Obsolete
521 "Description",
522 0};
523 static const char *iTFRewriteSourceOrder[] = {"Package",
524 "Source",
525 "Binary",
526 "Version",
527 "Priority",
528 "Section",
529 "Maintainer",
530 "Original-Maintainer",
531 "Build-Depends",
532 "Build-Depends-Indep",
533 "Build-Conflicts",
534 "Build-Conflicts-Indep",
535 "Architecture",
536 "Standards-Version",
537 "Format",
538 "Directory",
539 "Files",
540 0};
541
542 /* Two levels of initialization are used because gcc will set the symbol
543 size of an array to the length of the array, causing dynamic relinking
544 errors. Doing this makes the symbol size constant */
545 const char **TFRewritePackageOrder = iTFRewritePackageOrder;
546 const char **TFRewriteSourceOrder = iTFRewriteSourceOrder;
547
548 bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
549 TFRewriteData *Rewrite)
550 {
551 unsigned char Visited[256]; // Bit 1 is Order, Bit 2 is Rewrite
552 for (unsigned I = 0; I != 256; I++)
553 Visited[I] = 0;
554
555 // Set new tag up as necessary.
556 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
557 {
558 if (Rewrite[J].NewTag == 0)
559 Rewrite[J].NewTag = Rewrite[J].Tag;
560 }
561
562 // Write all all of the tags, in order.
563 for (unsigned int I = 0; Order[I] != 0; I++)
564 {
565 bool Rewritten = false;
566
567 // See if this is a field that needs to be rewritten
568 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
569 {
570 if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
571 {
572 Visited[J] |= 2;
573 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
574 {
575 if (isspace(Rewrite[J].Rewrite[0]))
576 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
577 else
578 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
579 }
580
581 Rewritten = true;
582 break;
583 }
584 }
585
586 // See if it is in the fragment
587 unsigned Pos;
588 if (Tags.Find(Order[I],Pos) == false)
589 continue;
590 Visited[Pos] |= 1;
591
592 if (Rewritten == true)
593 continue;
594
595 /* Write out this element, taking a moment to rewrite the tag
596 in case of changes of case. */
597 const char *Start;
598 const char *Stop;
599 Tags.Get(Start,Stop,Pos);
600
601 if (fputs(Order[I],Output) < 0)
602 return _error->Errno("fputs","IO Error to output");
603 Start += strlen(Order[I]);
604 if (fwrite(Start,Stop - Start,1,Output) != 1)
605 return _error->Errno("fwrite","IO Error to output");
606 if (Stop[-1] != '\n')
607 fprintf(Output,"\n");
608 }
609
610 // Now write all the old tags that were missed.
611 for (unsigned int I = 0; I != Tags.Count(); I++)
612 {
613 if ((Visited[I] & 1) == 1)
614 continue;
615
616 const char *Start;
617 const char *Stop;
618 Tags.Get(Start,Stop,I);
619 const char *End = Start;
620 for (; End < Stop && *End != ':'; End++);
621
622 // See if this is a field that needs to be rewritten
623 bool Rewritten = false;
624 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
625 {
626 if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0)
627 {
628 Visited[J] |= 2;
629 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
630 {
631 if (isspace(Rewrite[J].Rewrite[0]))
632 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
633 else
634 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
635 }
636
637 Rewritten = true;
638 break;
639 }
640 }
641
642 if (Rewritten == true)
643 continue;
644
645 // Write out this element
646 if (fwrite(Start,Stop - Start,1,Output) != 1)
647 return _error->Errno("fwrite","IO Error to output");
648 if (Stop[-1] != '\n')
649 fprintf(Output,"\n");
650 }
651
652 // Now write all the rewrites that were missed
653 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
654 {
655 if ((Visited[J] & 2) == 2)
656 continue;
657
658 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
659 {
660 if (isspace(Rewrite[J].Rewrite[0]))
661 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
662 else
663 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
664 }
665 }
666
667 return true;
668 }
669 /*}}}*/