Fix some unlikely segfaults
[ntk/apt.git] / apt-pkg / tagfile.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: tagfile.cc,v 1.35 2002/11/06 06:43:14 jgg Exp $
4 /* ######################################################################
5
6 Fast scanner for RFC-822 type header information
7
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
10
11 ##################################################################### */
12 /*}}}*/
13 // Include Files /*{{{*/
14 #ifdef __GNUG__
15 #pragma implementation "apt-pkg/tagfile.h"
16 #endif
17
18 #include <apt-pkg/tagfile.h>
19 #include <apt-pkg/error.h>
20 #include <apt-pkg/strutl.h>
21
22 #include <apti18n.h>
23
24 #include <string>
25 #include <stdio.h>
26 #include <ctype.h>
27 /*}}}*/
28
29 using std::string;
30
31 // TagFile::pkgTagFile - Constructor /*{{{*/
32 // ---------------------------------------------------------------------
33 /* */
34 pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long Size) : Fd(*pFd), Size(Size)
35 {
36 if (Fd.IsOpen() == false)
37 {
38 Buffer = 0;
39 Start = End = Buffer = 0;
40 Done = true;
41 iOffset = 0;
42 return;
43 }
44
45 Buffer = new char[Size];
46 Start = End = Buffer;
47 Done = false;
48 iOffset = 0;
49 Fill();
50 }
51 /*}}}*/
52 // TagFile::~pkgTagFile - Destructor /*{{{*/
53 // ---------------------------------------------------------------------
54 /* */
55 pkgTagFile::~pkgTagFile()
56 {
57 delete [] Buffer;
58 }
59 /*}}}*/
60 // TagFile::Step - Advance to the next section /*{{{*/
61 // ---------------------------------------------------------------------
62 /* If the Section Scanner fails we refill the buffer and try again. */
63 bool pkgTagFile::Step(pkgTagSection &Tag)
64 {
65 if (Tag.Scan(Start,End - Start) == false)
66 {
67 if (Fill() == false)
68 return false;
69
70 if (Tag.Scan(Start,End - Start) == false)
71 return _error->Error(_("Unable to parse package file %s (1)"),
72 Fd.Name().c_str());
73 }
74 Start += Tag.size();
75 iOffset += Tag.size();
76
77 Tag.Trim();
78 return true;
79 }
80 /*}}}*/
81 // TagFile::Fill - Top up the buffer /*{{{*/
82 // ---------------------------------------------------------------------
83 /* This takes the bit at the end of the buffer and puts it at the start
84 then fills the rest from the file */
85 bool pkgTagFile::Fill()
86 {
87 unsigned long EndSize = End - Start;
88 unsigned long Actual = 0;
89
90 memmove(Buffer,Start,EndSize);
91 Start = Buffer;
92 End = Buffer + EndSize;
93
94 if (Done == false)
95 {
96 // See if only a bit of the file is left
97 if (Fd.Read(End,Size - (End - Buffer),&Actual) == false)
98 return false;
99 if (Actual != Size - (End - Buffer))
100 Done = true;
101 End += Actual;
102 }
103
104 if (Done == true)
105 {
106 if (EndSize <= 3 && Actual == 0)
107 return false;
108 if (Size - (End - Buffer) < 4)
109 return true;
110
111 // Append a double new line if one does not exist
112 unsigned int LineCount = 0;
113 for (const char *E = End - 1; E - End < 6 && (*E == '\n' || *E == '\r'); E--)
114 if (*E == '\n')
115 LineCount++;
116 for (; LineCount < 2; LineCount++)
117 *End++ = '\n';
118
119 return true;
120 }
121
122 return true;
123 }
124 /*}}}*/
125 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
126 // ---------------------------------------------------------------------
127 /* This jumps to a pre-recorded file location and reads the record
128 that is there */
129 bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long Offset)
130 {
131 // We are within a buffer space of the next hit..
132 if (Offset >= iOffset && iOffset + (End - Start) > Offset)
133 {
134 unsigned long Dist = Offset - iOffset;
135 Start += Dist;
136 iOffset += Dist;
137 return Step(Tag);
138 }
139
140 // Reposition and reload..
141 iOffset = Offset;
142 Done = false;
143 if (Fd.Seek(Offset) == false)
144 return false;
145 End = Start = Buffer;
146
147 if (Fill() == false)
148 return false;
149
150 if (Tag.Scan(Start,End - Start) == true)
151 return true;
152
153 // This appends a double new line (for the real eof handling)
154 if (Fill() == false)
155 return false;
156
157 if (Tag.Scan(Start,End - Start) == false)
158 return _error->Error(_("Unable to parse package file %s (2)"),Fd.Name().c_str());
159
160 return true;
161 }
162 /*}}}*/
163 // TagSection::Scan - Scan for the end of the header information /*{{{*/
164 // ---------------------------------------------------------------------
165 /* This looks for the first double new line in the data stream. It also
166 indexes the tags in the section. This very simple hash function for the
167 first 3 letters gives very good performance on the debian package files */
168 inline static unsigned long AlphaHash(const char *Text, const char *End = 0)
169 {
170 unsigned long Res = 0;
171 for (; Text != End && *Text != ':' && *Text != 0; Text++)
172 Res = (unsigned long)(*Text) ^ (Res << 2);
173 return Res & 0xFF;
174 }
175
176 bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
177 {
178 const char *End = Start + MaxLength;
179 Stop = Section = Start;
180 memset(AlphaIndexes,0,sizeof(AlphaIndexes));
181
182 if (Stop == 0)
183 return false;
184
185 TagCount = 0;
186 while (TagCount+1 < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
187 {
188 // Start a new index and add it to the hash
189 if (isspace(Stop[0]) == 0)
190 {
191 Indexes[TagCount++] = Stop - Section;
192 AlphaIndexes[AlphaHash(Stop,End)] = TagCount;
193 }
194
195 Stop = (const char *)memchr(Stop,'\n',End - Stop);
196
197 if (Stop == 0)
198 return false;
199
200 for (; Stop+1 < End && Stop[1] == '\r'; Stop++);
201
202 // Double newline marks the end of the record
203 if (Stop+1 < End && Stop[1] == '\n')
204 {
205 Indexes[TagCount] = Stop - Section;
206 for (; Stop < End && (Stop[0] == '\n' || Stop[0] == '\r'); Stop++);
207 return true;
208 }
209
210 Stop++;
211 }
212
213 return false;
214 }
215 /*}}}*/
216 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
217 // ---------------------------------------------------------------------
218 /* There should be exactly 1 newline at the end of the buffer, no more. */
219 void pkgTagSection::Trim()
220 {
221 for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
222 }
223 /*}}}*/
224 // TagSection::Find - Locate a tag /*{{{*/
225 // ---------------------------------------------------------------------
226 /* This searches the section for a tag that matches the given string. */
227 bool pkgTagSection::Find(const char *Tag,unsigned &Pos) const
228 {
229 unsigned int Length = strlen(Tag);
230 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
231 if (I == 0)
232 return false;
233 I--;
234
235 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
236 I = (I+1)%TagCount)
237 {
238 const char *St;
239 St = Section + Indexes[I];
240 if (strncasecmp(Tag,St,Length) != 0)
241 continue;
242
243 // Make sure the colon is in the right place
244 const char *C = St + Length;
245 for (; isspace(*C) != 0; C++);
246 if (*C != ':')
247 continue;
248 Pos = I;
249 return true;
250 }
251
252 Pos = 0;
253 return false;
254 }
255 /*}}}*/
256 // TagSection::Find - Locate a tag /*{{{*/
257 // ---------------------------------------------------------------------
258 /* This searches the section for a tag that matches the given string. */
259 bool pkgTagSection::Find(const char *Tag,const char *&Start,
260 const char *&End) const
261 {
262 unsigned int Length = strlen(Tag);
263 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
264 if (I == 0)
265 return false;
266 I--;
267
268 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
269 I = (I+1)%TagCount)
270 {
271 const char *St;
272 St = Section + Indexes[I];
273 if (strncasecmp(Tag,St,Length) != 0)
274 continue;
275
276 // Make sure the colon is in the right place
277 const char *C = St + Length;
278 for (; isspace(*C) != 0; C++);
279 if (*C != ':')
280 continue;
281
282 // Strip off the gunk from the start end
283 Start = C;
284 End = Section + Indexes[I+1];
285 if (Start >= End)
286 return _error->Error("Internal parsing error");
287
288 for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
289 for (; isspace(End[-1]) != 0 && End > Start; End--);
290
291 return true;
292 }
293
294 Start = End = 0;
295 return false;
296 }
297 /*}}}*/
298 // TagSection::FindS - Find a string /*{{{*/
299 // ---------------------------------------------------------------------
300 /* */
301 string pkgTagSection::FindS(const char *Tag) const
302 {
303 const char *Start;
304 const char *End;
305 if (Find(Tag,Start,End) == false)
306 return string();
307 return string(Start,End);
308 }
309 /*}}}*/
310 // TagSection::FindI - Find an integer /*{{{*/
311 // ---------------------------------------------------------------------
312 /* */
313 signed int pkgTagSection::FindI(const char *Tag,signed long Default) const
314 {
315 const char *Start;
316 const char *Stop;
317 if (Find(Tag,Start,Stop) == false)
318 return Default;
319
320 // Copy it into a temp buffer so we can use strtol
321 char S[300];
322 if ((unsigned)(Stop - Start) >= sizeof(S))
323 return Default;
324 strncpy(S,Start,Stop-Start);
325 S[Stop - Start] = 0;
326
327 char *End;
328 signed long Result = strtol(S,&End,10);
329 if (S == End)
330 return Default;
331 return Result;
332 }
333 /*}}}*/
334 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
335 // ---------------------------------------------------------------------
336 /* The bits marked in Flag are masked on/off in Flags */
337 bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
338 unsigned long Flag) const
339 {
340 const char *Start;
341 const char *Stop;
342 if (Find(Tag,Start,Stop) == false)
343 return true;
344
345 switch (StringToBool(string(Start,Stop)))
346 {
347 case 0:
348 Flags &= ~Flag;
349 return true;
350
351 case 1:
352 Flags |= Flag;
353 return true;
354
355 default:
356 _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str());
357 return true;
358 }
359 return true;
360 }
361 /*}}}*/
362
363 // TFRewrite - Rewrite a control record /*{{{*/
364 // ---------------------------------------------------------------------
365 /* This writes the control record to stdout rewriting it as necessary. The
366 override map item specificies the rewriting rules to follow. This also
367 takes the time to sort the feild list. */
368
369 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
370 array. */
371 static const char *iTFRewritePackageOrder[] = {
372 "Package",
373 "Essential",
374 "Status",
375 "Priority",
376 "Section",
377 "Installed-Size",
378 "Maintainer",
379 "Architecture",
380 "Source",
381 "Version",
382 "Revision", // Obsolete
383 "Config-Version", // Obsolete
384 "Replaces",
385 "Provides",
386 "Depends",
387 "Pre-Depends",
388 "Recommends",
389 "Suggests",
390 "Conflicts",
391 "Conffiles",
392 "Filename",
393 "Size",
394 "MD5Sum",
395 "SHA1Sum",
396 "MSDOS-Filename", // Obsolete
397 "Description",
398 0};
399 static const char *iTFRewriteSourceOrder[] = {"Package",
400 "Source",
401 "Binary",
402 "Version",
403 "Priority",
404 "Section",
405 "Maintainer",
406 "Build-Depends",
407 "Build-Depends-Indep",
408 "Build-Conflicts",
409 "Build-Conflicts-Indep",
410 "Architecture",
411 "Standards-Version",
412 "Format",
413 "Directory",
414 "Files",
415 0};
416
417 /* Two levels of initialization are used because gcc will set the symbol
418 size of an array to the length of the array, causing dynamic relinking
419 errors. Doing this makes the symbol size constant */
420 const char **TFRewritePackageOrder = iTFRewritePackageOrder;
421 const char **TFRewriteSourceOrder = iTFRewriteSourceOrder;
422
423 bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
424 TFRewriteData *Rewrite)
425 {
426 unsigned char Visited[256]; // Bit 1 is Order, Bit 2 is Rewrite
427 for (unsigned I = 0; I != 256; I++)
428 Visited[I] = 0;
429
430 // Set new tag up as necessary.
431 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
432 {
433 if (Rewrite[J].NewTag == 0)
434 Rewrite[J].NewTag = Rewrite[J].Tag;
435 }
436
437 // Write all all of the tags, in order.
438 for (unsigned int I = 0; Order[I] != 0; I++)
439 {
440 bool Rewritten = false;
441
442 // See if this is a field that needs to be rewritten
443 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
444 {
445 if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
446 {
447 Visited[J] |= 2;
448 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
449 {
450 if (isspace(Rewrite[J].Rewrite[0]))
451 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
452 else
453 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
454 }
455
456 Rewritten = true;
457 break;
458 }
459 }
460
461 // See if it is in the fragment
462 unsigned Pos;
463 if (Tags.Find(Order[I],Pos) == false)
464 continue;
465 Visited[Pos] |= 1;
466
467 if (Rewritten == true)
468 continue;
469
470 /* Write out this element, taking a moment to rewrite the tag
471 in case of changes of case. */
472 const char *Start;
473 const char *Stop;
474 Tags.Get(Start,Stop,Pos);
475
476 if (fputs(Order[I],Output) < 0)
477 return _error->Errno("fputs","IO Error to output");
478 Start += strlen(Order[I]);
479 if (fwrite(Start,Stop - Start,1,Output) != 1)
480 return _error->Errno("fwrite","IO Error to output");
481 if (Stop[-1] != '\n')
482 fprintf(Output,"\n");
483 }
484
485 // Now write all the old tags that were missed.
486 for (unsigned int I = 0; I != Tags.Count(); I++)
487 {
488 if ((Visited[I] & 1) == 1)
489 continue;
490
491 const char *Start;
492 const char *Stop;
493 Tags.Get(Start,Stop,I);
494 const char *End = Start;
495 for (; End < Stop && *End != ':'; End++);
496
497 // See if this is a field that needs to be rewritten
498 bool Rewritten = false;
499 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
500 {
501 if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0)
502 {
503 Visited[J] |= 2;
504 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
505 {
506 if (isspace(Rewrite[J].Rewrite[0]))
507 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
508 else
509 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
510 }
511
512 Rewritten = true;
513 break;
514 }
515 }
516
517 if (Rewritten == true)
518 continue;
519
520 // Write out this element
521 if (fwrite(Start,Stop - Start,1,Output) != 1)
522 return _error->Errno("fwrite","IO Error to output");
523 if (Stop[-1] != '\n')
524 fprintf(Output,"\n");
525 }
526
527 // Now write all the rewrites that were missed
528 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
529 {
530 if ((Visited[J] & 2) == 2)
531 continue;
532
533 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
534 {
535 if (isspace(Rewrite[J].Rewrite[0]))
536 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
537 else
538 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
539 }
540 }
541
542 return true;
543 }
544 /*}}}*/