Join with aliencode
[ntk/apt.git] / apt-pkg / tagfile.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: tagfile.cc,v 1.26 2001/02/20 07:03:17 jgg Exp $
4 /* ######################################################################
5
6 Fast scanner for RFC-822 type header information
7
8 This uses a rotating buffer to load the package information into.
9 The scanner runs over it and isolates and indexes a single section.
10
11 ##################################################################### */
12 /*}}}*/
13 // Include Files /*{{{*/
14 #ifdef __GNUG__
15 #pragma implementation "apt-pkg/tagfile.h"
16 #endif
17
18 #include <apt-pkg/tagfile.h>
19 #include <apt-pkg/error.h>
20 #include <apt-pkg/strutl.h>
21
22 #include <apti18n.h>
23
24 #include <string>
25 #include <stdio.h>
26 /*}}}*/
27
28 // TagFile::pkgTagFile - Constructor /*{{{*/
29 // ---------------------------------------------------------------------
30 /* */
31 pkgTagFile::pkgTagFile(FileFd *pFd,unsigned long Size) : Fd(*pFd), Size(Size)
32 {
33 Buffer = new char[Size];
34 Start = End = Buffer;
35 Left = Fd.Size();
36 TotalSize = Fd.Size();
37 iOffset = 0;
38 Fill();
39 }
40 /*}}}*/
41 // TagFile::~pkgTagFile - Destructor /*{{{*/
42 // ---------------------------------------------------------------------
43 /* */
44 pkgTagFile::~pkgTagFile()
45 {
46 delete [] Buffer;
47 }
48 /*}}}*/
49 // TagFile::Step - Advance to the next section /*{{{*/
50 // ---------------------------------------------------------------------
51 /* If the Section Scanner fails we refill the buffer and try again. */
52 bool pkgTagFile::Step(pkgTagSection &Tag)
53 {
54 if (Tag.Scan(Start,End - Start) == false)
55 {
56 if (Fill() == false)
57 return false;
58
59 if (Tag.Scan(Start,End - Start) == false)
60 return _error->Error(_("Unable to parse package file %s (1)"),Fd.Name().c_str());
61 }
62 Start += Tag.size();
63 iOffset += Tag.size();
64
65 Tag.Trim();
66
67 return true;
68 }
69 /*}}}*/
70 // TagFile::Fill - Top up the buffer /*{{{*/
71 // ---------------------------------------------------------------------
72 /* This takes the bit at the end of the buffer and puts it at the start
73 then fills the rest from the file */
74 bool pkgTagFile::Fill()
75 {
76 unsigned long EndSize = End - Start;
77
78 memmove(Buffer,Start,EndSize);
79 Start = Buffer;
80 End = Buffer + EndSize;
81
82 if (Left == 0)
83 {
84 if (EndSize <= 3)
85 return false;
86 if (Size - (End - Buffer) < 4)
87 return true;
88
89 // Append a double new line if one does not exist
90 unsigned int LineCount = 0;
91 for (const char *E = End - 1; E - End < 6 && (*E == '\n' || *E == '\r'); E--)
92 if (*E == '\n')
93 LineCount++;
94 for (; LineCount < 2; LineCount++)
95 *End++ = '\n';
96
97 return true;
98 }
99
100 // See if only a bit of the file is left
101 if (Left < Size - (End - Buffer))
102 {
103 if (Fd.Read(End,Left) == false)
104 return false;
105
106 End += Left;
107 Left = 0;
108 }
109 else
110 {
111 if (Fd.Read(End,Size - (End - Buffer)) == false)
112 return false;
113
114 Left -= Size - (End - Buffer);
115 End = Buffer + Size;
116 }
117 return true;
118 }
119 /*}}}*/
120 // TagFile::Jump - Jump to a pre-recorded location in the file /*{{{*/
121 // ---------------------------------------------------------------------
122 /* This jumps to a pre-recorded file location and reads the record
123 that is there */
124 bool pkgTagFile::Jump(pkgTagSection &Tag,unsigned long Offset)
125 {
126 // We are within a buffer space of the next hit..
127 if (Offset >= iOffset && iOffset + (End - Start) > Offset)
128 {
129 unsigned long Dist = Offset - iOffset;
130 Start += Dist;
131 iOffset += Dist;
132 return Step(Tag);
133 }
134
135 // Reposition and reload..
136 iOffset = Offset;
137 Left = TotalSize - Offset;
138 if (Fd.Seek(Offset) == false)
139 return false;
140 End = Start = Buffer;
141
142 if (Fill() == false)
143 return false;
144
145 if (Tag.Scan(Start,End - Start) == true)
146 return true;
147
148 // This appends a double new line (for the real eof handling)
149 if (Fill() == false)
150 return false;
151
152 if (Tag.Scan(Start,End - Start) == false)
153 return _error->Error(_("Unable to parse package file %s (2)"),Fd.Name().c_str());
154
155 return true;
156 }
157 /*}}}*/
158 // TagSection::Scan - Scan for the end of the header information /*{{{*/
159 // ---------------------------------------------------------------------
160 /* This looks for the first double new line in the data stream. It also
161 indexes the tags in the section. This very simple hash function for the
162 first 3 letters gives very good performance on the debian package files */
163 inline static unsigned long AlphaHash(const char *Text, const char *End = 0)
164 {
165 unsigned long Res = 0;
166 for (; Text != End && *Text != ':' && *Text != 0; Text++)
167 Res = (unsigned long)(*Text) ^ (Res << 2);
168 return Res & 0xFF;
169 }
170
171 bool pkgTagSection::Scan(const char *Start,unsigned long MaxLength)
172 {
173 const char *End = Start + MaxLength;
174 Stop = Section = Start;
175 memset(AlphaIndexes,0,sizeof(AlphaIndexes));
176
177 if (Stop == 0)
178 return false;
179
180 TagCount = 0;
181 while (TagCount < sizeof(Indexes)/sizeof(Indexes[0]) && Stop < End)
182 {
183 // Start a new index and add it to the hash
184 if (isspace(Stop[0]) == 0)
185 {
186 Indexes[TagCount++] = Stop - Section;
187 AlphaIndexes[AlphaHash(Stop,End)] = TagCount;
188 }
189
190 Stop = (const char *)memchr(Stop,'\n',End - Stop);
191
192 if (Stop == 0)
193 return false;
194
195 for (; Stop[1] == '\r' && Stop+1 < End; Stop++);
196
197 // Double newline marks the end of the record
198 if (Stop+1 < End && Stop[1] == '\n')
199 {
200 Indexes[TagCount] = Stop - Section;
201 for (; (Stop[0] == '\n' || Stop[0] == '\r') && Stop < End; Stop++);
202 return true;
203 }
204
205 Stop++;
206 }
207
208 return false;
209 }
210 /*}}}*/
211 // TagSection::Trim - Trim off any trailing garbage /*{{{*/
212 // ---------------------------------------------------------------------
213 /* There should be exactly 1 newline at the end of the buffer, no more. */
214 void pkgTagSection::Trim()
215 {
216 for (; Stop > Section + 2 && (Stop[-2] == '\n' || Stop[-2] == '\r'); Stop--);
217 }
218 /*}}}*/
219 // TagSection::Find - Locate a tag /*{{{*/
220 // ---------------------------------------------------------------------
221 /* This searches the section for a tag that matches the given string. */
222 bool pkgTagSection::Find(const char *Tag,unsigned &Pos) const
223 {
224 unsigned int Length = strlen(Tag);
225 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
226 if (I == 0)
227 return false;
228 I--;
229
230 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
231 I = (I+1)%TagCount)
232 {
233 const char *St;
234 St = Section + Indexes[I];
235 if (strncasecmp(Tag,St,Length) != 0)
236 continue;
237
238 // Make sure the colon is in the right place
239 const char *C = St + Length;
240 for (; isspace(*C) != 0; C++);
241 if (*C != ':')
242 continue;
243 Pos = I;
244 return true;
245 }
246
247 Pos = 0;
248 return false;
249 }
250 /*}}}*/
251 // TagSection::Find - Locate a tag /*{{{*/
252 // ---------------------------------------------------------------------
253 /* This searches the section for a tag that matches the given string. */
254 bool pkgTagSection::Find(const char *Tag,const char *&Start,
255 const char *&End) const
256 {
257 unsigned int Length = strlen(Tag);
258 unsigned int I = AlphaIndexes[AlphaHash(Tag)];
259 if (I == 0)
260 return false;
261 I--;
262
263 for (unsigned int Counter = 0; Counter != TagCount; Counter++,
264 I = (I+1)%TagCount)
265 {
266 const char *St;
267 St = Section + Indexes[I];
268 if (strncasecmp(Tag,St,Length) != 0)
269 continue;
270
271 // Make sure the colon is in the right place
272 const char *C = St + Length;
273 for (; isspace(*C) != 0; C++);
274 if (*C != ':')
275 continue;
276
277 // Strip off the gunk from the start end
278 Start = C;
279 End = Section + Indexes[I+1];
280 if (Start >= End)
281 return _error->Error("Internal parsing error");
282
283 for (; (isspace(*Start) != 0 || *Start == ':') && Start < End; Start++);
284 for (; isspace(End[-1]) != 0 && End > Start; End--);
285
286 return true;
287 }
288
289 Start = End = 0;
290 return false;
291 }
292 /*}}}*/
293 // TagSection::FindS - Find a string /*{{{*/
294 // ---------------------------------------------------------------------
295 /* */
296 string pkgTagSection::FindS(const char *Tag) const
297 {
298 const char *Start;
299 const char *End;
300 if (Find(Tag,Start,End) == false)
301 return string();
302 return string(Start,End);
303 }
304 /*}}}*/
305 // TagSection::FindI - Find an integer /*{{{*/
306 // ---------------------------------------------------------------------
307 /* */
308 signed int pkgTagSection::FindI(const char *Tag,signed long Default) const
309 {
310 const char *Start;
311 const char *Stop;
312 if (Find(Tag,Start,Stop) == false)
313 return Default;
314
315 // Copy it into a temp buffer so we can use strtol
316 char S[300];
317 if ((unsigned)(Stop - Start) >= sizeof(S))
318 return Default;
319 strncpy(S,Start,Stop-Start);
320 S[Stop - Start] = 0;
321
322 char *End;
323 signed long Result = strtol(S,&End,10);
324 if (S == End)
325 return Default;
326 return Result;
327 }
328 /*}}}*/
329 // TagSection::FindFlag - Locate a yes/no type flag /*{{{*/
330 // ---------------------------------------------------------------------
331 /* The bits marked in Flag are masked on/off in Flags */
332 bool pkgTagSection::FindFlag(const char *Tag,unsigned long &Flags,
333 unsigned long Flag) const
334 {
335 const char *Start;
336 const char *Stop;
337 if (Find(Tag,Start,Stop) == false)
338 return true;
339
340 switch (StringToBool(string(Start,Stop)))
341 {
342 case 0:
343 Flags &= ~Flag;
344 return true;
345
346 case 1:
347 Flags |= Flag;
348 return true;
349
350 default:
351 _error->Warning("Unknown flag value: %s",string(Start,Stop).c_str());
352 return true;
353 }
354 return true;
355 }
356 /*}}}*/
357
358 // TFRewrite - Rewrite a control record /*{{{*/
359 // ---------------------------------------------------------------------
360 /* This writes the control record to stdout rewriting it as necessary. The
361 override map item specificies the rewriting rules to follow. This also
362 takes the time to sort the feild list. */
363
364 /* The order of this list is taken from dpkg source lib/parse.c the fieldinfos
365 array. */
366 static const char *iTFRewritePackageOrder[] = {
367 "Package",
368 "Essential",
369 "Status",
370 "Priority",
371 "Section",
372 "Installed-Size",
373 "Maintainer",
374 "Architecture",
375 "Source",
376 "Version",
377 "Revision", // Obsolete
378 "Config-Version", // Obsolete
379 "Replaces",
380 "Provides",
381 "Depends",
382 "Pre-Depends",
383 "Recommends",
384 "Suggests",
385 "Conflicts",
386 "Conffiles",
387 "Filename",
388 "Size",
389 "MD5Sum",
390 "MSDOS-Filename", // Obsolete
391 "Description",
392 0};
393 static const char *iTFRewriteSourceOrder[] = {"Package",
394 "Source",
395 "Binary",
396 "Version",
397 "Priority",
398 "Section",
399 "Maintainer",
400 "Build-Depends",
401 "Build-Depends-Indep",
402 "Build-Conflicts",
403 "Build-Conflicts-Indep",
404 "Architecture",
405 "Standards-Version",
406 "Format",
407 "Directory",
408 "Files",
409 0};
410
411 /* Two levels of initialization are used because gcc will set the symbol
412 size of an array to the length of the array, causing dynamic relinking
413 errors. Doing this makes the symbol size constant */
414 const char **TFRewritePackageOrder = iTFRewritePackageOrder;
415 const char **TFRewriteSourceOrder = iTFRewriteSourceOrder;
416
417 bool TFRewrite(FILE *Output,pkgTagSection const &Tags,const char *Order[],
418 TFRewriteData *Rewrite)
419 {
420 unsigned char Visited[256]; // Bit 1 is Order, Bit 2 is Rewrite
421 for (unsigned I = 0; I != 256; I++)
422 Visited[I] = 0;
423
424 // Set new tag up as necessary.
425 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
426 {
427 if (Rewrite[J].NewTag == 0)
428 Rewrite[J].NewTag = Rewrite[J].Tag;
429 }
430
431 // Write all all of the tags, in order.
432 for (unsigned int I = 0; Order[I] != 0; I++)
433 {
434 bool Rewritten = false;
435
436 // See if this is a field that needs to be rewritten
437 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
438 {
439 if (strcasecmp(Rewrite[J].Tag,Order[I]) == 0)
440 {
441 Visited[J] |= 2;
442 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
443 {
444 if (isspace(Rewrite[J].Rewrite[0]))
445 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
446 else
447 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
448 }
449
450 Rewritten = true;
451 break;
452 }
453 }
454
455 // See if it is in the fragment
456 unsigned Pos;
457 if (Tags.Find(Order[I],Pos) == false)
458 continue;
459 Visited[Pos] |= 1;
460
461 if (Rewritten == true)
462 continue;
463
464 /* Write out this element, taking a moment to rewrite the tag
465 in case of changes of case. */
466 const char *Start;
467 const char *Stop;
468 Tags.Get(Start,Stop,Pos);
469
470 if (fputs(Order[I],Output) < 0)
471 return _error->Errno("fputs","IO Error to output");
472 Start += strlen(Order[I]);
473 if (fwrite(Start,Stop - Start,1,Output) != 1)
474 return _error->Errno("fwrite","IO Error to output");
475 if (Stop[-1] != '\n')
476 fprintf(Output,"\n");
477 }
478
479 // Now write all the old tags that were missed.
480 for (unsigned int I = 0; I != Tags.Count(); I++)
481 {
482 if ((Visited[I] & 1) == 1)
483 continue;
484
485 const char *Start;
486 const char *Stop;
487 Tags.Get(Start,Stop,I);
488 const char *End = Start;
489 for (; End < Stop && *End != ':'; End++);
490
491 // See if this is a field that needs to be rewritten
492 bool Rewritten = false;
493 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
494 {
495 if (stringcasecmp(Start,End,Rewrite[J].Tag) == 0)
496 {
497 Visited[J] |= 2;
498 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
499 {
500 if (isspace(Rewrite[J].Rewrite[0]))
501 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
502 else
503 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
504 }
505
506 Rewritten = true;
507 break;
508 }
509 }
510
511 if (Rewritten == true)
512 continue;
513
514 // Write out this element
515 if (fwrite(Start,Stop - Start,1,Output) != 1)
516 return _error->Errno("fwrite","IO Error to output");
517 if (Stop[-1] != '\n')
518 fprintf(Output,"\n");
519 }
520
521 // Now write all the rewrites that were missed
522 for (unsigned int J = 0; Rewrite != 0 && Rewrite[J].Tag != 0; J++)
523 {
524 if ((Visited[J] & 2) == 2)
525 continue;
526
527 if (Rewrite[J].Rewrite != 0 && Rewrite[J].Rewrite[0] != 0)
528 {
529 if (isspace(Rewrite[J].Rewrite[0]))
530 fprintf(Output,"%s:%s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
531 else
532 fprintf(Output,"%s: %s\n",Rewrite[J].NewTag,Rewrite[J].Rewrite);
533 }
534 }
535
536 return true;
537 }
538 /*}}}*/