* support non-mmapable files again
[ntk/apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #ifdef __GNUG__
19 #pragma implementation "apt-pkg/strutl.h"
20 #endif
21
22 #include <apt-pkg/strutl.h>
23 #include <apt-pkg/fileutl.h>
24 #include <apt-pkg/error.h>
25
26 #include <apti18n.h>
27
28 #include <ctype.h>
29 #include <string.h>
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <regex.h>
33 #include <errno.h>
34 #include <stdarg.h>
35 #include <iconv.h>
36
37 #include "config.h"
38
39 using namespace std;
40 /*}}}*/
41
42 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
43 // ---------------------------------------------------------------------
44 /* This is handy to use before display some information for enduser */
45 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
46 {
47 iconv_t cd;
48 const char *inbuf;
49 char *inptr, *outbuf, *outptr;
50 size_t insize, outsize;
51
52 cd = iconv_open(codeset, "UTF-8");
53 if (cd == (iconv_t)(-1)) {
54 // Something went wrong
55 if (errno == EINVAL)
56 _error->Error("conversion from 'UTF-8' to '%s' not available",
57 codeset);
58 else
59 perror("iconv_open");
60
61 // Clean the destination string
62 *dest = "";
63
64 return false;
65 }
66
67 insize = outsize = orig.size();
68 inbuf = orig.data();
69 inptr = (char *)inbuf;
70 outbuf = new char[insize+1];
71 outptr = outbuf;
72
73 iconv(cd, &inptr, &insize, &outptr, &outsize);
74 *outptr = '\0';
75
76 *dest = outbuf;
77 delete[] outbuf;
78
79 iconv_close(cd);
80
81 return true;
82 }
83 /*}}}*/
84 // strstrip - Remove white space from the front and back of a string /*{{{*/
85 // ---------------------------------------------------------------------
86 /* This is handy to use when parsing a file. It also removes \n's left
87 over from fgets and company */
88 char *_strstrip(char *String)
89 {
90 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
91
92 if (*String == 0)
93 return String;
94
95 char *End = String + strlen(String) - 1;
96 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
97 *End == '\r'); End--);
98 End++;
99 *End = 0;
100 return String;
101 };
102 /*}}}*/
103 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
104 // ---------------------------------------------------------------------
105 /* */
106 char *_strtabexpand(char *String,size_t Len)
107 {
108 for (char *I = String; I != I + Len && *I != 0; I++)
109 {
110 if (*I != '\t')
111 continue;
112 if (I + 8 > String + Len)
113 {
114 *I = 0;
115 return String;
116 }
117
118 /* Assume the start of the string is 0 and find the next 8 char
119 division */
120 int Len;
121 if (String == I)
122 Len = 1;
123 else
124 Len = 8 - ((String - I) % 8);
125 Len -= 2;
126 if (Len <= 0)
127 {
128 *I = ' ';
129 continue;
130 }
131
132 memmove(I + Len,I + 1,strlen(I) + 1);
133 for (char *J = I; J + Len != I; *I = ' ', I++);
134 }
135 return String;
136 }
137 /*}}}*/
138 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
139 // ---------------------------------------------------------------------
140 /* This grabs a single word, converts any % escaped characters to their
141 proper values and advances the pointer. Double quotes are understood
142 and striped out as well. This is for URI/URL parsing. It also can
143 understand [] brackets.*/
144 bool ParseQuoteWord(const char *&String,string &Res)
145 {
146 // Skip leading whitespace
147 const char *C = String;
148 for (;*C != 0 && *C == ' '; C++);
149 if (*C == 0)
150 return false;
151
152 // Jump to the next word
153 for (;*C != 0 && isspace(*C) == 0; C++)
154 {
155 if (*C == '"')
156 {
157 for (C++; *C != 0 && *C != '"'; C++);
158 if (*C == 0)
159 return false;
160 }
161 if (*C == '[')
162 {
163 for (C++; *C != 0 && *C != ']'; C++);
164 if (*C == 0)
165 return false;
166 }
167 }
168
169 // Now de-quote characters
170 char Buffer[1024];
171 char Tmp[3];
172 const char *Start = String;
173 char *I;
174 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
175 {
176 if (*Start == '%' && Start + 2 < C)
177 {
178 Tmp[0] = Start[1];
179 Tmp[1] = Start[2];
180 Tmp[2] = 0;
181 *I = (char)strtol(Tmp,0,16);
182 Start += 3;
183 continue;
184 }
185 if (*Start != '"')
186 *I = *Start;
187 else
188 I--;
189 Start++;
190 }
191 *I = 0;
192 Res = Buffer;
193
194 // Skip ending white space
195 for (;*C != 0 && isspace(*C) != 0; C++);
196 String = C;
197 return true;
198 }
199 /*}}}*/
200 // ParseCWord - Parses a string like a C "" expression /*{{{*/
201 // ---------------------------------------------------------------------
202 /* This expects a series of space separated strings enclosed in ""'s.
203 It concatenates the ""'s into a single string. */
204 bool ParseCWord(const char *&String,string &Res)
205 {
206 // Skip leading whitespace
207 const char *C = String;
208 for (;*C != 0 && *C == ' '; C++);
209 if (*C == 0)
210 return false;
211
212 char Buffer[1024];
213 char *Buf = Buffer;
214 if (strlen(String) >= sizeof(Buffer))
215 return false;
216
217 for (; *C != 0; C++)
218 {
219 if (*C == '"')
220 {
221 for (C++; *C != 0 && *C != '"'; C++)
222 *Buf++ = *C;
223
224 if (*C == 0)
225 return false;
226
227 continue;
228 }
229
230 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
231 continue;
232 if (isspace(*C) == 0)
233 return false;
234 *Buf++ = ' ';
235 }
236 *Buf = 0;
237 Res = Buffer;
238 String = C;
239 return true;
240 }
241 /*}}}*/
242 // QuoteString - Convert a string into quoted from /*{{{*/
243 // ---------------------------------------------------------------------
244 /* */
245 string QuoteString(const string &Str, const char *Bad)
246 {
247 string Res;
248 for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
249 {
250 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
251 *I <= 0x20 || *I >= 0x7F)
252 {
253 char Buf[10];
254 sprintf(Buf,"%%%02x",(int)*I);
255 Res += Buf;
256 }
257 else
258 Res += *I;
259 }
260 return Res;
261 }
262 /*}}}*/
263 // DeQuoteString - Convert a string from quoted from /*{{{*/
264 // ---------------------------------------------------------------------
265 /* This undoes QuoteString */
266 string DeQuoteString(const string &Str)
267 {
268 string Res;
269 for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
270 {
271 if (*I == '%' && I + 2 < Str.end())
272 {
273 char Tmp[3];
274 Tmp[0] = I[1];
275 Tmp[1] = I[2];
276 Tmp[2] = 0;
277 Res += (char)strtol(Tmp,0,16);
278 I += 2;
279 continue;
280 }
281 else
282 Res += *I;
283 }
284 return Res;
285 }
286
287 /*}}}*/
288 // SizeToStr - Convert a long into a human readable size /*{{{*/
289 // ---------------------------------------------------------------------
290 /* A max of 4 digits are shown before conversion to the next highest unit.
291 The max length of the string will be 5 chars unless the size is > 10
292 YottaBytes (E24) */
293 string SizeToStr(double Size)
294 {
295 char S[300];
296 double ASize;
297 if (Size >= 0)
298 ASize = Size;
299 else
300 ASize = -1*Size;
301
302 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
303 ExaBytes, ZettaBytes, YottaBytes */
304 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
305 int I = 0;
306 while (I <= 8)
307 {
308 if (ASize < 100 && I != 0)
309 {
310 sprintf(S,"%.1f%c",ASize,Ext[I]);
311 break;
312 }
313
314 if (ASize < 10000)
315 {
316 sprintf(S,"%.0f%c",ASize,Ext[I]);
317 break;
318 }
319 ASize /= 1000.0;
320 I++;
321 }
322
323 return S;
324 }
325 /*}}}*/
326 // TimeToStr - Convert the time into a string /*{{{*/
327 // ---------------------------------------------------------------------
328 /* Converts a number of seconds to a hms format */
329 string TimeToStr(unsigned long Sec)
330 {
331 char S[300];
332
333 while (1)
334 {
335 if (Sec > 60*60*24)
336 {
337 sprintf(S,"%lid %lih%lim%lis",Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
338 break;
339 }
340
341 if (Sec > 60*60)
342 {
343 sprintf(S,"%lih%lim%lis",Sec/60/60,(Sec/60) % 60,Sec % 60);
344 break;
345 }
346
347 if (Sec > 60)
348 {
349 sprintf(S,"%lim%lis",Sec/60,Sec % 60);
350 break;
351 }
352
353 sprintf(S,"%lis",Sec);
354 break;
355 }
356
357 return S;
358 }
359 /*}}}*/
360 // SubstVar - Substitute a string for another string /*{{{*/
361 // ---------------------------------------------------------------------
362 /* This replaces all occurances of Subst with Contents in Str. */
363 string SubstVar(const string &Str,const string &Subst,const string &Contents)
364 {
365 string::size_type Pos = 0;
366 string::size_type OldPos = 0;
367 string Temp;
368
369 while (OldPos < Str.length() &&
370 (Pos = Str.find(Subst,OldPos)) != string::npos)
371 {
372 Temp += string(Str,OldPos,Pos) + Contents;
373 OldPos = Pos + Subst.length();
374 }
375
376 if (OldPos == 0)
377 return Str;
378
379 return Temp + string(Str,OldPos);
380 }
381
382 string SubstVar(string Str,const struct SubstVar *Vars)
383 {
384 for (; Vars->Subst != 0; Vars++)
385 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
386 return Str;
387 }
388 /*}}}*/
389 // URItoFileName - Convert the uri into a unique file name /*{{{*/
390 // ---------------------------------------------------------------------
391 /* This converts a URI into a safe filename. It quotes all unsafe characters
392 and converts / to _ and removes the scheme identifier. The resulting
393 file name should be unique and never occur again for a different file */
394 string URItoFileName(const string &URI)
395 {
396 // Nuke 'sensitive' items
397 ::URI U(URI);
398 U.User.clear();
399 U.Password.clear();
400 U.Access.clear();
401
402 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
403 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
404 replace(NewURI.begin(),NewURI.end(),'/','_');
405 return NewURI;
406 }
407 /*}}}*/
408 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
409 // ---------------------------------------------------------------------
410 /* This routine performs a base64 transformation on a string. It was ripped
411 from wget and then patched and bug fixed.
412
413 This spec can be found in rfc2045 */
414 string Base64Encode(const string &S)
415 {
416 // Conversion table.
417 static char tbl[64] = {'A','B','C','D','E','F','G','H',
418 'I','J','K','L','M','N','O','P',
419 'Q','R','S','T','U','V','W','X',
420 'Y','Z','a','b','c','d','e','f',
421 'g','h','i','j','k','l','m','n',
422 'o','p','q','r','s','t','u','v',
423 'w','x','y','z','0','1','2','3',
424 '4','5','6','7','8','9','+','/'};
425
426 // Pre-allocate some space
427 string Final;
428 Final.reserve((4*S.length() + 2)/3 + 2);
429
430 /* Transform the 3x8 bits to 4x6 bits, as required by
431 base64. */
432 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
433 {
434 char Bits[3] = {0,0,0};
435 Bits[0] = I[0];
436 if (I + 1 < S.end())
437 Bits[1] = I[1];
438 if (I + 2 < S.end())
439 Bits[2] = I[2];
440
441 Final += tbl[Bits[0] >> 2];
442 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
443
444 if (I + 1 >= S.end())
445 break;
446
447 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
448
449 if (I + 2 >= S.end())
450 break;
451
452 Final += tbl[Bits[2] & 0x3f];
453 }
454
455 /* Apply the padding elements, this tells how many bytes the remote
456 end should discard */
457 if (S.length() % 3 == 2)
458 Final += '=';
459 if (S.length() % 3 == 1)
460 Final += "==";
461
462 return Final;
463 }
464 /*}}}*/
465 // stringcmp - Arbitary string compare /*{{{*/
466 // ---------------------------------------------------------------------
467 /* This safely compares two non-null terminated strings of arbitary
468 length */
469 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
470 {
471 for (; A != AEnd && B != BEnd; A++, B++)
472 if (*A != *B)
473 break;
474
475 if (A == AEnd && B == BEnd)
476 return 0;
477 if (A == AEnd)
478 return 1;
479 if (B == BEnd)
480 return -1;
481 if (*A < *B)
482 return -1;
483 return 1;
484 }
485
486 #if __GNUC__ >= 3
487 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
488 const char *B,const char *BEnd)
489 {
490 for (; A != AEnd && B != BEnd; A++, B++)
491 if (*A != *B)
492 break;
493
494 if (A == AEnd && B == BEnd)
495 return 0;
496 if (A == AEnd)
497 return 1;
498 if (B == BEnd)
499 return -1;
500 if (*A < *B)
501 return -1;
502 return 1;
503 }
504 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
505 string::const_iterator B,string::const_iterator BEnd)
506 {
507 for (; A != AEnd && B != BEnd; A++, B++)
508 if (*A != *B)
509 break;
510
511 if (A == AEnd && B == BEnd)
512 return 0;
513 if (A == AEnd)
514 return 1;
515 if (B == BEnd)
516 return -1;
517 if (*A < *B)
518 return -1;
519 return 1;
520 }
521 #endif
522 /*}}}*/
523 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
524 // ---------------------------------------------------------------------
525 /* */
526 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
527 {
528 for (; A != AEnd && B != BEnd; A++, B++)
529 if (toupper(*A) != toupper(*B))
530 break;
531
532 if (A == AEnd && B == BEnd)
533 return 0;
534 if (A == AEnd)
535 return 1;
536 if (B == BEnd)
537 return -1;
538 if (toupper(*A) < toupper(*B))
539 return -1;
540 return 1;
541 }
542 #if __GNUC__ >= 3
543 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
544 const char *B,const char *BEnd)
545 {
546 for (; A != AEnd && B != BEnd; A++, B++)
547 if (toupper(*A) != toupper(*B))
548 break;
549
550 if (A == AEnd && B == BEnd)
551 return 0;
552 if (A == AEnd)
553 return 1;
554 if (B == BEnd)
555 return -1;
556 if (toupper(*A) < toupper(*B))
557 return -1;
558 return 1;
559 }
560 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
561 string::const_iterator B,string::const_iterator BEnd)
562 {
563 for (; A != AEnd && B != BEnd; A++, B++)
564 if (toupper(*A) != toupper(*B))
565 break;
566
567 if (A == AEnd && B == BEnd)
568 return 0;
569 if (A == AEnd)
570 return 1;
571 if (B == BEnd)
572 return -1;
573 if (toupper(*A) < toupper(*B))
574 return -1;
575 return 1;
576 }
577 #endif
578 /*}}}*/
579 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
580 // ---------------------------------------------------------------------
581 /* The format is like those used in package files and the method
582 communication system */
583 string LookupTag(const string &Message,const char *Tag,const char *Default)
584 {
585 // Look for a matching tag.
586 int Length = strlen(Tag);
587 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); I++)
588 {
589 // Found the tag
590 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
591 {
592 // Find the end of line and strip the leading/trailing spaces
593 string::const_iterator J;
594 I += Length + 1;
595 for (; isspace(*I) != 0 && I < Message.end(); I++);
596 for (J = I; *J != '\n' && J < Message.end(); J++);
597 for (; J > I && isspace(J[-1]) != 0; J--);
598
599 return string(I,J);
600 }
601
602 for (; *I != '\n' && I < Message.end(); I++);
603 }
604
605 // Failed to find a match
606 if (Default == 0)
607 return string();
608 return Default;
609 }
610 /*}}}*/
611 // StringToBool - Converts a string into a boolean /*{{{*/
612 // ---------------------------------------------------------------------
613 /* This inspects the string to see if it is true or if it is false and
614 then returns the result. Several varients on true/false are checked. */
615 int StringToBool(const string &Text,int Default)
616 {
617 char *End;
618 int Res = strtol(Text.c_str(),&End,0);
619 if (End != Text.c_str() && Res >= 0 && Res <= 1)
620 return Res;
621
622 // Check for positives
623 if (strcasecmp(Text.c_str(),"no") == 0 ||
624 strcasecmp(Text.c_str(),"false") == 0 ||
625 strcasecmp(Text.c_str(),"without") == 0 ||
626 strcasecmp(Text.c_str(),"off") == 0 ||
627 strcasecmp(Text.c_str(),"disable") == 0)
628 return 0;
629
630 // Check for negatives
631 if (strcasecmp(Text.c_str(),"yes") == 0 ||
632 strcasecmp(Text.c_str(),"true") == 0 ||
633 strcasecmp(Text.c_str(),"with") == 0 ||
634 strcasecmp(Text.c_str(),"on") == 0 ||
635 strcasecmp(Text.c_str(),"enable") == 0)
636 return 1;
637
638 return Default;
639 }
640 /*}}}*/
641 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
642 // ---------------------------------------------------------------------
643 /* This converts a time_t into a string time representation that is
644 year 2000 complient and timezone neutral */
645 string TimeRFC1123(time_t Date)
646 {
647 struct tm Conv = *gmtime(&Date);
648 char Buf[300];
649
650 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
651 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
652 "Aug","Sep","Oct","Nov","Dec"};
653
654 sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
655 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
656 Conv.tm_min,Conv.tm_sec);
657 return Buf;
658 }
659 /*}}}*/
660 // ReadMessages - Read messages from the FD /*{{{*/
661 // ---------------------------------------------------------------------
662 /* This pulls full messages from the input FD into the message buffer.
663 It assumes that messages will not pause during transit so no
664 fancy buffering is used. */
665 bool ReadMessages(int Fd, vector<string> &List)
666 {
667 char Buffer[64000];
668 char *End = Buffer;
669
670 while (1)
671 {
672 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
673 if (Res < 0 && errno == EINTR)
674 continue;
675
676 // Process is dead, this is kind of bad..
677 if (Res == 0)
678 return false;
679
680 // No data
681 if (Res < 0 && errno == EAGAIN)
682 return true;
683 if (Res < 0)
684 return false;
685
686 End += Res;
687
688 // Look for the end of the message
689 for (char *I = Buffer; I + 1 < End; I++)
690 {
691 if (I[0] != '\n' || I[1] != '\n')
692 continue;
693
694 // Pull the message out
695 string Message(Buffer,I-Buffer);
696
697 // Fix up the buffer
698 for (; I < End && *I == '\n'; I++);
699 End -= I-Buffer;
700 memmove(Buffer,I,End-Buffer);
701 I = Buffer;
702
703 List.push_back(Message);
704 }
705 if (End == Buffer)
706 return true;
707
708 if (WaitFd(Fd) == false)
709 return false;
710 }
711 }
712 /*}}}*/
713 // MonthConv - Converts a month string into a number /*{{{*/
714 // ---------------------------------------------------------------------
715 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
716 Made it a bit more robust with a few touppers though. */
717 static int MonthConv(char *Month)
718 {
719 switch (toupper(*Month))
720 {
721 case 'A':
722 return toupper(Month[1]) == 'P'?3:7;
723 case 'D':
724 return 11;
725 case 'F':
726 return 1;
727 case 'J':
728 if (toupper(Month[1]) == 'A')
729 return 0;
730 return toupper(Month[2]) == 'N'?5:6;
731 case 'M':
732 return toupper(Month[2]) == 'R'?2:4;
733 case 'N':
734 return 10;
735 case 'O':
736 return 9;
737 case 'S':
738 return 8;
739
740 // Pretend it is January..
741 default:
742 return 0;
743 }
744 }
745 /*}}}*/
746 // timegm - Internal timegm function if gnu is not available /*{{{*/
747 // ---------------------------------------------------------------------
748 /* Ripped this evil little function from wget - I prefer the use of
749 GNU timegm if possible as this technique will have interesting problems
750 with leap seconds, timezones and other.
751
752 Converts struct tm to time_t, assuming the data in tm is UTC rather
753 than local timezone (mktime assumes the latter).
754
755 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
756 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
757
758 /* Turned it into an autoconf check, because GNU is not the only thing which
759 can provide timegm. -- 2002-09-22, Joel Baker */
760
761 #ifndef HAVE_TIMEGM // Now with autoconf!
762 static time_t timegm(struct tm *t)
763 {
764 time_t tl, tb;
765
766 tl = mktime (t);
767 if (tl == -1)
768 return -1;
769 tb = mktime (gmtime (&tl));
770 return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
771 }
772 #endif
773 /*}}}*/
774 // StrToTime - Converts a string into a time_t /*{{{*/
775 // ---------------------------------------------------------------------
776 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
777 and the C library asctime format. It requires the GNU library function
778 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
779 reason the C library does not provide any such function :< This also
780 handles the weird, but unambiguous FTP time format*/
781 bool StrToTime(const string &Val,time_t &Result)
782 {
783 struct tm Tm;
784 char Month[10];
785 const char *I = Val.c_str();
786
787 // Skip the day of the week
788 for (;*I != 0 && *I != ' '; I++);
789
790 // Handle RFC 1123 time
791 Month[0] = 0;
792 if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
793 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
794 {
795 // Handle RFC 1036 time
796 if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
797 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
798 Tm.tm_year += 1900;
799 else
800 {
801 // asctime format
802 if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
803 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
804 {
805 // 'ftp' time
806 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
807 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
808 return false;
809 Tm.tm_mon--;
810 }
811 }
812 }
813
814 Tm.tm_isdst = 0;
815 if (Month[0] != 0)
816 Tm.tm_mon = MonthConv(Month);
817 Tm.tm_year -= 1900;
818
819 // Convert to local time and then to GMT
820 Result = timegm(&Tm);
821 return true;
822 }
823 /*}}}*/
824 // StrToNum - Convert a fixed length string to a number /*{{{*/
825 // ---------------------------------------------------------------------
826 /* This is used in decoding the crazy fixed length string headers in
827 tar and ar files. */
828 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
829 {
830 char S[30];
831 if (Len >= sizeof(S))
832 return false;
833 memcpy(S,Str,Len);
834 S[Len] = 0;
835
836 // All spaces is a zero
837 Res = 0;
838 unsigned I;
839 for (I = 0; S[I] == ' '; I++);
840 if (S[I] == 0)
841 return true;
842
843 char *End;
844 Res = strtoul(S,&End,Base);
845 if (End == S)
846 return false;
847
848 return true;
849 }
850 /*}}}*/
851 // HexDigit - Convert a hex character into an integer /*{{{*/
852 // ---------------------------------------------------------------------
853 /* Helper for Hex2Num */
854 static int HexDigit(int c)
855 {
856 if (c >= '0' && c <= '9')
857 return c - '0';
858 if (c >= 'a' && c <= 'f')
859 return c - 'a' + 10;
860 if (c >= 'A' && c <= 'F')
861 return c - 'A' + 10;
862 return 0;
863 }
864 /*}}}*/
865 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
866 // ---------------------------------------------------------------------
867 /* The length of the buffer must be exactly 1/2 the length of the string. */
868 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
869 {
870 if (Str.length() != Length*2)
871 return false;
872
873 // Convert each digit. We store it in the same order as the string
874 int J = 0;
875 for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
876 {
877 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
878 return false;
879
880 Num[J] = HexDigit(I[0]) << 4;
881 Num[J] += HexDigit(I[1]);
882 }
883
884 return true;
885 }
886 /*}}}*/
887 // TokSplitString - Split a string up by a given token /*{{{*/
888 // ---------------------------------------------------------------------
889 /* This is intended to be a faster splitter, it does not use dynamic
890 memories. Input is changed to insert nulls at each token location. */
891 bool TokSplitString(char Tok,char *Input,char **List,
892 unsigned long ListMax)
893 {
894 // Strip any leading spaces
895 char *Start = Input;
896 char *Stop = Start + strlen(Start);
897 for (; *Start != 0 && isspace(*Start) != 0; Start++);
898
899 unsigned long Count = 0;
900 char *Pos = Start;
901 while (Pos != Stop)
902 {
903 // Skip to the next Token
904 for (; Pos != Stop && *Pos != Tok; Pos++);
905
906 // Back remove spaces
907 char *End = Pos;
908 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
909 *End = 0;
910
911 List[Count++] = Start;
912 if (Count >= ListMax)
913 {
914 List[Count-1] = 0;
915 return false;
916 }
917
918 // Advance pos
919 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
920 Start = Pos;
921 }
922
923 List[Count] = 0;
924 return true;
925 }
926 /*}}}*/
927 // RegexChoice - Simple regex list/list matcher /*{{{*/
928 // ---------------------------------------------------------------------
929 /* */
930 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
931 const char **ListEnd)
932 {
933 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
934 R->Hit = false;
935
936 unsigned long Hits = 0;
937 for (; ListBegin != ListEnd; ListBegin++)
938 {
939 // Check if the name is a regex
940 const char *I;
941 bool Regex = true;
942 for (I = *ListBegin; *I != 0; I++)
943 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
944 break;
945 if (*I == 0)
946 Regex = false;
947
948 // Compile the regex pattern
949 regex_t Pattern;
950 if (Regex == true)
951 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
952 REG_NOSUB) != 0)
953 Regex = false;
954
955 // Search the list
956 bool Done = false;
957 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
958 {
959 if (R->Str[0] == 0)
960 continue;
961
962 if (strcasecmp(R->Str,*ListBegin) != 0)
963 {
964 if (Regex == false)
965 continue;
966 if (regexec(&Pattern,R->Str,0,0,0) != 0)
967 continue;
968 }
969 Done = true;
970
971 if (R->Hit == false)
972 Hits++;
973
974 R->Hit = true;
975 }
976
977 if (Regex == true)
978 regfree(&Pattern);
979
980 if (Done == false)
981 _error->Warning(_("Selection %s not found"),*ListBegin);
982 }
983
984 return Hits;
985 }
986 /*}}}*/
987 // ioprintf - C format string outputter to C++ iostreams /*{{{*/
988 // ---------------------------------------------------------------------
989 /* This is used to make the internationalization strings easier to translate
990 and to allow reordering of parameters */
991 void ioprintf(ostream &out,const char *format,...)
992 {
993 va_list args;
994 va_start(args,format);
995
996 // sprintf the description
997 char S[400];
998 vsnprintf(S,sizeof(S),format,args);
999 out << S;
1000 }
1001 /*}}}*/
1002 // safe_snprintf - Safer snprintf /*{{{*/
1003 // ---------------------------------------------------------------------
1004 /* This is a snprintf that will never (ever) go past 'End' and returns a
1005 pointer to the end of the new string. The returned string is always null
1006 terminated unless Buffer == end. This is a better alterantive to using
1007 consecutive snprintfs. */
1008 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1009 {
1010 va_list args;
1011 unsigned long Did;
1012
1013 va_start(args,Format);
1014
1015 if (End <= Buffer)
1016 return End;
1017
1018 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1019 if (Did < 0 || Buffer + Did > End)
1020 return End;
1021 return Buffer + Did;
1022 }
1023 /*}}}*/
1024
1025 // CheckDomainList - See if Host is in a , seperate list /*{{{*/
1026 // ---------------------------------------------------------------------
1027 /* The domain list is a comma seperate list of domains that are suffix
1028 matched against the argument */
1029 bool CheckDomainList(const string &Host,const string &List)
1030 {
1031 string::const_iterator Start = List.begin();
1032 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); Cur++)
1033 {
1034 if (Cur < List.end() && *Cur != ',')
1035 continue;
1036
1037 // Match the end of the string..
1038 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1039 Cur - Start != 0 &&
1040 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1041 return true;
1042
1043 Start = Cur + 1;
1044 }
1045 return false;
1046 }
1047 /*}}}*/
1048
1049 // URI::CopyFrom - Copy from an object /*{{{*/
1050 // ---------------------------------------------------------------------
1051 /* This parses the URI into all of its components */
1052 void URI::CopyFrom(const string &U)
1053 {
1054 string::const_iterator I = U.begin();
1055
1056 // Locate the first colon, this separates the scheme
1057 for (; I < U.end() && *I != ':' ; I++);
1058 string::const_iterator FirstColon = I;
1059
1060 /* Determine if this is a host type URI with a leading double //
1061 and then search for the first single / */
1062 string::const_iterator SingleSlash = I;
1063 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1064 SingleSlash += 3;
1065
1066 /* Find the / indicating the end of the hostname, ignoring /'s in the
1067 square brackets */
1068 bool InBracket = false;
1069 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
1070 {
1071 if (*SingleSlash == '[')
1072 InBracket = true;
1073 if (InBracket == true && *SingleSlash == ']')
1074 InBracket = false;
1075 }
1076
1077 if (SingleSlash > U.end())
1078 SingleSlash = U.end();
1079
1080 // We can now write the access and path specifiers
1081 Access.assign(U.begin(),FirstColon);
1082 if (SingleSlash != U.end())
1083 Path.assign(SingleSlash,U.end());
1084 if (Path.empty() == true)
1085 Path = "/";
1086
1087 // Now we attempt to locate a user:pass@host fragment
1088 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1089 FirstColon += 3;
1090 else
1091 FirstColon += 1;
1092 if (FirstColon >= U.end())
1093 return;
1094
1095 if (FirstColon > SingleSlash)
1096 FirstColon = SingleSlash;
1097
1098 // Find the colon...
1099 I = FirstColon + 1;
1100 if (I > SingleSlash)
1101 I = SingleSlash;
1102 for (; I < SingleSlash && *I != ':'; I++);
1103 string::const_iterator SecondColon = I;
1104
1105 // Search for the @ after the colon
1106 for (; I < SingleSlash && *I != '@'; I++);
1107 string::const_iterator At = I;
1108
1109 // Now write the host and user/pass
1110 if (At == SingleSlash)
1111 {
1112 if (FirstColon < SingleSlash)
1113 Host.assign(FirstColon,SingleSlash);
1114 }
1115 else
1116 {
1117 Host.assign(At+1,SingleSlash);
1118 User.assign(FirstColon,SecondColon);
1119 if (SecondColon < At)
1120 Password.assign(SecondColon+1,At);
1121 }
1122
1123 // Now we parse the RFC 2732 [] hostnames.
1124 unsigned long PortEnd = 0;
1125 InBracket = false;
1126 for (unsigned I = 0; I != Host.length();)
1127 {
1128 if (Host[I] == '[')
1129 {
1130 InBracket = true;
1131 Host.erase(I,1);
1132 continue;
1133 }
1134
1135 if (InBracket == true && Host[I] == ']')
1136 {
1137 InBracket = false;
1138 Host.erase(I,1);
1139 PortEnd = I;
1140 continue;
1141 }
1142 I++;
1143 }
1144
1145 // Tsk, weird.
1146 if (InBracket == true)
1147 {
1148 Host.clear();
1149 return;
1150 }
1151
1152 // Now we parse off a port number from the hostname
1153 Port = 0;
1154 string::size_type Pos = Host.rfind(':');
1155 if (Pos == string::npos || Pos < PortEnd)
1156 return;
1157
1158 Port = atoi(string(Host,Pos+1).c_str());
1159 Host.assign(Host,0,Pos);
1160 }
1161 /*}}}*/
1162 // URI::operator string - Convert the URI to a string /*{{{*/
1163 // ---------------------------------------------------------------------
1164 /* */
1165 URI::operator string()
1166 {
1167 string Res;
1168
1169 if (Access.empty() == false)
1170 Res = Access + ':';
1171
1172 if (Host.empty() == false)
1173 {
1174 if (Access.empty() == false)
1175 Res += "//";
1176
1177 if (User.empty() == false)
1178 {
1179 Res += User;
1180 if (Password.empty() == false)
1181 Res += ":" + Password;
1182 Res += "@";
1183 }
1184
1185 // Add RFC 2732 escaping characters
1186 if (Access.empty() == false &&
1187 (Host.find('/') != string::npos || Host.find(':') != string::npos))
1188 Res += '[' + Host + ']';
1189 else
1190 Res += Host;
1191
1192 if (Port != 0)
1193 {
1194 char S[30];
1195 sprintf(S,":%u",Port);
1196 Res += S;
1197 }
1198 }
1199
1200 if (Path.empty() == false)
1201 {
1202 if (Path[0] != '/')
1203 Res += "/" + Path;
1204 else
1205 Res += Path;
1206 }
1207
1208 return Res;
1209 }
1210 /*}}}*/
1211 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1212 // ---------------------------------------------------------------------
1213 /* */
1214 string URI::SiteOnly(const string &URI)
1215 {
1216 ::URI U(URI);
1217 U.User.clear();
1218 U.Password.clear();
1219 U.Path.clear();
1220 U.Port = 0;
1221 return U;
1222 }
1223 /*}}}*/