* apt-pkg/contrib/strutl.cc:
[ntk/apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
20 #include <apt-pkg/error.h>
21
22 #include <apti18n.h>
23
24 #include <ctype.h>
25 #include <string.h>
26 #include <stdio.h>
27 #include <algorithm>
28 #include <unistd.h>
29 #include <regex.h>
30 #include <errno.h>
31 #include <stdarg.h>
32 #include <iconv.h>
33
34 #include "config.h"
35
36 using namespace std;
37 /*}}}*/
38
39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
40 // ---------------------------------------------------------------------
41 /* This is handy to use before display some information for enduser */
42 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
43 {
44 iconv_t cd;
45 const char *inbuf;
46 char *inptr, *outbuf;
47 size_t insize, bufsize;
48 dest->clear();
49
50 cd = iconv_open(codeset, "UTF-8");
51 if (cd == (iconv_t)(-1)) {
52 // Something went wrong
53 if (errno == EINVAL)
54 _error->Error("conversion from 'UTF-8' to '%s' not available",
55 codeset);
56 else
57 perror("iconv_open");
58
59 return false;
60 }
61
62 insize = bufsize = orig.size();
63 inbuf = orig.data();
64 inptr = (char *)inbuf;
65 outbuf = new char[bufsize];
66 size_t lastError = -1;
67
68 while (insize != 0)
69 {
70 char *outptr = outbuf;
71 size_t outsize = bufsize;
72 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
73 dest->append(outbuf, outptr - outbuf);
74 if (err == (size_t)(-1))
75 {
76 switch (errno)
77 {
78 case EILSEQ:
79 insize--;
80 inptr++;
81 // replace a series of unknown multibytes with a single "?"
82 if (lastError != insize) {
83 lastError = insize - 1;
84 dest->append("?");
85 }
86 break;
87 case EINVAL:
88 insize = 0;
89 break;
90 case E2BIG:
91 if (outptr == outbuf)
92 {
93 bufsize *= 2;
94 delete[] outbuf;
95 outbuf = new char[bufsize];
96 }
97 break;
98 }
99 }
100 }
101
102 delete[] outbuf;
103
104 iconv_close(cd);
105
106 return true;
107 }
108 /*}}}*/
109 // strstrip - Remove white space from the front and back of a string /*{{{*/
110 // ---------------------------------------------------------------------
111 /* This is handy to use when parsing a file. It also removes \n's left
112 over from fgets and company */
113 char *_strstrip(char *String)
114 {
115 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
116
117 if (*String == 0)
118 return String;
119
120 char *End = String + strlen(String) - 1;
121 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
122 *End == '\r'); End--);
123 End++;
124 *End = 0;
125 return String;
126 };
127 /*}}}*/
128 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
129 // ---------------------------------------------------------------------
130 /* */
131 char *_strtabexpand(char *String,size_t Len)
132 {
133 for (char *I = String; I != I + Len && *I != 0; I++)
134 {
135 if (*I != '\t')
136 continue;
137 if (I + 8 > String + Len)
138 {
139 *I = 0;
140 return String;
141 }
142
143 /* Assume the start of the string is 0 and find the next 8 char
144 division */
145 int Len;
146 if (String == I)
147 Len = 1;
148 else
149 Len = 8 - ((String - I) % 8);
150 Len -= 2;
151 if (Len <= 0)
152 {
153 *I = ' ';
154 continue;
155 }
156
157 memmove(I + Len,I + 1,strlen(I) + 1);
158 for (char *J = I; J + Len != I; *I = ' ', I++);
159 }
160 return String;
161 }
162 /*}}}*/
163 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
164 // ---------------------------------------------------------------------
165 /* This grabs a single word, converts any % escaped characters to their
166 proper values and advances the pointer. Double quotes are understood
167 and striped out as well. This is for URI/URL parsing. It also can
168 understand [] brackets.*/
169 bool ParseQuoteWord(const char *&String,string &Res)
170 {
171 // Skip leading whitespace
172 const char *C = String;
173 for (;*C != 0 && *C == ' '; C++);
174 if (*C == 0)
175 return false;
176
177 // Jump to the next word
178 for (;*C != 0 && isspace(*C) == 0; C++)
179 {
180 if (*C == '"')
181 {
182 for (C++; *C != 0 && *C != '"'; C++);
183 if (*C == 0)
184 return false;
185 }
186 if (*C == '[')
187 {
188 for (C++; *C != 0 && *C != ']'; C++);
189 if (*C == 0)
190 return false;
191 }
192 }
193
194 // Now de-quote characters
195 char Buffer[1024];
196 char Tmp[3];
197 const char *Start = String;
198 char *I;
199 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
200 {
201 if (*Start == '%' && Start + 2 < C)
202 {
203 Tmp[0] = Start[1];
204 Tmp[1] = Start[2];
205 Tmp[2] = 0;
206 *I = (char)strtol(Tmp,0,16);
207 Start += 3;
208 continue;
209 }
210 if (*Start != '"')
211 *I = *Start;
212 else
213 I--;
214 Start++;
215 }
216 *I = 0;
217 Res = Buffer;
218
219 // Skip ending white space
220 for (;*C != 0 && isspace(*C) != 0; C++);
221 String = C;
222 return true;
223 }
224 /*}}}*/
225 // ParseCWord - Parses a string like a C "" expression /*{{{*/
226 // ---------------------------------------------------------------------
227 /* This expects a series of space separated strings enclosed in ""'s.
228 It concatenates the ""'s into a single string. */
229 bool ParseCWord(const char *&String,string &Res)
230 {
231 // Skip leading whitespace
232 const char *C = String;
233 for (;*C != 0 && *C == ' '; C++);
234 if (*C == 0)
235 return false;
236
237 char Buffer[1024];
238 char *Buf = Buffer;
239 if (strlen(String) >= sizeof(Buffer))
240 return false;
241
242 for (; *C != 0; C++)
243 {
244 if (*C == '"')
245 {
246 for (C++; *C != 0 && *C != '"'; C++)
247 *Buf++ = *C;
248
249 if (*C == 0)
250 return false;
251
252 continue;
253 }
254
255 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
256 continue;
257 if (isspace(*C) == 0)
258 return false;
259 *Buf++ = ' ';
260 }
261 *Buf = 0;
262 Res = Buffer;
263 String = C;
264 return true;
265 }
266 /*}}}*/
267 // QuoteString - Convert a string into quoted from /*{{{*/
268 // ---------------------------------------------------------------------
269 /* */
270 string QuoteString(const string &Str, const char *Bad)
271 {
272 string Res;
273 for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
274 {
275 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
276 *I <= 0x20 || *I >= 0x7F)
277 {
278 char Buf[10];
279 sprintf(Buf,"%%%02x",(int)*I);
280 Res += Buf;
281 }
282 else
283 Res += *I;
284 }
285 return Res;
286 }
287 /*}}}*/
288 // DeQuoteString - Convert a string from quoted from /*{{{*/
289 // ---------------------------------------------------------------------
290 /* This undoes QuoteString */
291 string DeQuoteString(const string &Str)
292 {
293 string Res;
294 for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
295 {
296 if (*I == '%' && I + 2 < Str.end())
297 {
298 char Tmp[3];
299 Tmp[0] = I[1];
300 Tmp[1] = I[2];
301 Tmp[2] = 0;
302 Res += (char)strtol(Tmp,0,16);
303 I += 2;
304 continue;
305 }
306 else
307 Res += *I;
308 }
309 return Res;
310 }
311
312 /*}}}*/
313 // SizeToStr - Convert a long into a human readable size /*{{{*/
314 // ---------------------------------------------------------------------
315 /* A max of 4 digits are shown before conversion to the next highest unit.
316 The max length of the string will be 5 chars unless the size is > 10
317 YottaBytes (E24) */
318 string SizeToStr(double Size)
319 {
320 char S[300];
321 double ASize;
322 if (Size >= 0)
323 ASize = Size;
324 else
325 ASize = -1*Size;
326
327 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
328 ExaBytes, ZettaBytes, YottaBytes */
329 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
330 int I = 0;
331 while (I <= 8)
332 {
333 if (ASize < 100 && I != 0)
334 {
335 sprintf(S,"%'.1f%c",ASize,Ext[I]);
336 break;
337 }
338
339 if (ASize < 10000)
340 {
341 sprintf(S,"%'.0f%c",ASize,Ext[I]);
342 break;
343 }
344 ASize /= 1000.0;
345 I++;
346 }
347
348 return S;
349 }
350 /*}}}*/
351 // TimeToStr - Convert the time into a string /*{{{*/
352 // ---------------------------------------------------------------------
353 /* Converts a number of seconds to a hms format */
354 string TimeToStr(unsigned long Sec)
355 {
356 char S[300];
357
358 while (1)
359 {
360 if (Sec > 60*60*24)
361 {
362 //d means days, h means hours, min means minutes, s means seconds
363 sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
364 break;
365 }
366
367 if (Sec > 60*60)
368 {
369 //h means hours, min means minutes, s means seconds
370 sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
371 break;
372 }
373
374 if (Sec > 60)
375 {
376 //min means minutes, s means seconds
377 sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
378 break;
379 }
380
381 //s means seconds
382 sprintf(S,_("%lis"),Sec);
383 break;
384 }
385
386 return S;
387 }
388 /*}}}*/
389 // SubstVar - Substitute a string for another string /*{{{*/
390 // ---------------------------------------------------------------------
391 /* This replaces all occurances of Subst with Contents in Str. */
392 string SubstVar(const string &Str,const string &Subst,const string &Contents)
393 {
394 string::size_type Pos = 0;
395 string::size_type OldPos = 0;
396 string Temp;
397
398 while (OldPos < Str.length() &&
399 (Pos = Str.find(Subst,OldPos)) != string::npos)
400 {
401 Temp += string(Str,OldPos,Pos) + Contents;
402 OldPos = Pos + Subst.length();
403 }
404
405 if (OldPos == 0)
406 return Str;
407
408 return Temp + string(Str,OldPos);
409 }
410
411 string SubstVar(string Str,const struct SubstVar *Vars)
412 {
413 for (; Vars->Subst != 0; Vars++)
414 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
415 return Str;
416 }
417 /*}}}*/
418 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
419 // ---------------------------------------------------------------------
420 /* Returns a string with the supplied separator depth + 1 times in it */
421 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
422 {
423 std::string output = "";
424 for(unsigned long d=Depth+1; d > 0; d--)
425 output.append(Separator);
426 return output;
427 }
428 /*}}}*/
429 // URItoFileName - Convert the uri into a unique file name /*{{{*/
430 // ---------------------------------------------------------------------
431 /* This converts a URI into a safe filename. It quotes all unsafe characters
432 and converts / to _ and removes the scheme identifier. The resulting
433 file name should be unique and never occur again for a different file */
434 string URItoFileName(const string &URI)
435 {
436 // Nuke 'sensitive' items
437 ::URI U(URI);
438 U.User.clear();
439 U.Password.clear();
440 U.Access.clear();
441
442 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
443 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
444 replace(NewURI.begin(),NewURI.end(),'/','_');
445 return NewURI;
446 }
447 /*}}}*/
448 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
449 // ---------------------------------------------------------------------
450 /* This routine performs a base64 transformation on a string. It was ripped
451 from wget and then patched and bug fixed.
452
453 This spec can be found in rfc2045 */
454 string Base64Encode(const string &S)
455 {
456 // Conversion table.
457 static char tbl[64] = {'A','B','C','D','E','F','G','H',
458 'I','J','K','L','M','N','O','P',
459 'Q','R','S','T','U','V','W','X',
460 'Y','Z','a','b','c','d','e','f',
461 'g','h','i','j','k','l','m','n',
462 'o','p','q','r','s','t','u','v',
463 'w','x','y','z','0','1','2','3',
464 '4','5','6','7','8','9','+','/'};
465
466 // Pre-allocate some space
467 string Final;
468 Final.reserve((4*S.length() + 2)/3 + 2);
469
470 /* Transform the 3x8 bits to 4x6 bits, as required by
471 base64. */
472 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
473 {
474 char Bits[3] = {0,0,0};
475 Bits[0] = I[0];
476 if (I + 1 < S.end())
477 Bits[1] = I[1];
478 if (I + 2 < S.end())
479 Bits[2] = I[2];
480
481 Final += tbl[Bits[0] >> 2];
482 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
483
484 if (I + 1 >= S.end())
485 break;
486
487 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
488
489 if (I + 2 >= S.end())
490 break;
491
492 Final += tbl[Bits[2] & 0x3f];
493 }
494
495 /* Apply the padding elements, this tells how many bytes the remote
496 end should discard */
497 if (S.length() % 3 == 2)
498 Final += '=';
499 if (S.length() % 3 == 1)
500 Final += "==";
501
502 return Final;
503 }
504 /*}}}*/
505 // stringcmp - Arbitrary string compare /*{{{*/
506 // ---------------------------------------------------------------------
507 /* This safely compares two non-null terminated strings of arbitrary
508 length */
509 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
510 {
511 for (; A != AEnd && B != BEnd; A++, B++)
512 if (*A != *B)
513 break;
514
515 if (A == AEnd && B == BEnd)
516 return 0;
517 if (A == AEnd)
518 return 1;
519 if (B == BEnd)
520 return -1;
521 if (*A < *B)
522 return -1;
523 return 1;
524 }
525
526 #if __GNUC__ >= 3
527 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
528 const char *B,const char *BEnd)
529 {
530 for (; A != AEnd && B != BEnd; A++, B++)
531 if (*A != *B)
532 break;
533
534 if (A == AEnd && B == BEnd)
535 return 0;
536 if (A == AEnd)
537 return 1;
538 if (B == BEnd)
539 return -1;
540 if (*A < *B)
541 return -1;
542 return 1;
543 }
544 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
545 string::const_iterator B,string::const_iterator BEnd)
546 {
547 for (; A != AEnd && B != BEnd; A++, B++)
548 if (*A != *B)
549 break;
550
551 if (A == AEnd && B == BEnd)
552 return 0;
553 if (A == AEnd)
554 return 1;
555 if (B == BEnd)
556 return -1;
557 if (*A < *B)
558 return -1;
559 return 1;
560 }
561 #endif
562 /*}}}*/
563 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
564 // ---------------------------------------------------------------------
565 /* */
566 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
567 {
568 for (; A != AEnd && B != BEnd; A++, B++)
569 if (toupper(*A) != toupper(*B))
570 break;
571
572 if (A == AEnd && B == BEnd)
573 return 0;
574 if (A == AEnd)
575 return 1;
576 if (B == BEnd)
577 return -1;
578 if (toupper(*A) < toupper(*B))
579 return -1;
580 return 1;
581 }
582 #if __GNUC__ >= 3
583 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
584 const char *B,const char *BEnd)
585 {
586 for (; A != AEnd && B != BEnd; A++, B++)
587 if (toupper(*A) != toupper(*B))
588 break;
589
590 if (A == AEnd && B == BEnd)
591 return 0;
592 if (A == AEnd)
593 return 1;
594 if (B == BEnd)
595 return -1;
596 if (toupper(*A) < toupper(*B))
597 return -1;
598 return 1;
599 }
600 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
601 string::const_iterator B,string::const_iterator BEnd)
602 {
603 for (; A != AEnd && B != BEnd; A++, B++)
604 if (toupper(*A) != toupper(*B))
605 break;
606
607 if (A == AEnd && B == BEnd)
608 return 0;
609 if (A == AEnd)
610 return 1;
611 if (B == BEnd)
612 return -1;
613 if (toupper(*A) < toupper(*B))
614 return -1;
615 return 1;
616 }
617 #endif
618 /*}}}*/
619 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
620 // ---------------------------------------------------------------------
621 /* The format is like those used in package files and the method
622 communication system */
623 string LookupTag(const string &Message,const char *Tag,const char *Default)
624 {
625 // Look for a matching tag.
626 int Length = strlen(Tag);
627 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); I++)
628 {
629 // Found the tag
630 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
631 {
632 // Find the end of line and strip the leading/trailing spaces
633 string::const_iterator J;
634 I += Length + 1;
635 for (; isspace(*I) != 0 && I < Message.end(); I++);
636 for (J = I; *J != '\n' && J < Message.end(); J++);
637 for (; J > I && isspace(J[-1]) != 0; J--);
638
639 return string(I,J);
640 }
641
642 for (; *I != '\n' && I < Message.end(); I++);
643 }
644
645 // Failed to find a match
646 if (Default == 0)
647 return string();
648 return Default;
649 }
650 /*}}}*/
651 // StringToBool - Converts a string into a boolean /*{{{*/
652 // ---------------------------------------------------------------------
653 /* This inspects the string to see if it is true or if it is false and
654 then returns the result. Several varients on true/false are checked. */
655 int StringToBool(const string &Text,int Default)
656 {
657 char *End;
658 int Res = strtol(Text.c_str(),&End,0);
659 if (End != Text.c_str() && Res >= 0 && Res <= 1)
660 return Res;
661
662 // Check for positives
663 if (strcasecmp(Text.c_str(),"no") == 0 ||
664 strcasecmp(Text.c_str(),"false") == 0 ||
665 strcasecmp(Text.c_str(),"without") == 0 ||
666 strcasecmp(Text.c_str(),"off") == 0 ||
667 strcasecmp(Text.c_str(),"disable") == 0)
668 return 0;
669
670 // Check for negatives
671 if (strcasecmp(Text.c_str(),"yes") == 0 ||
672 strcasecmp(Text.c_str(),"true") == 0 ||
673 strcasecmp(Text.c_str(),"with") == 0 ||
674 strcasecmp(Text.c_str(),"on") == 0 ||
675 strcasecmp(Text.c_str(),"enable") == 0)
676 return 1;
677
678 return Default;
679 }
680 /*}}}*/
681 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
682 // ---------------------------------------------------------------------
683 /* This converts a time_t into a string time representation that is
684 year 2000 complient and timezone neutral */
685 string TimeRFC1123(time_t Date)
686 {
687 struct tm Conv = *gmtime(&Date);
688 char Buf[300];
689
690 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
691 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
692 "Aug","Sep","Oct","Nov","Dec"};
693
694 sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
695 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
696 Conv.tm_min,Conv.tm_sec);
697 return Buf;
698 }
699 /*}}}*/
700 // ReadMessages - Read messages from the FD /*{{{*/
701 // ---------------------------------------------------------------------
702 /* This pulls full messages from the input FD into the message buffer.
703 It assumes that messages will not pause during transit so no
704 fancy buffering is used.
705
706 In particular: this reads blocks from the input until it believes
707 that it's run out of input text. Each block is terminated by a
708 double newline ('\n' followed by '\n'). As noted below, there is a
709 bug in this code: it assumes that all the blocks have been read if
710 it doesn't see additional text in the buffer after the last one is
711 parsed, which will cause it to lose blocks if the last block
712 coincides with the end of the buffer.
713 */
714 bool ReadMessages(int Fd, vector<string> &List)
715 {
716 char Buffer[64000];
717 char *End = Buffer;
718 // Represents any left-over from the previous iteration of the
719 // parse loop. (i.e., if a message is split across the end
720 // of the buffer, it goes here)
721 string PartialMessage;
722
723 while (1)
724 {
725 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
726 if (Res < 0 && errno == EINTR)
727 continue;
728
729 // Process is dead, this is kind of bad..
730 if (Res == 0)
731 return false;
732
733 // No data
734 if (Res < 0 && errno == EAGAIN)
735 return true;
736 if (Res < 0)
737 return false;
738
739 End += Res;
740
741 // Look for the end of the message
742 for (char *I = Buffer; I + 1 < End; I++)
743 {
744 if (I[0] != '\n' || I[1] != '\n')
745 continue;
746
747 // Pull the message out
748 string Message(Buffer,I-Buffer);
749 PartialMessage += Message;
750
751 // Fix up the buffer
752 for (; I < End && *I == '\n'; I++);
753 End -= I-Buffer;
754 memmove(Buffer,I,End-Buffer);
755 I = Buffer;
756
757 List.push_back(PartialMessage);
758 PartialMessage.clear();
759 }
760 if (End != Buffer)
761 {
762 // If there's text left in the buffer, store it
763 // in PartialMessage and throw the rest of the buffer
764 // away. This allows us to handle messages that
765 // are longer than the static buffer size.
766 PartialMessage += string(Buffer, End);
767 End = Buffer;
768 }
769 else
770 {
771 // BUG ALERT: if a message block happens to end at a
772 // multiple of 64000 characters, this will cause it to
773 // terminate early, leading to a badly formed block and
774 // probably crashing the method. However, this is the only
775 // way we have to find the end of the message block. I have
776 // an idea of how to fix this, but it will require changes
777 // to the protocol (essentially to mark the beginning and
778 // end of the block).
779 //
780 // -- dburrows 2008-04-02
781 return true;
782 }
783
784 if (WaitFd(Fd) == false)
785 return false;
786 }
787 }
788 /*}}}*/
789 // MonthConv - Converts a month string into a number /*{{{*/
790 // ---------------------------------------------------------------------
791 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
792 Made it a bit more robust with a few touppers though. */
793 static int MonthConv(char *Month)
794 {
795 switch (toupper(*Month))
796 {
797 case 'A':
798 return toupper(Month[1]) == 'P'?3:7;
799 case 'D':
800 return 11;
801 case 'F':
802 return 1;
803 case 'J':
804 if (toupper(Month[1]) == 'A')
805 return 0;
806 return toupper(Month[2]) == 'N'?5:6;
807 case 'M':
808 return toupper(Month[2]) == 'R'?2:4;
809 case 'N':
810 return 10;
811 case 'O':
812 return 9;
813 case 'S':
814 return 8;
815
816 // Pretend it is January..
817 default:
818 return 0;
819 }
820 }
821 /*}}}*/
822 // timegm - Internal timegm function if gnu is not available /*{{{*/
823 // ---------------------------------------------------------------------
824 /* Ripped this evil little function from wget - I prefer the use of
825 GNU timegm if possible as this technique will have interesting problems
826 with leap seconds, timezones and other.
827
828 Converts struct tm to time_t, assuming the data in tm is UTC rather
829 than local timezone (mktime assumes the latter).
830
831 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
832 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
833
834 /* Turned it into an autoconf check, because GNU is not the only thing which
835 can provide timegm. -- 2002-09-22, Joel Baker */
836
837 #ifndef HAVE_TIMEGM // Now with autoconf!
838 static time_t timegm(struct tm *t)
839 {
840 time_t tl, tb;
841
842 tl = mktime (t);
843 if (tl == -1)
844 return -1;
845 tb = mktime (gmtime (&tl));
846 return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
847 }
848 #endif
849 /*}}}*/
850 // StrToTime - Converts a string into a time_t /*{{{*/
851 // ---------------------------------------------------------------------
852 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
853 and the C library asctime format. It requires the GNU library function
854 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
855 reason the C library does not provide any such function :< This also
856 handles the weird, but unambiguous FTP time format*/
857 bool StrToTime(const string &Val,time_t &Result)
858 {
859 struct tm Tm;
860 char Month[10];
861 const char *I = Val.c_str();
862
863 // Skip the day of the week
864 for (;*I != 0 && *I != ' '; I++);
865
866 // Handle RFC 1123 time
867 Month[0] = 0;
868 if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
869 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
870 {
871 // Handle RFC 1036 time
872 if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
873 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
874 Tm.tm_year += 1900;
875 else
876 {
877 // asctime format
878 if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
879 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
880 {
881 // 'ftp' time
882 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
883 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
884 return false;
885 Tm.tm_mon--;
886 }
887 }
888 }
889
890 Tm.tm_isdst = 0;
891 if (Month[0] != 0)
892 Tm.tm_mon = MonthConv(Month);
893 Tm.tm_year -= 1900;
894
895 // Convert to local time and then to GMT
896 Result = timegm(&Tm);
897 return true;
898 }
899 /*}}}*/
900 // StrToNum - Convert a fixed length string to a number /*{{{*/
901 // ---------------------------------------------------------------------
902 /* This is used in decoding the crazy fixed length string headers in
903 tar and ar files. */
904 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
905 {
906 char S[30];
907 if (Len >= sizeof(S))
908 return false;
909 memcpy(S,Str,Len);
910 S[Len] = 0;
911
912 // All spaces is a zero
913 Res = 0;
914 unsigned I;
915 for (I = 0; S[I] == ' '; I++);
916 if (S[I] == 0)
917 return true;
918
919 char *End;
920 Res = strtoul(S,&End,Base);
921 if (End == S)
922 return false;
923
924 return true;
925 }
926 /*}}}*/
927 // HexDigit - Convert a hex character into an integer /*{{{*/
928 // ---------------------------------------------------------------------
929 /* Helper for Hex2Num */
930 static int HexDigit(int c)
931 {
932 if (c >= '0' && c <= '9')
933 return c - '0';
934 if (c >= 'a' && c <= 'f')
935 return c - 'a' + 10;
936 if (c >= 'A' && c <= 'F')
937 return c - 'A' + 10;
938 return 0;
939 }
940 /*}}}*/
941 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
942 // ---------------------------------------------------------------------
943 /* The length of the buffer must be exactly 1/2 the length of the string. */
944 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
945 {
946 if (Str.length() != Length*2)
947 return false;
948
949 // Convert each digit. We store it in the same order as the string
950 int J = 0;
951 for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
952 {
953 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
954 return false;
955
956 Num[J] = HexDigit(I[0]) << 4;
957 Num[J] += HexDigit(I[1]);
958 }
959
960 return true;
961 }
962 /*}}}*/
963 // TokSplitString - Split a string up by a given token /*{{{*/
964 // ---------------------------------------------------------------------
965 /* This is intended to be a faster splitter, it does not use dynamic
966 memories. Input is changed to insert nulls at each token location. */
967 bool TokSplitString(char Tok,char *Input,char **List,
968 unsigned long ListMax)
969 {
970 // Strip any leading spaces
971 char *Start = Input;
972 char *Stop = Start + strlen(Start);
973 for (; *Start != 0 && isspace(*Start) != 0; Start++);
974
975 unsigned long Count = 0;
976 char *Pos = Start;
977 while (Pos != Stop)
978 {
979 // Skip to the next Token
980 for (; Pos != Stop && *Pos != Tok; Pos++);
981
982 // Back remove spaces
983 char *End = Pos;
984 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
985 *End = 0;
986
987 List[Count++] = Start;
988 if (Count >= ListMax)
989 {
990 List[Count-1] = 0;
991 return false;
992 }
993
994 // Advance pos
995 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
996 Start = Pos;
997 }
998
999 List[Count] = 0;
1000 return true;
1001 }
1002 /*}}}*/
1003 // RegexChoice - Simple regex list/list matcher /*{{{*/
1004 // ---------------------------------------------------------------------
1005 /* */
1006 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1007 const char **ListEnd)
1008 {
1009 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1010 R->Hit = false;
1011
1012 unsigned long Hits = 0;
1013 for (; ListBegin != ListEnd; ListBegin++)
1014 {
1015 // Check if the name is a regex
1016 const char *I;
1017 bool Regex = true;
1018 for (I = *ListBegin; *I != 0; I++)
1019 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1020 break;
1021 if (*I == 0)
1022 Regex = false;
1023
1024 // Compile the regex pattern
1025 regex_t Pattern;
1026 if (Regex == true)
1027 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1028 REG_NOSUB) != 0)
1029 Regex = false;
1030
1031 // Search the list
1032 bool Done = false;
1033 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1034 {
1035 if (R->Str[0] == 0)
1036 continue;
1037
1038 if (strcasecmp(R->Str,*ListBegin) != 0)
1039 {
1040 if (Regex == false)
1041 continue;
1042 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1043 continue;
1044 }
1045 Done = true;
1046
1047 if (R->Hit == false)
1048 Hits++;
1049
1050 R->Hit = true;
1051 }
1052
1053 if (Regex == true)
1054 regfree(&Pattern);
1055
1056 if (Done == false)
1057 _error->Warning(_("Selection %s not found"),*ListBegin);
1058 }
1059
1060 return Hits;
1061 }
1062 /*}}}*/
1063 // ioprintf - C format string outputter to C++ iostreams /*{{{*/
1064 // ---------------------------------------------------------------------
1065 /* This is used to make the internationalization strings easier to translate
1066 and to allow reordering of parameters */
1067 void ioprintf(ostream &out,const char *format,...)
1068 {
1069 va_list args;
1070 va_start(args,format);
1071
1072 // sprintf the description
1073 char S[4096];
1074 vsnprintf(S,sizeof(S),format,args);
1075 out << S;
1076 }
1077 /*}}}*/
1078 // strprintf - C format string outputter to C++ strings /*{{{*/
1079 // ---------------------------------------------------------------------
1080 /* This is used to make the internationalization strings easier to translate
1081 and to allow reordering of parameters */
1082 void strprintf(string &out,const char *format,...)
1083 {
1084 va_list args;
1085 va_start(args,format);
1086
1087 // sprintf the description
1088 char S[4096];
1089 vsnprintf(S,sizeof(S),format,args);
1090 out = string(S);
1091 }
1092 /*}}}*/
1093 // safe_snprintf - Safer snprintf /*{{{*/
1094 // ---------------------------------------------------------------------
1095 /* This is a snprintf that will never (ever) go past 'End' and returns a
1096 pointer to the end of the new string. The returned string is always null
1097 terminated unless Buffer == end. This is a better alterantive to using
1098 consecutive snprintfs. */
1099 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1100 {
1101 va_list args;
1102 unsigned long Did;
1103
1104 va_start(args,Format);
1105
1106 if (End <= Buffer)
1107 return End;
1108
1109 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1110 if (Did < 0 || Buffer + Did > End)
1111 return End;
1112 return Buffer + Did;
1113 }
1114 /*}}}*/
1115
1116 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1117 // ---------------------------------------------------------------------
1118 /* */
1119 int tolower_ascii(int c)
1120 {
1121 if (c >= 'A' and c <= 'Z')
1122 return c + 32;
1123 return c;
1124 }
1125 /*}}}*/
1126
1127 // CheckDomainList - See if Host is in a , seperate list /*{{{*/
1128 // ---------------------------------------------------------------------
1129 /* The domain list is a comma seperate list of domains that are suffix
1130 matched against the argument */
1131 bool CheckDomainList(const string &Host,const string &List)
1132 {
1133 string::const_iterator Start = List.begin();
1134 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); Cur++)
1135 {
1136 if (Cur < List.end() && *Cur != ',')
1137 continue;
1138
1139 // Match the end of the string..
1140 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1141 Cur - Start != 0 &&
1142 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1143 return true;
1144
1145 Start = Cur + 1;
1146 }
1147 return false;
1148 }
1149 /*}}}*/
1150
1151 // URI::CopyFrom - Copy from an object /*{{{*/
1152 // ---------------------------------------------------------------------
1153 /* This parses the URI into all of its components */
1154 void URI::CopyFrom(const string &U)
1155 {
1156 string::const_iterator I = U.begin();
1157
1158 // Locate the first colon, this separates the scheme
1159 for (; I < U.end() && *I != ':' ; I++);
1160 string::const_iterator FirstColon = I;
1161
1162 /* Determine if this is a host type URI with a leading double //
1163 and then search for the first single / */
1164 string::const_iterator SingleSlash = I;
1165 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1166 SingleSlash += 3;
1167
1168 /* Find the / indicating the end of the hostname, ignoring /'s in the
1169 square brackets */
1170 bool InBracket = false;
1171 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
1172 {
1173 if (*SingleSlash == '[')
1174 InBracket = true;
1175 if (InBracket == true && *SingleSlash == ']')
1176 InBracket = false;
1177 }
1178
1179 if (SingleSlash > U.end())
1180 SingleSlash = U.end();
1181
1182 // We can now write the access and path specifiers
1183 Access.assign(U.begin(),FirstColon);
1184 if (SingleSlash != U.end())
1185 Path.assign(SingleSlash,U.end());
1186 if (Path.empty() == true)
1187 Path = "/";
1188
1189 // Now we attempt to locate a user:pass@host fragment
1190 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1191 FirstColon += 3;
1192 else
1193 FirstColon += 1;
1194 if (FirstColon >= U.end())
1195 return;
1196
1197 if (FirstColon > SingleSlash)
1198 FirstColon = SingleSlash;
1199
1200 // Find the colon...
1201 I = FirstColon + 1;
1202 if (I > SingleSlash)
1203 I = SingleSlash;
1204 for (; I < SingleSlash && *I != ':'; I++);
1205 string::const_iterator SecondColon = I;
1206
1207 // Search for the @ after the colon
1208 for (; I < SingleSlash && *I != '@'; I++);
1209 string::const_iterator At = I;
1210
1211 // Now write the host and user/pass
1212 if (At == SingleSlash)
1213 {
1214 if (FirstColon < SingleSlash)
1215 Host.assign(FirstColon,SingleSlash);
1216 }
1217 else
1218 {
1219 Host.assign(At+1,SingleSlash);
1220 User.assign(FirstColon,SecondColon);
1221 if (SecondColon < At)
1222 Password.assign(SecondColon+1,At);
1223 }
1224
1225 // Now we parse the RFC 2732 [] hostnames.
1226 unsigned long PortEnd = 0;
1227 InBracket = false;
1228 for (unsigned I = 0; I != Host.length();)
1229 {
1230 if (Host[I] == '[')
1231 {
1232 InBracket = true;
1233 Host.erase(I,1);
1234 continue;
1235 }
1236
1237 if (InBracket == true && Host[I] == ']')
1238 {
1239 InBracket = false;
1240 Host.erase(I,1);
1241 PortEnd = I;
1242 continue;
1243 }
1244 I++;
1245 }
1246
1247 // Tsk, weird.
1248 if (InBracket == true)
1249 {
1250 Host.clear();
1251 return;
1252 }
1253
1254 // Now we parse off a port number from the hostname
1255 Port = 0;
1256 string::size_type Pos = Host.rfind(':');
1257 if (Pos == string::npos || Pos < PortEnd)
1258 return;
1259
1260 Port = atoi(string(Host,Pos+1).c_str());
1261 Host.assign(Host,0,Pos);
1262 }
1263 /*}}}*/
1264 // URI::operator string - Convert the URI to a string /*{{{*/
1265 // ---------------------------------------------------------------------
1266 /* */
1267 URI::operator string()
1268 {
1269 string Res;
1270
1271 if (Access.empty() == false)
1272 Res = Access + ':';
1273
1274 if (Host.empty() == false)
1275 {
1276 if (Access.empty() == false)
1277 Res += "//";
1278
1279 if (User.empty() == false)
1280 {
1281 Res += User;
1282 if (Password.empty() == false)
1283 Res += ":" + Password;
1284 Res += "@";
1285 }
1286
1287 // Add RFC 2732 escaping characters
1288 if (Access.empty() == false &&
1289 (Host.find('/') != string::npos || Host.find(':') != string::npos))
1290 Res += '[' + Host + ']';
1291 else
1292 Res += Host;
1293
1294 if (Port != 0)
1295 {
1296 char S[30];
1297 sprintf(S,":%u",Port);
1298 Res += S;
1299 }
1300 }
1301
1302 if (Path.empty() == false)
1303 {
1304 if (Path[0] != '/')
1305 Res += "/" + Path;
1306 else
1307 Res += Path;
1308 }
1309
1310 return Res;
1311 }
1312 /*}}}*/
1313 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1314 // ---------------------------------------------------------------------
1315 /* */
1316 string URI::SiteOnly(const string &URI)
1317 {
1318 ::URI U(URI);
1319 U.User.clear();
1320 U.Password.clear();
1321 U.Path.clear();
1322 U.Port = 0;
1323 return U;
1324 }
1325 /*}}}*/