merge Goswin Brederlow "support download of index files for different archs"
[ntk/apt.git] / apt-pkg / contrib / strutl.cc
CommitLineData
6c139d6e
AL
1// -*- mode: cpp; mode: fold -*-
2// Description /*{{{*/
d48c6a7d 3// $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
6c139d6e
AL
4/* ######################################################################
5
b2e465d6 6 String Util - Some useful string functions.
6c139d6e 7
b2e465d6
AL
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
24231681 10 especially in APT methods.
6c139d6e
AL
11
12 This source is placed in the Public Domain, do with it what you will
24231681 13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
6c139d6e
AL
14
15 ##################################################################### */
16 /*}}}*/
17// Includes /*{{{*/
cdcc6d34 18#include <apt-pkg/strutl.h>
7049d16d 19#include <apt-pkg/fileutl.h>
b2e465d6 20#include <apt-pkg/error.h>
0a8a80e5 21
b2e465d6
AL
22#include <apti18n.h>
23
6c139d6e
AL
24#include <ctype.h>
25#include <string.h>
26#include <stdio.h>
152ab79e 27#include <algorithm>
2b154e53 28#include <unistd.h>
b2e465d6 29#include <regex.h>
b0db36b1 30#include <errno.h>
b2e465d6 31#include <stdarg.h>
a52f938b 32#include <iconv.h>
0db4a45b 33
41b6caf4
AL
34#include "config.h"
35
0db4a45b 36using namespace std;
6c139d6e
AL
37 /*}}}*/
38
a52f938b
OS
39// UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
40// ---------------------------------------------------------------------
41/* This is handy to use before display some information for enduser */
42bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
43{
44 iconv_t cd;
45 const char *inbuf;
46 char *inptr, *outbuf, *outptr;
d9f5f288 47 size_t insize, outsize;
a52f938b
OS
48
49 cd = iconv_open(codeset, "UTF-8");
50 if (cd == (iconv_t)(-1)) {
51 // Something went wrong
52 if (errno == EINVAL)
53 _error->Error("conversion from 'UTF-8' to '%s' not available",
54 codeset);
55 else
56 perror("iconv_open");
57
58 // Clean the destination string
59 *dest = "";
60
61 return false;
62 }
63
64 insize = outsize = orig.size();
65 inbuf = orig.data();
66 inptr = (char *)inbuf;
67 outbuf = new char[insize+1];
68 outptr = outbuf;
69
1f99b6d3
DK
70 while (insize != 0)
71 {
72 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
73 if (err == (size_t)(-1))
74 {
75 insize--;
76 outsize++;
77 inptr++;
78 *outptr = '?';
79 outptr++;
80 }
81 }
a52f938b 82
1f99b6d3 83 *outptr = '\0';
a52f938b
OS
84 *dest = outbuf;
85 delete[] outbuf;
86
87 iconv_close(cd);
88
89 return true;
90}
91 /*}}}*/
6c139d6e
AL
92// strstrip - Remove white space from the front and back of a string /*{{{*/
93// ---------------------------------------------------------------------
94/* This is handy to use when parsing a file. It also removes \n's left
95 over from fgets and company */
96char *_strstrip(char *String)
97{
98 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
99
100 if (*String == 0)
101 return String;
102
103 char *End = String + strlen(String) - 1;
104 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
105 *End == '\r'); End--);
106 End++;
107 *End = 0;
108 return String;
109};
110 /*}}}*/
111// strtabexpand - Converts tabs into 8 spaces /*{{{*/
112// ---------------------------------------------------------------------
113/* */
114char *_strtabexpand(char *String,size_t Len)
115{
116 for (char *I = String; I != I + Len && *I != 0; I++)
117 {
118 if (*I != '\t')
119 continue;
120 if (I + 8 > String + Len)
121 {
122 *I = 0;
123 return String;
124 }
125
126 /* Assume the start of the string is 0 and find the next 8 char
127 division */
128 int Len;
129 if (String == I)
130 Len = 1;
131 else
132 Len = 8 - ((String - I) % 8);
133 Len -= 2;
134 if (Len <= 0)
135 {
136 *I = ' ';
137 continue;
138 }
139
140 memmove(I + Len,I + 1,strlen(I) + 1);
141 for (char *J = I; J + Len != I; *I = ' ', I++);
142 }
143 return String;
144}
145 /*}}}*/
146// ParseQuoteWord - Parse a single word out of a string /*{{{*/
147// ---------------------------------------------------------------------
148/* This grabs a single word, converts any % escaped characters to their
149 proper values and advances the pointer. Double quotes are understood
7834cb57
AL
150 and striped out as well. This is for URI/URL parsing. It also can
151 understand [] brackets.*/
6c139d6e
AL
152bool ParseQuoteWord(const char *&String,string &Res)
153{
154 // Skip leading whitespace
155 const char *C = String;
156 for (;*C != 0 && *C == ' '; C++);
157 if (*C == 0)
158 return false;
159
160 // Jump to the next word
36f610f1 161 for (;*C != 0 && isspace(*C) == 0; C++)
6c139d6e
AL
162 {
163 if (*C == '"')
164 {
7834cb57
AL
165 for (C++; *C != 0 && *C != '"'; C++);
166 if (*C == 0)
167 return false;
168 }
169 if (*C == '[')
170 {
171 for (C++; *C != 0 && *C != ']'; C++);
6c139d6e
AL
172 if (*C == 0)
173 return false;
174 }
175 }
176
177 // Now de-quote characters
178 char Buffer[1024];
179 char Tmp[3];
180 const char *Start = String;
181 char *I;
182 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
183 {
184 if (*Start == '%' && Start + 2 < C)
185 {
186 Tmp[0] = Start[1];
187 Tmp[1] = Start[2];
1bc849af 188 Tmp[2] = 0;
6c139d6e
AL
189 *I = (char)strtol(Tmp,0,16);
190 Start += 3;
191 continue;
192 }
193 if (*Start != '"')
194 *I = *Start;
195 else
196 I--;
197 Start++;
198 }
199 *I = 0;
200 Res = Buffer;
201
202 // Skip ending white space
36f610f1 203 for (;*C != 0 && isspace(*C) != 0; C++);
6c139d6e
AL
204 String = C;
205 return true;
206}
207 /*}}}*/
08e8f724
AL
208// ParseCWord - Parses a string like a C "" expression /*{{{*/
209// ---------------------------------------------------------------------
b2e465d6 210/* This expects a series of space separated strings enclosed in ""'s.
08e8f724 211 It concatenates the ""'s into a single string. */
b2e465d6 212bool ParseCWord(const char *&String,string &Res)
08e8f724
AL
213{
214 // Skip leading whitespace
215 const char *C = String;
216 for (;*C != 0 && *C == ' '; C++);
217 if (*C == 0)
218 return false;
219
220 char Buffer[1024];
221 char *Buf = Buffer;
222 if (strlen(String) >= sizeof(Buffer))
223 return false;
224
225 for (; *C != 0; C++)
226 {
227 if (*C == '"')
228 {
229 for (C++; *C != 0 && *C != '"'; C++)
230 *Buf++ = *C;
231
232 if (*C == 0)
233 return false;
234
235 continue;
236 }
237
238 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
239 continue;
240 if (isspace(*C) == 0)
241 return false;
242 *Buf++ = ' ';
b2e465d6 243 }
08e8f724
AL
244 *Buf = 0;
245 Res = Buffer;
b2e465d6 246 String = C;
08e8f724
AL
247 return true;
248}
249 /*}}}*/
6d5dd02a 250// QuoteString - Convert a string into quoted from /*{{{*/
1bc849af 251// ---------------------------------------------------------------------
6d5dd02a 252/* */
171c75f1 253string QuoteString(const string &Str, const char *Bad)
1bc849af
AL
254{
255 string Res;
171c75f1 256 for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
1bc849af 257 {
6d5dd02a
AL
258 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
259 *I <= 0x20 || *I >= 0x7F)
1bc849af 260 {
6d5dd02a
AL
261 char Buf[10];
262 sprintf(Buf,"%%%02x",(int)*I);
263 Res += Buf;
1bc849af
AL
264 }
265 else
266 Res += *I;
267 }
268 return Res;
269}
270 /*}}}*/
6d5dd02a 271// DeQuoteString - Convert a string from quoted from /*{{{*/
6c139d6e 272// ---------------------------------------------------------------------
6d5dd02a 273/* This undoes QuoteString */
171c75f1 274string DeQuoteString(const string &Str)
6c139d6e
AL
275{
276 string Res;
5933aab2 277 for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
6c139d6e 278 {
5933aab2 279 if (*I == '%' && I + 2 < Str.end())
6c139d6e 280 {
6d5dd02a
AL
281 char Tmp[3];
282 Tmp[0] = I[1];
283 Tmp[1] = I[2];
284 Tmp[2] = 0;
285 Res += (char)strtol(Tmp,0,16);
286 I += 2;
287 continue;
6c139d6e
AL
288 }
289 else
290 Res += *I;
291 }
6d5dd02a 292 return Res;
6c139d6e 293}
6d5dd02a
AL
294
295 /*}}}*/
6c139d6e
AL
296// SizeToStr - Convert a long into a human readable size /*{{{*/
297// ---------------------------------------------------------------------
24231681
AL
298/* A max of 4 digits are shown before conversion to the next highest unit.
299 The max length of the string will be 5 chars unless the size is > 10
6c139d6e
AL
300 YottaBytes (E24) */
301string SizeToStr(double Size)
302{
303 char S[300];
304 double ASize;
305 if (Size >= 0)
306 ASize = Size;
307 else
308 ASize = -1*Size;
309
310 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
311 ExaBytes, ZettaBytes, YottaBytes */
7f25bdff 312 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
6c139d6e
AL
313 int I = 0;
314 while (I <= 8)
315 {
316 if (ASize < 100 && I != 0)
317 {
d95cf70d 318 sprintf(S,"%'.1f%c",ASize,Ext[I]);
6c139d6e
AL
319 break;
320 }
321
322 if (ASize < 10000)
323 {
d95cf70d 324 sprintf(S,"%'.0f%c",ASize,Ext[I]);
6c139d6e
AL
325 break;
326 }
327 ASize /= 1000.0;
328 I++;
329 }
330
331 return S;
332}
333 /*}}}*/
334// TimeToStr - Convert the time into a string /*{{{*/
335// ---------------------------------------------------------------------
336/* Converts a number of seconds to a hms format */
337string TimeToStr(unsigned long Sec)
338{
339 char S[300];
340
341 while (1)
342 {
343 if (Sec > 60*60*24)
344 {
09fab244
MV
345 //d means days, h means hours, min means minutes, s means seconds
346 sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
6c139d6e
AL
347 break;
348 }
349
350 if (Sec > 60*60)
351 {
09fab244
MV
352 //h means hours, min means minutes, s means seconds
353 sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
6c139d6e
AL
354 break;
355 }
356
357 if (Sec > 60)
358 {
09fab244
MV
359 //min means minutes, s means seconds
360 sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
6c139d6e
AL
361 break;
362 }
09fab244
MV
363
364 //s means seconds
365 sprintf(S,_("%lis"),Sec);
6c139d6e
AL
366 break;
367 }
368
369 return S;
370}
371 /*}}}*/
372// SubstVar - Substitute a string for another string /*{{{*/
373// ---------------------------------------------------------------------
374/* This replaces all occurances of Subst with Contents in Str. */
171c75f1 375string SubstVar(const string &Str,const string &Subst,const string &Contents)
6c139d6e 376{
8efa2a3b 377 string::size_type Pos = 0;
6c139d6e
AL
378 string::size_type OldPos = 0;
379 string Temp;
380
381 while (OldPos < Str.length() &&
382 (Pos = Str.find(Subst,OldPos)) != string::npos)
383 {
384 Temp += string(Str,OldPos,Pos) + Contents;
385 OldPos = Pos + Subst.length();
386 }
387
388 if (OldPos == 0)
389 return Str;
390
391 return Temp + string(Str,OldPos);
392}
b2e465d6
AL
393
394string SubstVar(string Str,const struct SubstVar *Vars)
395{
396 for (; Vars->Subst != 0; Vars++)
397 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
398 return Str;
399}
6c139d6e 400 /*}}}*/
fa3b0945
MV
401// OutputInDepth - return a string with separator multiplied with depth /*{{{*/
402// ---------------------------------------------------------------------
403/* Returns a string with the supplied separator depth + 1 times in it */
404std::string OutputInDepth(const unsigned long Depth, const char* Separator)
405{
406 std::string output = "";
407 for(unsigned long d=Depth+1; d > 0; d--)
408 output.append(Separator);
409 return output;
410}
411 /*}}}*/
ad00ae81
AL
412// URItoFileName - Convert the uri into a unique file name /*{{{*/
413// ---------------------------------------------------------------------
414/* This converts a URI into a safe filename. It quotes all unsafe characters
415 and converts / to _ and removes the scheme identifier. The resulting
416 file name should be unique and never occur again for a different file */
171c75f1 417string URItoFileName(const string &URI)
ad00ae81 418{
54cf15cb
AL
419 // Nuke 'sensitive' items
420 ::URI U(URI);
171c75f1
MV
421 U.User.clear();
422 U.Password.clear();
423 U.Access.clear();
54cf15cb 424
ad00ae81 425 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
171c75f1
MV
426 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
427 replace(NewURI.begin(),NewURI.end(),'/','_');
428 return NewURI;
ad00ae81
AL
429}
430 /*}}}*/
6c139d6e
AL
431// Base64Encode - Base64 Encoding routine for short strings /*{{{*/
432// ---------------------------------------------------------------------
433/* This routine performs a base64 transformation on a string. It was ripped
434 from wget and then patched and bug fixed.
435
436 This spec can be found in rfc2045 */
171c75f1 437string Base64Encode(const string &S)
6c139d6e
AL
438{
439 // Conversion table.
440 static char tbl[64] = {'A','B','C','D','E','F','G','H',
441 'I','J','K','L','M','N','O','P',
442 'Q','R','S','T','U','V','W','X',
443 'Y','Z','a','b','c','d','e','f',
444 'g','h','i','j','k','l','m','n',
445 'o','p','q','r','s','t','u','v',
446 'w','x','y','z','0','1','2','3',
447 '4','5','6','7','8','9','+','/'};
448
449 // Pre-allocate some space
450 string Final;
451 Final.reserve((4*S.length() + 2)/3 + 2);
452
453 /* Transform the 3x8 bits to 4x6 bits, as required by
454 base64. */
5933aab2 455 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
6c139d6e
AL
456 {
457 char Bits[3] = {0,0,0};
458 Bits[0] = I[0];
5933aab2 459 if (I + 1 < S.end())
6c139d6e 460 Bits[1] = I[1];
5933aab2 461 if (I + 2 < S.end())
6c139d6e
AL
462 Bits[2] = I[2];
463
464 Final += tbl[Bits[0] >> 2];
465 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
466
5933aab2 467 if (I + 1 >= S.end())
6c139d6e
AL
468 break;
469
470 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
471
5933aab2 472 if (I + 2 >= S.end())
6c139d6e
AL
473 break;
474
475 Final += tbl[Bits[2] & 0x3f];
476 }
477
478 /* Apply the padding elements, this tells how many bytes the remote
479 end should discard */
480 if (S.length() % 3 == 2)
481 Final += '=';
482 if (S.length() % 3 == 1)
483 Final += "==";
484
485 return Final;
486}
487 /*}}}*/
0da8987a 488// stringcmp - Arbitrary string compare /*{{{*/
6c139d6e 489// ---------------------------------------------------------------------
7365ff46 490/* This safely compares two non-null terminated strings of arbitrary
6c139d6e
AL
491 length */
492int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
47db8997
AL
493{
494 for (; A != AEnd && B != BEnd; A++, B++)
495 if (*A != *B)
496 break;
497
498 if (A == AEnd && B == BEnd)
499 return 0;
500 if (A == AEnd)
501 return 1;
502 if (B == BEnd)
503 return -1;
504 if (*A < *B)
505 return -1;
506 return 1;
507}
ae0b19f5
AL
508
509#if __GNUC__ >= 3
47db8997
AL
510int stringcmp(string::const_iterator A,string::const_iterator AEnd,
511 const char *B,const char *BEnd)
512{
513 for (; A != AEnd && B != BEnd; A++, B++)
514 if (*A != *B)
515 break;
516
517 if (A == AEnd && B == BEnd)
518 return 0;
519 if (A == AEnd)
520 return 1;
521 if (B == BEnd)
522 return -1;
523 if (*A < *B)
524 return -1;
525 return 1;
526}
527int stringcmp(string::const_iterator A,string::const_iterator AEnd,
528 string::const_iterator B,string::const_iterator BEnd)
6c139d6e
AL
529{
530 for (; A != AEnd && B != BEnd; A++, B++)
531 if (*A != *B)
532 break;
533
534 if (A == AEnd && B == BEnd)
535 return 0;
536 if (A == AEnd)
537 return 1;
538 if (B == BEnd)
539 return -1;
540 if (*A < *B)
541 return -1;
542 return 1;
543}
ae0b19f5 544#endif
6c139d6e 545 /*}}}*/
0da8987a 546// stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
6c139d6e
AL
547// ---------------------------------------------------------------------
548/* */
549int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
47db8997
AL
550{
551 for (; A != AEnd && B != BEnd; A++, B++)
552 if (toupper(*A) != toupper(*B))
553 break;
554
555 if (A == AEnd && B == BEnd)
556 return 0;
557 if (A == AEnd)
558 return 1;
559 if (B == BEnd)
560 return -1;
561 if (toupper(*A) < toupper(*B))
562 return -1;
563 return 1;
564}
ae0b19f5 565#if __GNUC__ >= 3
47db8997
AL
566int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
567 const char *B,const char *BEnd)
568{
569 for (; A != AEnd && B != BEnd; A++, B++)
570 if (toupper(*A) != toupper(*B))
571 break;
572
573 if (A == AEnd && B == BEnd)
574 return 0;
575 if (A == AEnd)
576 return 1;
577 if (B == BEnd)
578 return -1;
579 if (toupper(*A) < toupper(*B))
580 return -1;
581 return 1;
582}
583int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
584 string::const_iterator B,string::const_iterator BEnd)
6c139d6e
AL
585{
586 for (; A != AEnd && B != BEnd; A++, B++)
587 if (toupper(*A) != toupper(*B))
588 break;
3b5421b4 589
6c139d6e
AL
590 if (A == AEnd && B == BEnd)
591 return 0;
592 if (A == AEnd)
593 return 1;
594 if (B == BEnd)
595 return -1;
596 if (toupper(*A) < toupper(*B))
597 return -1;
598 return 1;
599}
ae0b19f5 600#endif
6c139d6e 601 /*}}}*/
3b5421b4
AL
602// LookupTag - Lookup the value of a tag in a taged string /*{{{*/
603// ---------------------------------------------------------------------
604/* The format is like those used in package files and the method
605 communication system */
171c75f1 606string LookupTag(const string &Message,const char *Tag,const char *Default)
3b5421b4
AL
607{
608 // Look for a matching tag.
609 int Length = strlen(Tag);
171c75f1 610 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); I++)
3b5421b4
AL
611 {
612 // Found the tag
613 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
614 {
615 // Find the end of line and strip the leading/trailing spaces
171c75f1 616 string::const_iterator J;
3b5421b4 617 I += Length + 1;
47db8997
AL
618 for (; isspace(*I) != 0 && I < Message.end(); I++);
619 for (J = I; *J != '\n' && J < Message.end(); J++);
3b5421b4
AL
620 for (; J > I && isspace(J[-1]) != 0; J--);
621
0db4a45b 622 return string(I,J);
3b5421b4
AL
623 }
624
47db8997 625 for (; *I != '\n' && I < Message.end(); I++);
3b5421b4
AL
626 }
627
628 // Failed to find a match
629 if (Default == 0)
630 return string();
631 return Default;
632}
633 /*}}}*/
634// StringToBool - Converts a string into a boolean /*{{{*/
635// ---------------------------------------------------------------------
636/* This inspects the string to see if it is true or if it is false and
637 then returns the result. Several varients on true/false are checked. */
171c75f1 638int StringToBool(const string &Text,int Default)
3b5421b4
AL
639{
640 char *End;
641 int Res = strtol(Text.c_str(),&End,0);
642 if (End != Text.c_str() && Res >= 0 && Res <= 1)
643 return Res;
644
645 // Check for positives
646 if (strcasecmp(Text.c_str(),"no") == 0 ||
647 strcasecmp(Text.c_str(),"false") == 0 ||
648 strcasecmp(Text.c_str(),"without") == 0 ||
7f25bdff 649 strcasecmp(Text.c_str(),"off") == 0 ||
3b5421b4
AL
650 strcasecmp(Text.c_str(),"disable") == 0)
651 return 0;
652
653 // Check for negatives
654 if (strcasecmp(Text.c_str(),"yes") == 0 ||
655 strcasecmp(Text.c_str(),"true") == 0 ||
656 strcasecmp(Text.c_str(),"with") == 0 ||
7f25bdff 657 strcasecmp(Text.c_str(),"on") == 0 ||
3b5421b4
AL
658 strcasecmp(Text.c_str(),"enable") == 0)
659 return 1;
660
661 return Default;
662}
663 /*}}}*/
0a8a80e5
AL
664// TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
665// ---------------------------------------------------------------------
666/* This converts a time_t into a string time representation that is
667 year 2000 complient and timezone neutral */
668string TimeRFC1123(time_t Date)
669{
670 struct tm Conv = *gmtime(&Date);
671 char Buf[300];
672
673 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
674 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
675 "Aug","Sep","Oct","Nov","Dec"};
676
677 sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
678 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
679 Conv.tm_min,Conv.tm_sec);
680 return Buf;
681}
682 /*}}}*/
683// ReadMessages - Read messages from the FD /*{{{*/
684// ---------------------------------------------------------------------
685/* This pulls full messages from the input FD into the message buffer.
686 It assumes that messages will not pause during transit so no
ffc36991
DB
687 fancy buffering is used.
688
689 In particular: this reads blocks from the input until it believes
690 that it's run out of input text. Each block is terminated by a
691 double newline ('\n' followed by '\n'). As noted below, there is a
692 bug in this code: it assumes that all the blocks have been read if
693 it doesn't see additional text in the buffer after the last one is
694 parsed, which will cause it to lose blocks if the last block
695 coincides with the end of the buffer.
696 */
0a8a80e5
AL
697bool ReadMessages(int Fd, vector<string> &List)
698{
aee70518 699 char Buffer[64000];
0a8a80e5 700 char *End = Buffer;
ffc36991
DB
701 // Represents any left-over from the previous iteration of the
702 // parse loop. (i.e., if a message is split across the end
703 // of the buffer, it goes here)
704 string PartialMessage;
0a8a80e5
AL
705
706 while (1)
707 {
708 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
b0db36b1
AL
709 if (Res < 0 && errno == EINTR)
710 continue;
0a8a80e5
AL
711
712 // Process is dead, this is kind of bad..
713 if (Res == 0)
714 return false;
715
716 // No data
b2e465d6 717 if (Res < 0 && errno == EAGAIN)
0a8a80e5 718 return true;
b2e465d6
AL
719 if (Res < 0)
720 return false;
721
0a8a80e5
AL
722 End += Res;
723
724 // Look for the end of the message
c88edf1d 725 for (char *I = Buffer; I + 1 < End; I++)
0a8a80e5
AL
726 {
727 if (I[0] != '\n' || I[1] != '\n')
728 continue;
729
730 // Pull the message out
d48c6a7d 731 string Message(Buffer,I-Buffer);
ffc36991 732 PartialMessage += Message;
0a8a80e5
AL
733
734 // Fix up the buffer
735 for (; I < End && *I == '\n'; I++);
736 End -= I-Buffer;
737 memmove(Buffer,I,End-Buffer);
738 I = Buffer;
739
ffc36991
DB
740 List.push_back(PartialMessage);
741 PartialMessage.clear();
0a8a80e5 742 }
ffc36991
DB
743 if (End != Buffer)
744 {
745 // If there's text left in the buffer, store it
746 // in PartialMessage and throw the rest of the buffer
747 // away. This allows us to handle messages that
748 // are longer than the static buffer size.
749 PartialMessage += string(Buffer, End);
750 End = Buffer;
751 }
752 else
753 {
754 // BUG ALERT: if a message block happens to end at a
755 // multiple of 64000 characters, this will cause it to
756 // terminate early, leading to a badly formed block and
757 // probably crashing the method. However, this is the only
758 // way we have to find the end of the message block. I have
759 // an idea of how to fix this, but it will require changes
760 // to the protocol (essentially to mark the beginning and
761 // end of the block).
762 //
763 // -- dburrows 2008-04-02
764 return true;
765 }
0a8a80e5
AL
766
767 if (WaitFd(Fd) == false)
768 return false;
769 }
770}
771 /*}}}*/
24231681
AL
772// MonthConv - Converts a month string into a number /*{{{*/
773// ---------------------------------------------------------------------
774/* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
775 Made it a bit more robust with a few touppers though. */
776static int MonthConv(char *Month)
777{
778 switch (toupper(*Month))
779 {
780 case 'A':
781 return toupper(Month[1]) == 'P'?3:7;
782 case 'D':
783 return 11;
784 case 'F':
785 return 1;
786 case 'J':
787 if (toupper(Month[1]) == 'A')
788 return 0;
789 return toupper(Month[2]) == 'N'?5:6;
790 case 'M':
791 return toupper(Month[2]) == 'R'?2:4;
792 case 'N':
793 return 10;
794 case 'O':
795 return 9;
796 case 'S':
797 return 8;
798
799 // Pretend it is January..
800 default:
801 return 0;
802 }
803}
804 /*}}}*/
6d5dd02a
AL
805// timegm - Internal timegm function if gnu is not available /*{{{*/
806// ---------------------------------------------------------------------
807/* Ripped this evil little function from wget - I prefer the use of
808 GNU timegm if possible as this technique will have interesting problems
809 with leap seconds, timezones and other.
810
811 Converts struct tm to time_t, assuming the data in tm is UTC rather
812 than local timezone (mktime assumes the latter).
813
814 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
815 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
41b6caf4
AL
816
817/* Turned it into an autoconf check, because GNU is not the only thing which
818 can provide timegm. -- 2002-09-22, Joel Baker */
819
820#ifndef HAVE_TIMEGM // Now with autoconf!
6d5dd02a
AL
821static time_t timegm(struct tm *t)
822{
823 time_t tl, tb;
824
825 tl = mktime (t);
826 if (tl == -1)
827 return -1;
828 tb = mktime (gmtime (&tl));
829 return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
830}
831#endif
832 /*}}}*/
24231681
AL
833// StrToTime - Converts a string into a time_t /*{{{*/
834// ---------------------------------------------------------------------
835/* This handles all 3 populare time formats including RFC 1123, RFC 1036
836 and the C library asctime format. It requires the GNU library function
837 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
f58a97d3
AL
838 reason the C library does not provide any such function :< This also
839 handles the weird, but unambiguous FTP time format*/
171c75f1 840bool StrToTime(const string &Val,time_t &Result)
24231681
AL
841{
842 struct tm Tm;
843 char Month[10];
844 const char *I = Val.c_str();
845
846 // Skip the day of the week
847 for (;*I != 0 && *I != ' '; I++);
848
849 // Handle RFC 1123 time
f58a97d3 850 Month[0] = 0;
24231681
AL
851 if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
852 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
853 {
854 // Handle RFC 1036 time
855 if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
856 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
857 Tm.tm_year += 1900;
858 else
859 {
860 // asctime format
861 if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
862 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
f58a97d3
AL
863 {
864 // 'ftp' time
7ef72446 865 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
f58a97d3
AL
866 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
867 return false;
868 Tm.tm_mon--;
869 }
24231681
AL
870 }
871 }
872
873 Tm.tm_isdst = 0;
f58a97d3
AL
874 if (Month[0] != 0)
875 Tm.tm_mon = MonthConv(Month);
24231681
AL
876 Tm.tm_year -= 1900;
877
878 // Convert to local time and then to GMT
879 Result = timegm(&Tm);
880 return true;
881}
882 /*}}}*/
ddc1d8d0
AL
883// StrToNum - Convert a fixed length string to a number /*{{{*/
884// ---------------------------------------------------------------------
885/* This is used in decoding the crazy fixed length string headers in
886 tar and ar files. */
887bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
888{
889 char S[30];
890 if (Len >= sizeof(S))
891 return false;
892 memcpy(S,Str,Len);
893 S[Len] = 0;
894
895 // All spaces is a zero
896 Res = 0;
897 unsigned I;
898 for (I = 0; S[I] == ' '; I++);
899 if (S[I] == 0)
900 return true;
901
902 char *End;
903 Res = strtoul(S,&End,Base);
904 if (End == S)
905 return false;
906
907 return true;
908}
909 /*}}}*/
6e52073f
AL
910// HexDigit - Convert a hex character into an integer /*{{{*/
911// ---------------------------------------------------------------------
912/* Helper for Hex2Num */
913static int HexDigit(int c)
914{
915 if (c >= '0' && c <= '9')
916 return c - '0';
917 if (c >= 'a' && c <= 'f')
918 return c - 'a' + 10;
919 if (c >= 'A' && c <= 'F')
920 return c - 'A' + 10;
921 return 0;
922}
923 /*}}}*/
924// Hex2Num - Convert a long hex number into a buffer /*{{{*/
925// ---------------------------------------------------------------------
926/* The length of the buffer must be exactly 1/2 the length of the string. */
171c75f1 927bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
6e52073f 928{
0db4a45b 929 if (Str.length() != Length*2)
6e52073f
AL
930 return false;
931
932 // Convert each digit. We store it in the same order as the string
933 int J = 0;
0db4a45b 934 for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
6e52073f
AL
935 {
936 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
937 return false;
938
939 Num[J] = HexDigit(I[0]) << 4;
940 Num[J] += HexDigit(I[1]);
941 }
942
943 return true;
944}
945 /*}}}*/
b2e465d6
AL
946// TokSplitString - Split a string up by a given token /*{{{*/
947// ---------------------------------------------------------------------
948/* This is intended to be a faster splitter, it does not use dynamic
949 memories. Input is changed to insert nulls at each token location. */
950bool TokSplitString(char Tok,char *Input,char **List,
951 unsigned long ListMax)
952{
953 // Strip any leading spaces
954 char *Start = Input;
955 char *Stop = Start + strlen(Start);
956 for (; *Start != 0 && isspace(*Start) != 0; Start++);
957
958 unsigned long Count = 0;
959 char *Pos = Start;
960 while (Pos != Stop)
961 {
962 // Skip to the next Token
963 for (; Pos != Stop && *Pos != Tok; Pos++);
964
965 // Back remove spaces
966 char *End = Pos;
967 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
968 *End = 0;
969
970 List[Count++] = Start;
971 if (Count >= ListMax)
972 {
973 List[Count-1] = 0;
974 return false;
975 }
976
977 // Advance pos
978 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
979 Start = Pos;
980 }
981
982 List[Count] = 0;
983 return true;
984}
985 /*}}}*/
5dd4c8b8
DK
986// ExplodeString - Split a string up into a vector /*{{{*/
987// ---------------------------------------------------------------------
988/* This can be used to split a given string up into a vector, so the
989 propose is the same as in the method above and this one is a bit slower
990 also, but the advantage is that we an iteratable vector */
991vector<string> ExplodeString(string const &haystack, char const &split) {
992 string::const_iterator start = haystack.begin();
993 string::const_iterator end = start;
994 vector<string> exploded;
995 do {
996 for (; end != haystack.end() && *end != split; ++end);
997 exploded.push_back(string(start, end));
998 start = end;
999 } while (end != haystack.end() && (++end) != haystack.end());
1000 return exploded;
1001}
1002 /*}}}*/
b2e465d6
AL
1003// RegexChoice - Simple regex list/list matcher /*{{{*/
1004// ---------------------------------------------------------------------
1005/* */
1006unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1007 const char **ListEnd)
1008{
1009 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1010 R->Hit = false;
1011
1012 unsigned long Hits = 0;
1013 for (; ListBegin != ListEnd; ListBegin++)
1014 {
1015 // Check if the name is a regex
1016 const char *I;
1017 bool Regex = true;
1018 for (I = *ListBegin; *I != 0; I++)
1019 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1020 break;
1021 if (*I == 0)
1022 Regex = false;
1023
1024 // Compile the regex pattern
1025 regex_t Pattern;
1026 if (Regex == true)
1027 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1028 REG_NOSUB) != 0)
1029 Regex = false;
1030
1031 // Search the list
1032 bool Done = false;
1033 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1034 {
1035 if (R->Str[0] == 0)
1036 continue;
1037
1038 if (strcasecmp(R->Str,*ListBegin) != 0)
1039 {
1040 if (Regex == false)
1041 continue;
1042 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1043 continue;
1044 }
1045 Done = true;
1046
1047 if (R->Hit == false)
1048 Hits++;
1049
1050 R->Hit = true;
1051 }
1052
1053 if (Regex == true)
1054 regfree(&Pattern);
1055
1056 if (Done == false)
1057 _error->Warning(_("Selection %s not found"),*ListBegin);
1058 }
1059
1060 return Hits;
1061}
1062 /*}}}*/
1063// ioprintf - C format string outputter to C++ iostreams /*{{{*/
1064// ---------------------------------------------------------------------
1168596f
AL
1065/* This is used to make the internationalization strings easier to translate
1066 and to allow reordering of parameters */
b2e465d6
AL
1067void ioprintf(ostream &out,const char *format,...)
1068{
1069 va_list args;
1070 va_start(args,format);
1071
1072 // sprintf the description
aebe158d 1073 char S[4096];
b2e465d6 1074 vsnprintf(S,sizeof(S),format,args);
e7b470ee 1075 out << S;
1168596f
AL
1076}
1077 /*}}}*/
d4cd303e
MV
1078// strprintf - C format string outputter to C++ strings /*{{{*/
1079// ---------------------------------------------------------------------
1080/* This is used to make the internationalization strings easier to translate
1081 and to allow reordering of parameters */
1082void strprintf(string &out,const char *format,...)
1083{
1084 va_list args;
1085 va_start(args,format);
1086
1087 // sprintf the description
aebe158d 1088 char S[4096];
d4cd303e
MV
1089 vsnprintf(S,sizeof(S),format,args);
1090 out = string(S);
1091}
1092 /*}}}*/
1168596f
AL
1093// safe_snprintf - Safer snprintf /*{{{*/
1094// ---------------------------------------------------------------------
1095/* This is a snprintf that will never (ever) go past 'End' and returns a
1096 pointer to the end of the new string. The returned string is always null
1097 terminated unless Buffer == end. This is a better alterantive to using
1098 consecutive snprintfs. */
1099char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1100{
1101 va_list args;
1102 unsigned long Did;
1103
1104 va_start(args,Format);
1105
1106 if (End <= Buffer)
1107 return End;
1108
1109 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1110 if (Did < 0 || Buffer + Did > End)
1111 return End;
1112 return Buffer + Did;
b2e465d6
AL
1113}
1114 /*}}}*/
93bf083d 1115
4e86942a
MV
1116// tolower_ascii - tolower() function that ignores the locale /*{{{*/
1117// ---------------------------------------------------------------------
1118/* */
1119int tolower_ascii(int c)
1120{
1121 if (c >= 'A' and c <= 'Z')
1122 return c + 32;
1123 return c;
1124}
1125 /*}}}*/
1126
f8081133
AL
1127// CheckDomainList - See if Host is in a , seperate list /*{{{*/
1128// ---------------------------------------------------------------------
1129/* The domain list is a comma seperate list of domains that are suffix
1130 matched against the argument */
171c75f1 1131bool CheckDomainList(const string &Host,const string &List)
f8081133 1132{
47db8997
AL
1133 string::const_iterator Start = List.begin();
1134 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); Cur++)
f8081133 1135 {
47db8997 1136 if (Cur < List.end() && *Cur != ',')
f8081133
AL
1137 continue;
1138
1139 // Match the end of the string..
e2c7e6b5 1140 if ((Host.size() >= (unsigned)(Cur - Start)) &&
f8081133 1141 Cur - Start != 0 &&
47db8997 1142 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
f8081133
AL
1143 return true;
1144
1145 Start = Cur + 1;
1146 }
1147 return false;
1148}
1149 /*}}}*/
1150
be4401bf 1151// URI::CopyFrom - Copy from an object /*{{{*/
93bf083d
AL
1152// ---------------------------------------------------------------------
1153/* This parses the URI into all of its components */
171c75f1 1154void URI::CopyFrom(const string &U)
93bf083d 1155{
5933aab2 1156 string::const_iterator I = U.begin();
93bf083d 1157
b2e465d6 1158 // Locate the first colon, this separates the scheme
5933aab2
AL
1159 for (; I < U.end() && *I != ':' ; I++);
1160 string::const_iterator FirstColon = I;
93bf083d 1161
bfd22fc0
AL
1162 /* Determine if this is a host type URI with a leading double //
1163 and then search for the first single / */
5933aab2
AL
1164 string::const_iterator SingleSlash = I;
1165 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
bfd22fc0 1166 SingleSlash += 3;
67ff87bf
AL
1167
1168 /* Find the / indicating the end of the hostname, ignoring /'s in the
1169 square brackets */
1170 bool InBracket = false;
5933aab2 1171 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
67ff87bf
AL
1172 {
1173 if (*SingleSlash == '[')
1174 InBracket = true;
1175 if (InBracket == true && *SingleSlash == ']')
1176 InBracket = false;
1177 }
1178
5933aab2
AL
1179 if (SingleSlash > U.end())
1180 SingleSlash = U.end();
93bf083d
AL
1181
1182 // We can now write the access and path specifiers
171c75f1 1183 Access.assign(U.begin(),FirstColon);
5933aab2 1184 if (SingleSlash != U.end())
171c75f1 1185 Path.assign(SingleSlash,U.end());
92e889c8
AL
1186 if (Path.empty() == true)
1187 Path = "/";
1188
93bf083d 1189 // Now we attempt to locate a user:pass@host fragment
d48c6a7d 1190 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
f46e7681
AL
1191 FirstColon += 3;
1192 else
1193 FirstColon += 1;
5933aab2 1194 if (FirstColon >= U.end())
93bf083d
AL
1195 return;
1196
1197 if (FirstColon > SingleSlash)
1198 FirstColon = SingleSlash;
1199
3856756b
AL
1200 // Find the colon...
1201 I = FirstColon + 1;
1d38d0e9
AL
1202 if (I > SingleSlash)
1203 I = SingleSlash;
3856756b 1204 for (; I < SingleSlash && *I != ':'; I++);
5933aab2 1205 string::const_iterator SecondColon = I;
3856756b
AL
1206
1207 // Search for the @ after the colon
93bf083d 1208 for (; I < SingleSlash && *I != '@'; I++);
5933aab2 1209 string::const_iterator At = I;
93bf083d 1210
93bf083d
AL
1211 // Now write the host and user/pass
1212 if (At == SingleSlash)
1213 {
1214 if (FirstColon < SingleSlash)
171c75f1 1215 Host.assign(FirstColon,SingleSlash);
93bf083d
AL
1216 }
1217 else
1218 {
171c75f1
MV
1219 Host.assign(At+1,SingleSlash);
1220 User.assign(FirstColon,SecondColon);
93bf083d 1221 if (SecondColon < At)
171c75f1 1222 Password.assign(SecondColon+1,At);
93bf083d
AL
1223 }
1224
67ff87bf
AL
1225 // Now we parse the RFC 2732 [] hostnames.
1226 unsigned long PortEnd = 0;
1227 InBracket = false;
1228 for (unsigned I = 0; I != Host.length();)
1229 {
1230 if (Host[I] == '[')
1231 {
1232 InBracket = true;
1233 Host.erase(I,1);
1234 continue;
1235 }
1236
1237 if (InBracket == true && Host[I] == ']')
1238 {
1239 InBracket = false;
1240 Host.erase(I,1);
1241 PortEnd = I;
1242 continue;
1243 }
1244 I++;
1245 }
1246
1247 // Tsk, weird.
1248 if (InBracket == true)
1249 {
171c75f1 1250 Host.clear();
67ff87bf
AL
1251 return;
1252 }
1253
1d38d0e9 1254 // Now we parse off a port number from the hostname
93bf083d
AL
1255 Port = 0;
1256 string::size_type Pos = Host.rfind(':');
67ff87bf 1257 if (Pos == string::npos || Pos < PortEnd)
93bf083d
AL
1258 return;
1259
1260 Port = atoi(string(Host,Pos+1).c_str());
171c75f1 1261 Host.assign(Host,0,Pos);
93bf083d
AL
1262}
1263 /*}}}*/
1264// URI::operator string - Convert the URI to a string /*{{{*/
1265// ---------------------------------------------------------------------
1266/* */
1267URI::operator string()
1268{
54cf15cb
AL
1269 string Res;
1270
1271 if (Access.empty() == false)
1272 Res = Access + ':';
1273
93bf083d 1274 if (Host.empty() == false)
7834cb57 1275 {
54cf15cb
AL
1276 if (Access.empty() == false)
1277 Res += "//";
7834cb57 1278
93bf083d
AL
1279 if (User.empty() == false)
1280 {
54cf15cb 1281 Res += User;
93bf083d
AL
1282 if (Password.empty() == false)
1283 Res += ":" + Password;
1284 Res += "@";
1285 }
54cf15cb 1286
7834cb57
AL
1287 // Add RFC 2732 escaping characters
1288 if (Access.empty() == false &&
1289 (Host.find('/') != string::npos || Host.find(':') != string::npos))
1290 Res += '[' + Host + ']';
1291 else
1292 Res += Host;
1293
492f957a
AL
1294 if (Port != 0)
1295 {
1296 char S[30];
1297 sprintf(S,":%u",Port);
1298 Res += S;
1299 }
93bf083d
AL
1300 }
1301
1302 if (Path.empty() == false)
492f957a
AL
1303 {
1304 if (Path[0] != '/')
1305 Res += "/" + Path;
1306 else
1307 Res += Path;
1308 }
93bf083d
AL
1309
1310 return Res;
1311}
1312 /*}}}*/
b2e465d6
AL
1313// URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1314// ---------------------------------------------------------------------
1315/* */
171c75f1 1316string URI::SiteOnly(const string &URI)
b2e465d6
AL
1317{
1318 ::URI U(URI);
171c75f1
MV
1319 U.User.clear();
1320 U.Password.clear();
1321 U.Path.clear();
b2e465d6
AL
1322 U.Port = 0;
1323 return U;
1324}
1325 /*}}}*/