Working acquire code
[ntk/apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.13 1998/11/05 07:21:44 jgg Exp $
4 /* ######################################################################
5
6 String Util - Some usefull string functions.
7
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #ifdef __GNUG__
19 #pragma implementation "strutl.h"
20 #endif
21
22 #include <strutl.h>
23 #include <apt-pkg/fileutl.h>
24
25 #include <ctype.h>
26 #include <string.h>
27 #include <stdio.h>
28 /*}}}*/
29
30 // strstrip - Remove white space from the front and back of a string /*{{{*/
31 // ---------------------------------------------------------------------
32 /* This is handy to use when parsing a file. It also removes \n's left
33 over from fgets and company */
34 char *_strstrip(char *String)
35 {
36 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
37
38 if (*String == 0)
39 return String;
40
41 char *End = String + strlen(String) - 1;
42 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
43 *End == '\r'); End--);
44 End++;
45 *End = 0;
46 return String;
47 };
48 /*}}}*/
49 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
50 // ---------------------------------------------------------------------
51 /* */
52 char *_strtabexpand(char *String,size_t Len)
53 {
54 for (char *I = String; I != I + Len && *I != 0; I++)
55 {
56 if (*I != '\t')
57 continue;
58 if (I + 8 > String + Len)
59 {
60 *I = 0;
61 return String;
62 }
63
64 /* Assume the start of the string is 0 and find the next 8 char
65 division */
66 int Len;
67 if (String == I)
68 Len = 1;
69 else
70 Len = 8 - ((String - I) % 8);
71 Len -= 2;
72 if (Len <= 0)
73 {
74 *I = ' ';
75 continue;
76 }
77
78 memmove(I + Len,I + 1,strlen(I) + 1);
79 for (char *J = I; J + Len != I; *I = ' ', I++);
80 }
81 return String;
82 }
83 /*}}}*/
84 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
85 // ---------------------------------------------------------------------
86 /* This grabs a single word, converts any % escaped characters to their
87 proper values and advances the pointer. Double quotes are understood
88 and striped out as well. This is for URI/URL parsing. */
89 bool ParseQuoteWord(const char *&String,string &Res)
90 {
91 // Skip leading whitespace
92 const char *C = String;
93 for (;*C != 0 && *C == ' '; C++);
94 if (*C == 0)
95 return false;
96
97 // Jump to the next word
98 for (;*C != 0 && *C != ' '; C++)
99 {
100 if (*C == '"')
101 {
102 for (C++;*C != 0 && *C != '"'; C++);
103 if (*C == 0)
104 return false;
105 }
106 }
107
108 // Now de-quote characters
109 char Buffer[1024];
110 char Tmp[3];
111 const char *Start = String;
112 char *I;
113 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
114 {
115 if (*Start == '%' && Start + 2 < C)
116 {
117 Tmp[0] = Start[1];
118 Tmp[1] = Start[2];
119 Tmp[3] = 0;
120 *I = (char)strtol(Tmp,0,16);
121 Start += 3;
122 continue;
123 }
124 if (*Start != '"')
125 *I = *Start;
126 else
127 I--;
128 Start++;
129 }
130 *I = 0;
131 Res = Buffer;
132
133 // Skip ending white space
134 for (;*C != 0 && *C == ' '; C++);
135 String = C;
136 return true;
137 }
138 /*}}}*/
139 // ParseCWord - Parses a string like a C "" expression /*{{{*/
140 // ---------------------------------------------------------------------
141 /* This expects a series of space seperated strings enclosed in ""'s.
142 It concatenates the ""'s into a single string. */
143 bool ParseCWord(const char *String,string &Res)
144 {
145 // Skip leading whitespace
146 const char *C = String;
147 for (;*C != 0 && *C == ' '; C++);
148 if (*C == 0)
149 return false;
150
151 char Buffer[1024];
152 char *Buf = Buffer;
153 if (strlen(String) >= sizeof(Buffer))
154 return false;
155
156 for (; *C != 0; C++)
157 {
158 if (*C == '"')
159 {
160 for (C++; *C != 0 && *C != '"'; C++)
161 *Buf++ = *C;
162
163 if (*C == 0)
164 return false;
165
166 continue;
167 }
168
169 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
170 continue;
171 if (isspace(*C) == 0)
172 return false;
173 *Buf++ = ' ';
174 }
175 *Buf = 0;
176 Res = Buffer;
177 return true;
178 }
179 /*}}}*/
180 // QuoteString - Convert a string into quoted from /*{{{*/
181 // ---------------------------------------------------------------------
182 /* */
183 string QuoteString(string Str,const char *Bad)
184 {
185 string Res;
186 for (string::iterator I = Str.begin(); I != Str.end(); I++)
187 {
188 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
189 *I <= 0x20 || *I >= 0x7F)
190 {
191 char Buf[10];
192 sprintf(Buf,"%%%02x",(int)*I);
193 Res += Buf;
194 }
195 else
196 Res += *I;
197 }
198 return Res;
199 }
200 /*}}}*/
201 // SizeToStr - Convert a long into a human readable size /*{{{*/
202 // ---------------------------------------------------------------------
203 /* A max of 4 digits are shown before conversion to the next highest unit.
204 The max length of the string will be 5 chars unless the size is > 10
205 YottaBytes (E24) */
206 string SizeToStr(double Size)
207 {
208 char S[300];
209 double ASize;
210 if (Size >= 0)
211 ASize = Size;
212 else
213 ASize = -1*Size;
214
215 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
216 ExaBytes, ZettaBytes, YottaBytes */
217 char Ext[] = {'b','k','M','G','T','P','E','Z','Y'};
218 int I = 0;
219 while (I <= 8)
220 {
221 if (ASize < 100 && I != 0)
222 {
223 sprintf(S,"%.1f%c",ASize,Ext[I]);
224 break;
225 }
226
227 if (ASize < 10000)
228 {
229 sprintf(S,"%.0f%c",ASize,Ext[I]);
230 break;
231 }
232 ASize /= 1000.0;
233 I++;
234 }
235
236 return S;
237 }
238 /*}}}*/
239 // TimeToStr - Convert the time into a string /*{{{*/
240 // ---------------------------------------------------------------------
241 /* Converts a number of seconds to a hms format */
242 string TimeToStr(unsigned long Sec)
243 {
244 char S[300];
245
246 while (1)
247 {
248 if (Sec > 60*60*24)
249 {
250 sprintf(S,"%lid %lih%lim%lis",Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
251 break;
252 }
253
254 if (Sec > 60*60)
255 {
256 sprintf(S,"%lih%lim%lis",Sec/60/60,(Sec/60) % 60,Sec % 60);
257 break;
258 }
259
260 if (Sec > 60)
261 {
262 sprintf(S,"%lim%lis",Sec/60,Sec % 60);
263 break;
264 }
265
266 sprintf(S,"%lis",Sec);
267 break;
268 }
269
270 return S;
271 }
272 /*}}}*/
273 // SubstVar - Substitute a string for another string /*{{{*/
274 // ---------------------------------------------------------------------
275 /* This replaces all occurances of Subst with Contents in Str. */
276 string SubstVar(string Str,string Subst,string Contents)
277 {
278 string::size_type Pos = 0;
279 string::size_type OldPos = 0;
280 string Temp;
281
282 while (OldPos < Str.length() &&
283 (Pos = Str.find(Subst,OldPos)) != string::npos)
284 {
285 Temp += string(Str,OldPos,Pos) + Contents;
286 OldPos = Pos + Subst.length();
287 }
288
289 if (OldPos == 0)
290 return Str;
291
292 return Temp + string(Str,OldPos);
293 }
294 /*}}}*/
295 // URItoFileName - Convert the uri into a unique file name /*{{{*/
296 // ---------------------------------------------------------------------
297 /* This converts a URI into a safe filename. It quotes all unsafe characters
298 and converts / to _ and removes the scheme identifier. The resulting
299 file name should be unique and never occur again for a different file */
300 string URItoFileName(string URI)
301 {
302 string::const_iterator I = URI.begin() + URI.find(':') + 1;
303 for (; I < URI.end() && *I == '/'; I++);
304
305 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
306 URI = QuoteString(string(I,URI.end() - I),"\\|{}[]<>\"^~_=!@#$%^&*");
307 string::iterator J = URI.begin();
308 for (; J != URI.end(); J++)
309 if (*J == '/')
310 *J = '_';
311 return URI;
312 }
313 /*}}}*/
314 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
315 // ---------------------------------------------------------------------
316 /* This routine performs a base64 transformation on a string. It was ripped
317 from wget and then patched and bug fixed.
318
319 This spec can be found in rfc2045 */
320 string Base64Encode(string S)
321 {
322 // Conversion table.
323 static char tbl[64] = {'A','B','C','D','E','F','G','H',
324 'I','J','K','L','M','N','O','P',
325 'Q','R','S','T','U','V','W','X',
326 'Y','Z','a','b','c','d','e','f',
327 'g','h','i','j','k','l','m','n',
328 'o','p','q','r','s','t','u','v',
329 'w','x','y','z','0','1','2','3',
330 '4','5','6','7','8','9','+','/'};
331
332 // Pre-allocate some space
333 string Final;
334 Final.reserve((4*S.length() + 2)/3 + 2);
335
336 /* Transform the 3x8 bits to 4x6 bits, as required by
337 base64. */
338 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
339 {
340 char Bits[3] = {0,0,0};
341 Bits[0] = I[0];
342 if (I + 1 < S.end())
343 Bits[1] = I[1];
344 if (I + 2 < S.end())
345 Bits[2] = I[2];
346
347 Final += tbl[Bits[0] >> 2];
348 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
349
350 if (I + 1 >= S.end())
351 break;
352
353 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
354
355 if (I + 2 >= S.end())
356 break;
357
358 Final += tbl[Bits[2] & 0x3f];
359 }
360
361 /* Apply the padding elements, this tells how many bytes the remote
362 end should discard */
363 if (S.length() % 3 == 2)
364 Final += '=';
365 if (S.length() % 3 == 1)
366 Final += "==";
367
368 return Final;
369 }
370 /*}}}*/
371 // stringcmp - Arbitary string compare /*{{{*/
372 // ---------------------------------------------------------------------
373 /* This safely compares two non-null terminated strings of arbitary
374 length */
375 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
376 {
377 for (; A != AEnd && B != BEnd; A++, B++)
378 if (*A != *B)
379 break;
380
381 if (A == AEnd && B == BEnd)
382 return 0;
383 if (A == AEnd)
384 return 1;
385 if (B == BEnd)
386 return -1;
387 if (*A < *B)
388 return -1;
389 return 1;
390 }
391 /*}}}*/
392 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
393 // ---------------------------------------------------------------------
394 /* */
395 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
396 {
397 for (; A != AEnd && B != BEnd; A++, B++)
398 if (toupper(*A) != toupper(*B))
399 break;
400
401 if (A == AEnd && B == BEnd)
402 return 0;
403 if (A == AEnd)
404 return 1;
405 if (B == BEnd)
406 return -1;
407 if (toupper(*A) < toupper(*B))
408 return -1;
409 return 1;
410 }
411 /*}}}*/
412 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
413 // ---------------------------------------------------------------------
414 /* The format is like those used in package files and the method
415 communication system */
416 string LookupTag(string Message,const char *Tag,const char *Default)
417 {
418 // Look for a matching tag.
419 int Length = strlen(Tag);
420 for (string::iterator I = Message.begin(); I + Length < Message.end(); I++)
421 {
422 // Found the tag
423 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
424 {
425 // Find the end of line and strip the leading/trailing spaces
426 string::iterator J;
427 I += Length + 1;
428 for (; isspace(*I) != 0 && I < Message.end(); I++);
429 for (J = I; *J != '\n' && J < Message.end(); J++);
430 for (; J > I && isspace(J[-1]) != 0; J--);
431
432 return string(I,J-I);
433 }
434
435 for (; *I != '\n' && I < Message.end(); I++);
436 }
437
438 // Failed to find a match
439 if (Default == 0)
440 return string();
441 return Default;
442 }
443 /*}}}*/
444 // StringToBool - Converts a string into a boolean /*{{{*/
445 // ---------------------------------------------------------------------
446 /* This inspects the string to see if it is true or if it is false and
447 then returns the result. Several varients on true/false are checked. */
448 int StringToBool(string Text,int Default = -1)
449 {
450 char *End;
451 int Res = strtol(Text.c_str(),&End,0);
452 if (End != Text.c_str() && Res >= 0 && Res <= 1)
453 return Res;
454
455 // Check for positives
456 if (strcasecmp(Text.c_str(),"no") == 0 ||
457 strcasecmp(Text.c_str(),"false") == 0 ||
458 strcasecmp(Text.c_str(),"without") == 0 ||
459 strcasecmp(Text.c_str(),"disable") == 0)
460 return 0;
461
462 // Check for negatives
463 if (strcasecmp(Text.c_str(),"yes") == 0 ||
464 strcasecmp(Text.c_str(),"true") == 0 ||
465 strcasecmp(Text.c_str(),"with") == 0 ||
466 strcasecmp(Text.c_str(),"enable") == 0)
467 return 1;
468
469 return Default;
470 }
471 /*}}}*/
472 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
473 // ---------------------------------------------------------------------
474 /* This converts a time_t into a string time representation that is
475 year 2000 complient and timezone neutral */
476 string TimeRFC1123(time_t Date)
477 {
478 struct tm Conv = *gmtime(&Date);
479 char Buf[300];
480
481 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
482 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
483 "Aug","Sep","Oct","Nov","Dec"};
484
485 sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
486 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
487 Conv.tm_min,Conv.tm_sec);
488 return Buf;
489 }
490 /*}}}*/
491 // ReadMessages - Read messages from the FD /*{{{*/
492 // ---------------------------------------------------------------------
493 /* This pulls full messages from the input FD into the message buffer.
494 It assumes that messages will not pause during transit so no
495 fancy buffering is used. */
496 bool ReadMessages(int Fd, vector<string> &List)
497 {
498 char Buffer[4000];
499 char *End = Buffer;
500
501 while (1)
502 {
503 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
504
505 // Process is dead, this is kind of bad..
506 if (Res == 0)
507 return false;
508
509 // No data
510 if (Res <= 0)
511 return true;
512
513 End += Res;
514
515 // Look for the end of the message
516 for (char *I = Buffer; I + 1 < End; I++)
517 {
518 if (I[0] != '\n' || I[1] != '\n')
519 continue;
520
521 // Pull the message out
522 string Message(Buffer,0,I-Buffer);
523
524 // Fix up the buffer
525 for (; I < End && *I == '\n'; I++);
526 End -= I-Buffer;
527 memmove(Buffer,I,End-Buffer);
528 I = Buffer;
529
530 List.push_back(Message);
531 }
532 if (End == Buffer)
533 return true;
534
535 if (WaitFd(Fd) == false)
536 return false;
537 }
538 }
539 /*}}}*/
540 // MonthConv - Converts a month string into a number /*{{{*/
541 // ---------------------------------------------------------------------
542 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
543 Made it a bit more robust with a few touppers though. */
544 static int MonthConv(char *Month)
545 {
546 switch (toupper(*Month))
547 {
548 case 'A':
549 return toupper(Month[1]) == 'P'?3:7;
550 case 'D':
551 return 11;
552 case 'F':
553 return 1;
554 case 'J':
555 if (toupper(Month[1]) == 'A')
556 return 0;
557 return toupper(Month[2]) == 'N'?5:6;
558 case 'M':
559 return toupper(Month[2]) == 'R'?2:4;
560 case 'N':
561 return 10;
562 case 'O':
563 return 9;
564 case 'S':
565 return 8;
566
567 // Pretend it is January..
568 default:
569 return 0;
570 }
571 }
572 /*}}}*/
573 // StrToTime - Converts a string into a time_t /*{{{*/
574 // ---------------------------------------------------------------------
575 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
576 and the C library asctime format. It requires the GNU library function
577 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
578 reason the C library does not provide any such function :<*/
579 bool StrToTime(string Val,time_t &Result)
580 {
581 struct tm Tm;
582 char Month[10];
583 const char *I = Val.c_str();
584
585 // Skip the day of the week
586 for (;*I != 0 && *I != ' '; I++);
587
588 // Handle RFC 1123 time
589 if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
590 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
591 {
592 // Handle RFC 1036 time
593 if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
594 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
595 Tm.tm_year += 1900;
596 else
597 {
598 // asctime format
599 if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
600 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
601 return false;
602 }
603 }
604
605 Tm.tm_isdst = 0;
606 Tm.tm_mon = MonthConv(Month);
607 Tm.tm_year -= 1900;
608
609 // Convert to local time and then to GMT
610 Result = timegm(&Tm);
611 return true;
612 }
613 /*}}}*/
614
615 // URI::CopyFrom - Copy from an object /*{{{*/
616 // ---------------------------------------------------------------------
617 /* This parses the URI into all of its components */
618 void URI::CopyFrom(string U)
619 {
620 string::const_iterator I = U.begin();
621
622 // Locate the first colon, this seperates the scheme
623 for (; I < U.end() && *I != ':' ; I++);
624 string::const_iterator FirstColon = I;
625
626 /* Determine if this is a host type URI with a leading double //
627 and then search for the first single / */
628 string::const_iterator SingleSlash = I;
629 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
630 SingleSlash += 3;
631 for (; SingleSlash < U.end() && *SingleSlash != '/'; SingleSlash++);
632 if (SingleSlash > U.end())
633 SingleSlash = U.end();
634
635 // We can now write the access and path specifiers
636 Access = string(U,0,FirstColon - U.begin());
637 if (SingleSlash != U.end())
638 Path = string(U,SingleSlash - U.begin());
639 if (Path.empty() == true)
640 Path = "/";
641
642 // Now we attempt to locate a user:pass@host fragment
643 FirstColon += 3;
644 if (FirstColon >= U.end())
645 return;
646
647 if (FirstColon > SingleSlash)
648 FirstColon = SingleSlash;
649
650 // Search for the @
651 I = FirstColon;
652 for (; I < SingleSlash && *I != '@'; I++);
653 string::const_iterator At = I;
654
655 // Colon in the @ section
656 I = FirstColon + 1;
657 for (; I < At && *I != ':'; I++);
658 string::const_iterator SecondColon = I;
659
660 // Now write the host and user/pass
661 if (At == SingleSlash)
662 {
663 if (FirstColon < SingleSlash)
664 Host = string(U,FirstColon - U.begin(),SingleSlash - FirstColon);
665 }
666 else
667 {
668 Host = string(U,At - U.begin() + 1,SingleSlash - At - 1);
669 User = string(U,FirstColon - U.begin(),SecondColon - FirstColon);
670 if (SecondColon < At)
671 Password = string(U,SecondColon - U.begin() + 1,At - SecondColon - 1);
672 }
673
674 // Now we parse off a pot number from the hostname
675 Port = 0;
676 string::size_type Pos = Host.rfind(':');
677 if (Pos == string::npos)
678 return;
679
680 Port = atoi(string(Host,Pos+1).c_str());
681 Host = string(Host,0,Pos);
682 }
683 /*}}}*/
684 // URI::operator string - Convert the URI to a string /*{{{*/
685 // ---------------------------------------------------------------------
686 /* */
687 URI::operator string()
688 {
689 string Res = Access + ':';
690 if (Host.empty() == false)
691 {
692 Res += "//";
693 if (User.empty() == false)
694 {
695 Res += "//" + User;
696 if (Password.empty() == false)
697 Res += ":" + Password;
698 Res += "@";
699 }
700 Res += Host;
701 if (Port != 0)
702 {
703 char S[30];
704 sprintf(S,":%u",Port);
705 Res += S;
706 }
707 }
708
709 if (Path.empty() == false)
710 {
711 if (Path[0] != '/')
712 Res += "/" + Path;
713 else
714 Res += Path;
715 }
716
717 return Res;
718 }
719 /*}}}*/