Working acquire code
[ntk/apt.git] / apt-pkg / contrib / strutl.cc
CommitLineData
6c139d6e
AL
1// -*- mode: cpp; mode: fold -*-
2// Description /*{{{*/
bfd22fc0 3// $Id: strutl.cc,v 1.13 1998/11/05 07:21:44 jgg Exp $
6c139d6e
AL
4/* ######################################################################
5
6 String Util - Some usefull string functions.
7
24231681
AL
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
6c139d6e
AL
11
12 This source is placed in the Public Domain, do with it what you will
24231681 13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
6c139d6e
AL
14
15 ##################################################################### */
16 /*}}}*/
17// Includes /*{{{*/
492f957a
AL
18#ifdef __GNUG__
19#pragma implementation "strutl.h"
20#endif
21
6c139d6e 22#include <strutl.h>
0a8a80e5
AL
23#include <apt-pkg/fileutl.h>
24
6c139d6e
AL
25#include <ctype.h>
26#include <string.h>
27#include <stdio.h>
28 /*}}}*/
29
30// strstrip - Remove white space from the front and back of a string /*{{{*/
31// ---------------------------------------------------------------------
32/* This is handy to use when parsing a file. It also removes \n's left
33 over from fgets and company */
34char *_strstrip(char *String)
35{
36 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
37
38 if (*String == 0)
39 return String;
40
41 char *End = String + strlen(String) - 1;
42 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
43 *End == '\r'); End--);
44 End++;
45 *End = 0;
46 return String;
47};
48 /*}}}*/
49// strtabexpand - Converts tabs into 8 spaces /*{{{*/
50// ---------------------------------------------------------------------
51/* */
52char *_strtabexpand(char *String,size_t Len)
53{
54 for (char *I = String; I != I + Len && *I != 0; I++)
55 {
56 if (*I != '\t')
57 continue;
58 if (I + 8 > String + Len)
59 {
60 *I = 0;
61 return String;
62 }
63
64 /* Assume the start of the string is 0 and find the next 8 char
65 division */
66 int Len;
67 if (String == I)
68 Len = 1;
69 else
70 Len = 8 - ((String - I) % 8);
71 Len -= 2;
72 if (Len <= 0)
73 {
74 *I = ' ';
75 continue;
76 }
77
78 memmove(I + Len,I + 1,strlen(I) + 1);
79 for (char *J = I; J + Len != I; *I = ' ', I++);
80 }
81 return String;
82}
83 /*}}}*/
84// ParseQuoteWord - Parse a single word out of a string /*{{{*/
85// ---------------------------------------------------------------------
86/* This grabs a single word, converts any % escaped characters to their
87 proper values and advances the pointer. Double quotes are understood
08e8f724 88 and striped out as well. This is for URI/URL parsing. */
6c139d6e
AL
89bool ParseQuoteWord(const char *&String,string &Res)
90{
91 // Skip leading whitespace
92 const char *C = String;
93 for (;*C != 0 && *C == ' '; C++);
94 if (*C == 0)
95 return false;
96
97 // Jump to the next word
98 for (;*C != 0 && *C != ' '; C++)
99 {
100 if (*C == '"')
101 {
102 for (C++;*C != 0 && *C != '"'; C++);
103 if (*C == 0)
104 return false;
105 }
106 }
107
108 // Now de-quote characters
109 char Buffer[1024];
110 char Tmp[3];
111 const char *Start = String;
112 char *I;
113 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
114 {
115 if (*Start == '%' && Start + 2 < C)
116 {
117 Tmp[0] = Start[1];
118 Tmp[1] = Start[2];
119 Tmp[3] = 0;
120 *I = (char)strtol(Tmp,0,16);
121 Start += 3;
122 continue;
123 }
124 if (*Start != '"')
125 *I = *Start;
126 else
127 I--;
128 Start++;
129 }
130 *I = 0;
131 Res = Buffer;
132
133 // Skip ending white space
134 for (;*C != 0 && *C == ' '; C++);
135 String = C;
136 return true;
137}
138 /*}}}*/
08e8f724
AL
139// ParseCWord - Parses a string like a C "" expression /*{{{*/
140// ---------------------------------------------------------------------
141/* This expects a series of space seperated strings enclosed in ""'s.
142 It concatenates the ""'s into a single string. */
143bool ParseCWord(const char *String,string &Res)
144{
145 // Skip leading whitespace
146 const char *C = String;
147 for (;*C != 0 && *C == ' '; C++);
148 if (*C == 0)
149 return false;
150
151 char Buffer[1024];
152 char *Buf = Buffer;
153 if (strlen(String) >= sizeof(Buffer))
154 return false;
155
156 for (; *C != 0; C++)
157 {
158 if (*C == '"')
159 {
160 for (C++; *C != 0 && *C != '"'; C++)
161 *Buf++ = *C;
162
163 if (*C == 0)
164 return false;
165
166 continue;
167 }
168
169 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
170 continue;
171 if (isspace(*C) == 0)
172 return false;
173 *Buf++ = ' ';
174 }
175 *Buf = 0;
176 Res = Buffer;
177 return true;
178}
179 /*}}}*/
6c139d6e
AL
180// QuoteString - Convert a string into quoted from /*{{{*/
181// ---------------------------------------------------------------------
182/* */
183string QuoteString(string Str,const char *Bad)
184{
185 string Res;
186 for (string::iterator I = Str.begin(); I != Str.end(); I++)
187 {
188 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
189 *I <= 0x20 || *I >= 0x7F)
190 {
191 char Buf[10];
192 sprintf(Buf,"%%%02x",(int)*I);
193 Res += Buf;
194 }
195 else
196 Res += *I;
197 }
198 return Res;
199}
200 /*}}}*/
201// SizeToStr - Convert a long into a human readable size /*{{{*/
202// ---------------------------------------------------------------------
24231681
AL
203/* A max of 4 digits are shown before conversion to the next highest unit.
204 The max length of the string will be 5 chars unless the size is > 10
6c139d6e
AL
205 YottaBytes (E24) */
206string SizeToStr(double Size)
207{
208 char S[300];
209 double ASize;
210 if (Size >= 0)
211 ASize = Size;
212 else
213 ASize = -1*Size;
214
215 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
216 ExaBytes, ZettaBytes, YottaBytes */
217 char Ext[] = {'b','k','M','G','T','P','E','Z','Y'};
218 int I = 0;
219 while (I <= 8)
220 {
221 if (ASize < 100 && I != 0)
222 {
223 sprintf(S,"%.1f%c",ASize,Ext[I]);
224 break;
225 }
226
227 if (ASize < 10000)
228 {
229 sprintf(S,"%.0f%c",ASize,Ext[I]);
230 break;
231 }
232 ASize /= 1000.0;
233 I++;
234 }
235
236 return S;
237}
238 /*}}}*/
239// TimeToStr - Convert the time into a string /*{{{*/
240// ---------------------------------------------------------------------
241/* Converts a number of seconds to a hms format */
242string TimeToStr(unsigned long Sec)
243{
244 char S[300];
245
246 while (1)
247 {
248 if (Sec > 60*60*24)
249 {
250 sprintf(S,"%lid %lih%lim%lis",Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
251 break;
252 }
253
254 if (Sec > 60*60)
255 {
256 sprintf(S,"%lih%lim%lis",Sec/60/60,(Sec/60) % 60,Sec % 60);
257 break;
258 }
259
260 if (Sec > 60)
261 {
262 sprintf(S,"%lim%lis",Sec/60,Sec % 60);
263 break;
264 }
265
266 sprintf(S,"%lis",Sec);
267 break;
268 }
269
270 return S;
271}
272 /*}}}*/
273// SubstVar - Substitute a string for another string /*{{{*/
274// ---------------------------------------------------------------------
275/* This replaces all occurances of Subst with Contents in Str. */
276string SubstVar(string Str,string Subst,string Contents)
277{
8efa2a3b 278 string::size_type Pos = 0;
6c139d6e
AL
279 string::size_type OldPos = 0;
280 string Temp;
281
282 while (OldPos < Str.length() &&
283 (Pos = Str.find(Subst,OldPos)) != string::npos)
284 {
285 Temp += string(Str,OldPos,Pos) + Contents;
286 OldPos = Pos + Subst.length();
287 }
288
289 if (OldPos == 0)
290 return Str;
291
292 return Temp + string(Str,OldPos);
293}
294 /*}}}*/
ad00ae81
AL
295// URItoFileName - Convert the uri into a unique file name /*{{{*/
296// ---------------------------------------------------------------------
297/* This converts a URI into a safe filename. It quotes all unsafe characters
298 and converts / to _ and removes the scheme identifier. The resulting
299 file name should be unique and never occur again for a different file */
300string URItoFileName(string URI)
301{
302 string::const_iterator I = URI.begin() + URI.find(':') + 1;
303 for (; I < URI.end() && *I == '/'; I++);
304
305 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
306 URI = QuoteString(string(I,URI.end() - I),"\\|{}[]<>\"^~_=!@#$%^&*");
307 string::iterator J = URI.begin();
308 for (; J != URI.end(); J++)
309 if (*J == '/')
310 *J = '_';
311 return URI;
312}
313 /*}}}*/
6c139d6e
AL
314// Base64Encode - Base64 Encoding routine for short strings /*{{{*/
315// ---------------------------------------------------------------------
316/* This routine performs a base64 transformation on a string. It was ripped
317 from wget and then patched and bug fixed.
318
319 This spec can be found in rfc2045 */
320string Base64Encode(string S)
321{
322 // Conversion table.
323 static char tbl[64] = {'A','B','C','D','E','F','G','H',
324 'I','J','K','L','M','N','O','P',
325 'Q','R','S','T','U','V','W','X',
326 'Y','Z','a','b','c','d','e','f',
327 'g','h','i','j','k','l','m','n',
328 'o','p','q','r','s','t','u','v',
329 'w','x','y','z','0','1','2','3',
330 '4','5','6','7','8','9','+','/'};
331
332 // Pre-allocate some space
333 string Final;
334 Final.reserve((4*S.length() + 2)/3 + 2);
335
336 /* Transform the 3x8 bits to 4x6 bits, as required by
337 base64. */
338 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
339 {
340 char Bits[3] = {0,0,0};
341 Bits[0] = I[0];
342 if (I + 1 < S.end())
343 Bits[1] = I[1];
344 if (I + 2 < S.end())
345 Bits[2] = I[2];
346
347 Final += tbl[Bits[0] >> 2];
348 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
349
350 if (I + 1 >= S.end())
351 break;
352
353 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
354
355 if (I + 2 >= S.end())
356 break;
357
358 Final += tbl[Bits[2] & 0x3f];
359 }
360
361 /* Apply the padding elements, this tells how many bytes the remote
362 end should discard */
363 if (S.length() % 3 == 2)
364 Final += '=';
365 if (S.length() % 3 == 1)
366 Final += "==";
367
368 return Final;
369}
370 /*}}}*/
371// stringcmp - Arbitary string compare /*{{{*/
372// ---------------------------------------------------------------------
373/* This safely compares two non-null terminated strings of arbitary
374 length */
375int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
376{
377 for (; A != AEnd && B != BEnd; A++, B++)
378 if (*A != *B)
379 break;
380
381 if (A == AEnd && B == BEnd)
382 return 0;
383 if (A == AEnd)
384 return 1;
385 if (B == BEnd)
386 return -1;
387 if (*A < *B)
388 return -1;
389 return 1;
390}
391 /*}}}*/
392// stringcasecmp - Arbitary case insensitive string compare /*{{{*/
393// ---------------------------------------------------------------------
394/* */
395int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
396{
397 for (; A != AEnd && B != BEnd; A++, B++)
398 if (toupper(*A) != toupper(*B))
399 break;
3b5421b4 400
6c139d6e
AL
401 if (A == AEnd && B == BEnd)
402 return 0;
403 if (A == AEnd)
404 return 1;
405 if (B == BEnd)
406 return -1;
407 if (toupper(*A) < toupper(*B))
408 return -1;
409 return 1;
410}
411 /*}}}*/
3b5421b4
AL
412// LookupTag - Lookup the value of a tag in a taged string /*{{{*/
413// ---------------------------------------------------------------------
414/* The format is like those used in package files and the method
415 communication system */
416string LookupTag(string Message,const char *Tag,const char *Default)
417{
418 // Look for a matching tag.
419 int Length = strlen(Tag);
420 for (string::iterator I = Message.begin(); I + Length < Message.end(); I++)
421 {
422 // Found the tag
423 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
424 {
425 // Find the end of line and strip the leading/trailing spaces
426 string::iterator J;
427 I += Length + 1;
428 for (; isspace(*I) != 0 && I < Message.end(); I++);
429 for (J = I; *J != '\n' && J < Message.end(); J++);
430 for (; J > I && isspace(J[-1]) != 0; J--);
431
432 return string(I,J-I);
433 }
434
435 for (; *I != '\n' && I < Message.end(); I++);
436 }
437
438 // Failed to find a match
439 if (Default == 0)
440 return string();
441 return Default;
442}
443 /*}}}*/
444// StringToBool - Converts a string into a boolean /*{{{*/
445// ---------------------------------------------------------------------
446/* This inspects the string to see if it is true or if it is false and
447 then returns the result. Several varients on true/false are checked. */
448int StringToBool(string Text,int Default = -1)
449{
450 char *End;
451 int Res = strtol(Text.c_str(),&End,0);
452 if (End != Text.c_str() && Res >= 0 && Res <= 1)
453 return Res;
454
455 // Check for positives
456 if (strcasecmp(Text.c_str(),"no") == 0 ||
457 strcasecmp(Text.c_str(),"false") == 0 ||
458 strcasecmp(Text.c_str(),"without") == 0 ||
459 strcasecmp(Text.c_str(),"disable") == 0)
460 return 0;
461
462 // Check for negatives
463 if (strcasecmp(Text.c_str(),"yes") == 0 ||
464 strcasecmp(Text.c_str(),"true") == 0 ||
465 strcasecmp(Text.c_str(),"with") == 0 ||
466 strcasecmp(Text.c_str(),"enable") == 0)
467 return 1;
468
469 return Default;
470}
471 /*}}}*/
0a8a80e5
AL
472// TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
473// ---------------------------------------------------------------------
474/* This converts a time_t into a string time representation that is
475 year 2000 complient and timezone neutral */
476string TimeRFC1123(time_t Date)
477{
478 struct tm Conv = *gmtime(&Date);
479 char Buf[300];
480
481 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
482 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
483 "Aug","Sep","Oct","Nov","Dec"};
484
485 sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
486 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
487 Conv.tm_min,Conv.tm_sec);
488 return Buf;
489}
490 /*}}}*/
491// ReadMessages - Read messages from the FD /*{{{*/
492// ---------------------------------------------------------------------
493/* This pulls full messages from the input FD into the message buffer.
494 It assumes that messages will not pause during transit so no
495 fancy buffering is used. */
496bool ReadMessages(int Fd, vector<string> &List)
497{
498 char Buffer[4000];
499 char *End = Buffer;
500
501 while (1)
502 {
503 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
504
505 // Process is dead, this is kind of bad..
506 if (Res == 0)
507 return false;
508
509 // No data
510 if (Res <= 0)
511 return true;
512
513 End += Res;
514
515 // Look for the end of the message
c88edf1d 516 for (char *I = Buffer; I + 1 < End; I++)
0a8a80e5
AL
517 {
518 if (I[0] != '\n' || I[1] != '\n')
519 continue;
520
521 // Pull the message out
522 string Message(Buffer,0,I-Buffer);
523
524 // Fix up the buffer
525 for (; I < End && *I == '\n'; I++);
526 End -= I-Buffer;
527 memmove(Buffer,I,End-Buffer);
528 I = Buffer;
529
530 List.push_back(Message);
531 }
532 if (End == Buffer)
533 return true;
534
535 if (WaitFd(Fd) == false)
536 return false;
537 }
538}
539 /*}}}*/
24231681
AL
540// MonthConv - Converts a month string into a number /*{{{*/
541// ---------------------------------------------------------------------
542/* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
543 Made it a bit more robust with a few touppers though. */
544static int MonthConv(char *Month)
545{
546 switch (toupper(*Month))
547 {
548 case 'A':
549 return toupper(Month[1]) == 'P'?3:7;
550 case 'D':
551 return 11;
552 case 'F':
553 return 1;
554 case 'J':
555 if (toupper(Month[1]) == 'A')
556 return 0;
557 return toupper(Month[2]) == 'N'?5:6;
558 case 'M':
559 return toupper(Month[2]) == 'R'?2:4;
560 case 'N':
561 return 10;
562 case 'O':
563 return 9;
564 case 'S':
565 return 8;
566
567 // Pretend it is January..
568 default:
569 return 0;
570 }
571}
572 /*}}}*/
573// StrToTime - Converts a string into a time_t /*{{{*/
574// ---------------------------------------------------------------------
575/* This handles all 3 populare time formats including RFC 1123, RFC 1036
576 and the C library asctime format. It requires the GNU library function
577 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
578 reason the C library does not provide any such function :<*/
579bool StrToTime(string Val,time_t &Result)
580{
581 struct tm Tm;
582 char Month[10];
583 const char *I = Val.c_str();
584
585 // Skip the day of the week
586 for (;*I != 0 && *I != ' '; I++);
587
588 // Handle RFC 1123 time
589 if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
590 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
591 {
592 // Handle RFC 1036 time
593 if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
594 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
595 Tm.tm_year += 1900;
596 else
597 {
598 // asctime format
599 if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
600 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
601 return false;
602 }
603 }
604
605 Tm.tm_isdst = 0;
606 Tm.tm_mon = MonthConv(Month);
607 Tm.tm_year -= 1900;
608
609 // Convert to local time and then to GMT
610 Result = timegm(&Tm);
611 return true;
612}
613 /*}}}*/
93bf083d 614
be4401bf 615// URI::CopyFrom - Copy from an object /*{{{*/
93bf083d
AL
616// ---------------------------------------------------------------------
617/* This parses the URI into all of its components */
be4401bf 618void URI::CopyFrom(string U)
93bf083d
AL
619{
620 string::const_iterator I = U.begin();
621
622 // Locate the first colon, this seperates the scheme
623 for (; I < U.end() && *I != ':' ; I++);
624 string::const_iterator FirstColon = I;
625
bfd22fc0
AL
626 /* Determine if this is a host type URI with a leading double //
627 and then search for the first single / */
93bf083d
AL
628 string::const_iterator SingleSlash = I;
629 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
bfd22fc0
AL
630 SingleSlash += 3;
631 for (; SingleSlash < U.end() && *SingleSlash != '/'; SingleSlash++);
632 if (SingleSlash > U.end())
633 SingleSlash = U.end();
93bf083d
AL
634
635 // We can now write the access and path specifiers
636 Access = string(U,0,FirstColon - U.begin());
637 if (SingleSlash != U.end())
92e889c8
AL
638 Path = string(U,SingleSlash - U.begin());
639 if (Path.empty() == true)
640 Path = "/";
641
93bf083d
AL
642 // Now we attempt to locate a user:pass@host fragment
643 FirstColon += 3;
644 if (FirstColon >= U.end())
645 return;
646
647 if (FirstColon > SingleSlash)
648 FirstColon = SingleSlash;
649
650 // Search for the @
651 I = FirstColon;
652 for (; I < SingleSlash && *I != '@'; I++);
653 string::const_iterator At = I;
654
655 // Colon in the @ section
656 I = FirstColon + 1;
657 for (; I < At && *I != ':'; I++);
658 string::const_iterator SecondColon = I;
659
660 // Now write the host and user/pass
661 if (At == SingleSlash)
662 {
663 if (FirstColon < SingleSlash)
664 Host = string(U,FirstColon - U.begin(),SingleSlash - FirstColon);
665 }
666 else
667 {
668 Host = string(U,At - U.begin() + 1,SingleSlash - At - 1);
669 User = string(U,FirstColon - U.begin(),SecondColon - FirstColon);
670 if (SecondColon < At)
671 Password = string(U,SecondColon - U.begin() + 1,At - SecondColon - 1);
672 }
673
674 // Now we parse off a pot number from the hostname
675 Port = 0;
676 string::size_type Pos = Host.rfind(':');
677 if (Pos == string::npos)
678 return;
679
680 Port = atoi(string(Host,Pos+1).c_str());
681 Host = string(Host,0,Pos);
682}
683 /*}}}*/
684// URI::operator string - Convert the URI to a string /*{{{*/
685// ---------------------------------------------------------------------
686/* */
687URI::operator string()
688{
689 string Res = Access + ':';
690 if (Host.empty() == false)
691 {
492f957a 692 Res += "//";
93bf083d
AL
693 if (User.empty() == false)
694 {
695 Res += "//" + User;
696 if (Password.empty() == false)
697 Res += ":" + Password;
698 Res += "@";
699 }
700 Res += Host;
492f957a
AL
701 if (Port != 0)
702 {
703 char S[30];
704 sprintf(S,":%u",Port);
705 Res += S;
706 }
93bf083d
AL
707 }
708
709 if (Path.empty() == false)
492f957a
AL
710 {
711 if (Path[0] != '/')
712 Res += "/" + Path;
713 else
714 Res += Path;
715 }
93bf083d
AL
716
717 return Res;
718}
719 /*}}}*/