New http method
[ntk/apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.10 1998/11/01 05:27:37 jgg Exp $
4 /* ######################################################################
5
6 String Util - Some usefull string functions.
7
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <strutl.h>
19 #include <apt-pkg/fileutl.h>
20
21 #include <ctype.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <time.h>
25 /*}}}*/
26
27 // strstrip - Remove white space from the front and back of a string /*{{{*/
28 // ---------------------------------------------------------------------
29 /* This is handy to use when parsing a file. It also removes \n's left
30 over from fgets and company */
31 char *_strstrip(char *String)
32 {
33 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
34
35 if (*String == 0)
36 return String;
37
38 char *End = String + strlen(String) - 1;
39 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
40 *End == '\r'); End--);
41 End++;
42 *End = 0;
43 return String;
44 };
45 /*}}}*/
46 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
47 // ---------------------------------------------------------------------
48 /* */
49 char *_strtabexpand(char *String,size_t Len)
50 {
51 for (char *I = String; I != I + Len && *I != 0; I++)
52 {
53 if (*I != '\t')
54 continue;
55 if (I + 8 > String + Len)
56 {
57 *I = 0;
58 return String;
59 }
60
61 /* Assume the start of the string is 0 and find the next 8 char
62 division */
63 int Len;
64 if (String == I)
65 Len = 1;
66 else
67 Len = 8 - ((String - I) % 8);
68 Len -= 2;
69 if (Len <= 0)
70 {
71 *I = ' ';
72 continue;
73 }
74
75 memmove(I + Len,I + 1,strlen(I) + 1);
76 for (char *J = I; J + Len != I; *I = ' ', I++);
77 }
78 return String;
79 }
80 /*}}}*/
81 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
82 // ---------------------------------------------------------------------
83 /* This grabs a single word, converts any % escaped characters to their
84 proper values and advances the pointer. Double quotes are understood
85 and striped out as well. This is for URI/URL parsing. */
86 bool ParseQuoteWord(const char *&String,string &Res)
87 {
88 // Skip leading whitespace
89 const char *C = String;
90 for (;*C != 0 && *C == ' '; C++);
91 if (*C == 0)
92 return false;
93
94 // Jump to the next word
95 for (;*C != 0 && *C != ' '; C++)
96 {
97 if (*C == '"')
98 {
99 for (C++;*C != 0 && *C != '"'; C++);
100 if (*C == 0)
101 return false;
102 }
103 }
104
105 // Now de-quote characters
106 char Buffer[1024];
107 char Tmp[3];
108 const char *Start = String;
109 char *I;
110 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
111 {
112 if (*Start == '%' && Start + 2 < C)
113 {
114 Tmp[0] = Start[1];
115 Tmp[1] = Start[2];
116 Tmp[3] = 0;
117 *I = (char)strtol(Tmp,0,16);
118 Start += 3;
119 continue;
120 }
121 if (*Start != '"')
122 *I = *Start;
123 else
124 I--;
125 Start++;
126 }
127 *I = 0;
128 Res = Buffer;
129
130 // Skip ending white space
131 for (;*C != 0 && *C == ' '; C++);
132 String = C;
133 return true;
134 }
135 /*}}}*/
136 // ParseCWord - Parses a string like a C "" expression /*{{{*/
137 // ---------------------------------------------------------------------
138 /* This expects a series of space seperated strings enclosed in ""'s.
139 It concatenates the ""'s into a single string. */
140 bool ParseCWord(const char *String,string &Res)
141 {
142 // Skip leading whitespace
143 const char *C = String;
144 for (;*C != 0 && *C == ' '; C++);
145 if (*C == 0)
146 return false;
147
148 char Buffer[1024];
149 char *Buf = Buffer;
150 if (strlen(String) >= sizeof(Buffer))
151 return false;
152
153 for (; *C != 0; C++)
154 {
155 if (*C == '"')
156 {
157 for (C++; *C != 0 && *C != '"'; C++)
158 *Buf++ = *C;
159
160 if (*C == 0)
161 return false;
162
163 continue;
164 }
165
166 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
167 continue;
168 if (isspace(*C) == 0)
169 return false;
170 *Buf++ = ' ';
171 }
172 *Buf = 0;
173 Res = Buffer;
174 return true;
175 }
176 /*}}}*/
177 // QuoteString - Convert a string into quoted from /*{{{*/
178 // ---------------------------------------------------------------------
179 /* */
180 string QuoteString(string Str,const char *Bad)
181 {
182 string Res;
183 for (string::iterator I = Str.begin(); I != Str.end(); I++)
184 {
185 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
186 *I <= 0x20 || *I >= 0x7F)
187 {
188 char Buf[10];
189 sprintf(Buf,"%%%02x",(int)*I);
190 Res += Buf;
191 }
192 else
193 Res += *I;
194 }
195 return Res;
196 }
197 /*}}}*/
198 // SizeToStr - Convert a long into a human readable size /*{{{*/
199 // ---------------------------------------------------------------------
200 /* A max of 4 digits are shown before conversion to the next highest unit.
201 The max length of the string will be 5 chars unless the size is > 10
202 YottaBytes (E24) */
203 string SizeToStr(double Size)
204 {
205 char S[300];
206 double ASize;
207 if (Size >= 0)
208 ASize = Size;
209 else
210 ASize = -1*Size;
211
212 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
213 ExaBytes, ZettaBytes, YottaBytes */
214 char Ext[] = {'b','k','M','G','T','P','E','Z','Y'};
215 int I = 0;
216 while (I <= 8)
217 {
218 if (ASize < 100 && I != 0)
219 {
220 sprintf(S,"%.1f%c",ASize,Ext[I]);
221 break;
222 }
223
224 if (ASize < 10000)
225 {
226 sprintf(S,"%.0f%c",ASize,Ext[I]);
227 break;
228 }
229 ASize /= 1000.0;
230 I++;
231 }
232
233 return S;
234 }
235 /*}}}*/
236 // TimeToStr - Convert the time into a string /*{{{*/
237 // ---------------------------------------------------------------------
238 /* Converts a number of seconds to a hms format */
239 string TimeToStr(unsigned long Sec)
240 {
241 char S[300];
242
243 while (1)
244 {
245 if (Sec > 60*60*24)
246 {
247 sprintf(S,"%lid %lih%lim%lis",Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
248 break;
249 }
250
251 if (Sec > 60*60)
252 {
253 sprintf(S,"%lih%lim%lis",Sec/60/60,(Sec/60) % 60,Sec % 60);
254 break;
255 }
256
257 if (Sec > 60)
258 {
259 sprintf(S,"%lim%lis",Sec/60,Sec % 60);
260 break;
261 }
262
263 sprintf(S,"%lis",Sec);
264 break;
265 }
266
267 return S;
268 }
269 /*}}}*/
270 // SubstVar - Substitute a string for another string /*{{{*/
271 // ---------------------------------------------------------------------
272 /* This replaces all occurances of Subst with Contents in Str. */
273 string SubstVar(string Str,string Subst,string Contents)
274 {
275 string::size_type Pos = 0;
276 string::size_type OldPos = 0;
277 string Temp;
278
279 while (OldPos < Str.length() &&
280 (Pos = Str.find(Subst,OldPos)) != string::npos)
281 {
282 Temp += string(Str,OldPos,Pos) + Contents;
283 OldPos = Pos + Subst.length();
284 }
285
286 if (OldPos == 0)
287 return Str;
288
289 return Temp + string(Str,OldPos);
290 }
291 /*}}}*/
292 // URItoFileName - Convert the uri into a unique file name /*{{{*/
293 // ---------------------------------------------------------------------
294 /* This converts a URI into a safe filename. It quotes all unsafe characters
295 and converts / to _ and removes the scheme identifier. The resulting
296 file name should be unique and never occur again for a different file */
297 string URItoFileName(string URI)
298 {
299 string::const_iterator I = URI.begin() + URI.find(':') + 1;
300 for (; I < URI.end() && *I == '/'; I++);
301
302 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
303 URI = QuoteString(string(I,URI.end() - I),"\\|{}[]<>\"^~_=!@#$%^&*");
304 string::iterator J = URI.begin();
305 for (; J != URI.end(); J++)
306 if (*J == '/')
307 *J = '_';
308 return URI;
309 }
310 /*}}}*/
311 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
312 // ---------------------------------------------------------------------
313 /* This routine performs a base64 transformation on a string. It was ripped
314 from wget and then patched and bug fixed.
315
316 This spec can be found in rfc2045 */
317 string Base64Encode(string S)
318 {
319 // Conversion table.
320 static char tbl[64] = {'A','B','C','D','E','F','G','H',
321 'I','J','K','L','M','N','O','P',
322 'Q','R','S','T','U','V','W','X',
323 'Y','Z','a','b','c','d','e','f',
324 'g','h','i','j','k','l','m','n',
325 'o','p','q','r','s','t','u','v',
326 'w','x','y','z','0','1','2','3',
327 '4','5','6','7','8','9','+','/'};
328
329 // Pre-allocate some space
330 string Final;
331 Final.reserve((4*S.length() + 2)/3 + 2);
332
333 /* Transform the 3x8 bits to 4x6 bits, as required by
334 base64. */
335 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
336 {
337 char Bits[3] = {0,0,0};
338 Bits[0] = I[0];
339 if (I + 1 < S.end())
340 Bits[1] = I[1];
341 if (I + 2 < S.end())
342 Bits[2] = I[2];
343
344 Final += tbl[Bits[0] >> 2];
345 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
346
347 if (I + 1 >= S.end())
348 break;
349
350 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
351
352 if (I + 2 >= S.end())
353 break;
354
355 Final += tbl[Bits[2] & 0x3f];
356 }
357
358 /* Apply the padding elements, this tells how many bytes the remote
359 end should discard */
360 if (S.length() % 3 == 2)
361 Final += '=';
362 if (S.length() % 3 == 1)
363 Final += "==";
364
365 return Final;
366 }
367 /*}}}*/
368 // stringcmp - Arbitary string compare /*{{{*/
369 // ---------------------------------------------------------------------
370 /* This safely compares two non-null terminated strings of arbitary
371 length */
372 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
373 {
374 for (; A != AEnd && B != BEnd; A++, B++)
375 if (*A != *B)
376 break;
377
378 if (A == AEnd && B == BEnd)
379 return 0;
380 if (A == AEnd)
381 return 1;
382 if (B == BEnd)
383 return -1;
384 if (*A < *B)
385 return -1;
386 return 1;
387 }
388 /*}}}*/
389 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
390 // ---------------------------------------------------------------------
391 /* */
392 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
393 {
394 for (; A != AEnd && B != BEnd; A++, B++)
395 if (toupper(*A) != toupper(*B))
396 break;
397
398 if (A == AEnd && B == BEnd)
399 return 0;
400 if (A == AEnd)
401 return 1;
402 if (B == BEnd)
403 return -1;
404 if (toupper(*A) < toupper(*B))
405 return -1;
406 return 1;
407 }
408 /*}}}*/
409 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
410 // ---------------------------------------------------------------------
411 /* The format is like those used in package files and the method
412 communication system */
413 string LookupTag(string Message,const char *Tag,const char *Default)
414 {
415 // Look for a matching tag.
416 int Length = strlen(Tag);
417 for (string::iterator I = Message.begin(); I + Length < Message.end(); I++)
418 {
419 // Found the tag
420 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
421 {
422 // Find the end of line and strip the leading/trailing spaces
423 string::iterator J;
424 I += Length + 1;
425 for (; isspace(*I) != 0 && I < Message.end(); I++);
426 for (J = I; *J != '\n' && J < Message.end(); J++);
427 for (; J > I && isspace(J[-1]) != 0; J--);
428
429 return string(I,J-I);
430 }
431
432 for (; *I != '\n' && I < Message.end(); I++);
433 }
434
435 // Failed to find a match
436 if (Default == 0)
437 return string();
438 return Default;
439 }
440 /*}}}*/
441 // StringToBool - Converts a string into a boolean /*{{{*/
442 // ---------------------------------------------------------------------
443 /* This inspects the string to see if it is true or if it is false and
444 then returns the result. Several varients on true/false are checked. */
445 int StringToBool(string Text,int Default = -1)
446 {
447 char *End;
448 int Res = strtol(Text.c_str(),&End,0);
449 if (End != Text.c_str() && Res >= 0 && Res <= 1)
450 return Res;
451
452 // Check for positives
453 if (strcasecmp(Text.c_str(),"no") == 0 ||
454 strcasecmp(Text.c_str(),"false") == 0 ||
455 strcasecmp(Text.c_str(),"without") == 0 ||
456 strcasecmp(Text.c_str(),"disable") == 0)
457 return 0;
458
459 // Check for negatives
460 if (strcasecmp(Text.c_str(),"yes") == 0 ||
461 strcasecmp(Text.c_str(),"true") == 0 ||
462 strcasecmp(Text.c_str(),"with") == 0 ||
463 strcasecmp(Text.c_str(),"enable") == 0)
464 return 1;
465
466 return Default;
467 }
468 /*}}}*/
469 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
470 // ---------------------------------------------------------------------
471 /* This converts a time_t into a string time representation that is
472 year 2000 complient and timezone neutral */
473 string TimeRFC1123(time_t Date)
474 {
475 struct tm Conv = *gmtime(&Date);
476 char Buf[300];
477
478 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
479 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
480 "Aug","Sep","Oct","Nov","Dec"};
481
482 sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
483 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
484 Conv.tm_min,Conv.tm_sec);
485 return Buf;
486 }
487 /*}}}*/
488 // ReadMessages - Read messages from the FD /*{{{*/
489 // ---------------------------------------------------------------------
490 /* This pulls full messages from the input FD into the message buffer.
491 It assumes that messages will not pause during transit so no
492 fancy buffering is used. */
493 bool ReadMessages(int Fd, vector<string> &List)
494 {
495 char Buffer[4000];
496 char *End = Buffer;
497
498 while (1)
499 {
500 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
501
502 // Process is dead, this is kind of bad..
503 if (Res == 0)
504 return false;
505
506 // No data
507 if (Res <= 0)
508 return true;
509
510 End += Res;
511
512 // Look for the end of the message
513 for (char *I = Buffer; I + 1 < End; I++)
514 {
515 if (I[0] != '\n' || I[1] != '\n')
516 continue;
517
518 // Pull the message out
519 string Message(Buffer,0,I-Buffer);
520
521 // Fix up the buffer
522 for (; I < End && *I == '\n'; I++);
523 End -= I-Buffer;
524 memmove(Buffer,I,End-Buffer);
525 I = Buffer;
526
527 List.push_back(Message);
528 }
529 if (End == Buffer)
530 return true;
531
532 if (WaitFd(Fd) == false)
533 return false;
534 }
535 }
536 /*}}}*/
537 // MonthConv - Converts a month string into a number /*{{{*/
538 // ---------------------------------------------------------------------
539 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
540 Made it a bit more robust with a few touppers though. */
541 static int MonthConv(char *Month)
542 {
543 switch (toupper(*Month))
544 {
545 case 'A':
546 return toupper(Month[1]) == 'P'?3:7;
547 case 'D':
548 return 11;
549 case 'F':
550 return 1;
551 case 'J':
552 if (toupper(Month[1]) == 'A')
553 return 0;
554 return toupper(Month[2]) == 'N'?5:6;
555 case 'M':
556 return toupper(Month[2]) == 'R'?2:4;
557 case 'N':
558 return 10;
559 case 'O':
560 return 9;
561 case 'S':
562 return 8;
563
564 // Pretend it is January..
565 default:
566 return 0;
567 }
568 }
569 /*}}}*/
570 // StrToTime - Converts a string into a time_t /*{{{*/
571 // ---------------------------------------------------------------------
572 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
573 and the C library asctime format. It requires the GNU library function
574 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
575 reason the C library does not provide any such function :<*/
576 bool StrToTime(string Val,time_t &Result)
577 {
578 struct tm Tm;
579 char Month[10];
580 const char *I = Val.c_str();
581
582 // Skip the day of the week
583 for (;*I != 0 && *I != ' '; I++);
584
585 // Handle RFC 1123 time
586 if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
587 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
588 {
589 // Handle RFC 1036 time
590 if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
591 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
592 Tm.tm_year += 1900;
593 else
594 {
595 // asctime format
596 if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
597 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
598 return false;
599 }
600 }
601
602 Tm.tm_isdst = 0;
603 Tm.tm_mon = MonthConv(Month);
604 Tm.tm_year -= 1900;
605
606 // Convert to local time and then to GMT
607 Result = timegm(&Tm);
608 return true;
609 }
610 /*}}}*/
611
612 // URI::CopyFrom - Copy from an object /*{{{*/
613 // ---------------------------------------------------------------------
614 /* This parses the URI into all of its components */
615 void URI::CopyFrom(string U)
616 {
617 string::const_iterator I = U.begin();
618
619 // Locate the first colon, this seperates the scheme
620 for (; I < U.end() && *I != ':' ; I++);
621 string::const_iterator FirstColon = I;
622
623 // Determine if this is a host type URI with a leading double //
624 string::const_iterator SingleSlash = I;
625 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
626 {
627 // Locate the single / that starts the path
628 for (; I < U.end(); I++)
629 {
630 if (*I == '/' && I[1] == '/')
631 I += 2;
632 else
633 if (*I == '/')
634 break;
635 }
636 if (I > U.end())
637 I = U.end();
638 SingleSlash = I;
639 }
640
641 // We can now write the access and path specifiers
642 Access = string(U,0,FirstColon - U.begin());
643 if (SingleSlash != U.end())
644 Path = string(U,SingleSlash - U.begin() + 1);
645
646 // Now we attempt to locate a user:pass@host fragment
647 FirstColon += 3;
648 if (FirstColon >= U.end())
649 return;
650
651 if (FirstColon > SingleSlash)
652 FirstColon = SingleSlash;
653
654 // Search for the @
655 I = FirstColon;
656 for (; I < SingleSlash && *I != '@'; I++);
657 string::const_iterator At = I;
658
659 // Colon in the @ section
660 I = FirstColon + 1;
661 for (; I < At && *I != ':'; I++);
662 string::const_iterator SecondColon = I;
663
664 // Now write the host and user/pass
665 if (At == SingleSlash)
666 {
667 if (FirstColon < SingleSlash)
668 Host = string(U,FirstColon - U.begin(),SingleSlash - FirstColon);
669 }
670 else
671 {
672 Host = string(U,At - U.begin() + 1,SingleSlash - At - 1);
673 User = string(U,FirstColon - U.begin(),SecondColon - FirstColon);
674 if (SecondColon < At)
675 Password = string(U,SecondColon - U.begin() + 1,At - SecondColon - 1);
676 }
677
678 // Now we parse off a pot number from the hostname
679 Port = 0;
680 string::size_type Pos = Host.rfind(':');
681 if (Pos == string::npos)
682 return;
683
684 Port = atoi(string(Host,Pos+1).c_str());
685 Host = string(Host,0,Pos);
686 }
687 /*}}}*/
688 // URI::operator string - Convert the URI to a string /*{{{*/
689 // ---------------------------------------------------------------------
690 /* */
691 URI::operator string()
692 {
693 string Res = Access + ':';
694 if (Host.empty() == false)
695 {
696 if (User.empty() == false)
697 {
698 Res += "//" + User;
699 if (Password.empty() == false)
700 Res += ":" + Password;
701 Res += "@";
702 }
703 Res += Host;
704 }
705
706 if (Path.empty() == false)
707 Res += "/" + Path;
708
709 return Res;
710 }
711 /*}}}*/