Fixed or handling bug
[ntk/apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.34 2000/01/16 05:36:17 jgg Exp $
4 /* ######################################################################
5
6 String Util - Some usefull string functions.
7
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #ifdef __GNUG__
19 #pragma implementation "apt-pkg/strutl.h"
20 #endif
21
22 #include <apt-pkg/strutl.h>
23 #include <apt-pkg/fileutl.h>
24
25 #include <ctype.h>
26 #include <string.h>
27 #include <stdio.h>
28 #include <unistd.h>
29 #include <errno.h>
30 /*}}}*/
31
32 // strstrip - Remove white space from the front and back of a string /*{{{*/
33 // ---------------------------------------------------------------------
34 /* This is handy to use when parsing a file. It also removes \n's left
35 over from fgets and company */
36 char *_strstrip(char *String)
37 {
38 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
39
40 if (*String == 0)
41 return String;
42
43 char *End = String + strlen(String) - 1;
44 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
45 *End == '\r'); End--);
46 End++;
47 *End = 0;
48 return String;
49 };
50 /*}}}*/
51 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
52 // ---------------------------------------------------------------------
53 /* */
54 char *_strtabexpand(char *String,size_t Len)
55 {
56 for (char *I = String; I != I + Len && *I != 0; I++)
57 {
58 if (*I != '\t')
59 continue;
60 if (I + 8 > String + Len)
61 {
62 *I = 0;
63 return String;
64 }
65
66 /* Assume the start of the string is 0 and find the next 8 char
67 division */
68 int Len;
69 if (String == I)
70 Len = 1;
71 else
72 Len = 8 - ((String - I) % 8);
73 Len -= 2;
74 if (Len <= 0)
75 {
76 *I = ' ';
77 continue;
78 }
79
80 memmove(I + Len,I + 1,strlen(I) + 1);
81 for (char *J = I; J + Len != I; *I = ' ', I++);
82 }
83 return String;
84 }
85 /*}}}*/
86 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
87 // ---------------------------------------------------------------------
88 /* This grabs a single word, converts any % escaped characters to their
89 proper values and advances the pointer. Double quotes are understood
90 and striped out as well. This is for URI/URL parsing. It also can
91 understand [] brackets.*/
92 bool ParseQuoteWord(const char *&String,string &Res)
93 {
94 // Skip leading whitespace
95 const char *C = String;
96 for (;*C != 0 && *C == ' '; C++);
97 if (*C == 0)
98 return false;
99
100 // Jump to the next word
101 for (;*C != 0 && isspace(*C) == 0; C++)
102 {
103 if (*C == '"')
104 {
105 for (C++; *C != 0 && *C != '"'; C++);
106 if (*C == 0)
107 return false;
108 }
109 if (*C == '[')
110 {
111 for (C++; *C != 0 && *C != ']'; C++);
112 if (*C == 0)
113 return false;
114 }
115 }
116
117 // Now de-quote characters
118 char Buffer[1024];
119 char Tmp[3];
120 const char *Start = String;
121 char *I;
122 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
123 {
124 if (*Start == '%' && Start + 2 < C)
125 {
126 Tmp[0] = Start[1];
127 Tmp[1] = Start[2];
128 Tmp[2] = 0;
129 *I = (char)strtol(Tmp,0,16);
130 Start += 3;
131 continue;
132 }
133 if (*Start != '"')
134 *I = *Start;
135 else
136 I--;
137 Start++;
138 }
139 *I = 0;
140 Res = Buffer;
141
142 // Skip ending white space
143 for (;*C != 0 && isspace(*C) != 0; C++);
144 String = C;
145 return true;
146 }
147 /*}}}*/
148 // ParseCWord - Parses a string like a C "" expression /*{{{*/
149 // ---------------------------------------------------------------------
150 /* This expects a series of space seperated strings enclosed in ""'s.
151 It concatenates the ""'s into a single string. */
152 bool ParseCWord(const char *String,string &Res)
153 {
154 // Skip leading whitespace
155 const char *C = String;
156 for (;*C != 0 && *C == ' '; C++);
157 if (*C == 0)
158 return false;
159
160 char Buffer[1024];
161 char *Buf = Buffer;
162 if (strlen(String) >= sizeof(Buffer))
163 return false;
164
165 for (; *C != 0; C++)
166 {
167 if (*C == '"')
168 {
169 for (C++; *C != 0 && *C != '"'; C++)
170 *Buf++ = *C;
171
172 if (*C == 0)
173 return false;
174
175 continue;
176 }
177
178 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
179 continue;
180 if (isspace(*C) == 0)
181 return false;
182 *Buf++ = ' ';
183 }
184 *Buf = 0;
185 Res = Buffer;
186 return true;
187 }
188 /*}}}*/
189 // QuoteString - Convert a string into quoted from /*{{{*/
190 // ---------------------------------------------------------------------
191 /* */
192 string QuoteString(string Str,const char *Bad)
193 {
194 string Res;
195 for (string::iterator I = Str.begin(); I != Str.end(); I++)
196 {
197 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
198 *I <= 0x20 || *I >= 0x7F)
199 {
200 char Buf[10];
201 sprintf(Buf,"%%%02x",(int)*I);
202 Res += Buf;
203 }
204 else
205 Res += *I;
206 }
207 return Res;
208 }
209 /*}}}*/
210 // DeQuoteString - Convert a string from quoted from /*{{{*/
211 // ---------------------------------------------------------------------
212 /* This undoes QuoteString */
213 string DeQuoteString(string Str)
214 {
215 string Res;
216 for (string::iterator I = Str.begin(); I != Str.end(); I++)
217 {
218 if (*I == '%' && I + 2 < Str.end())
219 {
220 char Tmp[3];
221 Tmp[0] = I[1];
222 Tmp[1] = I[2];
223 Tmp[2] = 0;
224 Res += (char)strtol(Tmp,0,16);
225 I += 2;
226 continue;
227 }
228 else
229 Res += *I;
230 }
231 return Res;
232 }
233
234 /*}}}*/
235 // SizeToStr - Convert a long into a human readable size /*{{{*/
236 // ---------------------------------------------------------------------
237 /* A max of 4 digits are shown before conversion to the next highest unit.
238 The max length of the string will be 5 chars unless the size is > 10
239 YottaBytes (E24) */
240 string SizeToStr(double Size)
241 {
242 char S[300];
243 double ASize;
244 if (Size >= 0)
245 ASize = Size;
246 else
247 ASize = -1*Size;
248
249 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
250 ExaBytes, ZettaBytes, YottaBytes */
251 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
252 int I = 0;
253 while (I <= 8)
254 {
255 if (ASize < 100 && I != 0)
256 {
257 sprintf(S,"%.1f%c",ASize,Ext[I]);
258 break;
259 }
260
261 if (ASize < 10000)
262 {
263 sprintf(S,"%.0f%c",ASize,Ext[I]);
264 break;
265 }
266 ASize /= 1000.0;
267 I++;
268 }
269
270 return S;
271 }
272 /*}}}*/
273 // TimeToStr - Convert the time into a string /*{{{*/
274 // ---------------------------------------------------------------------
275 /* Converts a number of seconds to a hms format */
276 string TimeToStr(unsigned long Sec)
277 {
278 char S[300];
279
280 while (1)
281 {
282 if (Sec > 60*60*24)
283 {
284 sprintf(S,"%lid %lih%lim%lis",Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
285 break;
286 }
287
288 if (Sec > 60*60)
289 {
290 sprintf(S,"%lih%lim%lis",Sec/60/60,(Sec/60) % 60,Sec % 60);
291 break;
292 }
293
294 if (Sec > 60)
295 {
296 sprintf(S,"%lim%lis",Sec/60,Sec % 60);
297 break;
298 }
299
300 sprintf(S,"%lis",Sec);
301 break;
302 }
303
304 return S;
305 }
306 /*}}}*/
307 // SubstVar - Substitute a string for another string /*{{{*/
308 // ---------------------------------------------------------------------
309 /* This replaces all occurances of Subst with Contents in Str. */
310 string SubstVar(string Str,string Subst,string Contents)
311 {
312 string::size_type Pos = 0;
313 string::size_type OldPos = 0;
314 string Temp;
315
316 while (OldPos < Str.length() &&
317 (Pos = Str.find(Subst,OldPos)) != string::npos)
318 {
319 Temp += string(Str,OldPos,Pos) + Contents;
320 OldPos = Pos + Subst.length();
321 }
322
323 if (OldPos == 0)
324 return Str;
325
326 return Temp + string(Str,OldPos);
327 }
328 /*}}}*/
329 // URItoFileName - Convert the uri into a unique file name /*{{{*/
330 // ---------------------------------------------------------------------
331 /* This converts a URI into a safe filename. It quotes all unsafe characters
332 and converts / to _ and removes the scheme identifier. The resulting
333 file name should be unique and never occur again for a different file */
334 string URItoFileName(string URI)
335 {
336 // Nuke 'sensitive' items
337 ::URI U(URI);
338 U.User = string();
339 U.Password = string();
340 U.Access = "";
341
342 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
343 URI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
344 string::iterator J = URI.begin();
345 for (; J != URI.end(); J++)
346 if (*J == '/')
347 *J = '_';
348 return URI;
349 }
350 /*}}}*/
351 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
352 // ---------------------------------------------------------------------
353 /* This routine performs a base64 transformation on a string. It was ripped
354 from wget and then patched and bug fixed.
355
356 This spec can be found in rfc2045 */
357 string Base64Encode(string S)
358 {
359 // Conversion table.
360 static char tbl[64] = {'A','B','C','D','E','F','G','H',
361 'I','J','K','L','M','N','O','P',
362 'Q','R','S','T','U','V','W','X',
363 'Y','Z','a','b','c','d','e','f',
364 'g','h','i','j','k','l','m','n',
365 'o','p','q','r','s','t','u','v',
366 'w','x','y','z','0','1','2','3',
367 '4','5','6','7','8','9','+','/'};
368
369 // Pre-allocate some space
370 string Final;
371 Final.reserve((4*S.length() + 2)/3 + 2);
372
373 /* Transform the 3x8 bits to 4x6 bits, as required by
374 base64. */
375 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
376 {
377 char Bits[3] = {0,0,0};
378 Bits[0] = I[0];
379 if (I + 1 < S.end())
380 Bits[1] = I[1];
381 if (I + 2 < S.end())
382 Bits[2] = I[2];
383
384 Final += tbl[Bits[0] >> 2];
385 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
386
387 if (I + 1 >= S.end())
388 break;
389
390 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
391
392 if (I + 2 >= S.end())
393 break;
394
395 Final += tbl[Bits[2] & 0x3f];
396 }
397
398 /* Apply the padding elements, this tells how many bytes the remote
399 end should discard */
400 if (S.length() % 3 == 2)
401 Final += '=';
402 if (S.length() % 3 == 1)
403 Final += "==";
404
405 return Final;
406 }
407 /*}}}*/
408 // stringcmp - Arbitary string compare /*{{{*/
409 // ---------------------------------------------------------------------
410 /* This safely compares two non-null terminated strings of arbitary
411 length */
412 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
413 {
414 for (; A != AEnd && B != BEnd; A++, B++)
415 if (*A != *B)
416 break;
417
418 if (A == AEnd && B == BEnd)
419 return 0;
420 if (A == AEnd)
421 return 1;
422 if (B == BEnd)
423 return -1;
424 if (*A < *B)
425 return -1;
426 return 1;
427 }
428 /*}}}*/
429 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
430 // ---------------------------------------------------------------------
431 /* */
432 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
433 {
434 for (; A != AEnd && B != BEnd; A++, B++)
435 if (toupper(*A) != toupper(*B))
436 break;
437
438 if (A == AEnd && B == BEnd)
439 return 0;
440 if (A == AEnd)
441 return 1;
442 if (B == BEnd)
443 return -1;
444 if (toupper(*A) < toupper(*B))
445 return -1;
446 return 1;
447 }
448 /*}}}*/
449 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
450 // ---------------------------------------------------------------------
451 /* The format is like those used in package files and the method
452 communication system */
453 string LookupTag(string Message,const char *Tag,const char *Default)
454 {
455 // Look for a matching tag.
456 int Length = strlen(Tag);
457 for (string::iterator I = Message.begin(); I + Length < Message.end(); I++)
458 {
459 // Found the tag
460 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
461 {
462 // Find the end of line and strip the leading/trailing spaces
463 string::iterator J;
464 I += Length + 1;
465 for (; isspace(*I) != 0 && I < Message.end(); I++);
466 for (J = I; *J != '\n' && J < Message.end(); J++);
467 for (; J > I && isspace(J[-1]) != 0; J--);
468
469 return string(I,J-I);
470 }
471
472 for (; *I != '\n' && I < Message.end(); I++);
473 }
474
475 // Failed to find a match
476 if (Default == 0)
477 return string();
478 return Default;
479 }
480 /*}}}*/
481 // StringToBool - Converts a string into a boolean /*{{{*/
482 // ---------------------------------------------------------------------
483 /* This inspects the string to see if it is true or if it is false and
484 then returns the result. Several varients on true/false are checked. */
485 int StringToBool(string Text,int Default = -1)
486 {
487 char *End;
488 int Res = strtol(Text.c_str(),&End,0);
489 if (End != Text.c_str() && Res >= 0 && Res <= 1)
490 return Res;
491
492 // Check for positives
493 if (strcasecmp(Text.c_str(),"no") == 0 ||
494 strcasecmp(Text.c_str(),"false") == 0 ||
495 strcasecmp(Text.c_str(),"without") == 0 ||
496 strcasecmp(Text.c_str(),"off") == 0 ||
497 strcasecmp(Text.c_str(),"disable") == 0)
498 return 0;
499
500 // Check for negatives
501 if (strcasecmp(Text.c_str(),"yes") == 0 ||
502 strcasecmp(Text.c_str(),"true") == 0 ||
503 strcasecmp(Text.c_str(),"with") == 0 ||
504 strcasecmp(Text.c_str(),"on") == 0 ||
505 strcasecmp(Text.c_str(),"enable") == 0)
506 return 1;
507
508 return Default;
509 }
510 /*}}}*/
511 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
512 // ---------------------------------------------------------------------
513 /* This converts a time_t into a string time representation that is
514 year 2000 complient and timezone neutral */
515 string TimeRFC1123(time_t Date)
516 {
517 struct tm Conv = *gmtime(&Date);
518 char Buf[300];
519
520 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
521 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
522 "Aug","Sep","Oct","Nov","Dec"};
523
524 sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
525 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
526 Conv.tm_min,Conv.tm_sec);
527 return Buf;
528 }
529 /*}}}*/
530 // ReadMessages - Read messages from the FD /*{{{*/
531 // ---------------------------------------------------------------------
532 /* This pulls full messages from the input FD into the message buffer.
533 It assumes that messages will not pause during transit so no
534 fancy buffering is used. */
535 bool ReadMessages(int Fd, vector<string> &List)
536 {
537 char Buffer[4000];
538 char *End = Buffer;
539
540 while (1)
541 {
542 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
543 if (Res < 0 && errno == EINTR)
544 continue;
545
546 // Process is dead, this is kind of bad..
547 if (Res == 0)
548 return false;
549
550 // No data
551 if (Res <= 0)
552 return true;
553
554 End += Res;
555
556 // Look for the end of the message
557 for (char *I = Buffer; I + 1 < End; I++)
558 {
559 if (I[0] != '\n' || I[1] != '\n')
560 continue;
561
562 // Pull the message out
563 string Message(Buffer,0,I-Buffer);
564
565 // Fix up the buffer
566 for (; I < End && *I == '\n'; I++);
567 End -= I-Buffer;
568 memmove(Buffer,I,End-Buffer);
569 I = Buffer;
570
571 List.push_back(Message);
572 }
573 if (End == Buffer)
574 return true;
575
576 if (WaitFd(Fd) == false)
577 return false;
578 }
579 }
580 /*}}}*/
581 // MonthConv - Converts a month string into a number /*{{{*/
582 // ---------------------------------------------------------------------
583 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
584 Made it a bit more robust with a few touppers though. */
585 static int MonthConv(char *Month)
586 {
587 switch (toupper(*Month))
588 {
589 case 'A':
590 return toupper(Month[1]) == 'P'?3:7;
591 case 'D':
592 return 11;
593 case 'F':
594 return 1;
595 case 'J':
596 if (toupper(Month[1]) == 'A')
597 return 0;
598 return toupper(Month[2]) == 'N'?5:6;
599 case 'M':
600 return toupper(Month[2]) == 'R'?2:4;
601 case 'N':
602 return 10;
603 case 'O':
604 return 9;
605 case 'S':
606 return 8;
607
608 // Pretend it is January..
609 default:
610 return 0;
611 }
612 }
613 /*}}}*/
614 // timegm - Internal timegm function if gnu is not available /*{{{*/
615 // ---------------------------------------------------------------------
616 /* Ripped this evil little function from wget - I prefer the use of
617 GNU timegm if possible as this technique will have interesting problems
618 with leap seconds, timezones and other.
619
620 Converts struct tm to time_t, assuming the data in tm is UTC rather
621 than local timezone (mktime assumes the latter).
622
623 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
624 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
625 #ifndef __USE_MISC // glib sets this
626 static time_t timegm(struct tm *t)
627 {
628 time_t tl, tb;
629
630 tl = mktime (t);
631 if (tl == -1)
632 return -1;
633 tb = mktime (gmtime (&tl));
634 return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
635 }
636 #endif
637 /*}}}*/
638 // StrToTime - Converts a string into a time_t /*{{{*/
639 // ---------------------------------------------------------------------
640 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
641 and the C library asctime format. It requires the GNU library function
642 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
643 reason the C library does not provide any such function :< This also
644 handles the weird, but unambiguous FTP time format*/
645 bool StrToTime(string Val,time_t &Result)
646 {
647 struct tm Tm;
648 char Month[10];
649 const char *I = Val.c_str();
650
651 // Skip the day of the week
652 for (;*I != 0 && *I != ' '; I++);
653
654 // Handle RFC 1123 time
655 Month[0] = 0;
656 if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
657 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
658 {
659 // Handle RFC 1036 time
660 if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
661 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
662 Tm.tm_year += 1900;
663 else
664 {
665 // asctime format
666 if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
667 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
668 {
669 // 'ftp' time
670 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
671 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
672 return false;
673 Tm.tm_mon--;
674 }
675 }
676 }
677
678 Tm.tm_isdst = 0;
679 if (Month[0] != 0)
680 Tm.tm_mon = MonthConv(Month);
681 Tm.tm_year -= 1900;
682
683 // Convert to local time and then to GMT
684 Result = timegm(&Tm);
685 return true;
686 }
687 /*}}}*/
688 // StrToNum - Convert a fixed length string to a number /*{{{*/
689 // ---------------------------------------------------------------------
690 /* This is used in decoding the crazy fixed length string headers in
691 tar and ar files. */
692 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
693 {
694 char S[30];
695 if (Len >= sizeof(S))
696 return false;
697 memcpy(S,Str,Len);
698 S[Len] = 0;
699
700 // All spaces is a zero
701 Res = 0;
702 unsigned I;
703 for (I = 0; S[I] == ' '; I++);
704 if (S[I] == 0)
705 return true;
706
707 char *End;
708 Res = strtoul(S,&End,Base);
709 if (End == S)
710 return false;
711
712 return true;
713 }
714 /*}}}*/
715 // HexDigit - Convert a hex character into an integer /*{{{*/
716 // ---------------------------------------------------------------------
717 /* Helper for Hex2Num */
718 static int HexDigit(int c)
719 {
720 if (c >= '0' && c <= '9')
721 return c - '0';
722 if (c >= 'a' && c <= 'f')
723 return c - 'a' + 10;
724 if (c >= 'A' && c <= 'F')
725 return c - 'A' + 10;
726 return 0;
727 }
728 /*}}}*/
729 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
730 // ---------------------------------------------------------------------
731 /* The length of the buffer must be exactly 1/2 the length of the string. */
732 bool Hex2Num(const char *Start,const char *End,unsigned char *Num,
733 unsigned int Length)
734 {
735 if (End - Start != (signed)(Length*2))
736 return false;
737
738 // Convert each digit. We store it in the same order as the string
739 int J = 0;
740 for (const char *I = Start; I < End;J++, I += 2)
741 {
742 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
743 return false;
744
745 Num[J] = HexDigit(I[0]) << 4;
746 Num[J] += HexDigit(I[1]);
747 }
748
749 return true;
750 }
751 /*}}}*/
752
753 // URI::CopyFrom - Copy from an object /*{{{*/
754 // ---------------------------------------------------------------------
755 /* This parses the URI into all of its components */
756 void URI::CopyFrom(string U)
757 {
758 string::const_iterator I = U.begin();
759
760 // Locate the first colon, this seperates the scheme
761 for (; I < U.end() && *I != ':' ; I++);
762 string::const_iterator FirstColon = I;
763
764 /* Determine if this is a host type URI with a leading double //
765 and then search for the first single / */
766 string::const_iterator SingleSlash = I;
767 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
768 SingleSlash += 3;
769
770 /* Find the / indicating the end of the hostname, ignoring /'s in the
771 square brackets */
772 bool InBracket = false;
773 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
774 {
775 if (*SingleSlash == '[')
776 InBracket = true;
777 if (InBracket == true && *SingleSlash == ']')
778 InBracket = false;
779 }
780
781 if (SingleSlash > U.end())
782 SingleSlash = U.end();
783
784 // We can now write the access and path specifiers
785 Access = string(U,0,FirstColon - U.begin());
786 if (SingleSlash != U.end())
787 Path = string(U,SingleSlash - U.begin());
788 if (Path.empty() == true)
789 Path = "/";
790
791 // Now we attempt to locate a user:pass@host fragment
792 if (FirstColon[1] == '/' && FirstColon[2] == '/')
793 FirstColon += 3;
794 else
795 FirstColon += 1;
796 if (FirstColon >= U.end())
797 return;
798
799 if (FirstColon > SingleSlash)
800 FirstColon = SingleSlash;
801
802 // Find the colon...
803 I = FirstColon + 1;
804 if (I > SingleSlash)
805 I = SingleSlash;
806 for (; I < SingleSlash && *I != ':'; I++);
807 string::const_iterator SecondColon = I;
808
809 // Search for the @ after the colon
810 for (; I < SingleSlash && *I != '@'; I++);
811 string::const_iterator At = I;
812
813 // Now write the host and user/pass
814 if (At == SingleSlash)
815 {
816 if (FirstColon < SingleSlash)
817 Host = string(U,FirstColon - U.begin(),SingleSlash - FirstColon);
818 }
819 else
820 {
821 Host = string(U,At - U.begin() + 1,SingleSlash - At - 1);
822 User = string(U,FirstColon - U.begin(),SecondColon - FirstColon);
823 if (SecondColon < At)
824 Password = string(U,SecondColon - U.begin() + 1,At - SecondColon - 1);
825 }
826
827 // Now we parse the RFC 2732 [] hostnames.
828 unsigned long PortEnd = 0;
829 InBracket = false;
830 for (unsigned I = 0; I != Host.length();)
831 {
832 if (Host[I] == '[')
833 {
834 InBracket = true;
835 Host.erase(I,1);
836 continue;
837 }
838
839 if (InBracket == true && Host[I] == ']')
840 {
841 InBracket = false;
842 Host.erase(I,1);
843 PortEnd = I;
844 continue;
845 }
846 I++;
847 }
848
849 // Tsk, weird.
850 if (InBracket == true)
851 {
852 Host = string();
853 return;
854 }
855
856 // Now we parse off a port number from the hostname
857 Port = 0;
858 string::size_type Pos = Host.rfind(':');
859 if (Pos == string::npos || Pos < PortEnd)
860 return;
861
862 Port = atoi(string(Host,Pos+1).c_str());
863 Host = string(Host,0,Pos);
864 }
865 /*}}}*/
866 // URI::operator string - Convert the URI to a string /*{{{*/
867 // ---------------------------------------------------------------------
868 /* */
869 URI::operator string()
870 {
871 string Res;
872
873 if (Access.empty() == false)
874 Res = Access + ':';
875
876 if (Host.empty() == false)
877 {
878 if (Access.empty() == false)
879 Res += "//";
880
881 if (User.empty() == false)
882 {
883 Res += User;
884 if (Password.empty() == false)
885 Res += ":" + Password;
886 Res += "@";
887 }
888
889 // Add RFC 2732 escaping characters
890 if (Access.empty() == false &&
891 (Host.find('/') != string::npos || Host.find(':') != string::npos))
892 Res += '[' + Host + ']';
893 else
894 Res += Host;
895
896 if (Port != 0)
897 {
898 char S[30];
899 sprintf(S,":%u",Port);
900 Res += S;
901 }
902 }
903
904 if (Path.empty() == false)
905 {
906 if (Path[0] != '/')
907 Res += "/" + Path;
908 else
909 Res += Path;
910 }
911
912 return Res;
913 }
914 /*}}}*/