Commit | Line | Data |
---|---|---|
e015f748 CE |
1 | /* |
2 | webalizer - a web server log analysis program | |
3 | ||
4 | Copyright (C) 1997-2011 Bradford L. Barrett | |
5 | ||
6 | This program is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2 of the License, or | |
9 | (at your option) any later version, and provided that the above | |
10 | copyright and permission notice is included with all distributed | |
11 | copies of this or derived software. | |
12 | ||
13 | This program is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with this program; if not, write to the Free Software | |
20 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA | |
21 | ||
22 | */ | |
23 | ||
24 | /*********************************************/ | |
25 | /* STANDARD INCLUDES */ | |
26 | /*********************************************/ | |
27 | ||
28 | #include <time.h> | |
29 | #include <stdio.h> | |
30 | #include <stdlib.h> | |
31 | #include <string.h> | |
32 | #include <unistd.h> /* normal stuff */ | |
33 | #include <ctype.h> | |
34 | #include <sys/utsname.h> | |
35 | ||
36 | /* ensure sys/types */ | |
37 | #ifndef _SYS_TYPES_H | |
38 | #include <sys/types.h> | |
39 | #endif | |
40 | ||
41 | /* need socket header? */ | |
42 | #ifdef HAVE_SYS_SOCKET_H | |
43 | #include <sys/socket.h> | |
44 | #endif | |
45 | ||
46 | /* some systems need this */ | |
47 | #ifdef HAVE_MATH_H | |
48 | #include <math.h> | |
49 | #endif | |
50 | ||
51 | #include "webalizer.h" /* main header */ | |
52 | #include "lang.h" | |
53 | #include "parser.h" | |
54 | ||
55 | /* internal function prototypes */ | |
56 | void fmt_logrec(char *); | |
57 | int parse_record_clf(char *); | |
58 | int parse_record_ftp(char *); | |
59 | int parse_record_squid(char *); | |
60 | int parse_record_w3c(char *); | |
61 | ||
62 | /*********************************************/ | |
63 | /* FMT_LOGREC - terminate log fields w/zeros */ | |
64 | /*********************************************/ | |
65 | ||
66 | void fmt_logrec(char *buffer) | |
67 | { | |
68 | char *cp=buffer; | |
69 | int q=0,b=0,p=0; | |
70 | ||
71 | while (*cp != '\0') | |
72 | { | |
73 | /* break record up, terminate fields with '\0' */ | |
74 | switch (*cp) | |
75 | { | |
76 | case '\t': if (b || q || p) break; *cp='\0'; break; | |
77 | case ' ': if (b || q || p) break; *cp='\0'; break; | |
78 | case '"': if (*(cp-1)=='\\') break; else q^=1; break; | |
79 | case '[': if (q) break; b++; break; | |
80 | case ']': if (q) break; if (b>0) b--; break; | |
81 | case '(': if (q) break; p++; break; | |
82 | case ')': if (q) break; if (p>0) p--; break; | |
83 | } | |
84 | cp++; | |
85 | } | |
86 | } | |
87 | ||
88 | /*********************************************/ | |
89 | /* PARSE_RECORD - uhhh, you know... */ | |
90 | /*********************************************/ | |
91 | ||
92 | int parse_record(char *buffer) | |
93 | { | |
94 | /* clear out structure */ | |
95 | memset(&log_rec,0,sizeof(struct log_struct)); | |
96 | ||
97 | /* call appropriate handler */ | |
98 | switch (log_type) | |
99 | { | |
100 | default: | |
101 | case LOG_CLF: return parse_record_clf(buffer); break; /* clf */ | |
102 | case LOG_FTP: return parse_record_ftp(buffer); break; /* ftp */ | |
103 | case LOG_SQUID: return parse_record_squid(buffer); break; /* squid */ | |
104 | case LOG_W3C: return parse_record_w3c(buffer); break; /* w3c */ | |
105 | } | |
106 | } | |
107 | ||
108 | /*********************************************/ | |
109 | /* PARSE_RECORD_FTP - ftp log handler */ | |
110 | /*********************************************/ | |
111 | ||
112 | int parse_record_ftp(char *buffer) | |
113 | { | |
114 | int size; | |
115 | int i,j,count; | |
116 | char *cp1, *cp2, *cpx, *cpy, *eob; | |
117 | ||
118 | size = strlen(buffer); /* get length of buffer */ | |
119 | eob = buffer+size; /* calculate end of buffer */ | |
120 | fmt_logrec(buffer); /* seperate fields with \0's */ | |
121 | ||
122 | /* Start out with date/time */ | |
123 | cp1=buffer; | |
124 | while (*cp1!=0 && cp1<eob) cp1++; | |
125 | while (*cp1==0 && cp1<eob) cp1++; | |
126 | cpx=cp1; /* save month name */ | |
127 | while (*cp1!=0 && cp1<eob) cp1++; | |
128 | while (*cp1==0 && cp1<eob) cp1++; | |
129 | i=atoi(cp1); /* get day number */ | |
130 | while (*cp1!=0 && cp1<eob) cp1++; | |
131 | while (*cp1==0 && cp1<eob) cp1++; | |
132 | cpy=cp1; /* get timestamp */ | |
133 | while (*cp1!=0 && cp1<eob) cp1++; | |
134 | while (*cp1==0 && cp1<eob) cp1++; | |
135 | j=atoi(cp1); /* get year */ | |
136 | ||
137 | /* minimal sanity check */ | |
138 | if (*(cpy+2)!=':' || *(cpy+5)!=':') return 0; | |
139 | if (j<1990 || j>2100) return 0; | |
140 | if (i<1 || i>31) return 0; | |
141 | ||
142 | /* format date/time field */ | |
143 | snprintf(log_rec.datetime,sizeof(log_rec.datetime), | |
144 | "[%02d/%s/%4d:%s -0000]",i,cpx,j,cpy); | |
145 | ||
146 | /* skip seconds... */ | |
147 | while (*cp1!=0 && cp1<eob) cp1++; | |
148 | while (*cp1==0 && cp1<eob) cp1++; | |
149 | while (*cp1!=0 && cp1<eob) cp1++; | |
150 | ||
151 | /* get hostname */ | |
152 | if (*(cp1+1)==0) | |
153 | { | |
154 | /* Blank? That's weird.. */ | |
155 | strcpy(log_rec.hostname,"NONE"); | |
156 | if (debug_mode) fprintf(stderr, "Warning: Blank hostname found!\n"); | |
157 | } | |
158 | else | |
159 | { | |
160 | /* good hostname */ | |
161 | strncpy(log_rec.hostname, ++cp1, MAXHOST); | |
162 | log_rec.hostname[MAXHOST-1]=0; | |
163 | while (*cp1!=0 && cp1<eob) cp1++; | |
164 | } | |
165 | while (*cp1==0 && cp1<eob) cp1++; | |
166 | ||
167 | /* get filesize */ | |
168 | if (*cp1<'0'||*cp1>'9') log_rec.xfer_size=0; | |
169 | else log_rec.xfer_size = strtoul(cp1,NULL,10); | |
170 | ||
171 | /* URL stuff */ | |
172 | while (*cp1!=0 && cp1<eob) cp1++; | |
173 | while (*cp1==0 && cp1<eob) cp1++; | |
174 | cpx=cp1; | |
175 | /* get next field for later */ | |
176 | while (*cp1!=0 && cp1<eob) cp1++; | |
177 | while (*cp1==0 && cp1<eob) cp1++; | |
178 | ||
179 | /* skip next two */ | |
180 | while (*cp1!=0 && cp1<eob) cp1++; | |
181 | while (*cp1==0) cp1++; | |
182 | while (*cp1!=0 && cp1<eob) cp1++; | |
183 | while (*cp1==0) cp1++; | |
184 | ||
185 | /* fabricate an appropriate request string based on direction */ | |
186 | if (*cp1=='i') | |
187 | snprintf(log_rec.url,sizeof(log_rec.url),"\"POST %s\"",cpx); | |
188 | else | |
189 | snprintf(log_rec.url,sizeof(log_rec.url),"\"GET %s\"",cpx); | |
190 | ||
191 | if (cp1<eob) cp1++; | |
192 | if (cp1<eob) cp1++; | |
193 | while (*cp1!=0 && cp1<eob) cp1++; | |
194 | if (cp1<eob) cp1++; | |
195 | cp2=log_rec.ident;count=MAXIDENT-1; | |
196 | while (*cp1!=0 && cp1<eob && count) { *cp2++ = *cp1++; count--; } | |
197 | *cp2='\0'; | |
198 | ||
199 | /* return appropriate response code */ | |
200 | log_rec.resp_code=(*(eob-2)=='i')?206:200; | |
201 | ||
202 | return 1; | |
203 | } | |
204 | ||
205 | /*********************************************/ | |
206 | /* PARSE_RECORD_CLF - CLF web log handler */ | |
207 | /*********************************************/ | |
208 | ||
209 | int parse_record_clf(char *buffer) | |
210 | { | |
211 | int size; | |
212 | char *cp1, *cp2, *cpx, *eob, *eos; | |
213 | ||
214 | size = strlen(buffer); /* get length of buffer */ | |
215 | eob = buffer+size; /* calculate end of buffer */ | |
216 | fmt_logrec(buffer); /* seperate fields with \0's */ | |
217 | ||
218 | /* HOSTNAME */ | |
219 | cp1 = cpx = buffer; cp2=log_rec.hostname; | |
220 | eos = (cp1+MAXHOST)-1; | |
221 | if (eos >= eob) eos=eob-1; | |
222 | ||
223 | while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++; | |
224 | *cp2 = '\0'; | |
225 | if (*cp1 != '\0') | |
226 | { | |
227 | if (verbose) | |
228 | { | |
229 | fprintf(stderr,"%s",msg_big_host); | |
230 | if (debug_mode) fprintf(stderr,": %s\n",cpx); | |
231 | else fprintf(stderr,"\n"); | |
232 | } | |
233 | while (*cp1 != '\0') cp1++; | |
234 | } | |
235 | if (cp1 < eob) cp1++; | |
236 | ||
237 | /* skip next field (ident) */ | |
238 | while ( (*cp1 != '\0') && (cp1 < eob) ) cp1++; | |
239 | if (cp1 < eob) cp1++; | |
240 | ||
241 | /* IDENT (authuser) field */ | |
242 | cpx = cp1; | |
243 | cp2 = log_rec.ident; | |
244 | eos = (cp1+MAXIDENT-1); | |
245 | if (eos >= eob) eos=eob-1; | |
246 | ||
247 | while ( (*cp1 != '[') && (cp1 < eos) ) /* remove embeded spaces */ | |
248 | { | |
249 | if (*cp1=='\0') *cp1=' '; | |
250 | *cp2++=*cp1++; | |
251 | } | |
252 | *cp2--='\0'; | |
253 | ||
254 | if (cp1 >= eob) return 0; | |
255 | ||
256 | /* check if oversized username */ | |
257 | if (*cp1 != '[') | |
258 | { | |
259 | if (verbose) | |
260 | { | |
261 | fprintf(stderr,"%s",msg_big_user); | |
262 | if (debug_mode) fprintf(stderr,": %s\n",cpx); | |
263 | else fprintf(stderr,"\n"); | |
264 | } | |
265 | while ( (*cp1 != '[') && (cp1 < eob) ) cp1++; | |
266 | } | |
267 | ||
268 | /* strip trailing space(s) */ | |
269 | while (*cp2==' ') *cp2--='\0'; | |
270 | ||
271 | /* date/time string */ | |
272 | cpx = cp1; | |
273 | cp2 = log_rec.datetime; | |
274 | eos = (cp1+28); | |
275 | if (eos >= eob) eos=eob-1; | |
276 | ||
277 | while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++; | |
278 | *cp2 = '\0'; | |
279 | if (*cp1 != '\0') | |
280 | { | |
281 | if (verbose) | |
282 | { | |
283 | fprintf(stderr,"%s",msg_big_date); | |
284 | if (debug_mode) fprintf(stderr,": %s\n",cpx); | |
285 | else fprintf(stderr,"\n"); | |
286 | } | |
287 | while (*cp1 != '\0') cp1++; | |
288 | } | |
289 | if (cp1 < eob) cp1++; | |
290 | ||
291 | /* minimal sanity check on timestamp */ | |
292 | if ( (log_rec.datetime[0] != '[') || | |
293 | (log_rec.datetime[3] != '/') || | |
294 | (cp1 >= eob)) return 0; | |
295 | ||
296 | /* HTTP request */ | |
297 | cpx = cp1; | |
298 | cp2 = log_rec.url; | |
299 | eos = (cp1+MAXURL-1); | |
300 | if (eos >= eob) eos = eob-1; | |
301 | ||
302 | while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++; | |
303 | *cp2 = '\0'; | |
304 | if (*cp1 != '\0') | |
305 | { | |
306 | if (verbose) | |
307 | { | |
308 | fprintf(stderr,"%s",msg_big_req); | |
309 | if (debug_mode) fprintf(stderr,": %s\n",cpx); | |
310 | else fprintf(stderr,"\n"); | |
311 | } | |
312 | while (*cp1 != '\0') cp1++; | |
313 | } | |
314 | if (cp1 < eob) cp1++; | |
315 | ||
316 | if ( (log_rec.url[0] != '"') || | |
317 | (cp1 >= eob) ) return 0; | |
318 | ||
319 | /* Strip off HTTP version from URL */ | |
320 | if ( (cp2=strstr(log_rec.url,"HTTP"))!=NULL ) | |
321 | { | |
322 | *cp2='\0'; /* Terminate string */ | |
323 | *(--cp2)='"'; /* change <sp> to " */ | |
324 | } | |
325 | ||
326 | /* response code */ | |
327 | log_rec.resp_code = atoi(cp1); | |
328 | ||
329 | /* xfer size */ | |
330 | while ( (*cp1 != '\0') && (cp1 < eob) ) cp1++; | |
331 | if (cp1 < eob) cp1++; | |
332 | if (*cp1<'0'||*cp1>'9') log_rec.xfer_size=0; | |
333 | else log_rec.xfer_size = strtoul(cp1,NULL,10); | |
334 | ||
335 | /* done with CLF record */ | |
336 | if (cp1>=eob) return 1; | |
337 | ||
338 | while ( (*cp1 != '\0') && (*cp1 != '\n') && (cp1 < eob) ) cp1++; | |
339 | if (cp1 < eob) cp1++; | |
340 | /* get referrer if present */ | |
341 | cpx = cp1; | |
342 | cp2 = log_rec.refer; | |
343 | eos = (cp1+MAXREF-1); | |
344 | if (eos >= eob) eos = eob-1; | |
345 | ||
346 | while ( (*cp1 != '\0') && (*cp1 != '\n') && (cp1 != eos) ) *cp2++ = *cp1++; | |
347 | *cp2 = '\0'; | |
348 | if (*cp1 != '\0') | |
349 | { | |
350 | if (verbose) | |
351 | { | |
352 | fprintf(stderr,"%s",msg_big_ref); | |
353 | if (debug_mode) fprintf(stderr,": %s\n",cpx); | |
354 | else fprintf(stderr,"\n"); | |
355 | } | |
356 | while (*cp1 != '\0') cp1++; | |
357 | } | |
358 | if (cp1 < eob) cp1++; | |
359 | ||
360 | cpx = cp1; | |
361 | cp2 = log_rec.agent; | |
362 | eos = cp1+(MAXAGENT-1); | |
363 | if (eos >= eob) eos = eob-1; | |
364 | ||
365 | while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++; | |
366 | *cp2 = '\0'; | |
367 | ||
368 | return 1; /* maybe a valid record, return with TRUE */ | |
369 | } | |
370 | ||
371 | /*********************************************/ | |
372 | /* PARSE_RECORD_SQUID - squid log handler */ | |
373 | /*********************************************/ | |
374 | ||
375 | int parse_record_squid(char *buffer) | |
376 | { | |
377 | int size, slash_count=0; | |
378 | time_t i; | |
379 | char *cp1, *cp2, *cpx, *eob, *eos; | |
380 | ||
381 | size = strlen(buffer); /* get length of buffer */ | |
382 | eob = buffer+size; /* calculate end of buffer */ | |
383 | fmt_logrec(buffer); /* seperate fields with \0's */ | |
384 | ||
385 | /* date/time */ | |
386 | cp1=buffer; | |
387 | i=atoi(cp1); /* get timestamp */ | |
388 | ||
389 | /* format date/time field */ | |
390 | strftime(log_rec.datetime,sizeof(log_rec.datetime), | |
391 | "[%d/%b/%Y:%H:%M:%S -0000]",localtime(&i)); | |
392 | ||
393 | while (*cp1!=0 && cp1<eob) cp1++; | |
394 | while (*cp1==0) cp1++; | |
395 | ||
396 | /* skip request size */ | |
397 | while (*cp1!=0 && cp1<eob) cp1++; | |
398 | while (*cp1==0) cp1++; | |
399 | ||
400 | /* HOSTNAME */ | |
401 | cpx = cp1; cp2=log_rec.hostname; | |
402 | eos = (cp1+MAXHOST)-1; | |
403 | if (eos >= eob) eos=eob-1; | |
404 | ||
405 | while ((*cp1 != '\0') && (cp1 != eos)) *cp2++ = *cp1++; | |
406 | *cp2='\0'; | |
407 | if (*cp1 != '\0') | |
408 | { | |
409 | if (verbose) | |
410 | { | |
411 | fprintf(stderr,"%s",msg_big_host); | |
412 | if (debug_mode) fprintf(stderr,": %s\n",cpx); | |
413 | else fprintf(stderr,"\n"); | |
414 | } | |
415 | while (*cp1 != '\0') cp1++; | |
416 | } | |
417 | if (cp1 < eob) cp1++; | |
418 | ||
419 | /* skip cache status */ | |
420 | while (*cp1!=0 && cp1<eob && *cp1!='/') cp1++; | |
421 | cp1++; | |
422 | ||
423 | /* response code */ | |
424 | log_rec.resp_code = atoi(cp1); | |
425 | while (*cp1!=0 && cp1<eob) cp1++; | |
426 | while (*cp1==0) cp1++; | |
427 | ||
428 | /* xfer size */ | |
429 | if (*cp1<'0'||*cp1>'9') log_rec.xfer_size=0; | |
430 | else log_rec.xfer_size = strtoul(cp1,NULL,10); | |
431 | ||
432 | while (*cp1!=0 && cp1<eob) cp1++; | |
433 | while (*cp1==0) cp1++; | |
434 | ||
435 | /* HTTP request type */ | |
436 | cpx = cp1; | |
437 | cp2 = log_rec.url; | |
438 | *cp2++ = '\"'; | |
439 | eos = (cp1+MAXURL-1); | |
440 | if (eos >= eob) eos = eob-1; | |
441 | ||
442 | while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++; | |
443 | *cp2 = '\0'; | |
444 | if (*cp1 != '\0') | |
445 | { | |
446 | if (verbose) | |
447 | { | |
448 | fprintf(stderr,"%s",msg_big_req); | |
449 | if (debug_mode) fprintf(stderr,": %s\n",cpx); | |
450 | else fprintf(stderr,"\n"); | |
451 | } | |
452 | while (*cp1 != '\0') cp1++; | |
453 | } | |
454 | if (cp1 < eob) cp1++; | |
455 | ||
456 | *cp2++ = ' '; | |
457 | ||
458 | /* HTTP URL requested */ | |
459 | cpx = cp1; | |
460 | ||
461 | if (trimsquid>0) | |
462 | { | |
463 | slash_count=trimsquid+2; | |
464 | while ( (*cp1 != '\0') && (cp1 != eos) && slash_count) | |
465 | { | |
466 | *cp2++ = *cp1++; | |
467 | if (*cp1 == '/') slash_count--; | |
468 | } | |
469 | } | |
470 | else while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++; | |
471 | ||
472 | *cp2 = '\0'; | |
473 | if ((*cp1 != '\0' && trimsquid==0) || (trimsquid && slash_count) ) | |
474 | { | |
475 | if (verbose) | |
476 | { | |
477 | fprintf(stderr,"%s",msg_big_req); | |
478 | if (debug_mode) fprintf(stderr,": %s\n",cpx); | |
479 | else fprintf(stderr,"\n"); | |
480 | } | |
481 | while (*cp1 != '\0') cp1++; | |
482 | } | |
483 | if (cp1 < eob) cp1++; | |
484 | ||
485 | *cp2++ = '\"'; | |
486 | ||
487 | /* IDENT (authuser) field */ | |
488 | cpx = cp1; | |
489 | cp2 = log_rec.ident; | |
490 | eos = (cp1+MAXIDENT-1); | |
491 | if (eos >= eob) eos=eob-1; | |
492 | ||
493 | while (*cp1 == ' ') cp1++; /* skip white space */ | |
494 | ||
495 | while ( (*cp1 != ' ' && *cp1!='\0') && (cp1 < eos) ) *cp2++=*cp1++; | |
496 | ||
497 | *cp2--='\0'; | |
498 | ||
499 | if (cp1 >= eob) return 0; | |
500 | ||
501 | /* strip trailing space(s) */ | |
502 | while (*cp2==' ') *cp2--='\0'; | |
503 | ||
504 | /* we have no interest in the remaining fields */ | |
505 | return 1; | |
506 | } | |
507 | ||
508 | /*********************************************/ | |
509 | /* PARSE_RECORD_W3C - w3c log handler */ | |
510 | /*********************************************/ | |
511 | ||
512 | /* field index structure */ | |
513 | struct field_index_struct | |
514 | { | |
515 | int date; /* Date field index */ | |
516 | int time; /* Time field index */ | |
517 | int ip; /* IP field index */ | |
518 | int username; /* Username field index */ | |
519 | int method; /* Method field index */ | |
520 | int url; /* URL field index */ | |
521 | int query; /* Querystring field index */ | |
522 | int status; /* Status code field index */ | |
523 | int size; /* Size field index */ | |
524 | int referer; /* Referrer field index */ | |
525 | int agent; /* User agent field index */ | |
526 | int fields; /* Number of fields in this format */ | |
527 | }; | |
528 | ||
529 | /* field structure */ | |
530 | struct fields_struct | |
531 | { | |
532 | char *date; /* Date field */ | |
533 | char *time; /* Time field */ | |
534 | char *ip; /* IP field */ | |
535 | char *username; /* Username field */ | |
536 | char *method; /* Method field */ | |
537 | char *url; /* URL field */ | |
538 | char *query; /* Querystring */ | |
539 | char *status; /* Status code */ | |
540 | char *size; /* Size field */ | |
541 | char *referer; /* Referrer field */ | |
542 | char *agent; /* User agent field */ | |
543 | }; | |
544 | ||
545 | int parse_record_w3c(char *buffer) | |
546 | { | |
547 | int size; | |
548 | char *eob; | |
549 | char *cp; | |
550 | int index; | |
551 | static struct field_index_struct field_index; | |
552 | struct fields_struct fields; | |
553 | struct tm gm_time, *local_time; | |
554 | time_t timestamp; | |
555 | ||
556 | memset(&gm_time, 0, sizeof(struct tm)); | |
557 | size = strlen(buffer); /* get length of buffer */ | |
558 | eob = buffer + size; /* calculate end of buffer */ | |
559 | ||
560 | /* remove line end markers, reduce eob accordingly */ | |
561 | cp = eob; | |
562 | while(cp>buffer) | |
563 | { | |
564 | cp--; | |
565 | if (*cp == '\r' || *cp=='\n') | |
566 | { | |
567 | *cp = '\0'; | |
568 | eob--; | |
569 | } | |
570 | else | |
571 | break; | |
572 | } | |
573 | ||
574 | fmt_logrec(buffer); /* seperate fields with \0's */ | |
575 | ||
576 | cp = buffer; | |
577 | ||
578 | /* Check if the line is empty or a line suffers from the IIS | |
579 | Null-Character bug and abort parsing if found. */ | |
580 | if (*cp == '\0') return 0; | |
581 | ||
582 | /* If it's a header line ignore it or parse the Fields header if found */ | |
583 | if (*cp == '#') | |
584 | { | |
585 | cp++; | |
586 | if (!strcmp(cp, "Fields:")) | |
587 | { | |
588 | /* Reset the field indices */ | |
589 | memset(&field_index, 0, sizeof(struct field_index_struct)); | |
590 | while (*cp) cp++; | |
591 | cp++; | |
592 | index = 1; | |
593 | while (cp < eob) | |
594 | { | |
595 | /* Set the field index */ | |
596 | if (!strcmp(cp, "date")) field_index.date = index; | |
597 | if (!strcmp(cp, "time")) field_index.time = index; | |
598 | if (!strcmp(cp, "c-ip")) field_index.ip = index; | |
599 | if (!strcmp(cp, "cs-method")) field_index.method = index; | |
600 | if (!strcmp(cp, "cs-uri-stem")) field_index.url = index; | |
601 | if (!strcmp(cp, "cs-uri-query")) field_index.query = index; | |
602 | if (!strcmp(cp, "sc-status")) field_index.status = index; | |
603 | if (!strcmp(cp, "cs(Referer)")) field_index.referer = index; | |
604 | if (!strcmp(cp, "sc-bytes")) field_index.size = index; | |
605 | if (!strcmp(cp, "cs(User-Agent)")) field_index.agent = index; | |
606 | if (!strcmp(cp, "cs-username")) field_index.username = index; | |
607 | ||
608 | /* Continue with the next field */ | |
609 | while (*cp) cp++; | |
610 | cp++; | |
611 | index++; | |
612 | } | |
613 | field_index.fields = index -1; | |
614 | } | |
615 | ||
616 | /* Return because this header line is completely parsed */ | |
617 | return 0; | |
618 | } | |
619 | ||
620 | /* A data line has been found */ | |
621 | ||
622 | /* Check if the number of entries in this line are conform to the | |
623 | format specified in the header */ | |
624 | index = 1; | |
625 | while (cp < eob) | |
626 | { | |
627 | while (*cp) cp++; | |
628 | cp++; | |
629 | index++; | |
630 | } | |
631 | if (index-1 != field_index.fields) return 0; | |
632 | ||
633 | /* Reset pointer */ | |
634 | cp = buffer; | |
635 | ||
636 | /* Reset the field pointers and begin parsing the data line */ | |
637 | memset(&fields, 0, sizeof(struct fields_struct)); | |
638 | index = 1; | |
639 | while (cp < eob) | |
640 | { | |
641 | /* Set the field pointers */ | |
642 | if (index == field_index.date) fields.date = cp; | |
643 | if (index == field_index.time) fields.time = cp; | |
644 | if (index == field_index.ip) fields.ip = cp; | |
645 | if (index == field_index.method) fields.method = cp; | |
646 | if (index == field_index.url) fields.url = cp; | |
647 | if (index == field_index.query) fields.query = cp; | |
648 | if (index == field_index.status) fields.status = cp; | |
649 | if (index == field_index.referer) fields.referer = cp; | |
650 | if (index == field_index.size) fields.size = cp; | |
651 | if (index == field_index.agent) fields.agent = cp; | |
652 | if (index == field_index.username) fields.username = cp; | |
653 | ||
654 | /* Continue with the next data field */ | |
655 | while (*cp) cp++; | |
656 | cp++; | |
657 | index++; | |
658 | } | |
659 | ||
660 | /* Save URL */ | |
661 | if (fields.url) | |
662 | { | |
663 | cp = fields.url; | |
664 | while (*cp) { if (*cp=='+') *cp=' '; cp++; } | |
665 | ||
666 | /* If no HTTP Method, force to "NONE" */ | |
667 | if (fields.method && (fields.method[0]=='-')) | |
668 | fields.method="NONE"; | |
669 | ||
670 | if (fields.query && (fields.query[0]!='-')) | |
671 | snprintf(log_rec.url, MAXURL, "\"%s %s?%s\"", | |
672 | fields.method, fields.url, fields.query); | |
673 | else snprintf(log_rec.url, MAXURL, "\"%s %s\"", | |
674 | fields.method, fields.url); | |
675 | } | |
676 | else return 0; | |
677 | ||
678 | /* Save hostname */ | |
679 | if (fields.ip) strncpy(log_rec.hostname, fields.ip, MAXHOST - 1); | |
680 | ||
681 | /* Save response code */ | |
682 | if (fields.status) log_rec.resp_code = atoi(fields.status); | |
683 | ||
684 | /* Save referer */ | |
685 | if (fields.referer) strncpy(log_rec.refer, fields.referer, MAXREF - 1); | |
686 | ||
687 | /* Save transfer size */ | |
688 | if (fields.size) log_rec.xfer_size = strtoul(fields.size, NULL, 10); | |
689 | ||
690 | /* Save user agent */ | |
691 | if (fields.agent) | |
692 | { | |
693 | cp = fields.agent; | |
694 | while (*cp) { if (*cp=='+') *cp=' '; cp++; } | |
695 | strncpy(log_rec.agent, fields.agent, MAXAGENT - 1); | |
696 | } | |
697 | ||
698 | /* Save auth username */ | |
699 | if (fields.username) strncpy(log_rec.ident, fields.username, MAXIDENT - 1); | |
700 | ||
701 | /* Parse date and time and save it */ | |
702 | if (fields.date) | |
703 | { | |
704 | gm_time.tm_year = atoi(fields.date); | |
705 | if (gm_time.tm_year > 1900) gm_time.tm_year-=1900; | |
706 | while ((fields.date[0] != '\0') && (fields.date[0] != '-')) fields.date++; | |
707 | if (fields.date[0] == '\0') return 0; | |
708 | fields.date++; | |
709 | gm_time.tm_mon = atoi(fields.date) - 1; | |
710 | while ((fields.date[0] != '\0') && (fields.date[0] != '-')) fields.date++; | |
711 | if (fields.date[0] == '\0') return 0; | |
712 | fields.date++; | |
713 | gm_time.tm_mday = atoi(fields.date); | |
714 | } | |
715 | if (fields.time) | |
716 | { | |
717 | gm_time.tm_hour = atoi(fields.time); | |
718 | while ((fields.time[0] != '\0') && (fields.time[0] != ':')) fields.time++; | |
719 | if (fields.time[0] == '\0') return 0; | |
720 | fields.time++; | |
721 | gm_time.tm_min = atoi(fields.time); | |
722 | while ((fields.time[0] != '\0') && (fields.time[0] != ':')) fields.time++; | |
723 | if (fields.time[0] == '\0') return 0; | |
724 | fields.time++; | |
725 | gm_time.tm_sec = atoi(fields.time); | |
726 | } | |
727 | ||
728 | /* Convert GMT to localtime */ | |
729 | gm_time.tm_isdst = -1; /* force dst check */ | |
730 | timestamp = mktime(&gm_time); /* get time in sec */ | |
731 | #ifdef HAVE_ALTZONE | |
732 | timestamp-=(gm_time.tm_isdst)?altzone:timezone; /* solaris & friends */ | |
733 | #else | |
734 | timestamp = mktime(&gm_time)+gm_time.tm_gmtoff; /* glibc systems */ | |
735 | #endif | |
736 | local_time = localtime(×tamp); /* update tm struct */ | |
737 | strftime(log_rec.datetime, sizeof(log_rec.datetime),/* and format sting */ | |
738 | "[%d/%b/%Y:%H:%M:%S -0000]", local_time); /* for log_rec field */ | |
739 | return 1; | |
740 | } |