2 webalizer - a web server log analysis program
4 Copyright (C) 1997-2011 Bradford L. Barrett
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version, and provided that the above
10 copyright and permission notice is included with all distributed
11 copies of this or derived software.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
24 /*********************************************/
25 /* STANDARD INCLUDES */
26 /*********************************************/
32 #include <unistd.h> /* normal stuff */
34 #include <sys/utsname.h>
36 /* ensure sys/types */
38 #include <sys/types.h>
41 /* need socket header? */
42 #ifdef HAVE_SYS_SOCKET_H
43 #include <sys/socket.h>
46 /* some systems need this */
51 #include "webalizer.h" /* main header */
55 /* internal function prototypes */
56 void fmt_logrec(char *);
57 int parse_record_clf(char *);
58 int parse_record_ftp(char *);
59 int parse_record_squid(char *);
60 int parse_record_w3c(char *);
62 /*********************************************/
63 /* FMT_LOGREC - terminate log fields w/zeros */
64 /*********************************************/
66 void fmt_logrec(char *buffer
)
73 /* break record up, terminate fields with '\0' */
76 case '\t': if (b
|| q
|| p
) break; *cp
='\0'; break;
77 case ' ': if (b
|| q
|| p
) break; *cp
='\0'; break;
78 case '"': if (*(cp
-1)=='\\') break; else q
^=1; break;
79 case '[': if (q
) break; b
++; break;
80 case ']': if (q
) break; if (b
>0) b
--; break;
81 case '(': if (q
) break; p
++; break;
82 case ')': if (q
) break; if (p
>0) p
--; break;
88 /*********************************************/
89 /* PARSE_RECORD - uhhh, you know... */
90 /*********************************************/
92 int parse_record(char *buffer
)
94 /* clear out structure */
95 memset(&log_rec
,0,sizeof(struct log_struct
));
97 /* call appropriate handler */
101 case LOG_CLF
: return parse_record_clf(buffer
); break; /* clf */
102 case LOG_FTP
: return parse_record_ftp(buffer
); break; /* ftp */
103 case LOG_SQUID
: return parse_record_squid(buffer
); break; /* squid */
104 case LOG_W3C
: return parse_record_w3c(buffer
); break; /* w3c */
108 /*********************************************/
109 /* PARSE_RECORD_FTP - ftp log handler */
110 /*********************************************/
112 int parse_record_ftp(char *buffer
)
116 char *cp1
, *cp2
, *cpx
, *cpy
, *eob
;
118 size
= strlen(buffer
); /* get length of buffer */
119 eob
= buffer
+size
; /* calculate end of buffer */
120 fmt_logrec(buffer
); /* seperate fields with \0's */
122 /* Start out with date/time */
124 while (*cp1
!=0 && cp1
<eob
) cp1
++;
125 while (*cp1
==0 && cp1
<eob
) cp1
++;
126 cpx
=cp1
; /* save month name */
127 while (*cp1
!=0 && cp1
<eob
) cp1
++;
128 while (*cp1
==0 && cp1
<eob
) cp1
++;
129 i
=atoi(cp1
); /* get day number */
130 while (*cp1
!=0 && cp1
<eob
) cp1
++;
131 while (*cp1
==0 && cp1
<eob
) cp1
++;
132 cpy
=cp1
; /* get timestamp */
133 while (*cp1
!=0 && cp1
<eob
) cp1
++;
134 while (*cp1
==0 && cp1
<eob
) cp1
++;
135 j
=atoi(cp1
); /* get year */
137 /* minimal sanity check */
138 if (*(cpy
+2)!=':' || *(cpy
+5)!=':') return 0;
139 if (j
<1990 || j
>2100) return 0;
140 if (i
<1 || i
>31) return 0;
142 /* format date/time field */
143 snprintf(log_rec
.datetime
,sizeof(log_rec
.datetime
),
144 "[%02d/%s/%4d:%s -0000]",i
,cpx
,j
,cpy
);
146 /* skip seconds... */
147 while (*cp1
!=0 && cp1
<eob
) cp1
++;
148 while (*cp1
==0 && cp1
<eob
) cp1
++;
149 while (*cp1
!=0 && cp1
<eob
) cp1
++;
154 /* Blank? That's weird.. */
155 strcpy(log_rec
.hostname
,"NONE");
156 if (debug_mode
) fprintf(stderr
, "Warning: Blank hostname found!\n");
161 strncpy(log_rec
.hostname
, ++cp1
, MAXHOST
);
162 log_rec
.hostname
[MAXHOST
-1]=0;
163 while (*cp1
!=0 && cp1
<eob
) cp1
++;
165 while (*cp1
==0 && cp1
<eob
) cp1
++;
168 if (*cp1
<'0'||*cp1
>'9') log_rec
.xfer_size
=0;
169 else log_rec
.xfer_size
= strtoul(cp1
,NULL
,10);
172 while (*cp1
!=0 && cp1
<eob
) cp1
++;
173 while (*cp1
==0 && cp1
<eob
) cp1
++;
175 /* get next field for later */
176 while (*cp1
!=0 && cp1
<eob
) cp1
++;
177 while (*cp1
==0 && cp1
<eob
) cp1
++;
180 while (*cp1
!=0 && cp1
<eob
) cp1
++;
181 while (*cp1
==0) cp1
++;
182 while (*cp1
!=0 && cp1
<eob
) cp1
++;
183 while (*cp1
==0) cp1
++;
185 /* fabricate an appropriate request string based on direction */
187 snprintf(log_rec
.url
,sizeof(log_rec
.url
),"\"POST %s\"",cpx
);
189 snprintf(log_rec
.url
,sizeof(log_rec
.url
),"\"GET %s\"",cpx
);
193 while (*cp1
!=0 && cp1
<eob
) cp1
++;
195 cp2
=log_rec
.ident
;count
=MAXIDENT
-1;
196 while (*cp1
!=0 && cp1
<eob
&& count
) { *cp2
++ = *cp1
++; count
--; }
199 /* return appropriate response code */
200 log_rec
.resp_code
=(*(eob
-2)=='i')?206:200;
205 /*********************************************/
206 /* PARSE_RECORD_CLF - CLF web log handler */
207 /*********************************************/
209 int parse_record_clf(char *buffer
)
212 char *cp1
, *cp2
, *cpx
, *eob
, *eos
;
214 size
= strlen(buffer
); /* get length of buffer */
215 eob
= buffer
+size
; /* calculate end of buffer */
216 fmt_logrec(buffer
); /* seperate fields with \0's */
219 cp1
= cpx
= buffer
; cp2
=log_rec
.hostname
;
220 eos
= (cp1
+MAXHOST
)-1;
221 if (eos
>= eob
) eos
=eob
-1;
223 while ( (*cp1
!= '\0') && (cp1
!= eos
) ) *cp2
++ = *cp1
++;
229 fprintf(stderr
,"%s",msg_big_host
);
230 if (debug_mode
) fprintf(stderr
,": %s\n",cpx
);
231 else fprintf(stderr
,"\n");
233 while (*cp1
!= '\0') cp1
++;
235 if (cp1
< eob
) cp1
++;
237 /* skip next field (ident) */
238 while ( (*cp1
!= '\0') && (cp1
< eob
) ) cp1
++;
239 if (cp1
< eob
) cp1
++;
241 /* IDENT (authuser) field */
244 eos
= (cp1
+MAXIDENT
-1);
245 if (eos
>= eob
) eos
=eob
-1;
247 while ( (*cp1
!= '[') && (cp1
< eos
) ) /* remove embeded spaces */
249 if (*cp1
=='\0') *cp1
=' ';
254 if (cp1
>= eob
) return 0;
256 /* check if oversized username */
261 fprintf(stderr
,"%s",msg_big_user
);
262 if (debug_mode
) fprintf(stderr
,": %s\n",cpx
);
263 else fprintf(stderr
,"\n");
265 while ( (*cp1
!= '[') && (cp1
< eob
) ) cp1
++;
268 /* strip trailing space(s) */
269 while (*cp2
==' ') *cp2
--='\0';
271 /* date/time string */
273 cp2
= log_rec
.datetime
;
275 if (eos
>= eob
) eos
=eob
-1;
277 while ( (*cp1
!= '\0') && (cp1
!= eos
) ) *cp2
++ = *cp1
++;
283 fprintf(stderr
,"%s",msg_big_date
);
284 if (debug_mode
) fprintf(stderr
,": %s\n",cpx
);
285 else fprintf(stderr
,"\n");
287 while (*cp1
!= '\0') cp1
++;
289 if (cp1
< eob
) cp1
++;
291 /* minimal sanity check on timestamp */
292 if ( (log_rec
.datetime
[0] != '[') ||
293 (log_rec
.datetime
[3] != '/') ||
294 (cp1
>= eob
)) return 0;
299 eos
= (cp1
+MAXURL
-1);
300 if (eos
>= eob
) eos
= eob
-1;
302 while ( (*cp1
!= '\0') && (cp1
!= eos
) ) *cp2
++ = *cp1
++;
308 fprintf(stderr
,"%s",msg_big_req
);
309 if (debug_mode
) fprintf(stderr
,": %s\n",cpx
);
310 else fprintf(stderr
,"\n");
312 while (*cp1
!= '\0') cp1
++;
314 if (cp1
< eob
) cp1
++;
316 if ( (log_rec
.url
[0] != '"') ||
317 (cp1
>= eob
) ) return 0;
319 /* Strip off HTTP version from URL */
320 if ( (cp2
=strstr(log_rec
.url
,"HTTP"))!=NULL
)
322 *cp2
='\0'; /* Terminate string */
323 *(--cp2
)='"'; /* change <sp> to " */
327 log_rec
.resp_code
= atoi(cp1
);
330 while ( (*cp1
!= '\0') && (cp1
< eob
) ) cp1
++;
331 if (cp1
< eob
) cp1
++;
332 if (*cp1
<'0'||*cp1
>'9') log_rec
.xfer_size
=0;
333 else log_rec
.xfer_size
= strtoul(cp1
,NULL
,10);
335 /* done with CLF record */
336 if (cp1
>=eob
) return 1;
338 while ( (*cp1
!= '\0') && (*cp1
!= '\n') && (cp1
< eob
) ) cp1
++;
339 if (cp1
< eob
) cp1
++;
340 /* get referrer if present */
343 eos
= (cp1
+MAXREF
-1);
344 if (eos
>= eob
) eos
= eob
-1;
346 while ( (*cp1
!= '\0') && (*cp1
!= '\n') && (cp1
!= eos
) ) *cp2
++ = *cp1
++;
352 fprintf(stderr
,"%s",msg_big_ref
);
353 if (debug_mode
) fprintf(stderr
,": %s\n",cpx
);
354 else fprintf(stderr
,"\n");
356 while (*cp1
!= '\0') cp1
++;
358 if (cp1
< eob
) cp1
++;
362 eos
= cp1
+(MAXAGENT
-1);
363 if (eos
>= eob
) eos
= eob
-1;
365 while ( (*cp1
!= '\0') && (cp1
!= eos
) ) *cp2
++ = *cp1
++;
368 return 1; /* maybe a valid record, return with TRUE */
371 /*********************************************/
372 /* PARSE_RECORD_SQUID - squid log handler */
373 /*********************************************/
375 int parse_record_squid(char *buffer
)
377 int size
, slash_count
=0;
379 char *cp1
, *cp2
, *cpx
, *eob
, *eos
;
381 size
= strlen(buffer
); /* get length of buffer */
382 eob
= buffer
+size
; /* calculate end of buffer */
383 fmt_logrec(buffer
); /* seperate fields with \0's */
387 i
=atoi(cp1
); /* get timestamp */
389 /* format date/time field */
390 strftime(log_rec
.datetime
,sizeof(log_rec
.datetime
),
391 "[%d/%b/%Y:%H:%M:%S -0000]",localtime(&i
));
393 while (*cp1
!=0 && cp1
<eob
) cp1
++;
394 while (*cp1
==0) cp1
++;
396 /* skip request size */
397 while (*cp1
!=0 && cp1
<eob
) cp1
++;
398 while (*cp1
==0) cp1
++;
401 cpx
= cp1
; cp2
=log_rec
.hostname
;
402 eos
= (cp1
+MAXHOST
)-1;
403 if (eos
>= eob
) eos
=eob
-1;
405 while ((*cp1
!= '\0') && (cp1
!= eos
)) *cp2
++ = *cp1
++;
411 fprintf(stderr
,"%s",msg_big_host
);
412 if (debug_mode
) fprintf(stderr
,": %s\n",cpx
);
413 else fprintf(stderr
,"\n");
415 while (*cp1
!= '\0') cp1
++;
417 if (cp1
< eob
) cp1
++;
419 /* skip cache status */
420 while (*cp1
!=0 && cp1
<eob
&& *cp1
!='/') cp1
++;
424 log_rec
.resp_code
= atoi(cp1
);
425 while (*cp1
!=0 && cp1
<eob
) cp1
++;
426 while (*cp1
==0) cp1
++;
429 if (*cp1
<'0'||*cp1
>'9') log_rec
.xfer_size
=0;
430 else log_rec
.xfer_size
= strtoul(cp1
,NULL
,10);
432 while (*cp1
!=0 && cp1
<eob
) cp1
++;
433 while (*cp1
==0) cp1
++;
435 /* HTTP request type */
439 eos
= (cp1
+MAXURL
-1);
440 if (eos
>= eob
) eos
= eob
-1;
442 while ( (*cp1
!= '\0') && (cp1
!= eos
) ) *cp2
++ = *cp1
++;
448 fprintf(stderr
,"%s",msg_big_req
);
449 if (debug_mode
) fprintf(stderr
,": %s\n",cpx
);
450 else fprintf(stderr
,"\n");
452 while (*cp1
!= '\0') cp1
++;
454 if (cp1
< eob
) cp1
++;
458 /* HTTP URL requested */
463 slash_count
=trimsquid
+2;
464 while ( (*cp1
!= '\0') && (cp1
!= eos
) && slash_count
)
467 if (*cp1
== '/') slash_count
--;
470 else while ( (*cp1
!= '\0') && (cp1
!= eos
) ) *cp2
++ = *cp1
++;
473 if ((*cp1
!= '\0' && trimsquid
==0) || (trimsquid
&& slash_count
) )
477 fprintf(stderr
,"%s",msg_big_req
);
478 if (debug_mode
) fprintf(stderr
,": %s\n",cpx
);
479 else fprintf(stderr
,"\n");
481 while (*cp1
!= '\0') cp1
++;
483 if (cp1
< eob
) cp1
++;
487 /* IDENT (authuser) field */
490 eos
= (cp1
+MAXIDENT
-1);
491 if (eos
>= eob
) eos
=eob
-1;
493 while (*cp1
== ' ') cp1
++; /* skip white space */
495 while ( (*cp1
!= ' ' && *cp1
!='\0') && (cp1
< eos
) ) *cp2
++=*cp1
++;
499 if (cp1
>= eob
) return 0;
501 /* strip trailing space(s) */
502 while (*cp2
==' ') *cp2
--='\0';
504 /* we have no interest in the remaining fields */
508 /*********************************************/
509 /* PARSE_RECORD_W3C - w3c log handler */
510 /*********************************************/
512 /* field index structure */
513 struct field_index_struct
515 int date
; /* Date field index */
516 int time
; /* Time field index */
517 int ip
; /* IP field index */
518 int username
; /* Username field index */
519 int method
; /* Method field index */
520 int url
; /* URL field index */
521 int query
; /* Querystring field index */
522 int status
; /* Status code field index */
523 int size
; /* Size field index */
524 int referer
; /* Referrer field index */
525 int agent
; /* User agent field index */
526 int fields
; /* Number of fields in this format */
529 /* field structure */
532 char *date
; /* Date field */
533 char *time
; /* Time field */
534 char *ip
; /* IP field */
535 char *username
; /* Username field */
536 char *method
; /* Method field */
537 char *url
; /* URL field */
538 char *query
; /* Querystring */
539 char *status
; /* Status code */
540 char *size
; /* Size field */
541 char *referer
; /* Referrer field */
542 char *agent
; /* User agent field */
545 int parse_record_w3c(char *buffer
)
551 static struct field_index_struct field_index
;
552 struct fields_struct fields
;
553 struct tm gm_time
, *local_time
;
556 memset(&gm_time
, 0, sizeof(struct tm
));
557 size
= strlen(buffer
); /* get length of buffer */
558 eob
= buffer
+ size
; /* calculate end of buffer */
560 /* remove line end markers, reduce eob accordingly */
565 if (*cp
== '\r' || *cp
=='\n')
574 fmt_logrec(buffer
); /* seperate fields with \0's */
578 /* Check if the line is empty or a line suffers from the IIS
579 Null-Character bug and abort parsing if found. */
580 if (*cp
== '\0') return 0;
582 /* If it's a header line ignore it or parse the Fields header if found */
586 if (!strcmp(cp
, "Fields:"))
588 /* Reset the field indices */
589 memset(&field_index
, 0, sizeof(struct field_index_struct
));
595 /* Set the field index */
596 if (!strcmp(cp
, "date")) field_index
.date
= index
;
597 if (!strcmp(cp
, "time")) field_index
.time
= index
;
598 if (!strcmp(cp
, "c-ip")) field_index
.ip
= index
;
599 if (!strcmp(cp
, "cs-method")) field_index
.method
= index
;
600 if (!strcmp(cp
, "cs-uri-stem")) field_index
.url
= index
;
601 if (!strcmp(cp
, "cs-uri-query")) field_index
.query
= index
;
602 if (!strcmp(cp
, "sc-status")) field_index
.status
= index
;
603 if (!strcmp(cp
, "cs(Referer)")) field_index
.referer
= index
;
604 if (!strcmp(cp
, "sc-bytes")) field_index
.size
= index
;
605 if (!strcmp(cp
, "cs(User-Agent)")) field_index
.agent
= index
;
606 if (!strcmp(cp
, "cs-username")) field_index
.username
= index
;
608 /* Continue with the next field */
613 field_index
.fields
= index
-1;
616 /* Return because this header line is completely parsed */
620 /* A data line has been found */
622 /* Check if the number of entries in this line are conform to the
623 format specified in the header */
631 if (index
-1 != field_index
.fields
) return 0;
636 /* Reset the field pointers and begin parsing the data line */
637 memset(&fields
, 0, sizeof(struct fields_struct
));
641 /* Set the field pointers */
642 if (index
== field_index
.date
) fields
.date
= cp
;
643 if (index
== field_index
.time
) fields
.time
= cp
;
644 if (index
== field_index
.ip
) fields
.ip
= cp
;
645 if (index
== field_index
.method
) fields
.method
= cp
;
646 if (index
== field_index
.url
) fields
.url
= cp
;
647 if (index
== field_index
.query
) fields
.query
= cp
;
648 if (index
== field_index
.status
) fields
.status
= cp
;
649 if (index
== field_index
.referer
) fields
.referer
= cp
;
650 if (index
== field_index
.size
) fields
.size
= cp
;
651 if (index
== field_index
.agent
) fields
.agent
= cp
;
652 if (index
== field_index
.username
) fields
.username
= cp
;
654 /* Continue with the next data field */
664 while (*cp
) { if (*cp
=='+') *cp
=' '; cp
++; }
666 /* If no HTTP Method, force to "NONE" */
667 if (fields
.method
&& (fields
.method
[0]=='-'))
668 fields
.method
="NONE";
670 if (fields
.query
&& (fields
.query
[0]!='-'))
671 snprintf(log_rec
.url
, MAXURL
, "\"%s %s?%s\"",
672 fields
.method
, fields
.url
, fields
.query
);
673 else snprintf(log_rec
.url
, MAXURL
, "\"%s %s\"",
674 fields
.method
, fields
.url
);
679 if (fields
.ip
) strncpy(log_rec
.hostname
, fields
.ip
, MAXHOST
- 1);
681 /* Save response code */
682 if (fields
.status
) log_rec
.resp_code
= atoi(fields
.status
);
685 if (fields
.referer
) strncpy(log_rec
.refer
, fields
.referer
, MAXREF
- 1);
687 /* Save transfer size */
688 if (fields
.size
) log_rec
.xfer_size
= strtoul(fields
.size
, NULL
, 10);
690 /* Save user agent */
694 while (*cp
) { if (*cp
=='+') *cp
=' '; cp
++; }
695 strncpy(log_rec
.agent
, fields
.agent
, MAXAGENT
- 1);
698 /* Save auth username */
699 if (fields
.username
) strncpy(log_rec
.ident
, fields
.username
, MAXIDENT
- 1);
701 /* Parse date and time and save it */
704 gm_time
.tm_year
= atoi(fields
.date
);
705 if (gm_time
.tm_year
> 1900) gm_time
.tm_year
-=1900;
706 while ((fields
.date
[0] != '\0') && (fields
.date
[0] != '-')) fields
.date
++;
707 if (fields
.date
[0] == '\0') return 0;
709 gm_time
.tm_mon
= atoi(fields
.date
) - 1;
710 while ((fields
.date
[0] != '\0') && (fields
.date
[0] != '-')) fields
.date
++;
711 if (fields
.date
[0] == '\0') return 0;
713 gm_time
.tm_mday
= atoi(fields
.date
);
717 gm_time
.tm_hour
= atoi(fields
.time
);
718 while ((fields
.time
[0] != '\0') && (fields
.time
[0] != ':')) fields
.time
++;
719 if (fields
.time
[0] == '\0') return 0;
721 gm_time
.tm_min
= atoi(fields
.time
);
722 while ((fields
.time
[0] != '\0') && (fields
.time
[0] != ':')) fields
.time
++;
723 if (fields
.time
[0] == '\0') return 0;
725 gm_time
.tm_sec
= atoi(fields
.time
);
728 /* Convert GMT to localtime */
729 gm_time
.tm_isdst
= -1; /* force dst check */
730 timestamp
= mktime(&gm_time
); /* get time in sec */
732 timestamp
-=(gm_time
.tm_isdst
)?altzone
:timezone
; /* solaris & friends */
734 timestamp
= mktime(&gm_time
)+gm_time
.tm_gmtoff
; /* glibc systems */
736 local_time
= localtime(×tamp
); /* update tm struct */
737 strftime(log_rec
.datetime
, sizeof(log_rec
.datetime
),/* and format sting */
738 "[%d/%b/%Y:%H:%M:%S -0000]", local_time
); /* for log_rec field */