| 1 | /* |
| 2 | webalizer - a web server log analysis program |
| 3 | |
| 4 | Copyright (C) 1997-2011 Bradford L. Barrett |
| 5 | |
| 6 | This program is free software; you can redistribute it and/or modify |
| 7 | it under the terms of the GNU General Public License as published by |
| 8 | the Free Software Foundation; either version 2 of the License, or |
| 9 | (at your option) any later version, and provided that the above |
| 10 | copyright and permission notice is included with all distributed |
| 11 | copies of this or derived software. |
| 12 | |
| 13 | This program is distributed in the hope that it will be useful, |
| 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 16 | GNU General Public License for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU General Public License |
| 19 | along with this program; if not, write to the Free Software |
| 20 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA |
| 21 | |
| 22 | */ |
| 23 | |
| 24 | /*********************************************/ |
| 25 | /* STANDARD INCLUDES */ |
| 26 | /*********************************************/ |
| 27 | |
| 28 | #include <time.h> |
| 29 | #include <stdio.h> |
| 30 | #include <stdlib.h> |
| 31 | #include <string.h> |
| 32 | #include <unistd.h> /* normal stuff */ |
| 33 | #include <ctype.h> |
| 34 | #include <sys/utsname.h> |
| 35 | |
| 36 | /* ensure sys/types */ |
| 37 | #ifndef _SYS_TYPES_H |
| 38 | #include <sys/types.h> |
| 39 | #endif |
| 40 | |
| 41 | /* need socket header? */ |
| 42 | #ifdef HAVE_SYS_SOCKET_H |
| 43 | #include <sys/socket.h> |
| 44 | #endif |
| 45 | |
| 46 | /* some systems need this */ |
| 47 | #ifdef HAVE_MATH_H |
| 48 | #include <math.h> |
| 49 | #endif |
| 50 | |
| 51 | #include "webalizer.h" /* main header */ |
| 52 | #include "lang.h" |
| 53 | #include "parser.h" |
| 54 | |
| 55 | /* internal function prototypes */ |
| 56 | void fmt_logrec(char *); |
| 57 | int parse_record_clf(char *); |
| 58 | int parse_record_ftp(char *); |
| 59 | int parse_record_squid(char *); |
| 60 | int parse_record_w3c(char *); |
| 61 | |
| 62 | /*********************************************/ |
| 63 | /* FMT_LOGREC - terminate log fields w/zeros */ |
| 64 | /*********************************************/ |
| 65 | |
| 66 | void fmt_logrec(char *buffer) |
| 67 | { |
| 68 | char *cp=buffer; |
| 69 | int q=0,b=0,p=0; |
| 70 | |
| 71 | while (*cp != '\0') |
| 72 | { |
| 73 | /* break record up, terminate fields with '\0' */ |
| 74 | switch (*cp) |
| 75 | { |
| 76 | case '\t': if (b || q || p) break; *cp='\0'; break; |
| 77 | case ' ': if (b || q || p) break; *cp='\0'; break; |
| 78 | case '"': if (*(cp-1)=='\\') break; else q^=1; break; |
| 79 | case '[': if (q) break; b++; break; |
| 80 | case ']': if (q) break; if (b>0) b--; break; |
| 81 | case '(': if (q) break; p++; break; |
| 82 | case ')': if (q) break; if (p>0) p--; break; |
| 83 | } |
| 84 | cp++; |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | /*********************************************/ |
| 89 | /* PARSE_RECORD - uhhh, you know... */ |
| 90 | /*********************************************/ |
| 91 | |
| 92 | int parse_record(char *buffer) |
| 93 | { |
| 94 | /* clear out structure */ |
| 95 | memset(&log_rec,0,sizeof(struct log_struct)); |
| 96 | |
| 97 | /* call appropriate handler */ |
| 98 | switch (log_type) |
| 99 | { |
| 100 | default: |
| 101 | case LOG_CLF: return parse_record_clf(buffer); break; /* clf */ |
| 102 | case LOG_FTP: return parse_record_ftp(buffer); break; /* ftp */ |
| 103 | case LOG_SQUID: return parse_record_squid(buffer); break; /* squid */ |
| 104 | case LOG_W3C: return parse_record_w3c(buffer); break; /* w3c */ |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | /*********************************************/ |
| 109 | /* PARSE_RECORD_FTP - ftp log handler */ |
| 110 | /*********************************************/ |
| 111 | |
| 112 | int parse_record_ftp(char *buffer) |
| 113 | { |
| 114 | int size; |
| 115 | int i,j,count; |
| 116 | char *cp1, *cp2, *cpx, *cpy, *eob; |
| 117 | |
| 118 | size = strlen(buffer); /* get length of buffer */ |
| 119 | eob = buffer+size; /* calculate end of buffer */ |
| 120 | fmt_logrec(buffer); /* seperate fields with \0's */ |
| 121 | |
| 122 | /* Start out with date/time */ |
| 123 | cp1=buffer; |
| 124 | while (*cp1!=0 && cp1<eob) cp1++; |
| 125 | while (*cp1==0 && cp1<eob) cp1++; |
| 126 | cpx=cp1; /* save month name */ |
| 127 | while (*cp1!=0 && cp1<eob) cp1++; |
| 128 | while (*cp1==0 && cp1<eob) cp1++; |
| 129 | i=atoi(cp1); /* get day number */ |
| 130 | while (*cp1!=0 && cp1<eob) cp1++; |
| 131 | while (*cp1==0 && cp1<eob) cp1++; |
| 132 | cpy=cp1; /* get timestamp */ |
| 133 | while (*cp1!=0 && cp1<eob) cp1++; |
| 134 | while (*cp1==0 && cp1<eob) cp1++; |
| 135 | j=atoi(cp1); /* get year */ |
| 136 | |
| 137 | /* minimal sanity check */ |
| 138 | if (*(cpy+2)!=':' || *(cpy+5)!=':') return 0; |
| 139 | if (j<1990 || j>2100) return 0; |
| 140 | if (i<1 || i>31) return 0; |
| 141 | |
| 142 | /* format date/time field */ |
| 143 | snprintf(log_rec.datetime,sizeof(log_rec.datetime), |
| 144 | "[%02d/%s/%4d:%s -0000]",i,cpx,j,cpy); |
| 145 | |
| 146 | /* skip seconds... */ |
| 147 | while (*cp1!=0 && cp1<eob) cp1++; |
| 148 | while (*cp1==0 && cp1<eob) cp1++; |
| 149 | while (*cp1!=0 && cp1<eob) cp1++; |
| 150 | |
| 151 | /* get hostname */ |
| 152 | if (*(cp1+1)==0) |
| 153 | { |
| 154 | /* Blank? That's weird.. */ |
| 155 | strcpy(log_rec.hostname,"NONE"); |
| 156 | if (debug_mode) fprintf(stderr, "Warning: Blank hostname found!\n"); |
| 157 | } |
| 158 | else |
| 159 | { |
| 160 | /* good hostname */ |
| 161 | strncpy(log_rec.hostname, ++cp1, MAXHOST); |
| 162 | log_rec.hostname[MAXHOST-1]=0; |
| 163 | while (*cp1!=0 && cp1<eob) cp1++; |
| 164 | } |
| 165 | while (*cp1==0 && cp1<eob) cp1++; |
| 166 | |
| 167 | /* get filesize */ |
| 168 | if (*cp1<'0'||*cp1>'9') log_rec.xfer_size=0; |
| 169 | else log_rec.xfer_size = strtoul(cp1,NULL,10); |
| 170 | |
| 171 | /* URL stuff */ |
| 172 | while (*cp1!=0 && cp1<eob) cp1++; |
| 173 | while (*cp1==0 && cp1<eob) cp1++; |
| 174 | cpx=cp1; |
| 175 | /* get next field for later */ |
| 176 | while (*cp1!=0 && cp1<eob) cp1++; |
| 177 | while (*cp1==0 && cp1<eob) cp1++; |
| 178 | |
| 179 | /* skip next two */ |
| 180 | while (*cp1!=0 && cp1<eob) cp1++; |
| 181 | while (*cp1==0) cp1++; |
| 182 | while (*cp1!=0 && cp1<eob) cp1++; |
| 183 | while (*cp1==0) cp1++; |
| 184 | |
| 185 | /* fabricate an appropriate request string based on direction */ |
| 186 | if (*cp1=='i') |
| 187 | snprintf(log_rec.url,sizeof(log_rec.url),"\"POST %s\"",cpx); |
| 188 | else |
| 189 | snprintf(log_rec.url,sizeof(log_rec.url),"\"GET %s\"",cpx); |
| 190 | |
| 191 | if (cp1<eob) cp1++; |
| 192 | if (cp1<eob) cp1++; |
| 193 | while (*cp1!=0 && cp1<eob) cp1++; |
| 194 | if (cp1<eob) cp1++; |
| 195 | cp2=log_rec.ident;count=MAXIDENT-1; |
| 196 | while (*cp1!=0 && cp1<eob && count) { *cp2++ = *cp1++; count--; } |
| 197 | *cp2='\0'; |
| 198 | |
| 199 | /* return appropriate response code */ |
| 200 | log_rec.resp_code=(*(eob-2)=='i')?206:200; |
| 201 | |
| 202 | return 1; |
| 203 | } |
| 204 | |
| 205 | /*********************************************/ |
| 206 | /* PARSE_RECORD_CLF - CLF web log handler */ |
| 207 | /*********************************************/ |
| 208 | |
| 209 | int parse_record_clf(char *buffer) |
| 210 | { |
| 211 | int size; |
| 212 | char *cp1, *cp2, *cpx, *eob, *eos; |
| 213 | |
| 214 | size = strlen(buffer); /* get length of buffer */ |
| 215 | eob = buffer+size; /* calculate end of buffer */ |
| 216 | fmt_logrec(buffer); /* seperate fields with \0's */ |
| 217 | |
| 218 | /* HOSTNAME */ |
| 219 | cp1 = cpx = buffer; cp2=log_rec.hostname; |
| 220 | eos = (cp1+MAXHOST)-1; |
| 221 | if (eos >= eob) eos=eob-1; |
| 222 | |
| 223 | while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++; |
| 224 | *cp2 = '\0'; |
| 225 | if (*cp1 != '\0') |
| 226 | { |
| 227 | if (verbose) |
| 228 | { |
| 229 | fprintf(stderr,"%s",msg_big_host); |
| 230 | if (debug_mode) fprintf(stderr,": %s\n",cpx); |
| 231 | else fprintf(stderr,"\n"); |
| 232 | } |
| 233 | while (*cp1 != '\0') cp1++; |
| 234 | } |
| 235 | if (cp1 < eob) cp1++; |
| 236 | |
| 237 | /* skip next field (ident) */ |
| 238 | while ( (*cp1 != '\0') && (cp1 < eob) ) cp1++; |
| 239 | if (cp1 < eob) cp1++; |
| 240 | |
| 241 | /* IDENT (authuser) field */ |
| 242 | cpx = cp1; |
| 243 | cp2 = log_rec.ident; |
| 244 | eos = (cp1+MAXIDENT-1); |
| 245 | if (eos >= eob) eos=eob-1; |
| 246 | |
| 247 | while ( (*cp1 != '[') && (cp1 < eos) ) /* remove embeded spaces */ |
| 248 | { |
| 249 | if (*cp1=='\0') *cp1=' '; |
| 250 | *cp2++=*cp1++; |
| 251 | } |
| 252 | *cp2--='\0'; |
| 253 | |
| 254 | if (cp1 >= eob) return 0; |
| 255 | |
| 256 | /* check if oversized username */ |
| 257 | if (*cp1 != '[') |
| 258 | { |
| 259 | if (verbose) |
| 260 | { |
| 261 | fprintf(stderr,"%s",msg_big_user); |
| 262 | if (debug_mode) fprintf(stderr,": %s\n",cpx); |
| 263 | else fprintf(stderr,"\n"); |
| 264 | } |
| 265 | while ( (*cp1 != '[') && (cp1 < eob) ) cp1++; |
| 266 | } |
| 267 | |
| 268 | /* strip trailing space(s) */ |
| 269 | while (*cp2==' ') *cp2--='\0'; |
| 270 | |
| 271 | /* date/time string */ |
| 272 | cpx = cp1; |
| 273 | cp2 = log_rec.datetime; |
| 274 | eos = (cp1+28); |
| 275 | if (eos >= eob) eos=eob-1; |
| 276 | |
| 277 | while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++; |
| 278 | *cp2 = '\0'; |
| 279 | if (*cp1 != '\0') |
| 280 | { |
| 281 | if (verbose) |
| 282 | { |
| 283 | fprintf(stderr,"%s",msg_big_date); |
| 284 | if (debug_mode) fprintf(stderr,": %s\n",cpx); |
| 285 | else fprintf(stderr,"\n"); |
| 286 | } |
| 287 | while (*cp1 != '\0') cp1++; |
| 288 | } |
| 289 | if (cp1 < eob) cp1++; |
| 290 | |
| 291 | /* minimal sanity check on timestamp */ |
| 292 | if ( (log_rec.datetime[0] != '[') || |
| 293 | (log_rec.datetime[3] != '/') || |
| 294 | (cp1 >= eob)) return 0; |
| 295 | |
| 296 | /* HTTP request */ |
| 297 | cpx = cp1; |
| 298 | cp2 = log_rec.url; |
| 299 | eos = (cp1+MAXURL-1); |
| 300 | if (eos >= eob) eos = eob-1; |
| 301 | |
| 302 | while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++; |
| 303 | *cp2 = '\0'; |
| 304 | if (*cp1 != '\0') |
| 305 | { |
| 306 | if (verbose) |
| 307 | { |
| 308 | fprintf(stderr,"%s",msg_big_req); |
| 309 | if (debug_mode) fprintf(stderr,": %s\n",cpx); |
| 310 | else fprintf(stderr,"\n"); |
| 311 | } |
| 312 | while (*cp1 != '\0') cp1++; |
| 313 | } |
| 314 | if (cp1 < eob) cp1++; |
| 315 | |
| 316 | if ( (log_rec.url[0] != '"') || |
| 317 | (cp1 >= eob) ) return 0; |
| 318 | |
| 319 | /* Strip off HTTP version from URL */ |
| 320 | if ( (cp2=strstr(log_rec.url,"HTTP"))!=NULL ) |
| 321 | { |
| 322 | *cp2='\0'; /* Terminate string */ |
| 323 | *(--cp2)='"'; /* change <sp> to " */ |
| 324 | } |
| 325 | |
| 326 | /* response code */ |
| 327 | log_rec.resp_code = atoi(cp1); |
| 328 | |
| 329 | /* xfer size */ |
| 330 | while ( (*cp1 != '\0') && (cp1 < eob) ) cp1++; |
| 331 | if (cp1 < eob) cp1++; |
| 332 | if (*cp1<'0'||*cp1>'9') log_rec.xfer_size=0; |
| 333 | else log_rec.xfer_size = strtoul(cp1,NULL,10); |
| 334 | |
| 335 | /* done with CLF record */ |
| 336 | if (cp1>=eob) return 1; |
| 337 | |
| 338 | while ( (*cp1 != '\0') && (*cp1 != '\n') && (cp1 < eob) ) cp1++; |
| 339 | if (cp1 < eob) cp1++; |
| 340 | /* get referrer if present */ |
| 341 | cpx = cp1; |
| 342 | cp2 = log_rec.refer; |
| 343 | eos = (cp1+MAXREF-1); |
| 344 | if (eos >= eob) eos = eob-1; |
| 345 | |
| 346 | while ( (*cp1 != '\0') && (*cp1 != '\n') && (cp1 != eos) ) *cp2++ = *cp1++; |
| 347 | *cp2 = '\0'; |
| 348 | if (*cp1 != '\0') |
| 349 | { |
| 350 | if (verbose) |
| 351 | { |
| 352 | fprintf(stderr,"%s",msg_big_ref); |
| 353 | if (debug_mode) fprintf(stderr,": %s\n",cpx); |
| 354 | else fprintf(stderr,"\n"); |
| 355 | } |
| 356 | while (*cp1 != '\0') cp1++; |
| 357 | } |
| 358 | if (cp1 < eob) cp1++; |
| 359 | |
| 360 | cpx = cp1; |
| 361 | cp2 = log_rec.agent; |
| 362 | eos = cp1+(MAXAGENT-1); |
| 363 | if (eos >= eob) eos = eob-1; |
| 364 | |
| 365 | while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++; |
| 366 | *cp2 = '\0'; |
| 367 | |
| 368 | return 1; /* maybe a valid record, return with TRUE */ |
| 369 | } |
| 370 | |
| 371 | /*********************************************/ |
| 372 | /* PARSE_RECORD_SQUID - squid log handler */ |
| 373 | /*********************************************/ |
| 374 | |
| 375 | int parse_record_squid(char *buffer) |
| 376 | { |
| 377 | int size, slash_count=0; |
| 378 | time_t i; |
| 379 | char *cp1, *cp2, *cpx, *eob, *eos; |
| 380 | |
| 381 | size = strlen(buffer); /* get length of buffer */ |
| 382 | eob = buffer+size; /* calculate end of buffer */ |
| 383 | fmt_logrec(buffer); /* seperate fields with \0's */ |
| 384 | |
| 385 | /* date/time */ |
| 386 | cp1=buffer; |
| 387 | i=atoi(cp1); /* get timestamp */ |
| 388 | |
| 389 | /* format date/time field */ |
| 390 | strftime(log_rec.datetime,sizeof(log_rec.datetime), |
| 391 | "[%d/%b/%Y:%H:%M:%S -0000]",localtime(&i)); |
| 392 | |
| 393 | while (*cp1!=0 && cp1<eob) cp1++; |
| 394 | while (*cp1==0) cp1++; |
| 395 | |
| 396 | /* skip request size */ |
| 397 | while (*cp1!=0 && cp1<eob) cp1++; |
| 398 | while (*cp1==0) cp1++; |
| 399 | |
| 400 | /* HOSTNAME */ |
| 401 | cpx = cp1; cp2=log_rec.hostname; |
| 402 | eos = (cp1+MAXHOST)-1; |
| 403 | if (eos >= eob) eos=eob-1; |
| 404 | |
| 405 | while ((*cp1 != '\0') && (cp1 != eos)) *cp2++ = *cp1++; |
| 406 | *cp2='\0'; |
| 407 | if (*cp1 != '\0') |
| 408 | { |
| 409 | if (verbose) |
| 410 | { |
| 411 | fprintf(stderr,"%s",msg_big_host); |
| 412 | if (debug_mode) fprintf(stderr,": %s\n",cpx); |
| 413 | else fprintf(stderr,"\n"); |
| 414 | } |
| 415 | while (*cp1 != '\0') cp1++; |
| 416 | } |
| 417 | if (cp1 < eob) cp1++; |
| 418 | |
| 419 | /* skip cache status */ |
| 420 | while (*cp1!=0 && cp1<eob && *cp1!='/') cp1++; |
| 421 | cp1++; |
| 422 | |
| 423 | /* response code */ |
| 424 | log_rec.resp_code = atoi(cp1); |
| 425 | while (*cp1!=0 && cp1<eob) cp1++; |
| 426 | while (*cp1==0) cp1++; |
| 427 | |
| 428 | /* xfer size */ |
| 429 | if (*cp1<'0'||*cp1>'9') log_rec.xfer_size=0; |
| 430 | else log_rec.xfer_size = strtoul(cp1,NULL,10); |
| 431 | |
| 432 | while (*cp1!=0 && cp1<eob) cp1++; |
| 433 | while (*cp1==0) cp1++; |
| 434 | |
| 435 | /* HTTP request type */ |
| 436 | cpx = cp1; |
| 437 | cp2 = log_rec.url; |
| 438 | *cp2++ = '\"'; |
| 439 | eos = (cp1+MAXURL-1); |
| 440 | if (eos >= eob) eos = eob-1; |
| 441 | |
| 442 | while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++; |
| 443 | *cp2 = '\0'; |
| 444 | if (*cp1 != '\0') |
| 445 | { |
| 446 | if (verbose) |
| 447 | { |
| 448 | fprintf(stderr,"%s",msg_big_req); |
| 449 | if (debug_mode) fprintf(stderr,": %s\n",cpx); |
| 450 | else fprintf(stderr,"\n"); |
| 451 | } |
| 452 | while (*cp1 != '\0') cp1++; |
| 453 | } |
| 454 | if (cp1 < eob) cp1++; |
| 455 | |
| 456 | *cp2++ = ' '; |
| 457 | |
| 458 | /* HTTP URL requested */ |
| 459 | cpx = cp1; |
| 460 | |
| 461 | if (trimsquid>0) |
| 462 | { |
| 463 | slash_count=trimsquid+2; |
| 464 | while ( (*cp1 != '\0') && (cp1 != eos) && slash_count) |
| 465 | { |
| 466 | *cp2++ = *cp1++; |
| 467 | if (*cp1 == '/') slash_count--; |
| 468 | } |
| 469 | } |
| 470 | else while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++; |
| 471 | |
| 472 | *cp2 = '\0'; |
| 473 | if ((*cp1 != '\0' && trimsquid==0) || (trimsquid && slash_count) ) |
| 474 | { |
| 475 | if (verbose) |
| 476 | { |
| 477 | fprintf(stderr,"%s",msg_big_req); |
| 478 | if (debug_mode) fprintf(stderr,": %s\n",cpx); |
| 479 | else fprintf(stderr,"\n"); |
| 480 | } |
| 481 | while (*cp1 != '\0') cp1++; |
| 482 | } |
| 483 | if (cp1 < eob) cp1++; |
| 484 | |
| 485 | *cp2++ = '\"'; |
| 486 | |
| 487 | /* IDENT (authuser) field */ |
| 488 | cpx = cp1; |
| 489 | cp2 = log_rec.ident; |
| 490 | eos = (cp1+MAXIDENT-1); |
| 491 | if (eos >= eob) eos=eob-1; |
| 492 | |
| 493 | while (*cp1 == ' ') cp1++; /* skip white space */ |
| 494 | |
| 495 | while ( (*cp1 != ' ' && *cp1!='\0') && (cp1 < eos) ) *cp2++=*cp1++; |
| 496 | |
| 497 | *cp2--='\0'; |
| 498 | |
| 499 | if (cp1 >= eob) return 0; |
| 500 | |
| 501 | /* strip trailing space(s) */ |
| 502 | while (*cp2==' ') *cp2--='\0'; |
| 503 | |
| 504 | /* we have no interest in the remaining fields */ |
| 505 | return 1; |
| 506 | } |
| 507 | |
| 508 | /*********************************************/ |
| 509 | /* PARSE_RECORD_W3C - w3c log handler */ |
| 510 | /*********************************************/ |
| 511 | |
| 512 | /* field index structure */ |
| 513 | struct field_index_struct |
| 514 | { |
| 515 | int date; /* Date field index */ |
| 516 | int time; /* Time field index */ |
| 517 | int ip; /* IP field index */ |
| 518 | int username; /* Username field index */ |
| 519 | int method; /* Method field index */ |
| 520 | int url; /* URL field index */ |
| 521 | int query; /* Querystring field index */ |
| 522 | int status; /* Status code field index */ |
| 523 | int size; /* Size field index */ |
| 524 | int referer; /* Referrer field index */ |
| 525 | int agent; /* User agent field index */ |
| 526 | int fields; /* Number of fields in this format */ |
| 527 | }; |
| 528 | |
| 529 | /* field structure */ |
| 530 | struct fields_struct |
| 531 | { |
| 532 | char *date; /* Date field */ |
| 533 | char *time; /* Time field */ |
| 534 | char *ip; /* IP field */ |
| 535 | char *username; /* Username field */ |
| 536 | char *method; /* Method field */ |
| 537 | char *url; /* URL field */ |
| 538 | char *query; /* Querystring */ |
| 539 | char *status; /* Status code */ |
| 540 | char *size; /* Size field */ |
| 541 | char *referer; /* Referrer field */ |
| 542 | char *agent; /* User agent field */ |
| 543 | }; |
| 544 | |
| 545 | int parse_record_w3c(char *buffer) |
| 546 | { |
| 547 | int size; |
| 548 | char *eob; |
| 549 | char *cp; |
| 550 | int index; |
| 551 | static struct field_index_struct field_index; |
| 552 | struct fields_struct fields; |
| 553 | struct tm gm_time, *local_time; |
| 554 | time_t timestamp; |
| 555 | |
| 556 | memset(&gm_time, 0, sizeof(struct tm)); |
| 557 | size = strlen(buffer); /* get length of buffer */ |
| 558 | eob = buffer + size; /* calculate end of buffer */ |
| 559 | |
| 560 | /* remove line end markers, reduce eob accordingly */ |
| 561 | cp = eob; |
| 562 | while(cp>buffer) |
| 563 | { |
| 564 | cp--; |
| 565 | if (*cp == '\r' || *cp=='\n') |
| 566 | { |
| 567 | *cp = '\0'; |
| 568 | eob--; |
| 569 | } |
| 570 | else |
| 571 | break; |
| 572 | } |
| 573 | |
| 574 | fmt_logrec(buffer); /* seperate fields with \0's */ |
| 575 | |
| 576 | cp = buffer; |
| 577 | |
| 578 | /* Check if the line is empty or a line suffers from the IIS |
| 579 | Null-Character bug and abort parsing if found. */ |
| 580 | if (*cp == '\0') return 0; |
| 581 | |
| 582 | /* If it's a header line ignore it or parse the Fields header if found */ |
| 583 | if (*cp == '#') |
| 584 | { |
| 585 | cp++; |
| 586 | if (!strcmp(cp, "Fields:")) |
| 587 | { |
| 588 | /* Reset the field indices */ |
| 589 | memset(&field_index, 0, sizeof(struct field_index_struct)); |
| 590 | while (*cp) cp++; |
| 591 | cp++; |
| 592 | index = 1; |
| 593 | while (cp < eob) |
| 594 | { |
| 595 | /* Set the field index */ |
| 596 | if (!strcmp(cp, "date")) field_index.date = index; |
| 597 | if (!strcmp(cp, "time")) field_index.time = index; |
| 598 | if (!strcmp(cp, "c-ip")) field_index.ip = index; |
| 599 | if (!strcmp(cp, "cs-method")) field_index.method = index; |
| 600 | if (!strcmp(cp, "cs-uri-stem")) field_index.url = index; |
| 601 | if (!strcmp(cp, "cs-uri-query")) field_index.query = index; |
| 602 | if (!strcmp(cp, "sc-status")) field_index.status = index; |
| 603 | if (!strcmp(cp, "cs(Referer)")) field_index.referer = index; |
| 604 | if (!strcmp(cp, "sc-bytes")) field_index.size = index; |
| 605 | if (!strcmp(cp, "cs(User-Agent)")) field_index.agent = index; |
| 606 | if (!strcmp(cp, "cs-username")) field_index.username = index; |
| 607 | |
| 608 | /* Continue with the next field */ |
| 609 | while (*cp) cp++; |
| 610 | cp++; |
| 611 | index++; |
| 612 | } |
| 613 | field_index.fields = index -1; |
| 614 | } |
| 615 | |
| 616 | /* Return because this header line is completely parsed */ |
| 617 | return 0; |
| 618 | } |
| 619 | |
| 620 | /* A data line has been found */ |
| 621 | |
| 622 | /* Check if the number of entries in this line are conform to the |
| 623 | format specified in the header */ |
| 624 | index = 1; |
| 625 | while (cp < eob) |
| 626 | { |
| 627 | while (*cp) cp++; |
| 628 | cp++; |
| 629 | index++; |
| 630 | } |
| 631 | if (index-1 != field_index.fields) return 0; |
| 632 | |
| 633 | /* Reset pointer */ |
| 634 | cp = buffer; |
| 635 | |
| 636 | /* Reset the field pointers and begin parsing the data line */ |
| 637 | memset(&fields, 0, sizeof(struct fields_struct)); |
| 638 | index = 1; |
| 639 | while (cp < eob) |
| 640 | { |
| 641 | /* Set the field pointers */ |
| 642 | if (index == field_index.date) fields.date = cp; |
| 643 | if (index == field_index.time) fields.time = cp; |
| 644 | if (index == field_index.ip) fields.ip = cp; |
| 645 | if (index == field_index.method) fields.method = cp; |
| 646 | if (index == field_index.url) fields.url = cp; |
| 647 | if (index == field_index.query) fields.query = cp; |
| 648 | if (index == field_index.status) fields.status = cp; |
| 649 | if (index == field_index.referer) fields.referer = cp; |
| 650 | if (index == field_index.size) fields.size = cp; |
| 651 | if (index == field_index.agent) fields.agent = cp; |
| 652 | if (index == field_index.username) fields.username = cp; |
| 653 | |
| 654 | /* Continue with the next data field */ |
| 655 | while (*cp) cp++; |
| 656 | cp++; |
| 657 | index++; |
| 658 | } |
| 659 | |
| 660 | /* Save URL */ |
| 661 | if (fields.url) |
| 662 | { |
| 663 | cp = fields.url; |
| 664 | while (*cp) { if (*cp=='+') *cp=' '; cp++; } |
| 665 | |
| 666 | /* If no HTTP Method, force to "NONE" */ |
| 667 | if (fields.method && (fields.method[0]=='-')) |
| 668 | fields.method="NONE"; |
| 669 | |
| 670 | if (fields.query && (fields.query[0]!='-')) |
| 671 | snprintf(log_rec.url, MAXURL, "\"%s %s?%s\"", |
| 672 | fields.method, fields.url, fields.query); |
| 673 | else snprintf(log_rec.url, MAXURL, "\"%s %s\"", |
| 674 | fields.method, fields.url); |
| 675 | } |
| 676 | else return 0; |
| 677 | |
| 678 | /* Save hostname */ |
| 679 | if (fields.ip) strncpy(log_rec.hostname, fields.ip, MAXHOST - 1); |
| 680 | |
| 681 | /* Save response code */ |
| 682 | if (fields.status) log_rec.resp_code = atoi(fields.status); |
| 683 | |
| 684 | /* Save referer */ |
| 685 | if (fields.referer) strncpy(log_rec.refer, fields.referer, MAXREF - 1); |
| 686 | |
| 687 | /* Save transfer size */ |
| 688 | if (fields.size) log_rec.xfer_size = strtoul(fields.size, NULL, 10); |
| 689 | |
| 690 | /* Save user agent */ |
| 691 | if (fields.agent) |
| 692 | { |
| 693 | cp = fields.agent; |
| 694 | while (*cp) { if (*cp=='+') *cp=' '; cp++; } |
| 695 | strncpy(log_rec.agent, fields.agent, MAXAGENT - 1); |
| 696 | } |
| 697 | |
| 698 | /* Save auth username */ |
| 699 | if (fields.username) strncpy(log_rec.ident, fields.username, MAXIDENT - 1); |
| 700 | |
| 701 | /* Parse date and time and save it */ |
| 702 | if (fields.date) |
| 703 | { |
| 704 | gm_time.tm_year = atoi(fields.date); |
| 705 | if (gm_time.tm_year > 1900) gm_time.tm_year-=1900; |
| 706 | while ((fields.date[0] != '\0') && (fields.date[0] != '-')) fields.date++; |
| 707 | if (fields.date[0] == '\0') return 0; |
| 708 | fields.date++; |
| 709 | gm_time.tm_mon = atoi(fields.date) - 1; |
| 710 | while ((fields.date[0] != '\0') && (fields.date[0] != '-')) fields.date++; |
| 711 | if (fields.date[0] == '\0') return 0; |
| 712 | fields.date++; |
| 713 | gm_time.tm_mday = atoi(fields.date); |
| 714 | } |
| 715 | if (fields.time) |
| 716 | { |
| 717 | gm_time.tm_hour = atoi(fields.time); |
| 718 | while ((fields.time[0] != '\0') && (fields.time[0] != ':')) fields.time++; |
| 719 | if (fields.time[0] == '\0') return 0; |
| 720 | fields.time++; |
| 721 | gm_time.tm_min = atoi(fields.time); |
| 722 | while ((fields.time[0] != '\0') && (fields.time[0] != ':')) fields.time++; |
| 723 | if (fields.time[0] == '\0') return 0; |
| 724 | fields.time++; |
| 725 | gm_time.tm_sec = atoi(fields.time); |
| 726 | } |
| 727 | |
| 728 | /* Convert GMT to localtime */ |
| 729 | gm_time.tm_isdst = -1; /* force dst check */ |
| 730 | timestamp = mktime(&gm_time); /* get time in sec */ |
| 731 | #ifdef HAVE_ALTZONE |
| 732 | timestamp-=(gm_time.tm_isdst)?altzone:timezone; /* solaris & friends */ |
| 733 | #else |
| 734 | timestamp = mktime(&gm_time)+gm_time.tm_gmtoff; /* glibc systems */ |
| 735 | #endif |
| 736 | local_time = localtime(×tamp); /* update tm struct */ |
| 737 | strftime(log_rec.datetime, sizeof(log_rec.datetime),/* and format sting */ |
| 738 | "[%d/%b/%Y:%H:%M:%S -0000]", local_time); /* for log_rec field */ |
| 739 | return 1; |
| 740 | } |