Imported Upstream version 2.23.05
[hcoop/zz_old/debian/webalizer.git] / webalizer.c
CommitLineData
e015f748
CE
1/*
2 webalizer - a web server log analysis program
3
4 Copyright (C) 1997-2011 Bradford L. Barrett
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version, and provided that the above
10 copyright and permission notice is included with all distributed
11 copies of this or derived software.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
21
22*/
23
24/*********************************************/
25/* STANDARD INCLUDES */
26/*********************************************/
27
28/* Fix broken Zlib 64 bitness */
29#if _FILE_OFFSET_BITS == 64
30#ifndef _LARGEFILE64_SOURCE
31#define _LARGEFILE64_SOURCE 1
32#endif
33#endif
34
35#include <time.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39#include <errno.h>
40#include <unistd.h> /* normal stuff */
41#include <locale.h>
42#include <ctype.h>
43#include <sys/utsname.h>
44#include <zlib.h>
45#include <sys/stat.h>
46
47/* ensure getopt */
48#ifdef HAVE_GETOPT_H
49#include <getopt.h>
50#endif
51
52/* ensure sys/types */
53#ifndef _SYS_TYPES_H
54#include <sys/types.h>
55#endif
56
57/* Need socket header? */
58#ifdef HAVE_SYS_SOCKET_H
59#include <sys/socket.h>
60#endif
61
62/* some systems need this */
63#ifdef HAVE_MATH_H
64#include <math.h>
65#endif
66
67#ifdef USE_DNS
68#include <netdb.h>
69#include <netinet/in.h>
70#include <arpa/inet.h>
71#include <db.h>
72#endif /* USE_DNS */
73
74#ifdef USE_GEOIP
75#include <GeoIP.h>
76#endif
77
78#ifdef USE_BZIP
79#include <bzlib.h>
80int bz2_rewind(void **, char *, char *);
81#endif
82
83#include "webalizer.h" /* main header */
84#include "output.h"
85#include "parser.h"
86#include "preserve.h"
87#include "hashtab.h"
88#include "linklist.h"
89#include "webalizer_lang.h" /* lang. support */
90#ifdef USE_DNS
91#include "dns_resolv.h"
92#endif
93
94/* internal function prototypes */
95
96void clear_month(); /* clear monthly stuff */
97char *unescape(char *); /* unescape URLs */
98void print_opts(char *); /* print options */
99void print_version(); /* duhh... */
100int isurlchar(unsigned char, int); /* valid URL char fnc. */
101void get_config(char *); /* Read a config file */
102static char *save_opt(char *); /* save conf option */
103void srch_string(char *); /* srch str analysis */
104char *get_domain(char *); /* return domain name */
105void agent_mangle(char *); /* reformat user agent */
106char *our_gzgets(void *, char *, int); /* our gzgets */
107int ouricmp(char *, char *); /* case ins. compare */
108int isipaddr(char *); /* is IP address test */
109
110/*********************************************/
111/* GLOBAL VARIABLES */
112/*********************************************/
113
114char *version = "2.23"; /* program version */
115char *editlvl = "05"; /* edit level */
116char *moddate = "14-Apr-2011"; /* modification date */
117char *copyright = "Copyright 1997-2011 by Bradford L. Barrett";
118
119int verbose = 2; /* 2=verbose,1=err, 0=none */
120int debug_mode = 0; /* debug mode flag */
121int time_me = 0; /* timing display flag */
122int local_time = 1; /* 1=localtime 0=GMT (UTC) */
123int hist_gap = 0; /* 1=error w/hist, save bkp */
124int ignore_hist = 0; /* history flag (1=skip) */
125int ignore_state = 0; /* state flag (1=skip) */
126int default_index= 1; /* default index. (1=yes) */
127int hourly_graph = 1; /* hourly graph display */
128int hourly_stats = 1; /* hourly stats table */
129int daily_graph = 1; /* daily graph display */
130int daily_stats = 1; /* daily stats table */
131int ctry_graph = 1; /* country graph display */
132int shade_groups = 1; /* Group shading 0=no 1=yes */
133int hlite_groups = 1; /* Group hlite 0=no 1=yes */
134int mangle_agent = 0; /* mangle user agents */
135int incremental = 0; /* incremental mode 1=yes */
136int use_https = 0; /* use 'https://' on URLs */
137int htaccess = 0; /* create .htaccess? (0=no) */
138int stripcgi = 1; /* strip url cgi (0=no) */
139int normalize = 1; /* normalize CLF URL (0=no) */
140int trimsquid = 0; /* trim squid urls (0=no) */
141int searchcasei = 1; /* case insensitive search */
142int visit_timeout= 1800; /* visit timeout (seconds) */
143int graph_legend = 1; /* graph legend (1=yes) */
144int graph_lines = 2; /* graph lines (0=none) */
145int fold_seq_err = 0; /* fold seq err (0=no) */
146int log_type = LOG_CLF; /* log type (default=CLF) */
147int group_domains= 0; /* Group domains 0=none */
148int hide_sites = 0; /* Hide ind. sites (0=no) */
149int link_referrer= 0; /* Link referrers (0=no) */
150char *hname = NULL; /* hostname for reports */
151char *state_fname = "webalizer.current"; /* run state file name */
152char *hist_fname = "webalizer.hist"; /* name of history file */
153char *html_ext = "html"; /* HTML file suffix */
154char *dump_ext = "tab"; /* Dump file suffix */
155char *conf_fname = NULL; /* name of config file */
156char *log_fname = NULL; /* log file pointer */
157char *out_dir = NULL; /* output directory */
158char *blank_str = ""; /* blank string */
159char *geodb_fname = NULL; /* GeoDB database filename */
160char *dns_cache = NULL; /* DNS cache file name */
161int dns_children = 0; /* DNS children (0=don't do)*/
162int cache_ips = 0; /* CacheIPs in DB (0=no) */
163int cache_ttl = 7; /* DNS Cache TTL (days) */
164int geodb = 0; /* Use GeoDB (0=no) */
165int graph_mths = 12; /* # months in index graph */
166int index_mths = 12; /* # months in index table */
167int year_hdrs = 1; /* index year seperators */
168int year_totals = 1; /* index year subtotals */
169int use_flags = 0; /* Show flags in ctry table */
170char *flag_dir = "flags"; /* location of flag icons */
171
172#ifdef USE_GEOIP
173int geoip = 0; /* Use GeoIP (0=no) */
174char *geoip_db = NULL; /* GeoIP database filename */
175GeoIP *geo_fp = NULL; /* GeoIP database handle */
176#endif
177
178int ntop_sites = 30; /* top n sites to display */
179int ntop_sitesK = 10; /* top n sites (by kbytes) */
180int ntop_urls = 30; /* top n url's to display */
181int ntop_urlsK = 10; /* top n url's (by kbytes) */
182int ntop_entry = 10; /* top n entry url's */
183int ntop_exit = 10; /* top n exit url's */
184int ntop_refs = 30; /* top n referrers "" */
185int ntop_agents = 15; /* top n user agents "" */
186int ntop_ctrys = 30; /* top n countries "" */
187int ntop_search = 20; /* top n search strings */
188int ntop_users = 20; /* top n users to display */
189
190int all_sites = 0; /* List All sites (0=no) */
191int all_urls = 0; /* List All URLs (0=no) */
192int all_refs = 0; /* List All Referrers */
193int all_agents = 0; /* List All User Agents */
194int all_search = 0; /* List All Search Strings */
195int all_users = 0; /* List All Usernames */
196
197int dump_sites = 0; /* Dump tab delimited sites */
198int dump_urls = 0; /* URLs */
199int dump_refs = 0; /* Referrers */
200int dump_agents = 0; /* User Agents */
201int dump_users = 0; /* Usernames */
202int dump_search = 0; /* Search strings */
203int dump_header = 0; /* Dump header as first rec */
204char *dump_path = NULL; /* Path for dump files */
205
206int cur_year=0, cur_month=0, /* year/month/day/hour */
207 cur_day=0, cur_hour=0, /* tracking variables */
208 cur_min=0, cur_sec=0;
209
210u_int64_t cur_tstamp=0; /* Timestamp... */
211u_int64_t rec_tstamp=0;
212u_int64_t req_tstamp=0;
213u_int64_t epoch; /* used for timestamp adj. */
214
215int check_dup=0; /* check for dup flag */
216int gz_log=COMP_NONE; /* gziped log? (0=no) */
217
218double t_xfer=0.0; /* monthly total xfer value */
219u_int64_t t_hit=0,t_file=0,t_site=0, /* monthly total vars */
220 t_url=0,t_ref=0,t_agent=0,
221 t_page=0, t_visit=0, t_user=0;
222
223double tm_xfer[31]; /* daily transfer totals */
224
225u_int64_t tm_hit[31], tm_file[31], /* daily total arrays */
226 tm_site[31], tm_page[31],
227 tm_visit[31];
228
229u_int64_t dt_site; /* daily 'sites' total */
230
231u_int64_t ht_hit=0, mh_hit=0; /* hourly hits totals */
232
233u_int64_t th_hit[24], th_file[24], /* hourly total arrays */
234 th_page[24];
235
236double th_xfer[24];
237
238int f_day,l_day; /* first/last day vars */
239
240struct utsname system_info; /* system info structure */
241
242u_int64_t ul_bogus =0; /* Dummy counter for groups */
243
244struct log_struct log_rec; /* expanded log storage */
245
246void *zlog_fp; /* compressed logfile ptr */
247FILE *log_fp; /* regular logfile pointer */
248
249char buffer[BUFSIZE]; /* log file record buffer */
250char tmp_buf[BUFSIZE]; /* used to temp save above */
251
252CLISTPTR *top_ctrys = NULL; /* Top countries table */
253
254#define GZ_BUFSIZE 16384 /* our_getfs buffer size */
255char f_buf[GZ_BUFSIZE]; /* our_getfs buffer */
256char *f_cp=f_buf+GZ_BUFSIZE; /* pointer into the buffer */
257int f_end=0; /* count to end of buffer */
258
259char hit_color[] = "#00805c"; /* graph hit color */
260char file_color[] = "#0040ff"; /* graph file color */
261char site_color[] = "#ff8000"; /* graph site color */
262char kbyte_color[] = "#ff0000"; /* graph kbyte color */
263char page_color[] = "#00e0ff"; /* graph page color */
264char visit_color[] = "#ffff00"; /* graph visit color */
265char misc_color[] = "#00e0ff"; /* graph misc color */
266char pie_color1[] = "#800080"; /* pie additionnal color 1 */
267char pie_color2[] = "#80ffc0"; /* pie additionnal color 2 */
268char pie_color3[] = "#ff00ff"; /* pie additionnal color 3 */
269char pie_color4[] = "#ffc080"; /* pie additionnal color 4 */
270
271/*********************************************/
272/* MAIN - start here */
273/*********************************************/
274
275int main(int argc, char *argv[])
276{
277 int i; /* generic counter */
278 char *cp1, *cp2, *cp3; /* generic char pointers */
279 char host_buf[MAXHOST+1]; /* used to save hostname */
280
281 NLISTPTR lptr; /* generic list pointer */
282
283 extern char *optarg; /* used for command line */
284 extern int optind; /* parsing routine 'getopt' */
285 extern int opterr;
286
287 time_t start_time, end_time; /* program timers */
288 float temp_time; /* temporary time storage */
289
290 int rec_year,rec_month=1,rec_day,rec_hour,rec_min,rec_sec;
291
292 int good_rec =0; /* 1 if we had a good record */
293 u_int64_t total_rec =0; /* Total Records Processed */
294 u_int64_t total_ignore=0; /* Total Records Ignored */
295 u_int64_t total_bad =0; /* Total Bad Records */
296
297 int max_ctry; /* max countries defined */
298
299 /* month names used for parsing logfile (shouldn't be lang specific) */
300 char *log_month[12]={ "jan", "feb", "mar",
301 "apr", "may", "jun",
302 "jul", "aug", "sep",
303 "oct", "nov", "dec"};
304
305 /* stat struct for files */
306 struct stat log_stat;
307
308 /* Assume that LC_CTYPE is what the user wants for non-ASCII chars */
309 setlocale(LC_CTYPE,"");
310
311 /* initalize epoch */
312 epoch=jdate(1,1,1970); /* used for timestamp adj. */
313
314 sprintf(tmp_buf,"%s/webalizer.conf",ETCDIR);
315 /* check for default config file */
316 if (!access("webalizer.conf",F_OK))
317 get_config("webalizer.conf");
318 else if (!access(tmp_buf,F_OK))
319 get_config(tmp_buf);
320
321 /* get command line options */
322 opterr = 0; /* disable parser errors */
323 while ((i=getopt(argc,argv,"a:A:bc:C:dD:e:E:fF:g:GhHiI:jJ:k:K:l:Lm:M:n:N:o:O:pP:qQr:R:s:S:t:Tu:U:vVwW:x:XYz:Z"))!=EOF)
324 {
325 switch (i)
326 {
327 case 'a': add_nlist(optarg,&hidden_agents); break; /* Hide agents */
328 case 'A': ntop_agents=atoi(optarg); break; /* Top agents */
329 case 'b': ignore_state=1; break; /* Ignore state file */
330 case 'c': get_config(optarg); break; /* Config file */
331 case 'C': ntop_ctrys=atoi(optarg); break; /* Top countries */
332 case 'd': debug_mode=1; break; /* Debug */
333 case 'D': dns_cache=optarg; break; /* DNS Cache filename */
334 case 'e': ntop_entry=atoi(optarg); break; /* Top entry pages */
335 case 'E': ntop_exit=atoi(optarg); break; /* Top exit pages */
336 case 'f': fold_seq_err=1; break; /* Fold sequence errs */
337 case 'F': log_type=(tolower(optarg[0])=='f')?
338 LOG_FTP:(tolower(optarg[0])=='s')?
339 LOG_SQUID:(tolower(optarg[0])=='w')?
340 LOG_W3C:LOG_CLF; break; /* define log type */
341 case 'g': group_domains=atoi(optarg); break; /* GroupDomains (0=no) */
342 case 'G': hourly_graph=0; break; /* no hourly graph */
343 case 'h': print_opts(argv[0]); break; /* help */
344 case 'H': hourly_stats=0; break; /* no hourly stats */
345 case 'i': ignore_hist=1; break; /* Ignore history */
346 case 'I': add_nlist(optarg,&index_alias); break; /* Index alias */
347 case 'j': geodb=1; break; /* Enable GeoDB */
348 case 'J': geodb_fname=optarg; break; /* GeoDB db filename */
349 case 'k': graph_mths=atoi(optarg); break; /* # months idx graph */
350 case 'K': index_mths=atoi(optarg); break; /* # months idx table */
351 case 'l': graph_lines=atoi(optarg); break; /* Graph Lines */
352 case 'L': graph_legend=0; break; /* Graph Legends */
353 case 'm': visit_timeout=atoi(optarg); break; /* Visit Timeout */
354 case 'M': mangle_agent=atoi(optarg); break; /* mangle user agents */
355 case 'n': hname=optarg; break; /* Hostname */
356 case 'N': dns_children=atoi(optarg); break; /* # of DNS children */
357 case 'o': out_dir=optarg; break; /* Output directory */
358 case 'O': add_nlist(optarg,&omit_page); break; /* pages not counted */
359 case 'p': incremental=1; break; /* Incremental run */
360 case 'P': add_nlist(optarg,&page_type); break; /* page view types */
361 case 'q': verbose=1; break; /* Quiet (verbose=1) */
362 case 'Q': verbose=0; break; /* Really Quiet */
363 case 'r': add_nlist(optarg,&hidden_refs); break; /* Hide referrer */
364 case 'R': ntop_refs=atoi(optarg); break; /* Top referrers */
365 case 's': add_nlist(optarg,&hidden_sites); break; /* Hide site */
366 case 'S': ntop_sites=atoi(optarg); break; /* Top sites */
367 case 't': msg_title=optarg; break; /* Report title */
368 case 'T': time_me=1; break; /* TimeMe */
369 case 'u': add_nlist(optarg,&hidden_urls); break; /* hide URL */
370 case 'U': ntop_urls=atoi(optarg); break; /* Top urls */
371 case 'v': verbose=2; debug_mode=1; break; /* Verbose */
372 case 'V': print_version(); break; /* Version */
373#ifdef USE_GEOIP
374 case 'w': geoip=1; break; /* Enable GeoIP */
375 case 'W': geoip_db=optarg; break; /* GeoIP database name */
376#endif
377 case 'x': html_ext=optarg; break; /* HTML file extension */
378 case 'X': hide_sites=1; break; /* Hide ind. sites */
379 case 'Y': ctry_graph=0; break; /* Supress ctry graph */
380 case 'Z': normalize=0; break; /* Dont normalize URLs */
381 case 'z': use_flags=1; flag_dir=optarg; break; /* Ctry flag dir */
382 }
383 }
384
385 if (argc - optind != 0) log_fname = argv[optind];
386 if ( log_fname && (log_fname[0]=='-')) log_fname=NULL; /* force STDIN? */
387
388 /* check for gzipped file - .gz */
389 if (log_fname) if (!strcmp((log_fname+strlen(log_fname)-3),".gz"))
390 gz_log=COMP_GZIP;
391
392#ifdef USE_BZIP
393 /* check for bzip file - .bz2 */
394 if (log_fname) if (!strcmp((log_fname+strlen(log_fname)-4),".bz2"))
395 gz_log=COMP_BZIP;
396#endif
397
398 /* setup our internal variables */
399 init_counters(); /* initalize (zero) main counters */
400 memset(hist, 0, sizeof(hist)); /* initalize (zero) history array */
401
402 /* add default index. alias if needed */
403 if (default_index) add_nlist("index.",&index_alias);
404
405 if (page_type==NULL) /* check if page types present */
406 {
407 if ((log_type==LOG_CLF)||(log_type==LOG_SQUID)||(log_type==LOG_W3C))
408 {
409 add_nlist("htm*" ,&page_type); /* if no page types specified, we */
410 add_nlist("cgi" ,&page_type); /* use the default ones here... */
411 if (!isinlist(page_type,html_ext)) add_nlist(html_ext,&page_type);
412 }
413 else add_nlist("txt" ,&page_type); /* FTP logs default to .txt */
414 }
415
416 for (max_ctry=0;ctry[max_ctry].desc;max_ctry++);
417 if (ntop_ctrys > max_ctry) ntop_ctrys = max_ctry; /* force upper limit */
418 if (graph_lines> 20) graph_lines= 20; /* keep graphs sane! */
419 if (graph_mths<12) graph_mths=12;
420 if (graph_mths>GRAPHMAX) graph_mths=GRAPHMAX;
421 if (index_mths<12) index_mths=12;
422 if (index_mths>HISTSIZE) index_mths=HISTSIZE;
423
424 if (log_type == LOG_FTP)
425 {
426 /* disable stuff for ftp logs */
427 ntop_entry=ntop_exit=0;
428 ntop_search=0;
429 }
430 else
431 {
432 if (search_list==NULL)
433 {
434 /* If no search engines defined, define some :) */
435 add_glist(".google. q=" ,&search_list);
436 add_glist("yahoo.com p=" ,&search_list);
437 add_glist("altavista.com q=" ,&search_list);
438 add_glist("aolsearch. query=" ,&search_list);
439 add_glist("ask.co q=" ,&search_list);
440 add_glist("eureka.com q=" ,&search_list);
441 add_glist("lycos.com query=" ,&search_list);
442 add_glist("hotbot.com MT=" ,&search_list);
443 add_glist("msn.com q=" ,&search_list);
444 add_glist("infoseek.com qt=" ,&search_list);
445 add_glist("webcrawler searchText=" ,&search_list);
446 add_glist("excite search=" ,&search_list);
447 add_glist("netscape.com query=" ,&search_list);
448 add_glist("mamma.com query=" ,&search_list);
449 add_glist("alltheweb.com q=" ,&search_list);
450 add_glist("northernlight.com qr=" ,&search_list);
451 }
452 }
453
454 /* ensure entry/exits don't exceed urls */
455 i=(ntop_urls>ntop_urlsK)?ntop_urls:ntop_urlsK;
456 if (ntop_entry>i) ntop_entry=i;
457 if (ntop_exit>i) ntop_exit=i;
458
459 for (i=0;i<MAXHASH;i++)
460 {
461 sm_htab[i]=sd_htab[i]=NULL; /* initalize hash tables */
462 um_htab[i]=NULL;
463 rm_htab[i]=NULL;
464 am_htab[i]=NULL;
465 sr_htab[i]=NULL;
466 }
467
468 /* Be polite and announce yourself... */
469 if (verbose>1)
470 {
471 uname(&system_info);
472 printf("Webalizer V%s-%s (%s %s %s) %s\n", version,editlvl,
473 system_info.sysname, system_info.release,
474 system_info.machine,language);
475 }
476
477#ifndef USE_DNS
478 if (strstr(argv[0],"webazolver")!=0)
479 /* DNS support not present, aborting... */
480 { printf("%s\n",msg_dns_abrt); exit(1); }
481#else
482 /* Force sane values for cache TTL */
483 if (cache_ttl<1) cache_ttl=1;
484 if (cache_ttl>100) cache_ttl=100;
485#endif /* USE_DNS */
486
487 /* open log file */
488 if (log_fname)
489 {
490 /* stat the file */
491 if ( !(lstat(log_fname, &log_stat)) )
492 {
493 /* check if the file a symlink */
494 if ( S_ISLNK(log_stat.st_mode) )
495 {
496 if (verbose)
497 fprintf(stderr,"%s %s (symlink)\n",msg_log_err,log_fname);
498 exit(EBADF);
499 }
500 }
501
502 if (gz_log)
503 {
504 /* open compressed file */
505#ifdef USE_BZIP
506 if (gz_log==COMP_BZIP)
507 zlog_fp = BZ2_bzopen(log_fname,"rb");
508 else
509#endif
510 zlog_fp = gzopen(log_fname, "rb");
511 if (zlog_fp==Z_NULL)
512 {
513 /* Error: Can't open log file ... */
514 fprintf(stderr, "%s %s (%d)\n",msg_log_err,log_fname,ENOENT);
515 exit(ENOENT);
516 }
517 }
518 else
519 {
520 /* open regular file */
521 log_fp = fopen(log_fname,"r");
522 if (log_fp==NULL)
523 {
524 /* Error: Can't open log file ... */
525 fprintf(stderr, "%s %s\n",msg_log_err,log_fname);
526 exit(1);
527 }
528 }
529 }
530
531 /* Using logfile ... */
532 if (verbose>1)
533 {
534 printf("%s %s (",msg_log_use,log_fname?log_fname:"STDIN");
535 if (gz_log==COMP_GZIP) printf("gzip-");
536#ifdef USE_BZIP
537 if (gz_log==COMP_BZIP) printf("bzip-");
538#endif
539 switch (log_type)
540 {
541 /* display log file type hint */
542 case LOG_CLF: printf("clf)\n"); break;
543 case LOG_FTP: printf("ftp)\n"); break;
544 case LOG_SQUID: printf("squid)\n"); break;
545 case LOG_W3C: printf("w3c)\n"); break;
546 }
547 }
548
549 /* switch directories if needed */
550 if (out_dir)
551 {
552 if (chdir(out_dir) != 0)
553 {
554 /* Error: Can't change directory to ... */
555 fprintf(stderr, "%s %s\n",msg_dir_err,out_dir);
556 exit(1);
557 }
558 }
559
560#ifdef USE_DNS
561 if (strstr(argv[0],"webazolver")!=0)
562 {
563 if (!dns_children) dns_children=5; /* default dns children if needed */
564 if (!dns_cache)
565 {
566 /* No cache file specified, aborting... */
567 fprintf(stderr,"%s\n",msg_dns_nocf); /* Must have a cache file */
568 exit(1);
569 }
570 }
571
572 if (dns_cache && dns_children) /* run-time resolution */
573 {
574 if (dns_children > MAXCHILD) dns_children=MAXCHILD;
575 /* DNS Lookup (#children): */
576 if (verbose>1) printf("%s (%d): ",msg_dns_rslv,dns_children);
577 fflush(stdout);
578 (gz_log)?dns_resolver(zlog_fp):dns_resolver(log_fp);
579#ifdef USE_BZIP
580 (gz_log==COMP_BZIP)?bz2_rewind(&zlog_fp, log_fname, "rb"):
581#endif
582 (gz_log==COMP_GZIP)?gzrewind(zlog_fp):
583 (log_fname)?rewind(log_fp):exit(0);
584 }
585
586 if (strstr(argv[0],"webazolver")!=0) exit(0); /* webazolver exits here */
587
588 if (dns_cache)
589 {
590 if (!open_cache()) { dns_cache=NULL; dns_db=NULL; }
591 else
592 {
593 /* Using DNS cache file <filaneme> */
594 if (verbose>1) printf("%s %s\n",msg_dns_usec,dns_cache);
595 }
596 }
597
598 /* Open GeoDB? */
599 if (geodb)
600 {
601 geo_db=geodb_open(geodb_fname);
602 if (geo_db==NULL)
603 {
604 if (verbose) printf("%s: %s\n",msg_geo_open,
605 (geodb_fname)?geodb_fname:msg_geo_dflt);
606 if (verbose) printf("GeoDB %s\n",msg_geo_nolu);
607 geodb=0;
608 }
609 else if (verbose>1) printf("%s %s\n",
610 msg_geo_use,geodb_ver(geo_db,buffer));
611#ifdef USE_GEOIP
612 if (geoip) geoip=0; /* Disable GeoIP if using GeoDB */
613#endif
614 }
615#endif /* USE_DNS */
616
617#ifdef USE_GEOIP
618 /* open GeoIP database */
619 if (geoip)
620 {
621 if (geoip_db!=NULL)
622 geo_fp=GeoIP_open(geoip_db, GEOIP_MEMORY_CACHE);
623 else
624 geo_fp=GeoIP_new(GEOIP_MEMORY_CACHE);
625
626 /* Did we open one? */
627 if (geo_fp==NULL)
628 {
629 /* couldn't open.. warn user */
630 if (verbose) printf("GeoIP %s\n",msg_geo_nolu);
631 geoip=0;
632 }
633 else if (verbose>1) printf("%s %s (%s)\n",msg_geo_use,
634 GeoIPDBDescription[(int)geo_fp->databaseType],
635 (geoip_db==NULL)?msg_geo_dflt:geo_fp->file_path);
636 }
637#endif /* USE_GEOIP */
638
639 /* Creating output in ... */
640 if (verbose>1)
641 printf("%s %s\n",msg_dir_use,out_dir?out_dir:msg_cur_dir);
642
643 /* prep hostname */
644 if (!hname)
645 {
646 if (uname(&system_info)) hname="localhost";
647 else hname=system_info.nodename;
648 }
649
650 /* Hostname for reports is ... */
651 if (strlen(hname)) if (verbose>1) printf("%s '%s'\n",msg_hostname,hname);
652
653 /* get past history */
654 if (ignore_hist) { if (verbose>1) printf("%s\n",msg_ign_hist); }
655 else get_history();
656
657 if (incremental) /* incremental processing? */
658 {
659 if ((i=restore_state())) /* restore internal data structs */
660 {
661 /* Error: Unable to restore run data (error num) */
662 /* if (verbose) fprintf(stderr,"%s (%d)\n",msg_bad_data,i); */
663 fprintf(stderr,"%s (%d)\n",msg_bad_data,i);
664 exit(1);
665 }
666 }
667
668 /* Allocate memory for our TOP countries array */
669 if (ntop_ctrys != 0)
670 { if ( (top_ctrys=calloc(ntop_ctrys,sizeof(CLISTPTR))) == NULL)
671 /* Can't get memory, Top Countries disabled! */
672 {if (verbose) fprintf(stderr,"%s\n",msg_nomem_tc); ntop_ctrys=0;}}
673
674 /* get processing start time */
675 start_time = time(NULL);
676
677 /*********************************************/
678 /* MAIN PROCESS LOOP - read through log file */
679 /*********************************************/
680
681 while ( (gz_log)?(our_gzgets(zlog_fp,buffer,BUFSIZE) != Z_NULL):
682 (fgets(buffer,BUFSIZE,log_fname?log_fp:stdin) != NULL))
683 {
684 total_rec++;
685 if (strlen(buffer) == (BUFSIZE-1))
686 {
687 if (verbose)
688 {
689 fprintf(stderr,"%s",msg_big_rec);
690 if (debug_mode) fprintf(stderr,":\n%s",buffer);
691 else fprintf(stderr,"\n");
692 }
693
694 total_bad++; /* bump bad record counter */
695
696 /* get the rest of the record */
697 while ( (gz_log)?(our_gzgets(zlog_fp,buffer,BUFSIZE)!=Z_NULL):
698 (fgets(buffer,BUFSIZE,log_fname?log_fp:stdin)!=NULL))
699 {
700 if (strlen(buffer) < BUFSIZE-1)
701 {
702 if (debug_mode && verbose) fprintf(stderr,"%s\n",buffer);
703 break;
704 }
705 if (debug_mode && verbose) fprintf(stderr,"%s",buffer);
706 }
707 continue; /* go get next record if any */
708 }
709
710 /* got a record... */
711 strcpy(tmp_buf, buffer); /* save buffer in case of error */
712 if (parse_record(buffer)) /* parse the record */
713 {
714 /*********************************************/
715 /* PASSED MINIMAL CHECKS, DO A LITTLE MORE */
716 /*********************************************/
717
718 /* convert month name to lowercase */
719 for (i=4;i<7;i++)
720 log_rec.datetime[i]=tolower(log_rec.datetime[i]);
721
722 /* lowercase sitename/IPv6 addresses */
723 cp1=log_rec.hostname;
724 while (*cp1++!='\0') *cp1=tolower(*cp1);
725
726 /* get year/month/day/hour/min/sec values */
727 for (i=0;i<12;i++)
728 {
729 if (strncmp(log_month[i],&log_rec.datetime[4],3)==0)
730 { rec_month = i+1; break; }
731 }
732
733 rec_year=atoi(&log_rec.datetime[8]); /* get year number (int) */
734 rec_day =atoi(&log_rec.datetime[1]); /* get day number */
735 rec_hour=atoi(&log_rec.datetime[13]); /* get hour number */
736 rec_min =atoi(&log_rec.datetime[16]); /* get minute number */
737 rec_sec =atoi(&log_rec.datetime[19]); /* get second number */
738
739 /* Kludge for Netscape server time (0-24?) error */
740 if (rec_hour>23) rec_hour=0;
741
742 /* minimal sanity check on date */
743 if ((i>=12)||(rec_min>59)||(rec_sec>60)||(rec_year<1990))
744 {
745 total_bad++; /* if a bad date, bump counter */
746 if (verbose)
747 {
748 fprintf(stderr,"%s: %s [%llu]",
749 msg_bad_date,log_rec.datetime,total_rec);
750 if (debug_mode) fprintf(stderr,":\n%s\n",tmp_buf);
751 else fprintf(stderr,"\n");
752 }
753 continue; /* and ignore this record */
754 }
755
756 /*********************************************/
757 /* GOOD RECORD, CHECK INCREMENTAL/TIMESTAMPS */
758 /*********************************************/
759
760 /* Flag as a good one */
761 good_rec = 1;
762
763 /* get current records timestamp (seconds since epoch) */
764 req_tstamp=cur_tstamp;
765 rec_tstamp=((jdate(rec_day,rec_month,rec_year)-epoch)*86400)+
766 (rec_hour*3600)+(rec_min*60)+rec_sec;
767
768 /* Do we need to check for duplicate records? (incremental mode) */
769 if (check_dup)
770 {
771 /* check if less than/equal to last record processed */
772 if ( rec_tstamp <= cur_tstamp )
773 {
774 /* if it is, assume we have already processed and ignore it */
775 total_ignore++;
776 continue;
777 }
778 else
779 {
780 /* if it isn't.. disable any more checks this run */
781 check_dup=0;
782 /* now check if it's a new month */
783 if ( (cur_month != rec_month) || (cur_year != rec_year) )
784 {
785 clear_month();
786 cur_sec = rec_sec; /* set current counters */
787 cur_min = rec_min;
788 cur_hour = rec_hour;
789 cur_day = rec_day;
790 cur_month = rec_month;
791 cur_year = rec_year;
792 cur_tstamp= rec_tstamp;
793 f_day=l_day=rec_day; /* reset first and last day */
794 }
795 }
796 }
797
798 /* check for out of sequence records */
799 if (rec_tstamp/3600 < cur_tstamp/3600)
800 {
801 if (!fold_seq_err && ((rec_tstamp+SLOP_VAL)/3600<cur_tstamp/3600) )
802 { total_ignore++; continue; }
803 else
804 {
805 rec_sec = cur_sec; /* if folding sequence */
806 rec_min = cur_min; /* errors, just make it */
807 rec_hour = cur_hour; /* look like the last */
808 rec_day = cur_day; /* good records timestamp */
809 rec_month = cur_month;
810 rec_year = cur_year;
811 rec_tstamp= cur_tstamp;
812 }
813 }
814 cur_tstamp=rec_tstamp; /* update current timestamp */
815
816 /*********************************************/
817 /* DO SOME PRE-PROCESS FORMATTING */
818 /*********************************************/
819
820 /* un-escape URL */
821 unescape(log_rec.url);
822
823 /* fix URL field */
824 cp1 = cp2 = log_rec.url;
825 /* handle null '-' case here... */
826 if (*++cp1 == '-') strcpy(log_rec.url,"/INVALID-URL");
827 else
828 {
829 /* strip actual URL out of request */
830 while ( (*cp1 != ' ') && (*cp1 != '\0') ) cp1++;
831 if (*cp1 != '\0')
832 {
833 /* scan to begin of actual URL field */
834 while ((*cp1 == ' ') && (*cp1 != '\0')) cp1++;
835 /* remove duplicate / if needed */
836 while (( *cp1=='/') && (*(cp1+1)=='/')) cp1++;
837 while (( *cp1!='\0')&&(*cp1!='"')) *cp2++=*cp1++;
838 *cp2='\0';
839 }
840 }
841
842 /* strip query portion of cgi scripts */
843 cp1 = log_rec.url;
844 while (*cp1 != '\0')
845 if (!isurlchar(*cp1, stripcgi)) { *cp1 = '\0'; break; }
846 else cp1++;
847 if (log_rec.url[0]=='\0')
848 { log_rec.url[0]='/'; log_rec.url[1]='\0'; }
849
850 /* Normalize URL */
851 if (log_type==LOG_CLF && log_rec.resp_code!=RC_NOTFOUND && normalize)
852 {
853 if ( ((cp2=strstr(log_rec.url,"://"))!=NULL)&&(cp2<log_rec.url+6) )
854 {
855 cp1=cp2+3;
856 /* see if a '/' is present after it */
857 if ( (cp2=strchr(cp1,(int)'/'))==NULL) cp1--;
858 else cp1=cp2;
859 /* Ok, now shift url string */
860 cp2=log_rec.url; while (*cp1!='\0') *cp2++=*cp1++; *cp2='\0';
861 }
862 /* extra sanity checks on URL string */
863 while ((cp2=strstr(log_rec.url,"/./")))
864 { cp1=cp2+2; while (*cp1!='\0') *cp2++=*cp1++; *cp2='\0'; }
865 if (log_rec.url[0]!='/')
866 {
867 if ( log_rec.resp_code==RC_OK ||
868 log_rec.resp_code==RC_PARTIALCONTENT ||
869 log_rec.resp_code==RC_NOMOD)
870 {
871 if (debug_mode)
872 fprintf(stderr,"Converted URL '%s' to '/'\n",log_rec.url);
873 log_rec.url[0]='/';
874 log_rec.url[1]='\0';
875 }
876 else
877 {
878 if (debug_mode)
879 fprintf(stderr,"Invalid URL: '%s'\n",log_rec.url);
880 strcpy(log_rec.url,"/INVALID-URL");
881 }
882 }
883 while ( log_rec.url[ (i=strlen(log_rec.url)-1) ] == '?' )
884 log_rec.url[i]='\0'; /* drop trailing ?s if any */
885 }
886 else
887 {
888 /* check for service (ie: http://) and lowercase if found */
889 if (((cp2=strstr(log_rec.url,"://"))!= NULL)&&(cp2<log_rec.url+6))
890 {
891 cp1=log_rec.url;
892 while (cp1!=cp2)
893 {
894 if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
895 cp1++;
896 }
897 }
898 }
899
900 /* strip off index.html (or any aliases) */
901 lptr=index_alias;
902 while (lptr!=NULL)
903 {
904 if ((cp1=strstr(log_rec.url,lptr->string))!=NULL)
905 {
906 if (*(cp1-1)=='/')
907 {
908 if ( !stripcgi && (cp2=strchr(cp1,'?'))!=NULL )
909 { while(*cp2) *cp1++=*cp2++; *cp1='\0'; }
910 else *cp1='\0';
911 break;
912 }
913 }
914 lptr=lptr->next;
915 }
916
917 /* unescape referrer */
918 unescape(log_rec.refer);
919
920 /* fix referrer field */
921 cp1 = log_rec.refer;
922 cp3 = cp2 = cp1++;
923 if ( (*cp2 != '\0') && (*cp2 == '"') )
924 {
925 while ( *cp1 != '\0' )
926 {
927 cp3=cp2;
928 if (((unsigned char)*cp1<32&&(unsigned char)*cp1>0) ||
929 *cp1==127 || (unsigned char)*cp1=='<') *cp1=0;
930 else *cp2++=*cp1++;
931 }
932 *cp3 = '\0';
933 }
934
935 /* get query portion of cgi referrals */
936 cp1 = log_rec.refer;
937 if (*cp1 != '\0')
938 {
939 while (*cp1 != '\0')
940 {
941 if (!isurlchar(*cp1, 1))
942 {
943 /* Save query portion in log.rec.srchstr */
944 strncpy(log_rec.srchstr,(char *)cp1,MAXSRCH);
945 *cp1++='\0';
946 break;
947 }
948 else cp1++;
949 }
950 /* handle null referrer */
951 if (log_rec.refer[0]=='\0')
952 { log_rec.refer[0]='-'; log_rec.refer[1]='\0'; }
953 }
954
955 /* if HTTP request, lowercase http://sitename/ portion */
956 cp1 = log_rec.refer;
957 if ( (*cp1=='h') || (*cp1=='H'))
958 {
959 while ( (*cp1!='/') && (*cp1!='\0'))
960 {
961 if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
962 cp1++;
963 }
964 /* now do hostname */
965 if ( (*cp1=='/') && ( *(cp1+1)=='/')) {cp1++; cp1++;}
966 while ( (*cp1!='/') && (*cp1!='\0'))
967 {
968 if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
969 cp1++;
970 }
971 }
972
973 /* Do we need to mangle? */
974 if (mangle_agent) agent_mangle(log_rec.agent);
975
976 /* if necessary, shrink referrer to fit storage */
977 if (strlen(log_rec.refer)>=MAXREFH)
978 {
979 if (verbose) fprintf(stderr,"%s [%llu]\n",
980 msg_big_ref,total_rec);
981 log_rec.refer[MAXREFH-1]='\0';
982 }
983
984 /* if necessary, shrink URL to fit storage */
985 if (strlen(log_rec.url)>=MAXURLH)
986 {
987 if (verbose) fprintf(stderr,"%s [%llu]\n",
988 msg_big_req,total_rec);
989 log_rec.url[MAXURLH-1]='\0';
990 }
991
992 /* fix user agent field */
993 cp1 = log_rec.agent;
994 cp3 = cp2 = cp1++;
995 if ( (*cp2 != '\0') && ((*cp2 == '"')||(*cp2 == '(')) )
996 {
997 while (*cp1 != '\0') { cp3 = cp2; *cp2++ = *cp1++; }
998 *cp3 = '\0';
999 }
1000 cp1 = log_rec.agent; /* CHANGE !!! */
1001 while (*cp1 != 0) /* get rid of more common _bad_ chars ;) */
1002 {
1003 if ( ((unsigned char)*cp1 < 32) ||
1004 ((unsigned char)*cp1==127) ||
1005 (*cp1=='<') || (*cp1=='>') )
1006 { *cp1='\0'; break; }
1007 else cp1++;
1008 }
1009
1010 /* fix username if needed */
1011 if (log_rec.ident[0]==0)
1012 { log_rec.ident[0]='-'; log_rec.ident[1]='\0'; }
1013 else
1014 {
1015 cp3=log_rec.ident;
1016 while ((unsigned char)*cp3>=32 && *cp3!='"') cp3++;
1017 *cp3='\0';
1018 }
1019 /* unescape user name */
1020 unescape(log_rec.ident);
1021
1022 /********************************************/
1023 /* PROCESS RECORD */
1024 /********************************************/
1025
1026 /* first time through? */
1027 if (cur_month == 0)
1028 {
1029 /* if yes, init our date vars */
1030 cur_month=rec_month; cur_year=rec_year;
1031 cur_day=rec_day; cur_hour=rec_hour;
1032 cur_min=rec_min; cur_sec=rec_sec;
1033 f_day=rec_day;
1034 }
1035
1036 /* adjust last day processed if different */
1037 if (rec_day > l_day) l_day = rec_day;
1038
1039 /* update min/sec stuff */
1040 if (cur_sec != rec_sec) cur_sec = rec_sec;
1041 if (cur_min != rec_min) cur_min = rec_min;
1042
1043 /* check for hour change */
1044 if (cur_hour != rec_hour)
1045 {
1046 /* if yes, init hourly stuff */
1047 if (ht_hit > mh_hit) mh_hit = ht_hit;
1048 ht_hit = 0;
1049 cur_hour = rec_hour;
1050 }
1051
1052 /* check for day change */
1053 if (cur_day != rec_day)
1054 {
1055 /* if yes, init daily stuff */
1056 tm_site[cur_day-1]=dt_site; dt_site=0;
1057 tm_visit[cur_day-1]=tot_visit(sd_htab);
1058 del_hlist(sd_htab);
1059 cur_day = rec_day;
1060 }
1061
1062 /* check for month change */
1063 if ( (cur_month != rec_month) || (cur_year != rec_year) )
1064 {
1065 /* if yes, do monthly stuff */
1066 t_visit=tot_visit(sm_htab);
1067 month_update_exit(req_tstamp); /* process exit pages */
1068 update_history();
1069 write_month_html(); /* generate HTML for month */
1070 clear_month();
1071 cur_month = rec_month; /* update our flags */
1072 cur_year = rec_year;
1073 f_day=l_day=rec_day;
1074 }
1075
1076 /* save hostname for later */
1077 strncpy(host_buf, log_rec.hostname, sizeof(log_rec.hostname));
1078
1079#ifdef USE_DNS
1080 /* Resolve IP address if needed */
1081 if (dns_db)
1082 {
1083 struct addrinfo hints, *ares;
1084 memset(&hints, 0, sizeof(hints));
1085 hints.ai_family = AF_UNSPEC;
1086 hints.ai_socktype = SOCK_STREAM;
1087 hints.ai_flags = AI_NUMERICHOST;
1088 if (0 == getaddrinfo(log_rec.hostname, "0", &hints, &ares))
1089 {
1090 freeaddrinfo(ares);
1091 resolve_dns(&log_rec);
1092 }
1093 }
1094#endif
1095 /* lowercase hostname and validity check */
1096 cp1 = log_rec.hostname; i=0;
1097
1098 if ( (!isalnum((unsigned char)*cp1)) && (*cp1!=':') )
1099 strncpy(log_rec.hostname, "Invalid", 8);
1100 else
1101 {
1102 while (*cp1 != '\0') /* loop through string */
1103 {
1104 if ( (*cp1>='A') && (*cp1<='Z') )
1105 { *cp1++ += 'a'-'A'; continue; }
1106 if ( *cp1=='.' ) i++;
1107 if ( (isalnum((unsigned char)*cp1)) ||
1108 (*cp1=='.')||(*cp1=='-') ||
1109 (*cp1==':')||((*cp1=='_')&&(i==0)) ) cp1++;
1110 else
1111 {
1112 /* Invalid hostname found! */
1113 if (strcmp(log_rec.hostname, host_buf))
1114 strcpy(log_rec.hostname, host_buf);
1115 else strncpy(log_rec.hostname,"Invalid",8);
1116 break;
1117 }
1118 }
1119 if (*cp1 == '\0') /* did we make it to the end? */
1120 {
1121 if (!isalnum((unsigned char)*(cp1-1)))
1122 strncpy(log_rec.hostname,"Invalid",8);
1123 }
1124 }
1125
1126 /* Catch blank hostnames here */
1127 if (log_rec.hostname[0]=='\0')
1128 strncpy(log_rec.hostname,"Unknown",8);
1129
1130 /* Ignore/Include check */
1131 if ( (isinlist(include_sites,log_rec.hostname)==NULL) &&
1132 (isinlist(include_urls,log_rec.url)==NULL) &&
1133 (isinlist(include_refs,log_rec.refer)==NULL) &&
1134 (isinlist(include_agents,log_rec.agent)==NULL) &&
1135 (isinlist(include_users,log_rec.ident)==NULL) )
1136 {
1137 if (isinlist(ignored_sites,log_rec.hostname)!=NULL)
1138 { total_ignore++; continue; }
1139 if (isinlist(ignored_urls,log_rec.url)!=NULL)
1140 { total_ignore++; continue; }
1141 if (isinlist(ignored_agents,log_rec.agent)!=NULL)
1142 { total_ignore++; continue; }
1143 if (isinlist(ignored_refs,log_rec.refer)!=NULL)
1144 { total_ignore++; continue; }
1145 if (isinlist(ignored_users,log_rec.ident)!=NULL)
1146 { total_ignore++; continue; }
1147 }
1148
1149 /* Bump response code totals */
1150 switch (log_rec.resp_code) {
1151 case RC_CONTINUE: i=IDX_CONTINUE; break;
1152 case RC_SWITCHPROTO: i=IDX_SWITCHPROTO; break;
1153 case RC_OK: i=IDX_OK; break;
1154 case RC_CREATED: i=IDX_CREATED; break;
1155 case RC_ACCEPTED: i=IDX_ACCEPTED; break;
1156 case RC_NONAUTHINFO: i=IDX_NONAUTHINFO; break;
1157 case RC_NOCONTENT: i=IDX_NOCONTENT; break;
1158 case RC_RESETCONTENT: i=IDX_RESETCONTENT; break;
1159 case RC_PARTIALCONTENT: i=IDX_PARTIALCONTENT; break;
1160 case RC_MULTIPLECHOICES: i=IDX_MULTIPLECHOICES; break;
1161 case RC_MOVEDPERM: i=IDX_MOVEDPERM; break;
1162 case RC_MOVEDTEMP: i=IDX_MOVEDTEMP; break;
1163 case RC_SEEOTHER: i=IDX_SEEOTHER; break;
1164 case RC_NOMOD: i=IDX_NOMOD; break;
1165 case RC_USEPROXY: i=IDX_USEPROXY; break;
1166 case RC_MOVEDTEMPORARILY: i=IDX_MOVEDTEMPORARILY; break;
1167 case RC_BAD: i=IDX_BAD; break;
1168 case RC_UNAUTH: i=IDX_UNAUTH; break;
1169 case RC_PAYMENTREQ: i=IDX_PAYMENTREQ; break;
1170 case RC_FORBIDDEN: i=IDX_FORBIDDEN; break;
1171 case RC_NOTFOUND: i=IDX_NOTFOUND; break;
1172 case RC_METHODNOTALLOWED: i=IDX_METHODNOTALLOWED; break;
1173 case RC_NOTACCEPTABLE: i=IDX_NOTACCEPTABLE; break;
1174 case RC_PROXYAUTHREQ: i=IDX_PROXYAUTHREQ; break;
1175 case RC_TIMEOUT: i=IDX_TIMEOUT; break;
1176 case RC_CONFLICT: i=IDX_CONFLICT; break;
1177 case RC_GONE: i=IDX_GONE; break;
1178 case RC_LENGTHREQ: i=IDX_LENGTHREQ; break;
1179 case RC_PREFAILED: i=IDX_PREFAILED; break;
1180 case RC_REQENTTOOLARGE: i=IDX_REQENTTOOLARGE; break;
1181 case RC_REQURITOOLARGE: i=IDX_REQURITOOLARGE; break;
1182 case RC_UNSUPMEDIATYPE: i=IDX_UNSUPMEDIATYPE; break;
1183 case RC_RNGNOTSATISFIABLE:i=IDX_RNGNOTSATISFIABLE;break;
1184 case RC_EXPECTATIONFAILED:i=IDX_EXPECTATIONFAILED;break;
1185 case RC_SERVERERR: i=IDX_SERVERERR; break;
1186 case RC_NOTIMPLEMENTED: i=IDX_NOTIMPLEMENTED; break;
1187 case RC_BADGATEWAY: i=IDX_BADGATEWAY; break;
1188 case RC_UNAVAIL: i=IDX_UNAVAIL; break;
1189 case RC_GATEWAYTIMEOUT: i=IDX_GATEWAYTIMEOUT; break;
1190 case RC_BADHTTPVER: i=IDX_BADHTTPVER; break;
1191 default: i=IDX_UNDEFINED; break;
1192 }
1193 response[i].count++;
1194
1195 /* now save in the various hash tables... */
1196 if (log_rec.resp_code==RC_OK || log_rec.resp_code==RC_PARTIALCONTENT)
1197 i=1; else i=0;
1198
1199 /* URL/ident hash table (only if valid response code) */
1200 if ((log_rec.resp_code==RC_OK)||(log_rec.resp_code==RC_NOMOD)||
1201 (log_rec.resp_code==RC_PARTIALCONTENT))
1202 {
1203 /* URL hash table */
1204 if (put_unode(log_rec.url,OBJ_REG,(u_int64_t)1,
1205 log_rec.xfer_size,&t_url,(u_int64_t)0,(u_int64_t)0,um_htab))
1206 {
1207 if (verbose)
1208 /* Error adding URL node, skipping ... */
1209 fprintf(stderr,"%s %s\n", msg_nomem_u, log_rec.url);
1210 }
1211
1212 /* ident (username) hash table */
1213 if (put_inode(log_rec.ident,OBJ_REG,
1214 1,(u_int64_t)i,log_rec.xfer_size,&t_user,
1215 0,rec_tstamp,im_htab))
1216 {
1217 if (verbose)
1218 /* Error adding ident node, skipping .... */
1219 fprintf(stderr,"%s %s\n", msg_nomem_i, log_rec.ident);
1220 }
1221 }
1222
1223 /* referrer hash table */
1224 if (ntop_refs)
1225 {
1226 if (log_rec.refer[0]!='\0')
1227 if (put_rnode(log_rec.refer,OBJ_REG,(u_int64_t)1,&t_ref,rm_htab))
1228 {
1229 if (verbose)
1230 fprintf(stderr,"%s %s\n", msg_nomem_r, log_rec.refer);
1231 }
1232 }
1233
1234 /* hostname (site) hash table - daily */
1235 if (put_hnode(log_rec.hostname,OBJ_REG,
1236 1,(u_int64_t)i,log_rec.xfer_size,&dt_site,
1237 0,rec_tstamp,"",sd_htab))
1238 {
1239 if (verbose)
1240 /* Error adding host node (daily), skipping .... */
1241 fprintf(stderr,"%s %s\n",msg_nomem_dh, log_rec.hostname);
1242 }
1243
1244 /* hostname (site) hash table - monthly */
1245 if (put_hnode(log_rec.hostname,OBJ_REG,
1246 1,(u_int64_t)i,log_rec.xfer_size,&t_site,
1247 0,rec_tstamp,"",sm_htab))
1248 {
1249 if (verbose)
1250 /* Error adding host node (monthly), skipping .... */
1251 fprintf(stderr,"%s %s\n", msg_nomem_mh, log_rec.hostname);
1252 }
1253
1254 /* user agent hash table */
1255 if (ntop_agents)
1256 {
1257 if (log_rec.agent[0]!='\0')
1258 if (put_anode(log_rec.agent,OBJ_REG,(u_int64_t)1,&t_agent,am_htab))
1259 {
1260 if (verbose)
1261 fprintf(stderr,"%s %s\n", msg_nomem_a, log_rec.agent);
1262 }
1263 }
1264
1265 /* bump monthly/daily/hourly totals */
1266 t_hit++; ht_hit++; /* daily/hourly hits */
1267 t_xfer += log_rec.xfer_size; /* total xfer size */
1268 tm_xfer[rec_day-1] += log_rec.xfer_size; /* daily xfer total */
1269 tm_hit[rec_day-1]++; /* daily hits total */
1270 th_xfer[rec_hour] += log_rec.xfer_size; /* hourly xfer total */
1271 th_hit[rec_hour]++; /* hourly hits total */
1272
1273 /* if RC_OK, increase file counters */
1274 if (log_rec.resp_code == RC_OK)
1275 {
1276 t_file++;
1277 tm_file[rec_day-1]++;
1278 th_file[rec_hour]++;
1279 }
1280
1281 /* Pages (pageview) calculation */
1282 if (ispage(log_rec.url))
1283 {
1284 t_page++;
1285 tm_page[rec_day-1]++;
1286 th_page[rec_hour]++;
1287
1288 /* do search string stuff if needed */
1289 if (ntop_search) srch_string(log_rec.srchstr);
1290 }
1291
1292 /*********************************************/
1293 /* RECORD PROCESSED - DO GROUPS HERE */
1294 /*********************************************/
1295
1296 /* URL Grouping */
1297 if ( (cp1=isinglist(group_urls,log_rec.url))!=NULL)
1298 {
1299 if (put_unode(cp1,OBJ_GRP,(u_int64_t)1,log_rec.xfer_size,
1300 &ul_bogus,(u_int64_t)0,(u_int64_t)0,um_htab))
1301 {
1302 if (verbose)
1303 /* Error adding URL node, skipping ... */
1304 fprintf(stderr,"%s %s\n", msg_nomem_u, cp1);
1305 }
1306 }
1307
1308 /* Site Grouping */
1309 if ( (cp1=isinglist(group_sites,log_rec.hostname))!=NULL)
1310 {
1311 if (put_hnode(cp1,OBJ_GRP,1,
1312 (u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1313 log_rec.xfer_size,&ul_bogus,
1314 0,rec_tstamp,"",sm_htab))
1315 {
1316 if (verbose)
1317 /* Error adding Site node, skipping ... */
1318 fprintf(stderr,"%s %s\n", msg_nomem_mh, cp1);
1319 }
1320 }
1321 else
1322 {
1323 /* Domain Grouping */
1324 if (group_domains)
1325 {
1326 cp1 = get_domain(log_rec.hostname);
1327 if (cp1 != NULL)
1328 {
1329 if (put_hnode(cp1,OBJ_GRP,1,
1330 (u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1331 log_rec.xfer_size,&ul_bogus,
1332 0,rec_tstamp,"",sm_htab))
1333 {
1334 if (verbose)
1335 /* Error adding Site node, skipping ... */
1336 fprintf(stderr,"%s %s\n", msg_nomem_mh, cp1);
1337 }
1338 }
1339 }
1340 }
1341
1342 /* Referrer Grouping */
1343 if ( (cp1=isinglist(group_refs,log_rec.refer))!=NULL)
1344 {
1345 if (put_rnode(cp1,OBJ_GRP,(u_int64_t)1,&ul_bogus,rm_htab))
1346 {
1347 if (verbose)
1348 /* Error adding Referrer node, skipping ... */
1349 fprintf(stderr,"%s %s\n", msg_nomem_r, cp1);
1350 }
1351 }
1352
1353 /* User Agent Grouping */
1354 if ( (cp1=isinglist(group_agents,log_rec.agent))!=NULL)
1355 {
1356 if (put_anode(cp1,OBJ_GRP,(u_int64_t)1,&ul_bogus,am_htab))
1357 {
1358 if (verbose)
1359 /* Error adding User Agent node, skipping ... */
1360 fprintf(stderr,"%s %s\n", msg_nomem_a, cp1);
1361 }
1362 }
1363
1364 /* Ident (username) Grouping */
1365 if ( (cp1=isinglist(group_users,log_rec.ident))!=NULL)
1366 {
1367 if (put_inode(cp1,OBJ_GRP,1,
1368 (u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1369 log_rec.xfer_size,&ul_bogus,
1370 0,rec_tstamp,im_htab))
1371 {
1372 if (verbose)
1373 /* Error adding Username node, skipping ... */
1374 fprintf(stderr,"%s %s\n", msg_nomem_i, cp1);
1375 }
1376 }
1377 }
1378
1379 /*********************************************/
1380 /* BAD RECORD */
1381 /*********************************************/
1382
1383 else
1384 {
1385 /* If first record, check if stupid Netscape header stuff */
1386 if ( (total_rec==1) && (strncmp(buffer,"format=",7)==0) )
1387 {
1388 /* Skipping Netscape header record */
1389 if (verbose>1) printf("%s\n",msg_ign_nscp);
1390 /* count it as ignored... */
1391 total_ignore++;
1392 }
1393 else
1394 {
1395 /* Check if it's a W3C header or IIS Null-Character line */
1396 if ((buffer[0]=='\0') || (buffer[0]=='#'))
1397 {
1398 total_ignore++;
1399 }
1400 else
1401 {
1402 /* really bad record... */
1403 total_bad++;
1404 if (verbose)
1405 {
1406 fprintf(stderr,"%s (%llu)",msg_bad_rec,total_rec);
1407 if (debug_mode) fprintf(stderr,":\n%s\n",tmp_buf);
1408 else fprintf(stderr,"\n");
1409 }
1410 }
1411 }
1412 }
1413 }
1414
1415 /*********************************************/
1416 /* DONE READING LOG FILE - final processing */
1417 /*********************************************/
1418
1419 /* close log file if needed */
1420#ifdef USE_BZIP
1421 if (gz_log) (gz_log==COMP_BZIP)?BZ2_bzclose(zlog_fp):gzclose(zlog_fp);
1422#else
1423 if (gz_log) gzclose(zlog_fp);
1424#endif
1425 else if (log_fname) fclose(log_fp);
1426
1427 if (good_rec) /* were any good records? */
1428 {
1429 tm_site[cur_day-1]=dt_site; /* If yes, clean up a bit */
1430 tm_visit[cur_day-1]=tot_visit(sd_htab);
1431 t_visit=tot_visit(sm_htab);
1432 if (ht_hit > mh_hit) mh_hit = ht_hit;
1433
1434 if (total_rec > (total_ignore+total_bad)) /* did we process any? */
1435 {
1436 if (incremental)
1437 {
1438 if (save_state()) /* incremental stuff */
1439 {
1440 /* Error: Unable to save current run data */
1441 if (verbose) fprintf(stderr,"%s\n",msg_data_err);
1442 unlink(state_fname);
1443 }
1444 }
1445 month_update_exit(rec_tstamp); /* calculate exit pages */
1446 update_history();
1447 write_month_html(); /* write monthly HTML file */
1448 put_history(); /* write history */
1449 }
1450 if (hist[0].month!=0) write_main_index(); /* write main HTML file */
1451
1452 /* get processing end time */
1453 end_time = time(NULL);
1454
1455 /* display end of processing statistics */
1456 if (time_me || (verbose>1))
1457 {
1458 printf("%llu %s ",total_rec, msg_records);
1459 if (total_ignore)
1460 {
1461 printf("(%llu %s",total_ignore,msg_ignored);
1462 if (total_bad) printf(", %llu %s) ",total_bad,msg_bad);
1463 else printf(") ");
1464 }
1465 else if (total_bad) printf("(%llu %s) ",total_bad,msg_bad);
1466
1467 /* totoal processing time in seconds */
1468 temp_time = difftime(end_time, start_time);
1469 if (temp_time==0) temp_time=1;
1470 printf("%s %.0f %s", msg_in, temp_time, msg_seconds);
1471
1472 /* calculate records per second */
1473 if (temp_time)
1474 i=( (int)( (float)total_rec/temp_time ) );
1475 else i=0;
1476
1477 if ( (i>0) && (i<=total_rec) ) printf(", %d/sec\n", i);
1478 else printf("\n");
1479 }
1480
1481#ifdef USE_DNS
1482 /* Close DNS cache file */
1483 if (dns_db) close_cache();
1484 /* Close GeoDB database */
1485 if (geo_db) geodb_close(geo_db);
1486#endif
1487
1488#ifdef USE_GEOIP
1489 /* Close GeoIP database */
1490 if (geo_fp) GeoIP_delete(geo_fp);
1491#endif
1492
1493 /* Whew, all done! Exit with completion status (0) */
1494 exit(0);
1495 }
1496 else
1497 {
1498 /* No valid records found... exit with error (1) */
1499 if (verbose) printf("%s\n",msg_no_vrec);
1500 if (hist[0].month!=0) write_main_index(); /* write main HTML file */
1501 exit(1);
1502 }
1503}
1504
1505/*********************************************/
1506/* GET_CONFIG - get configuration file info */
1507/*********************************************/
1508
1509void get_config(char *fname)
1510{
1511 char *kwords[]= { "Undefined", /* 0 = undefined keyword 0 */
1512 "OutputDir", /* Output directory 1 */
1513 "LogFile", /* Log file to use for input 2 */
1514 "ReportTitle", /* Title for reports 3 */
1515 "HostName", /* Hostname to use 4 */
1516 "IgnoreHist", /* Ignore history file 5 */
1517 "Quiet", /* Run in quiet mode 6 */
1518 "TimeMe", /* Produce timing results 7 */
1519 "Debug", /* Produce debug information 8 */
1520 "HourlyGraph", /* Hourly stats graph 9 */
1521 "HourlyStats", /* Hourly stats table 10 */
1522 "TopSites", /* Top sites 11 */
1523 "TopURLs", /* Top URLs 12 */
1524 "TopReferrers", /* Top Referrers 13 */
1525 "TopAgents", /* Top User Agents 14 */
1526 "TopCountries", /* Top Countries 15 */
1527 "HideSite", /* Sites to hide 16 */
1528 "HideURL", /* URLs to hide 17 */
1529 "HideReferrer", /* Referrers to hide 18 */
1530 "HideAgent", /* User Agents to hide 19 */
1531 "IndexAlias", /* Aliases for index.html 20 */
1532 "HTMLHead", /* HTML Top1 code 21 */
1533 "HTMLPost", /* HTML Top2 code 22 */
1534 "HTMLTail", /* HTML Tail code 23 */
1535 "MangleAgents", /* Mangle User Agents 24 */
1536 "IgnoreSite", /* Sites to ignore 25 */
1537 "IgnoreURL", /* Url's to ignore 26 */
1538 "IgnoreReferrer", /* Referrers to ignore 27 */
1539 "IgnoreAgent", /* User Agents to ignore 28 */
1540 "ReallyQuiet", /* Dont display ANY messages 29 */
1541 "GMTTime", /* Local or UTC time? 30 */
1542 "GroupURL", /* Group URLs 31 */
1543 "GroupSite", /* Group Sites 32 */
1544 "GroupReferrer", /* Group Referrers 33 */
1545 "GroupAgent", /* Group Agents 34 */
1546 "GroupShading", /* Shade Grouped entries 35 */
1547 "GroupHighlight", /* BOLD Grouped entries 36 */
1548 "Incremental", /* Incremental runs 37 */
1549 "IncrementalName", /* Filename for state data 38 */
1550 "HistoryName", /* Filename for history data 39 */
1551 "HTMLExtension", /* HTML filename extension 40 */
1552 "HTMLPre", /* HTML code at beginning 41 */
1553 "HTMLBody", /* HTML body code 42 */
1554 "HTMLEnd", /* HTML code at end 43 */
1555 "UseHTTPS", /* Use https:// on URLs 44 */
1556 "IncludeSite", /* Sites to always include 45 */
1557 "IncludeURL", /* URLs to always include 46 */
1558 "IncludeReferrer", /* Referrers to include 47 */
1559 "IncludeAgent", /* User Agents to include 48 */
1560 "PageType", /* Page Type (pageview) 49 */
1561 "VisitTimeout", /* Visit timeout (seconds) 50 */
1562 "GraphLegend", /* Graph Legends (yes/no) 51 */
1563 "GraphLines", /* Graph Lines (0=none) 52 */
1564 "FoldSeqErr", /* Fold sequence errors 53 */
1565 "CountryGraph", /* Display ctry graph (0=no) 54 */
1566 "TopKSites", /* Top sites (by KBytes) 55 */
1567 "TopKURLs", /* Top URLs (by KBytes) 56 */
1568 "TopEntry", /* Top Entry Pages 57 */
1569 "TopExit", /* Top Exit Pages 58 */
1570 "TopSearch", /* Top Search Strings 59 */
1571 "LogType", /* Log Type (clf/ftp/squid) 60 */
1572 "SearchEngine", /* SearchEngine strings 61 */
1573 "GroupDomains", /* Group domains (n=level) 62 */
1574 "HideAllSites", /* Hide ind. sites (0=no) 63 */
1575 "AllSites", /* List all sites? 64 */
1576 "AllURLs", /* List all URLs? 65 */
1577 "AllReferrers", /* List all Referrers? 66 */
1578 "AllAgents", /* List all User Agents? 67 */
1579 "AllSearchStr", /* List all Search Strings? 68 */
1580 "AllUsers", /* List all Users? 69 */
1581 "TopUsers", /* Top Usernames to show 70 */
1582 "HideUser", /* Usernames to hide 71 */
1583 "IgnoreUser", /* Usernames to ignore 72 */
1584 "IncludeUser", /* Usernames to include 73 */
1585 "GroupUser", /* Usernames to group 74 */
1586 "DumpPath", /* Path for dump files 75 */
1587 "DumpExtension", /* Dump filename extension 76 */
1588 "DumpHeader", /* Dump header as first rec? 77 */
1589 "DumpSites", /* Dump sites tab file 78 */
1590 "DumpURLs", /* Dump urls tab file 79 */
1591 "DumpReferrers", /* Dump referrers tab file 80 */
1592 "DumpAgents", /* Dump user agents tab file 81 */
1593 "DumpUsers", /* Dump usernames tab file 82 */
1594 "DumpSearchStr", /* Dump search str tab file 83 */
1595 "DNSCache", /* DNS Cache file name 84 */
1596 "DNSChildren", /* DNS Children (0=no DNS) 85 */
1597 "DailyGraph", /* Daily Graph (0=no) 86 */
1598 "DailyStats", /* Daily Stats (0=no) 87 */
1599 "LinkReferrer", /* Link referrer (0=no) 88 */
1600 "PagePrefix", /* PagePrefix - treat as page 89 */
1601 "ColorHit", /* Hit Color (def=00805c) 90 */
1602 "ColorFile", /* File Color (def=0040ff) 91 */
1603 "ColorSite", /* Site Color (def=ff8000) 92 */
1604 "ColorKbyte", /* Kbyte Color (def=ff0000) 93 */
1605 "ColorPage", /* Page Color (def=00e0ff) 94 */
1606 "ColorVisit", /* Visit Color (def=ffff00) 95 */
1607 "ColorMisc", /* Misc Color (def=00e0ff) 96 */
1608 "PieColor1", /* Pie Color 1 (def=800080) 97 */
1609 "PieColor2", /* Pie Color 2 (def=80ffc0) 98 */
1610 "PieColor3", /* Pie Color 3 (def=ff00ff) 99 */
1611 "PieColor4", /* Pie Color 4 (def=ffc080) 100 */
1612 "CacheIPs", /* Cache IPs in DNS DB (0=no) 101 */
1613 "CacheTTL", /* DNS Cache entry TTL (days) 102 */
1614 "GeoDB", /* GeoDB lookups (0=no) 103 */
1615 "GeoDBDatabase", /* GeoDB database filename 104 */
1616 "StripCGI", /* Strip CGI in URLS (0=no) 105 */
1617 "TrimSquidURL", /* Trim squid URLs (0=none) 106 */
1618 "OmitPage", /* URLs not counted as pages 107 */
1619 "HTAccess", /* Write .httaccess files? 108 */
1620 "IgnoreState", /* Ignore state file (0=no) 109 */
1621 "DefaultIndex", /* Default index.* (1=yes) 110 */
1622 "GeoIP", /* Use GeoIP? (1=yes) 111 */
1623 "GeoIPDatabase", /* Database to use for GeoIP 112 */
1624 "NormalizeURL", /* Normalize CLF URLs (1=yes) 113 */
1625 "IndexMonths", /* # months for main page 114 */
1626 "GraphMonths", /* # months for yearly graph 115 */
1627 "YearHeaders", /* use year headers? (1=yes) 116 */
1628 "YearTotals", /* show year subtotals (0=no) 117 */
1629 "CountryFlags", /* show country flags? (0-no) 118 */
1630 "FlagDir", /* directory w/flag images 119 */
1631 "SearchCaseI" /* srch str case insensitive 120 */
1632 };
1633
1634 FILE *fp;
1635
1636 char buffer[BUFSIZE];
1637 char keyword[MAXKWORD];
1638 char value[MAXKVAL];
1639 char *cp1, *cp2;
1640 int i,key,count;
1641 int num_kwords=sizeof(kwords)/sizeof(char *);
1642
1643 if ( (fp=fopen(fname,"r")) == NULL)
1644 {
1645 if (verbose)
1646 fprintf(stderr,"%s %s\n",msg_bad_conf,fname);
1647 return;
1648 }
1649
1650 while ( (fgets(buffer,BUFSIZE,fp)) != NULL)
1651 {
1652 /* skip comments and blank lines */
1653 if ( (buffer[0]=='#') || isspace((unsigned char)buffer[0]) ) continue;
1654
1655 /* Get keyword */
1656 cp1=buffer;cp2=keyword;count=MAXKWORD-1;
1657 while ( (isalnum((unsigned char)*cp1)) && count )
1658 { *cp2++ = *cp1++; count--; }
1659 *cp2='\0';
1660
1661 /* Get value */
1662 cp2=value; count=MAXKVAL-1;
1663 while ((*cp1!='\n')&&(*cp1!='\0')&&(isspace((unsigned char)*cp1))) cp1++;
1664 while ((*cp1!='\n')&&(*cp1!='\0')&&count ) { *cp2++ = *cp1++; count--; }
1665 *cp2--='\0';
1666 while ((isspace((unsigned char)*cp2)) && (cp2 != value) ) *cp2--='\0';
1667
1668 /* check if blank keyword/value */
1669 if ( (keyword[0]=='\0') || (value[0]=='\0') ) continue;
1670
1671 key=0;
1672 for (i=0;i<num_kwords;i++)
1673 if (!ouricmp(keyword,kwords[i])) { key=i; break; }
1674
1675 if (key==0) { printf("%s '%s' (%s)\n", /* Invalid keyword */
1676 msg_bad_key,keyword,fname);
1677 continue;
1678 }
1679
1680 switch (key)
1681 {
1682 case 1: out_dir=save_opt(value); break; /* OutputDir */
1683 case 2: log_fname=save_opt(value); break; /* LogFile */
1684 case 3: msg_title=save_opt(value); break; /* ReportTitle */
1685 case 4: hname=save_opt(value); break; /* HostName */
1686 case 5: ignore_hist=
1687 (tolower(value[0])=='y')?1:0; break; /* IgnoreHist */
1688 case 6: verbose=
1689 (tolower(value[0])=='y')?1:2; break; /* Quiet */
1690 case 7: time_me=
1691 (tolower(value[0])=='n')?0:1; break; /* TimeMe */
1692 case 8: debug_mode=
1693 (tolower(value[0])=='y')?1:0; break; /* Debug */
1694 case 9: hourly_graph=
1695 (tolower(value[0])=='n')?0:1; break; /* HourlyGraph */
1696 case 10: hourly_stats=
1697 (tolower(value[0])=='n')?0:1; break; /* HourlyStats */
1698 case 11: ntop_sites = atoi(value); break; /* TopSites */
1699 case 12: ntop_urls = atoi(value); break; /* TopURLs */
1700 case 13: ntop_refs = atoi(value); break; /* TopRefs */
1701 case 14: ntop_agents = atoi(value); break; /* TopAgents */
1702 case 15: ntop_ctrys = atoi(value); break; /* TopCountries */
1703 case 16: add_nlist(value,&hidden_sites); break; /* HideSite */
1704 case 17: add_nlist(value,&hidden_urls); break; /* HideURL */
1705 case 18: add_nlist(value,&hidden_refs); break; /* HideReferrer */
1706 case 19: add_nlist(value,&hidden_agents); break; /* HideAgent */
1707 case 20: add_nlist(value,&index_alias); break; /* IndexAlias */
1708 case 21: add_nlist(value,&html_head); break; /* HTMLHead */
1709 case 22: add_nlist(value,&html_post); break; /* HTMLPost */
1710 case 23: add_nlist(value,&html_tail); break; /* HTMLTail */
1711 case 24: mangle_agent=atoi(value); break; /* MangleAgents */
1712 case 25: add_nlist(value,&ignored_sites); break; /* IgnoreSite */
1713 case 26: add_nlist(value,&ignored_urls); break; /* IgnoreURL */
1714 case 27: add_nlist(value,&ignored_refs); break; /* IgnoreReferrer */
1715 case 28: add_nlist(value,&ignored_agents); break; /* IgnoreAgent */
1716 case 29: if (tolower(value[0])=='y')
1717 verbose=0; break; /* ReallyQuiet */
1718 case 30: local_time=
1719 (tolower(value[0])=='y')?0:1; break; /* GMTTime */
1720 case 31: add_glist(value,&group_urls); break; /* GroupURL */
1721 case 32: add_glist(value,&group_sites); break; /* GroupSite */
1722 case 33: add_glist(value,&group_refs); break; /* GroupReferrer */
1723 case 34: add_glist(value,&group_agents); break; /* GroupAgent */
1724 case 35: shade_groups=
1725 (tolower(value[0])=='n')?0:1; break; /* GroupShading */
1726 case 36: hlite_groups=
1727 (tolower(value[0])=='n')?0:1; break; /* GroupHighlight */
1728 case 37: incremental=
1729 (tolower(value[0])=='y')?1:0; break; /* Incremental */
1730 case 38: state_fname=save_opt(value); break; /* State FName */
1731 case 39: hist_fname=save_opt(value); break; /* History FName */
1732 case 40: html_ext=save_opt(value); break; /* HTML extension */
1733 case 41: add_nlist(value,&html_pre); break; /* HTML Pre code */
1734 case 42: add_nlist(value,&html_body); break; /* HTML Body code */
1735 case 43: add_nlist(value,&html_end); break; /* HTML End code */
1736 case 44: use_https=
1737 (tolower(value[0])=='y')?1:0; break; /* Use https:// */
1738 case 45: add_nlist(value,&include_sites); break; /* IncludeSite */
1739 case 46: add_nlist(value,&include_urls); break; /* IncludeURL */
1740 case 47: add_nlist(value,&include_refs); break; /* IncludeReferrer*/
1741 case 48: add_nlist(value,&include_agents); break; /* IncludeAgent */
1742 case 49: add_nlist(value,&page_type); break; /* PageType */
1743 case 50: visit_timeout=atoi(value); break; /* VisitTimeout */
1744 case 51: graph_legend=
1745 (tolower(value[0])=='n')?0:1; break; /* GraphLegend */
1746 case 52: graph_lines = atoi(value); break; /* GraphLines */
1747 case 53: fold_seq_err=
1748 (tolower(value[0])=='y')?1:0; break; /* FoldSeqErr */
1749 case 54: ctry_graph=
1750 (tolower(value[0])=='n')?0:1; break; /* CountryGraph */
1751 case 55: ntop_sitesK = atoi(value); break; /* TopKSites (KB) */
1752 case 56: ntop_urlsK = atoi(value); break; /* TopKUrls (KB) */
1753 case 57: ntop_entry = atoi(value); break; /* Top Entry pgs */
1754 case 58: ntop_exit = atoi(value); break; /* Top Exit pages */
1755 case 59: ntop_search = atoi(value); break; /* Top Search pgs */
1756 case 60: log_type=(tolower(value[0])=='f')?
1757 LOG_FTP:((tolower(value[0])=='s')?
1758 LOG_SQUID:((tolower(value[0])=='w')?
1759 LOG_W3C:LOG_CLF)); break; /* LogType */
1760 case 61: add_glist(value,&search_list); break; /* SearchEngine */
1761 case 62: group_domains=atoi(value); break; /* GroupDomains */
1762 case 63: hide_sites=
1763 (tolower(value[0])=='y')?1:0; break; /* HideAllSites */
1764 case 64: all_sites=
1765 (tolower(value[0])=='y')?1:0; break; /* All Sites? */
1766 case 65: all_urls=
1767 (tolower(value[0])=='y')?1:0; break; /* All URLs? */
1768 case 66: all_refs=
1769 (tolower(value[0])=='y')?1:0; break; /* All Refs */
1770 case 67: all_agents=
1771 (tolower(value[0])=='y')?1:0; break; /* All Agents? */
1772 case 68: all_search=
1773 (tolower(value[0])=='y')?1:0; break; /* All Srch str */
1774 case 69: all_users=
1775 (tolower(value[0])=='y')?1:0; break; /* All Users? */
1776 case 70: ntop_users=atoi(value); break; /* TopUsers */
1777 case 71: add_nlist(value,&hidden_users); break; /* HideUser */
1778 case 72: add_nlist(value,&ignored_users); break; /* IgnoreUser */
1779 case 73: add_nlist(value,&include_users); break; /* IncludeUser */
1780 case 74: add_glist(value,&group_users); break; /* GroupUser */
1781 case 75: dump_path=save_opt(value); break; /* DumpPath */
1782 case 76: dump_ext=save_opt(value); break; /* Dumpfile ext */
1783 case 77: dump_header=
1784 (tolower(value[0])=='y')?1:0; break; /* DumpHeader? */
1785 case 78: dump_sites=
1786 (tolower(value[0])=='y')?1:0; break; /* DumpSites? */
1787 case 79: dump_urls=
1788 (tolower(value[0])=='y')?1:0; break; /* DumpURLs? */
1789 case 80: dump_refs=
1790 (tolower(value[0])=='y')?1:0; break; /* DumpReferrers? */
1791 case 81: dump_agents=
1792 (tolower(value[0])=='y')?1:0; break; /* DumpAgents? */
1793 case 82: dump_users=
1794 (tolower(value[0])=='y')?1:0; break; /* DumpUsers? */
1795 case 83: dump_search=
1796 (tolower(value[0])=='y')?1:0; break; /* DumpSrchStrs? */
1797#ifdef USE_DNS
1798 case 84: dns_cache=save_opt(value); break; /* DNSCache fname */
1799 case 85: dns_children=atoi(value); break; /* DNSChildren */
1800#else
1801 case 84: /* Disable DNSCache and DNSChildren if DNS is not enabled */
1802 case 85: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1803#endif /* USE_DNS */
1804 case 86: daily_graph=
1805 (tolower(value[0])=='n')?0:1; break; /* HourlyGraph */
1806 case 87: daily_stats=
1807 (tolower(value[0])=='n')?0:1; break; /* HourlyStats */
1808 case 88: link_referrer=
1809 (tolower(value[0])=='y')?1:0; break; /* LinkReferrer */
1810 case 89: add_nlist(value,&page_prefix); break; /* PagePrefix */
1811 case 90: strncpy(hit_color+1, value, 6); break; /* ColorHit */
1812 case 91: strncpy(file_color+1, value, 6); break; /* ColorFile */
1813 case 92: strncpy(site_color+1, value, 6); break; /* ColorSite */
1814 case 93: strncpy(kbyte_color+1,value, 6); break; /* ColorKbyte */
1815 case 94: strncpy(page_color+1, value, 6); break; /* ColorPage */
1816 case 95: strncpy(visit_color+1,value, 6); break; /* ColorVisit */
1817 case 96: strncpy(misc_color+1, value, 6); break; /* ColorMisc */
1818 case 97: strncpy(pie_color1+1, value, 6); break; /* PieColor1 */
1819 case 98: strncpy(pie_color2+1, value, 6); break; /* PieColor2 */
1820 case 99: strncpy(pie_color3+1, value, 6); break; /* PieColor3 */
1821 case 100:strncpy(pie_color4+1, value, 6); break; /* PieColor4 */
1822#ifdef USE_DNS
1823 case 101: cache_ips=
1824 (tolower(value[0])=='y')?1:0; break; /* CacheIPs */
1825 case 102: cache_ttl=atoi(value); break; /* CacheTTL days */
1826 case 103: geodb=
1827 (tolower(value[0])=='y')?1:0; break; /* GeoDB */
1828 case 104: geodb_fname=save_opt(value); break; /* GeoDBDatabase */
1829#else
1830 case 101: /* Disable CacheIPs/CacheTTL/GeoDB/GeoDBDatabase if none */
1831 case 102:
1832 case 103:
1833 case 104: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1834#endif /* USE_DNS */
1835 case 105: stripcgi=
1836 (tolower(value[0])=='n')?0:1; break; /* StripCGI */
1837 case 106: trimsquid=atoi(value); break; /* TrimSquidURL */
1838 case 107: add_nlist(value,&omit_page); break; /* OmitPage */
1839 case 108: htaccess=
1840 (tolower(value[0])=='y')?1:0; break; /* HTAccess */
1841 case 109: ignore_state=
1842 (tolower(value[0])=='y')?1:0; break; /* IgnoreState */
1843 case 110: default_index=
1844 (tolower(value[0])=='n')?0:1; break; /* DefaultIndex */
1845#ifdef USE_GEOIP
1846 case 111: geoip=
1847 (tolower(value[0])=='y')?1:0; break; /* GeoIP */
1848 case 112: geoip_db=save_opt(value); break; /* GeoIPDatabase */
1849#else
1850 case 111: /* Disable GeoIP and GeoIPDatabase if not enabled */
1851 case 112: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1852#endif
1853 case 113: normalize=
1854 (tolower(value[0])=='n')?0:1; break; /* NormalizeURL */
1855 case 114: index_mths=atoi(value); break; /* IndexMonths */
1856 case 115: graph_mths=atoi(value); break; /* GraphMonths */
1857 case 116: year_hdrs=
1858 (tolower(value[0])=='n')?0:1; break; /* YearHeaders */
1859 case 117: year_totals=
1860 (tolower(value[0])=='n')?0:1; break; /* YearTotals */
1861 case 118: use_flags=
1862 (tolower(value[0])=='y')?1:0; break; /* CountryFlags */
1863 case 119: use_flags=1; flag_dir=save_opt(value); break; /* FlagDir */
1864 case 120: searchcasei=
1865 (tolower(value[0])=='n')?0:1; break; /* SearchCaseI */
1866 }
1867 }
1868 fclose(fp);
1869}
1870
1871/*********************************************/
1872/* SAVE_OPT - save option from config file */
1873/*********************************************/
1874
1875static char *save_opt(char *str)
1876{
1877 char *cp1;
1878
1879 if ( (cp1=malloc(strlen(str)+1))==NULL) return NULL;
1880
1881 strcpy(cp1,str);
1882 return cp1;
1883}
1884
1885/*********************************************/
1886/* CLEAR_MONTH - initalize monthly stuff */
1887/*********************************************/
1888
1889void clear_month()
1890{
1891 int i;
1892
1893 init_counters(); /* reset monthly counters */
1894 del_htabs(); /* clear hash tables */
1895 if (ntop_ctrys!=0 ) for (i=0;i<ntop_ctrys;i++) top_ctrys[i]=NULL;
1896}
1897
1898/*********************************************/
1899/* INIT_COUNTERS - prep counters for use */
1900/*********************************************/
1901
1902void init_counters()
1903{
1904 int i;
1905 for (i=0;i<TOTAL_RC;i++) response[i].count = 0;
1906 for (i=0;i<31;i++) /* monthly totals */
1907 {
1908 tm_xfer[i]=0.0;
1909 tm_hit[i]=tm_file[i]=tm_site[i]=tm_page[i]=tm_visit[i]=0;
1910 }
1911 for (i=0;i<24;i++) /* hourly totals */
1912 {
1913 th_hit[i]=th_file[i]=th_page[i]=0;
1914 th_xfer[i]=0.0;
1915 }
1916 for (i=0;ctry[i].desc;i++) /* country totals */
1917 {
1918 ctry[i].count=0;
1919 ctry[i].files=0;
1920 ctry[i].xfer=0;
1921 }
1922 t_hit=t_file=t_site=t_url=t_ref=t_agent=t_page=t_visit=t_user=0;
1923 t_xfer=0.0;
1924 mh_hit = dt_site = 0;
1925 f_day=l_day=1;
1926}
1927
1928/*********************************************/
1929/* PRINT_OPTS - print command line options */
1930/*********************************************/
1931
1932void print_opts(char *pname)
1933{
1934 int i;
1935
1936 printf("%s: %s %s\n",h_usage1,pname,h_usage2);
1937 for (i=0;h_msg[i];i++) printf("%s\n",h_msg[i]);
1938 exit(1);
1939}
1940
1941/*********************************************/
1942/* PRINT_VERSION */
1943/*********************************************/
1944
1945void print_version()
1946{
1947 char buf[128]="";
1948 uname(&system_info);
1949
1950 printf("Webalizer V%s-%s (%s %s %s) %s\n%s\n",
1951 version,editlvl,
1952 system_info.sysname,system_info.release,system_info.machine,
1953 language,copyright);
1954
1955#ifdef USE_DNS
1956 strncpy(&buf[strlen(buf)],"DNS/GeoDB ",11);
1957#endif
1958#ifdef USE_BZIP
1959 strncpy(&buf[strlen(buf)],"BZip2 ",7);
1960#endif
1961#ifdef USE_GEOIP
1962 strncpy(&buf[strlen(buf)],"GeoIP ",7);
1963#endif
1964
1965 if (debug_mode)
1966 {
1967 printf("Mod date: %s Options: ",moddate);
1968 if (buf[0]!=0) printf("%s",buf);
1969 else printf("none");
1970 printf("\n");
1971#if USE_DNS
1972 printf("Default GeoDB dir : %s\n",GEODB_LOC);
1973#endif
1974 printf("Default config dir: %s\n",ETCDIR);
1975 printf("\n");
1976 }
1977 else printf("\n");
1978 exit(1);
1979}
1980
1981/*********************************************/
1982/* CUR_TIME - return date/time as a string */
1983/*********************************************/
1984
1985char *cur_time()
1986{
1987 time_t now;
1988 static char timestamp[48];
1989
1990 /* get system time */
1991 now = time(NULL);
1992 /* convert to timestamp string */
1993 if (local_time)
1994 strftime(timestamp,sizeof(timestamp),"%d-%b-%Y %H:%M %Z",
1995 localtime(&now));
1996 else
1997 strftime(timestamp,sizeof(timestamp),"%d-%b-%Y %H:%M GMT",
1998 gmtime(&now));
1999
2000 return timestamp;
2001}
2002
2003/*********************************************/
2004/* ISPAGE - determine if an HTML page or not */
2005/*********************************************/
2006
2007int ispage(char *str)
2008{
2009 NLISTPTR t;
2010 char *cp1, *cp2;
2011
2012 if (isinlist(omit_page,str)!=NULL) return 0;
2013
2014 cp1=cp2=str;
2015 while (*cp1!='\0') { if (*cp1=='.') cp2=cp1; cp1++; }
2016 if ((cp2++==str)||(*(--cp1)=='/')) return 1;
2017 t=page_prefix;
2018 while(t!=NULL)
2019 {
2020 /* Check if a PagePrefix matches */
2021 if(strncmp(str,t->string,strlen(t->string))==0) return 1;
2022 t=t->next;
2023 }
2024 return (isinlist(page_type,cp2)!=NULL);
2025}
2026
2027/*********************************************/
2028/* ISURLCHAR - checks for valid URL chars */
2029/*********************************************/
2030
2031int isurlchar(unsigned char ch, int flag)
2032{
2033 if (isalnum(ch)) return 1; /* allow letters, numbers... */
2034 if (ch > 127) return 1; /* allow extended chars... */
2035 if (flag) /* and filter some others */
2036 return (strchr(":/\\.,' *!-+_@~()[]!",ch)!=NULL); /* strip cgi vars */
2037 else
2038 return (strchr(":/\\.,' *!-+_@~()[]!;?&=",ch)!=NULL); /* keep cgi vars */
2039}
2040
2041/*********************************************/
2042/* CTRY_IDX - create unique # from TLD */
2043/*********************************************/
2044
2045u_int64_t ctry_idx(char *str)
2046{
2047 int i=strlen(str),j=0;
2048 u_int64_t idx=0;
2049 char *cp=str+i;
2050
2051 for (;i>0;i--) { idx+=((*--cp-'a'+1)<<j); j+=(j==0)?7:5; }
2052 return idx;
2053}
2054
2055/*********************************************/
2056/* UN_IDX - get TLD from index # */
2057/*********************************************/
2058
2059char *un_idx(u_int64_t idx)
2060{
2061 int i,j;
2062 char *cp;
2063 static char buf[8];
2064
2065 memset(buf, 0, sizeof(buf));
2066 if (idx<=0) return buf;
2067 if ((j=(idx&0x7f))>32) /* only for a1, a2 and o1 */
2068 { buf[0]=(idx>>7)+'a'; buf[1]=j-32; return buf; }
2069
2070 for (i=5;i>=0;i--)
2071 buf[i]=(i==5)?(idx&0x7f)+'a'-1:(j=(idx>>(((5-i)*5)+2))&0x1f)?j+'a'-1:' ';
2072 cp=buf; while (*cp==' ') { for (i=0;i<6;i++) buf[i]=buf[i+1]; } return buf;
2073}
2074
2075/*********************************************/
2076/* FROM_HEX - convert hex char to decimal */
2077/*********************************************/
2078
2079char from_hex(char c) /* convert hex to dec */
2080{
2081 c = (c>='0'&&c<='9')?c-'0': /* 0-9? */
2082 (c>='A'&&c<='F')?c-'A'+10: /* A-F? */
2083 c - 'a' + 10; /* lowercase... */
2084 return (c<0||c>15)?0:c; /* return 0 if bad... */
2085}
2086
2087/*********************************************/
2088/* UNESCAPE - convert escape seqs to chars */
2089/*********************************************/
2090
2091char *unescape(char *str)
2092{
2093 unsigned char *cp1=(unsigned char *)str; /* force unsigned so we */
2094 unsigned char *cp2=cp1; /* can do > 127 */
2095
2096 if (!str) return NULL; /* make sure strings valid */
2097
2098 while (*cp1)
2099 {
2100 if (*cp1=='%') /* Found an escape? */
2101 {
2102 cp1++;
2103 if (isxdigit(*cp1)) /* ensure a hex digit */
2104 {
2105 if (*cp1) *cp2=from_hex(*cp1++)*16; /* convert hex to an ASCII */
2106 if (*cp1) *cp2+=from_hex(*cp1); /* (hopefully) character */
2107 if ((*cp2<32)||(*cp2==127)) *cp2='_'; /* make '_' if its bad */
2108 if (*cp1) { cp2++; cp1++; }
2109 }
2110 else *cp2++='%';
2111 }
2112 else *cp2++ = *cp1++; /* if not, just continue */
2113 }
2114 *cp2=*cp1; /* don't forget terminator */
2115 return str; /* return the string */
2116}
2117
2118/*********************************************/
2119/* OURICMP - Case insensitive string compare */
2120/*********************************************/
2121
2122int ouricmp(char *str1, char *str2)
2123{
2124 while((*str1!=0) &&
2125 (tolower((unsigned char)*str1)==tolower((unsigned char)*str2)))
2126 { str1++;str2++; }
2127 if (*str1==0) return 0; else return 1;
2128}
2129
2130/*********************************************/
2131/* SRCH_STRING - get search strings from ref */
2132/*********************************************/
2133
2134void srch_string(char *ptr)
2135{
2136 /* ptr should point to unescaped query string */
2137 char tmpbuf[BUFSIZE];
2138 char srch[80]="";
2139 unsigned char *cp1, *cp2, *cps;
2140 int sp_flg=0;
2141
2142 /* Check if search engine referrer or return */
2143 if ( (cps=(unsigned char *)isinglist(search_list,log_rec.refer))==NULL)
2144 return;
2145
2146 /* Try to find query variable */
2147 srch[0]='?'; srch[sizeof(srch)-1] = '\0';
2148 strncpy(&srch[1],(char *)cps,sizeof(srch)-2); /* First, try "?..." */
2149 if ((cp1=(unsigned char *)strstr(ptr,srch))==NULL)
2150 {
2151 srch[0]='&'; /* Next, try "&..." */
2152 if ((cp1=(unsigned char *)strstr(ptr,srch))==NULL) return;
2153 }
2154 cp2=(unsigned char *)tmpbuf;
2155 while (*cp1!='=' && *cp1!=0) cp1++; if (*cp1!=0) cp1++;
2156 while (*cp1!='&' && *cp1!=0)
2157 {
2158 if (*cp1=='"' || *cp1==',' || *cp1=='?')
2159 { cp1++; continue; } /* skip bad ones.. */
2160 else
2161 {
2162 if (*cp1=='+') *cp1=' '; /* change + to space */
2163 if (sp_flg && *cp1==' ') { cp1++; continue; } /* compress spaces */
2164 if (*cp1==' ') sp_flg=1; else sp_flg=0; /* (flag spaces here) */
2165 if (searchcasei)
2166 *cp2++=tolower(*cp1++); /* normal character */
2167 else *cp2++=*cp1++;
2168 }
2169 }
2170 *cp2=0; cp2=(unsigned char *)tmpbuf;
2171 if (tmpbuf[0]=='?') tmpbuf[0]=' '; /* format fix ? */
2172 while( *cp2!=0 && isspace((unsigned char)*cp2) ) cp2++; /* skip sps. */
2173 if (*cp2==0) return;
2174
2175 /* any trailing spaces? */
2176 cp1=cp2+strlen((char *)cp2)-1;
2177 while (cp1!=cp2) if (isspace((unsigned char)*cp1)) *cp1--='\0'; else break;
2178
2179 /* strip invalid chars */
2180 cp1=cp2;
2181 while (*cp1!=0) { if ((*cp1<32)||(*cp1==127)) *cp1='_'; cp1++; }
2182
2183 if (put_snode((char *)cp2,(u_int64_t)1,sr_htab))
2184 {
2185 if (verbose)
2186 /* Error adding search string node, skipping .... */
2187 fprintf(stderr,"%s %s\n", msg_nomem_sc, tmpbuf);
2188 }
2189 return;
2190}
2191
2192/*********************************************/
2193/* GET_DOMAIN - Get domain portion of host */
2194/*********************************************/
2195
2196char *get_domain(char *str)
2197{
2198 char *cp;
2199 int i=group_domains+1;
2200
2201 if (isipaddr(str)) return NULL;
2202 cp = str+strlen(str)-1;
2203
2204 while (cp!=str)
2205 {
2206 if (*cp=='.')
2207 if (!(--i)) return ++cp;
2208 cp--;
2209 }
2210 return cp;
2211}
2212
2213/*********************************************/
2214/* AGENT_MANGLE - Re-format user agent */
2215/*********************************************/
2216
2217void agent_mangle(char *str)
2218{
2219 char *cp1, *cp2, *cp3;
2220
2221 str=cp2=log_rec.agent;
2222 cp1=strstr(str,"ompatible"); /* check known fakers */
2223 if (cp1!=NULL)
2224 {
2225 while (*cp1!=';'&&*cp1!='\0') cp1++;
2226 /* kludge for Mozilla/3.01 (compatible;) */
2227 if (*cp1++==';' && strcmp(cp1,")\"")) /* success! */
2228 {
2229 /* Opera can hide as MSIE */
2230 cp3=strstr(str,"Opera");
2231 if (cp3!=NULL)
2232 {
2233 while (*cp3!='.'&&*cp3!='\0')
2234 {
2235 if(*cp3=='/') *cp2++=' ';
2236 else *cp2++=*cp3;
2237 cp3++;
2238 }
2239 cp1=cp3;
2240 }
2241 else
2242 {
2243 while (*cp1 == ' ') cp1++; /* eat spaces */
2244 while (*cp1!='.'&&*cp1!='\0'&&*cp1!=';') *cp2++=*cp1++;
2245 }
2246 if (mangle_agent<5)
2247 {
2248 while (*cp1!='.'&&*cp1!=';'&&*cp1!='\0') *cp2++=*cp1++;
2249 if (*cp1!=';'&&*cp1!='\0') { *cp2++=*cp1++; *cp2++=*cp1++; }
2250 }
2251 if (mangle_agent<4)
2252 if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2253 if (mangle_agent<3)
2254 while (*cp1!=';'&&*cp1!='\0'&&*cp1!='('&&*cp1!=' ') *cp2++=*cp1++;
2255 if (mangle_agent<2)
2256 {
2257 /* Level 1 - try to get OS */
2258 cp1=strstr(cp1,")");
2259 if (cp1!=NULL)
2260 {
2261 *cp2++=' ';
2262 *cp2++='(';
2263 while (*cp1!=';'&&*cp1!='('&&cp1!=str) cp1--;
2264 if (cp1!=str&&*cp1!='\0') cp1++;
2265 while (*cp1==' '&&*cp1!='\0') cp1++;
2266 while (*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2267 *cp2++=')';
2268 }
2269 }
2270 *cp2='\0';
2271 }
2272 else
2273 {
2274 /* nothing after "compatible", should we mangle? */
2275 /* not for now */
2276 }
2277 }
2278 else
2279 {
2280 cp1=strstr(str,"Opera"); /* Opera flavor */
2281 if (cp1!=NULL)
2282 {
2283 while (*cp1!='/'&&*cp1!=' '&&*cp1!='\0') *cp2++=*cp1++;
2284 while (*cp1!='.'&&*cp1!='\0')
2285 {
2286 if(*cp1=='/') *cp2++=' ';
2287 else *cp2++=*cp1;
2288 cp1++;
2289 }
2290 if (mangle_agent<5)
2291 {
2292 while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2293 *cp2++=*cp1++;
2294 *cp2++=*cp1++;
2295 }
2296 if (mangle_agent<4)
2297 if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2298 if (mangle_agent<3)
2299 while (*cp1!=' '&&*cp1!='\0'&&*cp1!='(') *cp2++=*cp1++;
2300 if (mangle_agent<2)
2301 {
2302 cp1=strstr(cp1,"(");
2303 if (cp1!=NULL)
2304 {
2305 cp1++;
2306 *cp2++=' ';
2307 *cp2++='(';
2308 while (*cp1!=';'&&*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2309 *cp2++=')';
2310 }
2311 }
2312 *cp2='\0';
2313 }
2314 else
2315 {
2316 cp1=strstr(str,"Mozilla"); /* Netscape flavor */
2317 if (cp1!=NULL)
2318 {
2319 while (*cp1!='/'&&*cp1!=' '&&*cp1!='\0') *cp2++=*cp1++;
2320 if (*cp1==' ') *cp1='/';
2321 while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2322 if (mangle_agent<5)
2323 {
2324 while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2325 *cp2++=*cp1++;
2326 *cp2++=*cp1++;
2327 }
2328 if (mangle_agent<4)
2329 if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2330 if (mangle_agent<3)
2331 while (*cp1!=' '&&*cp1!='\0'&&*cp1!='(') *cp2++=*cp1++;
2332 if (mangle_agent<2)
2333 {
2334 /* Level 1 - Try to get OS */
2335 cp1=strstr(cp1,"(");
2336 if (cp1!=NULL)
2337 {
2338 cp1++;
2339 *cp2++=' ';
2340 *cp2++='(';
2341 while (*cp1!=';'&&*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2342 *cp2++=')';
2343 }
2344 }
2345 *cp2='\0';
2346 }
2347 }
2348 }
2349}
2350
2351/*********************************************/
2352/* OUR_GZGETS - enhanced gzgets for log only */
2353/*********************************************/
2354
2355char *our_gzgets(void *fp, char *buf, int size)
2356{
2357 char *out_cp=buf; /* point to output */
2358 while (1)
2359 {
2360 if (f_cp>(f_buf+f_end-1)) /* load? */
2361 {
2362#ifdef USE_BZIP
2363 f_end=(gz_log==COMP_BZIP)?
2364 BZ2_bzread(fp, f_buf, GZ_BUFSIZE):
2365 gzread(fp, f_buf, GZ_BUFSIZE);
2366#else
2367 f_end=gzread(fp, f_buf, GZ_BUFSIZE);
2368#endif
2369 if (f_end<=0) return Z_NULL;
2370 f_cp=f_buf;
2371 }
2372
2373 if (--size) /* more? */
2374 {
2375 *out_cp++ = *f_cp;
2376 if (*f_cp++ == '\n') { *out_cp='\0'; return buf; }
2377 }
2378 else { *out_cp='\0'; return buf; }
2379 }
2380}
2381
2382#ifdef USE_BZIP
2383/*********************************************/
2384/* bz2_rewind - our 'rewind' for bz2 files */
2385/*********************************************/
2386
2387int bz2_rewind( void **fp, char *fname, char *mode )
2388{
2389 BZ2_bzclose( *fp );
2390 *fp = BZ2_bzopen( fname, "rb");
2391 f_cp=f_buf+GZ_BUFSIZE; f_end=0; /* reset buffer counters */
2392 memset(f_buf, 0, sizeof(f_buf));
2393 if (*fp == Z_NULL) return -1;
2394 else return 0;
2395}
2396#endif /* USE_BZIP */
2397
2398/*********************************************/
2399/* ISIPADDR - Determine if str is IP address */
2400/*********************************************/
2401
2402int isipaddr(char *str)
2403{
2404 int i=1,j=0;
2405 char *cp; /* generic ptr */
2406
2407 if (strchr(str,':')!=NULL)
2408 {
2409 /* Possible IPv6 Address */
2410 cp=str;
2411 while (strchr(":.abcdef0123456789",*cp)!=NULL && *cp!='\0')
2412 {
2413 if (*cp=='.') j++;
2414 if (*cp++==':') i++;
2415 }
2416
2417 if (*cp!='\0') return -1; /* bad hostname (has ':') */
2418 if (i>1 && j) return 2; /* IPv4/IPv6 */
2419 return 3; /* IPv6 */
2420 }
2421 else
2422 {
2423 /* Not an IPv6 address, check for IPv4 */
2424 cp=str;
2425 while (strchr(".0123456789",*cp)!=NULL && *cp!='\0')
2426 {
2427 if (*cp++=='.') i++;
2428 }
2429 if (*cp!='\0') return 0; /* hostname */
2430 if (i!=4) return -1; /* bad hostname */
2431 return 1; /* IPv4 */
2432 }
2433}
2434
2435/*****************************************************************/
2436/* */
2437/* JDATE - Julian date calculator */
2438/* */
2439/* Calculates the number of days since Jan 1, 0000. */
2440/* */
2441/* Originally written by Bradford L. Barrett (03/17/1988) */
2442/* Returns an unsigned long value representing the number of */
2443/* days since January 1, 0000. */
2444/* */
2445/* Note: Due to the changes made by Pope Gregory XIII in the */
2446/* 16th Centyry (Feb 24, 1582), dates before 1583 will */
2447/* not return a truely accurate number (will be at least */
2448/* 10 days off). Somehow, I don't think this will */
2449/* present much of a problem for most situations :) */
2450/* */
2451/* Usage: days = jdate(day, month, year) */
2452/* */
2453/* The number returned is adjusted by 5 to facilitate day of */
2454/* week calculations. The mod of the returned value gives the */
2455/* day of the week the date is. (ie: dow = days % 7 ) where */
2456/* dow will return 0=Sunday, 1=Monday, 2=Tuesday, etc... */
2457/* */
2458/*****************************************************************/
2459
2460u_int64_t jdate( int day, int month, int year )
2461{
2462 u_int64_t days; /* value returned */
2463 int mtable[] = {0,31,59,90,120,151,181,212,243,273,304,334};
2464
2465 /* First, calculate base number including leap and Centenial year stuff */
2466
2467 days=(((u_int64_t)year*365)+day+mtable[month-1]+
2468 ((year+4)/4) - ((year/100)-(year/400)));
2469
2470 /* now adjust for leap year before March 1st */
2471
2472 if ((year % 4 == 0) && !((year % 100 == 0) &&
2473 (year % 400 != 0)) && (month < 3))
2474 --days;
2475
2476 /* done, return with calculated value */
2477
2478 return(days+5);
2479}