2 webalizer - a web server log analysis program
4 Copyright (C) 1997-2011 Bradford L. Barrett
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version, and provided that the above
10 copyright and permission notice is included with all distributed
11 copies of this or derived software.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
24 /*********************************************/
25 /* STANDARD INCLUDES */
26 /*********************************************/
28 /* Fix broken Zlib 64 bitness */
29 #if _FILE_OFFSET_BITS == 64
30 #ifndef _LARGEFILE64_SOURCE
31 #define _LARGEFILE64_SOURCE 1
40 #include <unistd.h> /* normal stuff */
43 #include <sys/utsname.h>
52 /* ensure sys/types */
54 #include <sys/types.h>
57 /* Need socket header? */
58 #ifdef HAVE_SYS_SOCKET_H
59 #include <sys/socket.h>
62 /* some systems need this */
69 #include <netinet/in.h>
70 #include <arpa/inet.h>
80 int bz2_rewind(void **, char *, char *);
83 #include "webalizer.h" /* main header */
89 #include "webalizer_lang.h" /* lang. support */
91 #include "dns_resolv.h"
94 /* internal function prototypes */
96 void clear_month(); /* clear monthly stuff */
97 char *unescape(char *); /* unescape URLs */
98 void print_opts(char *); /* print options */
99 void print_version(); /* duhh... */
100 int isurlchar(unsigned char, int); /* valid URL char fnc. */
101 void get_config(char *); /* Read a config file */
102 static char *save_opt(char *); /* save conf option */
103 void srch_string(char *); /* srch str analysis */
104 char *get_domain(char *); /* return domain name */
105 void agent_mangle(char *); /* reformat user agent */
106 char *our_gzgets(void *, char *, int); /* our gzgets */
107 int ouricmp(char *, char *); /* case ins. compare */
108 int isipaddr(char *); /* is IP address test */
110 /*********************************************/
111 /* GLOBAL VARIABLES */
112 /*********************************************/
114 char *version
= "2.23"; /* program version */
115 char *editlvl
= "05"; /* edit level */
116 char *moddate
= "14-Apr-2011"; /* modification date */
117 char *copyright
= "Copyright 1997-2011 by Bradford L. Barrett";
119 int verbose
= 2; /* 2=verbose,1=err, 0=none */
120 int debug_mode
= 0; /* debug mode flag */
121 int time_me
= 0; /* timing display flag */
122 int local_time
= 1; /* 1=localtime 0=GMT (UTC) */
123 int hist_gap
= 0; /* 1=error w/hist, save bkp */
124 int ignore_hist
= 0; /* history flag (1=skip) */
125 int ignore_state
= 0; /* state flag (1=skip) */
126 int default_index
= 1; /* default index. (1=yes) */
127 int hourly_graph
= 1; /* hourly graph display */
128 int hourly_stats
= 1; /* hourly stats table */
129 int daily_graph
= 1; /* daily graph display */
130 int daily_stats
= 1; /* daily stats table */
131 int ctry_graph
= 1; /* country graph display */
132 int shade_groups
= 1; /* Group shading 0=no 1=yes */
133 int hlite_groups
= 1; /* Group hlite 0=no 1=yes */
134 int mangle_agent
= 0; /* mangle user agents */
135 int incremental
= 0; /* incremental mode 1=yes */
136 int use_https
= 0; /* use 'https://' on URLs */
137 int htaccess
= 0; /* create .htaccess? (0=no) */
138 int stripcgi
= 1; /* strip url cgi (0=no) */
139 int normalize
= 1; /* normalize CLF URL (0=no) */
140 int trimsquid
= 0; /* trim squid urls (0=no) */
141 int searchcasei
= 1; /* case insensitive search */
142 int visit_timeout
= 1800; /* visit timeout (seconds) */
143 int graph_legend
= 1; /* graph legend (1=yes) */
144 int graph_lines
= 2; /* graph lines (0=none) */
145 int fold_seq_err
= 0; /* fold seq err (0=no) */
146 int log_type
= LOG_CLF
; /* log type (default=CLF) */
147 int group_domains
= 0; /* Group domains 0=none */
148 int hide_sites
= 0; /* Hide ind. sites (0=no) */
149 int link_referrer
= 0; /* Link referrers (0=no) */
150 char *hname
= NULL
; /* hostname for reports */
151 char *state_fname
= "webalizer.current"; /* run state file name */
152 char *hist_fname
= "webalizer.hist"; /* name of history file */
153 char *html_ext
= "html"; /* HTML file suffix */
154 char *dump_ext
= "tab"; /* Dump file suffix */
155 char *conf_fname
= NULL
; /* name of config file */
156 char *log_fname
= NULL
; /* log file pointer */
157 char *out_dir
= NULL
; /* output directory */
158 char *blank_str
= ""; /* blank string */
159 char *geodb_fname
= NULL
; /* GeoDB database filename */
160 char *dns_cache
= NULL
; /* DNS cache file name */
161 int dns_children
= 0; /* DNS children (0=don't do)*/
162 int cache_ips
= 0; /* CacheIPs in DB (0=no) */
163 int cache_ttl
= 7; /* DNS Cache TTL (days) */
164 int geodb
= 0; /* Use GeoDB (0=no) */
165 int graph_mths
= 12; /* # months in index graph */
166 int index_mths
= 12; /* # months in index table */
167 int year_hdrs
= 1; /* index year seperators */
168 int year_totals
= 1; /* index year subtotals */
169 int use_flags
= 0; /* Show flags in ctry table */
170 char *flag_dir
= "flags"; /* location of flag icons */
173 int geoip
= 0; /* Use GeoIP (0=no) */
174 char *geoip_db
= NULL
; /* GeoIP database filename */
175 GeoIP
*geo_fp
= NULL
; /* GeoIP database handle */
178 int ntop_sites
= 30; /* top n sites to display */
179 int ntop_sitesK
= 10; /* top n sites (by kbytes) */
180 int ntop_urls
= 30; /* top n url's to display */
181 int ntop_urlsK
= 10; /* top n url's (by kbytes) */
182 int ntop_entry
= 10; /* top n entry url's */
183 int ntop_exit
= 10; /* top n exit url's */
184 int ntop_refs
= 30; /* top n referrers "" */
185 int ntop_agents
= 15; /* top n user agents "" */
186 int ntop_ctrys
= 30; /* top n countries "" */
187 int ntop_search
= 20; /* top n search strings */
188 int ntop_users
= 20; /* top n users to display */
190 int all_sites
= 0; /* List All sites (0=no) */
191 int all_urls
= 0; /* List All URLs (0=no) */
192 int all_refs
= 0; /* List All Referrers */
193 int all_agents
= 0; /* List All User Agents */
194 int all_search
= 0; /* List All Search Strings */
195 int all_users
= 0; /* List All Usernames */
197 int dump_sites
= 0; /* Dump tab delimited sites */
198 int dump_urls
= 0; /* URLs */
199 int dump_refs
= 0; /* Referrers */
200 int dump_agents
= 0; /* User Agents */
201 int dump_users
= 0; /* Usernames */
202 int dump_search
= 0; /* Search strings */
203 int dump_header
= 0; /* Dump header as first rec */
204 char *dump_path
= NULL
; /* Path for dump files */
206 int cur_year
=0, cur_month
=0, /* year/month/day/hour */
207 cur_day
=0, cur_hour
=0, /* tracking variables */
208 cur_min
=0, cur_sec
=0;
210 u_int64_t cur_tstamp
=0; /* Timestamp... */
211 u_int64_t rec_tstamp
=0;
212 u_int64_t req_tstamp
=0;
213 u_int64_t epoch
; /* used for timestamp adj. */
215 int check_dup
=0; /* check for dup flag */
216 int gz_log
=COMP_NONE
; /* gziped log? (0=no) */
218 double t_xfer
=0.0; /* monthly total xfer value */
219 u_int64_t t_hit
=0,t_file
=0,t_site
=0, /* monthly total vars */
220 t_url
=0,t_ref
=0,t_agent
=0,
221 t_page
=0, t_visit
=0, t_user
=0;
223 double tm_xfer
[31]; /* daily transfer totals */
225 u_int64_t tm_hit
[31], tm_file
[31], /* daily total arrays */
226 tm_site
[31], tm_page
[31],
229 u_int64_t dt_site
; /* daily 'sites' total */
231 u_int64_t ht_hit
=0, mh_hit
=0; /* hourly hits totals */
233 u_int64_t th_hit
[24], th_file
[24], /* hourly total arrays */
238 int f_day
,l_day
; /* first/last day vars */
240 struct utsname system_info
; /* system info structure */
242 u_int64_t ul_bogus
=0; /* Dummy counter for groups */
244 struct log_struct log_rec
; /* expanded log storage */
246 void *zlog_fp
; /* compressed logfile ptr */
247 FILE *log_fp
; /* regular logfile pointer */
249 char buffer
[BUFSIZE
]; /* log file record buffer */
250 char tmp_buf
[BUFSIZE
]; /* used to temp save above */
252 CLISTPTR
*top_ctrys
= NULL
; /* Top countries table */
254 #define GZ_BUFSIZE 16384 /* our_getfs buffer size */
255 char f_buf
[GZ_BUFSIZE
]; /* our_getfs buffer */
256 char *f_cp
=f_buf
+GZ_BUFSIZE
; /* pointer into the buffer */
257 int f_end
=0; /* count to end of buffer */
259 char hit_color
[] = "#00805c"; /* graph hit color */
260 char file_color
[] = "#0040ff"; /* graph file color */
261 char site_color
[] = "#ff8000"; /* graph site color */
262 char kbyte_color
[] = "#ff0000"; /* graph kbyte color */
263 char page_color
[] = "#00e0ff"; /* graph page color */
264 char visit_color
[] = "#ffff00"; /* graph visit color */
265 char misc_color
[] = "#00e0ff"; /* graph misc color */
266 char pie_color1
[] = "#800080"; /* pie additionnal color 1 */
267 char pie_color2
[] = "#80ffc0"; /* pie additionnal color 2 */
268 char pie_color3
[] = "#ff00ff"; /* pie additionnal color 3 */
269 char pie_color4
[] = "#ffc080"; /* pie additionnal color 4 */
271 /*********************************************/
272 /* MAIN - start here */
273 /*********************************************/
275 int main(int argc
, char *argv
[])
277 int i
; /* generic counter */
278 char *cp1
, *cp2
, *cp3
; /* generic char pointers */
279 char host_buf
[MAXHOST
+1]; /* used to save hostname */
281 NLISTPTR lptr
; /* generic list pointer */
283 extern char *optarg
; /* used for command line */
284 extern int optind
; /* parsing routine 'getopt' */
287 time_t start_time
, end_time
; /* program timers */
288 float temp_time
; /* temporary time storage */
290 int rec_year
,rec_month
=1,rec_day
,rec_hour
,rec_min
,rec_sec
;
292 int good_rec
=0; /* 1 if we had a good record */
293 u_int64_t total_rec
=0; /* Total Records Processed */
294 u_int64_t total_ignore
=0; /* Total Records Ignored */
295 u_int64_t total_bad
=0; /* Total Bad Records */
297 int max_ctry
; /* max countries defined */
299 /* month names used for parsing logfile (shouldn't be lang specific) */
300 char *log_month
[12]={ "jan", "feb", "mar",
303 "oct", "nov", "dec"};
305 /* stat struct for files */
306 struct stat log_stat
;
308 /* Assume that LC_CTYPE is what the user wants for non-ASCII chars */
309 setlocale(LC_CTYPE
,"");
311 /* initalize epoch */
312 epoch
=jdate(1,1,1970); /* used for timestamp adj. */
314 sprintf(tmp_buf
,"%s/webalizer.conf",ETCDIR
);
315 /* check for default config file */
316 if (!access("webalizer.conf",F_OK
))
317 get_config("webalizer.conf");
318 else if (!access(tmp_buf
,F_OK
))
321 /* get command line options */
322 opterr
= 0; /* disable parser errors */
323 while ((i
=getopt(argc
,argv
,"a:A:bc:C:dD:e:E:fF:g:GhHiI:jJ:k:K:l:Lm:M:n:N:o:O:pP:qQr:R:s:S:t:Tu:U:vVwW:x:XYz:Z"))!=EOF
)
327 case 'a': add_nlist(optarg
,&hidden_agents
); break; /* Hide agents */
328 case 'A': ntop_agents
=atoi(optarg
); break; /* Top agents */
329 case 'b': ignore_state
=1; break; /* Ignore state file */
330 case 'c': get_config(optarg
); break; /* Config file */
331 case 'C': ntop_ctrys
=atoi(optarg
); break; /* Top countries */
332 case 'd': debug_mode
=1; break; /* Debug */
333 case 'D': dns_cache
=optarg
; break; /* DNS Cache filename */
334 case 'e': ntop_entry
=atoi(optarg
); break; /* Top entry pages */
335 case 'E': ntop_exit
=atoi(optarg
); break; /* Top exit pages */
336 case 'f': fold_seq_err
=1; break; /* Fold sequence errs */
337 case 'F': log_type
=(tolower(optarg
[0])=='f')?
338 LOG_FTP
:(tolower(optarg
[0])=='s')?
339 LOG_SQUID
:(tolower(optarg
[0])=='w')?
340 LOG_W3C
:LOG_CLF
; break; /* define log type */
341 case 'g': group_domains
=atoi(optarg
); break; /* GroupDomains (0=no) */
342 case 'G': hourly_graph
=0; break; /* no hourly graph */
343 case 'h': print_opts(argv
[0]); break; /* help */
344 case 'H': hourly_stats
=0; break; /* no hourly stats */
345 case 'i': ignore_hist
=1; break; /* Ignore history */
346 case 'I': add_nlist(optarg
,&index_alias
); break; /* Index alias */
347 case 'j': geodb
=1; break; /* Enable GeoDB */
348 case 'J': geodb_fname
=optarg
; break; /* GeoDB db filename */
349 case 'k': graph_mths
=atoi(optarg
); break; /* # months idx graph */
350 case 'K': index_mths
=atoi(optarg
); break; /* # months idx table */
351 case 'l': graph_lines
=atoi(optarg
); break; /* Graph Lines */
352 case 'L': graph_legend
=0; break; /* Graph Legends */
353 case 'm': visit_timeout
=atoi(optarg
); break; /* Visit Timeout */
354 case 'M': mangle_agent
=atoi(optarg
); break; /* mangle user agents */
355 case 'n': hname
=optarg
; break; /* Hostname */
356 case 'N': dns_children
=atoi(optarg
); break; /* # of DNS children */
357 case 'o': out_dir
=optarg
; break; /* Output directory */
358 case 'O': add_nlist(optarg
,&omit_page
); break; /* pages not counted */
359 case 'p': incremental
=1; break; /* Incremental run */
360 case 'P': add_nlist(optarg
,&page_type
); break; /* page view types */
361 case 'q': verbose
=1; break; /* Quiet (verbose=1) */
362 case 'Q': verbose
=0; break; /* Really Quiet */
363 case 'r': add_nlist(optarg
,&hidden_refs
); break; /* Hide referrer */
364 case 'R': ntop_refs
=atoi(optarg
); break; /* Top referrers */
365 case 's': add_nlist(optarg
,&hidden_sites
); break; /* Hide site */
366 case 'S': ntop_sites
=atoi(optarg
); break; /* Top sites */
367 case 't': msg_title
=optarg
; break; /* Report title */
368 case 'T': time_me
=1; break; /* TimeMe */
369 case 'u': add_nlist(optarg
,&hidden_urls
); break; /* hide URL */
370 case 'U': ntop_urls
=atoi(optarg
); break; /* Top urls */
371 case 'v': verbose
=2; debug_mode
=1; break; /* Verbose */
372 case 'V': print_version(); break; /* Version */
374 case 'w': geoip
=1; break; /* Enable GeoIP */
375 case 'W': geoip_db
=optarg
; break; /* GeoIP database name */
377 case 'x': html_ext
=optarg
; break; /* HTML file extension */
378 case 'X': hide_sites
=1; break; /* Hide ind. sites */
379 case 'Y': ctry_graph
=0; break; /* Supress ctry graph */
380 case 'Z': normalize
=0; break; /* Dont normalize URLs */
381 case 'z': use_flags
=1; flag_dir
=optarg
; break; /* Ctry flag dir */
385 if (argc
- optind
!= 0) log_fname
= argv
[optind
];
386 if ( log_fname
&& (log_fname
[0]=='-')) log_fname
=NULL
; /* force STDIN? */
388 /* check for gzipped file - .gz */
389 if (log_fname
) if (!strcmp((log_fname
+strlen(log_fname
)-3),".gz"))
393 /* check for bzip file - .bz2 */
394 if (log_fname
) if (!strcmp((log_fname
+strlen(log_fname
)-4),".bz2"))
398 /* setup our internal variables */
399 init_counters(); /* initalize (zero) main counters */
400 memset(hist
, 0, sizeof(hist
)); /* initalize (zero) history array */
402 /* add default index. alias if needed */
403 if (default_index
) add_nlist("index.",&index_alias
);
405 if (page_type
==NULL
) /* check if page types present */
407 if ((log_type
==LOG_CLF
)||(log_type
==LOG_SQUID
)||(log_type
==LOG_W3C
))
409 add_nlist("htm*" ,&page_type
); /* if no page types specified, we */
410 add_nlist("cgi" ,&page_type
); /* use the default ones here... */
411 if (!isinlist(page_type
,html_ext
)) add_nlist(html_ext
,&page_type
);
413 else add_nlist("txt" ,&page_type
); /* FTP logs default to .txt */
416 for (max_ctry
=0;ctry
[max_ctry
].desc
;max_ctry
++);
417 if (ntop_ctrys
> max_ctry
) ntop_ctrys
= max_ctry
; /* force upper limit */
418 if (graph_lines
> 20) graph_lines
= 20; /* keep graphs sane! */
419 if (graph_mths
<12) graph_mths
=12;
420 if (graph_mths
>GRAPHMAX
) graph_mths
=GRAPHMAX
;
421 if (index_mths
<12) index_mths
=12;
422 if (index_mths
>HISTSIZE
) index_mths
=HISTSIZE
;
424 if (log_type
== LOG_FTP
)
426 /* disable stuff for ftp logs */
427 ntop_entry
=ntop_exit
=0;
432 if (search_list
==NULL
)
434 /* If no search engines defined, define some :) */
435 add_glist(".google. q=" ,&search_list
);
436 add_glist("yahoo.com p=" ,&search_list
);
437 add_glist("altavista.com q=" ,&search_list
);
438 add_glist("aolsearch. query=" ,&search_list
);
439 add_glist("ask.co q=" ,&search_list
);
440 add_glist("eureka.com q=" ,&search_list
);
441 add_glist("lycos.com query=" ,&search_list
);
442 add_glist("hotbot.com MT=" ,&search_list
);
443 add_glist("msn.com q=" ,&search_list
);
444 add_glist("infoseek.com qt=" ,&search_list
);
445 add_glist("webcrawler searchText=" ,&search_list
);
446 add_glist("excite search=" ,&search_list
);
447 add_glist("netscape.com query=" ,&search_list
);
448 add_glist("mamma.com query=" ,&search_list
);
449 add_glist("alltheweb.com q=" ,&search_list
);
450 add_glist("northernlight.com qr=" ,&search_list
);
454 /* ensure entry/exits don't exceed urls */
455 i
=(ntop_urls
>ntop_urlsK
)?ntop_urls
:ntop_urlsK
;
456 if (ntop_entry
>i
) ntop_entry
=i
;
457 if (ntop_exit
>i
) ntop_exit
=i
;
459 for (i
=0;i
<MAXHASH
;i
++)
461 sm_htab
[i
]=sd_htab
[i
]=NULL
; /* initalize hash tables */
468 /* Be polite and announce yourself... */
472 printf("Webalizer V%s-%s (%s %s %s) %s\n", version
,editlvl
,
473 system_info
.sysname
, system_info
.release
,
474 system_info
.machine
,language
);
478 if (strstr(argv
[0],"webazolver")!=0)
479 /* DNS support not present, aborting... */
480 { printf("%s\n",msg_dns_abrt
); exit(1); }
482 /* Force sane values for cache TTL */
483 if (cache_ttl
<1) cache_ttl
=1;
484 if (cache_ttl
>100) cache_ttl
=100;
491 if ( !(lstat(log_fname
, &log_stat
)) )
493 /* check if the file a symlink */
494 if ( S_ISLNK(log_stat
.st_mode
) )
497 fprintf(stderr
,"%s %s (symlink)\n",msg_log_err
,log_fname
);
504 /* open compressed file */
506 if (gz_log
==COMP_BZIP
)
507 zlog_fp
= BZ2_bzopen(log_fname
,"rb");
510 zlog_fp
= gzopen(log_fname
, "rb");
513 /* Error: Can't open log file ... */
514 fprintf(stderr
, "%s %s (%d)\n",msg_log_err
,log_fname
,ENOENT
);
520 /* open regular file */
521 log_fp
= fopen(log_fname
,"r");
524 /* Error: Can't open log file ... */
525 fprintf(stderr
, "%s %s\n",msg_log_err
,log_fname
);
531 /* Using logfile ... */
534 printf("%s %s (",msg_log_use
,log_fname
?log_fname
:"STDIN");
535 if (gz_log
==COMP_GZIP
) printf("gzip-");
537 if (gz_log
==COMP_BZIP
) printf("bzip-");
541 /* display log file type hint */
542 case LOG_CLF
: printf("clf)\n"); break;
543 case LOG_FTP
: printf("ftp)\n"); break;
544 case LOG_SQUID
: printf("squid)\n"); break;
545 case LOG_W3C
: printf("w3c)\n"); break;
549 /* switch directories if needed */
552 if (chdir(out_dir
) != 0)
554 /* Error: Can't change directory to ... */
555 fprintf(stderr
, "%s %s\n",msg_dir_err
,out_dir
);
561 if (strstr(argv
[0],"webazolver")!=0)
563 if (!dns_children
) dns_children
=5; /* default dns children if needed */
566 /* No cache file specified, aborting... */
567 fprintf(stderr
,"%s\n",msg_dns_nocf
); /* Must have a cache file */
572 if (dns_cache
&& dns_children
) /* run-time resolution */
574 if (dns_children
> MAXCHILD
) dns_children
=MAXCHILD
;
575 /* DNS Lookup (#children): */
576 if (verbose
>1) printf("%s (%d): ",msg_dns_rslv
,dns_children
);
578 (gz_log
)?dns_resolver(zlog_fp
):dns_resolver(log_fp
);
580 (gz_log
==COMP_BZIP
)?bz2_rewind(&zlog_fp
, log_fname
, "rb"):
582 (gz_log
==COMP_GZIP
)?gzrewind(zlog_fp
):
583 (log_fname
)?rewind(log_fp
):exit(0);
586 if (strstr(argv
[0],"webazolver")!=0) exit(0); /* webazolver exits here */
590 if (!open_cache()) { dns_cache
=NULL
; dns_db
=NULL
; }
593 /* Using DNS cache file <filaneme> */
594 if (verbose
>1) printf("%s %s\n",msg_dns_usec
,dns_cache
);
601 geo_db
=geodb_open(geodb_fname
);
604 if (verbose
) printf("%s: %s\n",msg_geo_open
,
605 (geodb_fname
)?geodb_fname
:msg_geo_dflt
);
606 if (verbose
) printf("GeoDB %s\n",msg_geo_nolu
);
609 else if (verbose
>1) printf("%s %s\n",
610 msg_geo_use
,geodb_ver(geo_db
,buffer
));
612 if (geoip
) geoip
=0; /* Disable GeoIP if using GeoDB */
618 /* open GeoIP database */
622 geo_fp
=GeoIP_open(geoip_db
, GEOIP_MEMORY_CACHE
);
624 geo_fp
=GeoIP_new(GEOIP_MEMORY_CACHE
);
626 /* Did we open one? */
629 /* couldn't open.. warn user */
630 if (verbose
) printf("GeoIP %s\n",msg_geo_nolu
);
633 else if (verbose
>1) printf("%s %s (%s)\n",msg_geo_use
,
634 GeoIPDBDescription
[(int)geo_fp
->databaseType
],
635 (geoip_db
==NULL
)?msg_geo_dflt
:geo_fp
->file_path
);
637 #endif /* USE_GEOIP */
639 /* Creating output in ... */
641 printf("%s %s\n",msg_dir_use
,out_dir
?out_dir
:msg_cur_dir
);
646 if (uname(&system_info
)) hname
="localhost";
647 else hname
=system_info
.nodename
;
650 /* Hostname for reports is ... */
651 if (strlen(hname
)) if (verbose
>1) printf("%s '%s'\n",msg_hostname
,hname
);
653 /* get past history */
654 if (ignore_hist
) { if (verbose
>1) printf("%s\n",msg_ign_hist
); }
657 if (incremental
) /* incremental processing? */
659 if ((i
=restore_state())) /* restore internal data structs */
661 /* Error: Unable to restore run data (error num) */
662 /* if (verbose) fprintf(stderr,"%s (%d)\n",msg_bad_data,i); */
663 fprintf(stderr
,"%s (%d)\n",msg_bad_data
,i
);
668 /* Allocate memory for our TOP countries array */
670 { if ( (top_ctrys
=calloc(ntop_ctrys
,sizeof(CLISTPTR
))) == NULL
)
671 /* Can't get memory, Top Countries disabled! */
672 {if (verbose
) fprintf(stderr
,"%s\n",msg_nomem_tc
); ntop_ctrys
=0;}}
674 /* get processing start time */
675 start_time
= time(NULL
);
677 /*********************************************/
678 /* MAIN PROCESS LOOP - read through log file */
679 /*********************************************/
681 while ( (gz_log
)?(our_gzgets(zlog_fp
,buffer
,BUFSIZE
) != Z_NULL
):
682 (fgets(buffer
,BUFSIZE
,log_fname
?log_fp
:stdin
) != NULL
))
685 if (strlen(buffer
) == (BUFSIZE
-1))
689 fprintf(stderr
,"%s",msg_big_rec
);
690 if (debug_mode
) fprintf(stderr
,":\n%s",buffer
);
691 else fprintf(stderr
,"\n");
694 total_bad
++; /* bump bad record counter */
696 /* get the rest of the record */
697 while ( (gz_log
)?(our_gzgets(zlog_fp
,buffer
,BUFSIZE
)!=Z_NULL
):
698 (fgets(buffer
,BUFSIZE
,log_fname
?log_fp
:stdin
)!=NULL
))
700 if (strlen(buffer
) < BUFSIZE
-1)
702 if (debug_mode
&& verbose
) fprintf(stderr
,"%s\n",buffer
);
705 if (debug_mode
&& verbose
) fprintf(stderr
,"%s",buffer
);
707 continue; /* go get next record if any */
710 /* got a record... */
711 strcpy(tmp_buf
, buffer
); /* save buffer in case of error */
712 if (parse_record(buffer
)) /* parse the record */
714 /*********************************************/
715 /* PASSED MINIMAL CHECKS, DO A LITTLE MORE */
716 /*********************************************/
718 /* convert month name to lowercase */
720 log_rec
.datetime
[i
]=tolower(log_rec
.datetime
[i
]);
722 /* lowercase sitename/IPv6 addresses */
723 cp1
=log_rec
.hostname
;
724 while (*cp1
++!='\0') *cp1
=tolower(*cp1
);
726 /* get year/month/day/hour/min/sec values */
729 if (strncmp(log_month
[i
],&log_rec
.datetime
[4],3)==0)
730 { rec_month
= i
+1; break; }
733 rec_year
=atoi(&log_rec
.datetime
[8]); /* get year number (int) */
734 rec_day
=atoi(&log_rec
.datetime
[1]); /* get day number */
735 rec_hour
=atoi(&log_rec
.datetime
[13]); /* get hour number */
736 rec_min
=atoi(&log_rec
.datetime
[16]); /* get minute number */
737 rec_sec
=atoi(&log_rec
.datetime
[19]); /* get second number */
739 /* Kludge for Netscape server time (0-24?) error */
740 if (rec_hour
>23) rec_hour
=0;
742 /* minimal sanity check on date */
743 if ((i
>=12)||(rec_min
>59)||(rec_sec
>60)||(rec_year
<1990))
745 total_bad
++; /* if a bad date, bump counter */
748 fprintf(stderr
,"%s: %s [%llu]",
749 msg_bad_date
,log_rec
.datetime
,total_rec
);
750 if (debug_mode
) fprintf(stderr
,":\n%s\n",tmp_buf
);
751 else fprintf(stderr
,"\n");
753 continue; /* and ignore this record */
756 /*********************************************/
757 /* GOOD RECORD, CHECK INCREMENTAL/TIMESTAMPS */
758 /*********************************************/
760 /* Flag as a good one */
763 /* get current records timestamp (seconds since epoch) */
764 req_tstamp
=cur_tstamp
;
765 rec_tstamp
=((jdate(rec_day
,rec_month
,rec_year
)-epoch
)*86400)+
766 (rec_hour
*3600)+(rec_min
*60)+rec_sec
;
768 /* Do we need to check for duplicate records? (incremental mode) */
771 /* check if less than/equal to last record processed */
772 if ( rec_tstamp
<= cur_tstamp
)
774 /* if it is, assume we have already processed and ignore it */
780 /* if it isn't.. disable any more checks this run */
782 /* now check if it's a new month */
783 if ( (cur_month
!= rec_month
) || (cur_year
!= rec_year
) )
786 cur_sec
= rec_sec
; /* set current counters */
790 cur_month
= rec_month
;
792 cur_tstamp
= rec_tstamp
;
793 f_day
=l_day
=rec_day
; /* reset first and last day */
798 /* check for out of sequence records */
799 if (rec_tstamp
/3600 < cur_tstamp
/3600)
801 if (!fold_seq_err
&& ((rec_tstamp
+SLOP_VAL
)/3600<cur_tstamp
/3600) )
802 { total_ignore
++; continue; }
805 rec_sec
= cur_sec
; /* if folding sequence */
806 rec_min
= cur_min
; /* errors, just make it */
807 rec_hour
= cur_hour
; /* look like the last */
808 rec_day
= cur_day
; /* good records timestamp */
809 rec_month
= cur_month
;
811 rec_tstamp
= cur_tstamp
;
814 cur_tstamp
=rec_tstamp
; /* update current timestamp */
816 /*********************************************/
817 /* DO SOME PRE-PROCESS FORMATTING */
818 /*********************************************/
821 unescape(log_rec
.url
);
824 cp1
= cp2
= log_rec
.url
;
825 /* handle null '-' case here... */
826 if (*++cp1
== '-') strcpy(log_rec
.url
,"/INVALID-URL");
829 /* strip actual URL out of request */
830 while ( (*cp1
!= ' ') && (*cp1
!= '\0') ) cp1
++;
833 /* scan to begin of actual URL field */
834 while ((*cp1
== ' ') && (*cp1
!= '\0')) cp1
++;
835 /* remove duplicate / if needed */
836 while (( *cp1
=='/') && (*(cp1
+1)=='/')) cp1
++;
837 while (( *cp1
!='\0')&&(*cp1
!='"')) *cp2
++=*cp1
++;
842 /* strip query portion of cgi scripts */
845 if (!isurlchar(*cp1
, stripcgi
)) { *cp1
= '\0'; break; }
847 if (log_rec
.url
[0]=='\0')
848 { log_rec
.url
[0]='/'; log_rec
.url
[1]='\0'; }
851 if (log_type
==LOG_CLF
&& log_rec
.resp_code
!=RC_NOTFOUND
&& normalize
)
853 if ( ((cp2
=strstr(log_rec
.url
,"://"))!=NULL
)&&(cp2
<log_rec
.url
+6) )
856 /* see if a '/' is present after it */
857 if ( (cp2
=strchr(cp1
,(int)'/'))==NULL
) cp1
--;
859 /* Ok, now shift url string */
860 cp2
=log_rec
.url
; while (*cp1
!='\0') *cp2
++=*cp1
++; *cp2
='\0';
862 /* extra sanity checks on URL string */
863 while ((cp2
=strstr(log_rec
.url
,"/./")))
864 { cp1
=cp2
+2; while (*cp1
!='\0') *cp2
++=*cp1
++; *cp2
='\0'; }
865 if (log_rec
.url
[0]!='/')
867 if ( log_rec
.resp_code
==RC_OK
||
868 log_rec
.resp_code
==RC_PARTIALCONTENT
||
869 log_rec
.resp_code
==RC_NOMOD
)
872 fprintf(stderr
,"Converted URL '%s' to '/'\n",log_rec
.url
);
879 fprintf(stderr
,"Invalid URL: '%s'\n",log_rec
.url
);
880 strcpy(log_rec
.url
,"/INVALID-URL");
883 while ( log_rec
.url
[ (i
=strlen(log_rec
.url
)-1) ] == '?' )
884 log_rec
.url
[i
]='\0'; /* drop trailing ?s if any */
888 /* check for service (ie: http://) and lowercase if found */
889 if (((cp2
=strstr(log_rec
.url
,"://"))!= NULL
)&&(cp2
<log_rec
.url
+6))
894 if ( (*cp1
>='A') && (*cp1
<='Z')) *cp1
+= 'a'-'A';
900 /* strip off index.html (or any aliases) */
904 if ((cp1
=strstr(log_rec
.url
,lptr
->string
))!=NULL
)
908 if ( !stripcgi
&& (cp2
=strchr(cp1
,'?'))!=NULL
)
909 { while(*cp2
) *cp1
++=*cp2
++; *cp1
='\0'; }
917 /* unescape referrer */
918 unescape(log_rec
.refer
);
920 /* fix referrer field */
923 if ( (*cp2
!= '\0') && (*cp2
== '"') )
925 while ( *cp1
!= '\0' )
928 if (((unsigned char)*cp1
<32&&(unsigned char)*cp1
>0) ||
929 *cp1
==127 || (unsigned char)*cp1
=='<') *cp1
=0;
935 /* get query portion of cgi referrals */
941 if (!isurlchar(*cp1
, 1))
943 /* Save query portion in log.rec.srchstr */
944 strncpy(log_rec
.srchstr
,(char *)cp1
,MAXSRCH
);
950 /* handle null referrer */
951 if (log_rec
.refer
[0]=='\0')
952 { log_rec
.refer
[0]='-'; log_rec
.refer
[1]='\0'; }
955 /* if HTTP request, lowercase http://sitename/ portion */
957 if ( (*cp1
=='h') || (*cp1
=='H'))
959 while ( (*cp1
!='/') && (*cp1
!='\0'))
961 if ( (*cp1
>='A') && (*cp1
<='Z')) *cp1
+= 'a'-'A';
964 /* now do hostname */
965 if ( (*cp1
=='/') && ( *(cp1
+1)=='/')) {cp1
++; cp1
++;}
966 while ( (*cp1
!='/') && (*cp1
!='\0'))
968 if ( (*cp1
>='A') && (*cp1
<='Z')) *cp1
+= 'a'-'A';
973 /* Do we need to mangle? */
974 if (mangle_agent
) agent_mangle(log_rec
.agent
);
976 /* if necessary, shrink referrer to fit storage */
977 if (strlen(log_rec
.refer
)>=MAXREFH
)
979 if (verbose
) fprintf(stderr
,"%s [%llu]\n",
980 msg_big_ref
,total_rec
);
981 log_rec
.refer
[MAXREFH
-1]='\0';
984 /* if necessary, shrink URL to fit storage */
985 if (strlen(log_rec
.url
)>=MAXURLH
)
987 if (verbose
) fprintf(stderr
,"%s [%llu]\n",
988 msg_big_req
,total_rec
);
989 log_rec
.url
[MAXURLH
-1]='\0';
992 /* fix user agent field */
995 if ( (*cp2
!= '\0') && ((*cp2
== '"')||(*cp2
== '(')) )
997 while (*cp1
!= '\0') { cp3
= cp2
; *cp2
++ = *cp1
++; }
1000 cp1
= log_rec
.agent
; /* CHANGE !!! */
1001 while (*cp1
!= 0) /* get rid of more common _bad_ chars ;) */
1003 if ( ((unsigned char)*cp1
< 32) ||
1004 ((unsigned char)*cp1
==127) ||
1005 (*cp1
=='<') || (*cp1
=='>') )
1006 { *cp1
='\0'; break; }
1010 /* fix username if needed */
1011 if (log_rec
.ident
[0]==0)
1012 { log_rec
.ident
[0]='-'; log_rec
.ident
[1]='\0'; }
1016 while ((unsigned char)*cp3
>=32 && *cp3
!='"') cp3
++;
1019 /* unescape user name */
1020 unescape(log_rec
.ident
);
1022 /********************************************/
1023 /* PROCESS RECORD */
1024 /********************************************/
1026 /* first time through? */
1029 /* if yes, init our date vars */
1030 cur_month
=rec_month
; cur_year
=rec_year
;
1031 cur_day
=rec_day
; cur_hour
=rec_hour
;
1032 cur_min
=rec_min
; cur_sec
=rec_sec
;
1036 /* adjust last day processed if different */
1037 if (rec_day
> l_day
) l_day
= rec_day
;
1039 /* update min/sec stuff */
1040 if (cur_sec
!= rec_sec
) cur_sec
= rec_sec
;
1041 if (cur_min
!= rec_min
) cur_min
= rec_min
;
1043 /* check for hour change */
1044 if (cur_hour
!= rec_hour
)
1046 /* if yes, init hourly stuff */
1047 if (ht_hit
> mh_hit
) mh_hit
= ht_hit
;
1049 cur_hour
= rec_hour
;
1052 /* check for day change */
1053 if (cur_day
!= rec_day
)
1055 /* if yes, init daily stuff */
1056 tm_site
[cur_day
-1]=dt_site
; dt_site
=0;
1057 tm_visit
[cur_day
-1]=tot_visit(sd_htab
);
1062 /* check for month change */
1063 if ( (cur_month
!= rec_month
) || (cur_year
!= rec_year
) )
1065 /* if yes, do monthly stuff */
1066 t_visit
=tot_visit(sm_htab
);
1067 month_update_exit(req_tstamp
); /* process exit pages */
1069 write_month_html(); /* generate HTML for month */
1071 cur_month
= rec_month
; /* update our flags */
1072 cur_year
= rec_year
;
1073 f_day
=l_day
=rec_day
;
1076 /* save hostname for later */
1077 strncpy(host_buf
, log_rec
.hostname
, sizeof(log_rec
.hostname
));
1080 /* Resolve IP address if needed */
1083 struct addrinfo hints
, *ares
;
1084 memset(&hints
, 0, sizeof(hints
));
1085 hints
.ai_family
= AF_UNSPEC
;
1086 hints
.ai_socktype
= SOCK_STREAM
;
1087 hints
.ai_flags
= AI_NUMERICHOST
;
1088 if (0 == getaddrinfo(log_rec
.hostname
, "0", &hints
, &ares
))
1091 resolve_dns(&log_rec
);
1095 /* lowercase hostname and validity check */
1096 cp1
= log_rec
.hostname
; i
=0;
1098 if ( (!isalnum((unsigned char)*cp1
)) && (*cp1
!=':') )
1099 strncpy(log_rec
.hostname
, "Invalid", 8);
1102 while (*cp1
!= '\0') /* loop through string */
1104 if ( (*cp1
>='A') && (*cp1
<='Z') )
1105 { *cp1
++ += 'a'-'A'; continue; }
1106 if ( *cp1
=='.' ) i
++;
1107 if ( (isalnum((unsigned char)*cp1
)) ||
1108 (*cp1
=='.')||(*cp1
=='-') ||
1109 (*cp1
==':')||((*cp1
=='_')&&(i
==0)) ) cp1
++;
1112 /* Invalid hostname found! */
1113 if (strcmp(log_rec
.hostname
, host_buf
))
1114 strcpy(log_rec
.hostname
, host_buf
);
1115 else strncpy(log_rec
.hostname
,"Invalid",8);
1119 if (*cp1
== '\0') /* did we make it to the end? */
1121 if (!isalnum((unsigned char)*(cp1
-1)))
1122 strncpy(log_rec
.hostname
,"Invalid",8);
1126 /* Catch blank hostnames here */
1127 if (log_rec
.hostname
[0]=='\0')
1128 strncpy(log_rec
.hostname
,"Unknown",8);
1130 /* Ignore/Include check */
1131 if ( (isinlist(include_sites
,log_rec
.hostname
)==NULL
) &&
1132 (isinlist(include_urls
,log_rec
.url
)==NULL
) &&
1133 (isinlist(include_refs
,log_rec
.refer
)==NULL
) &&
1134 (isinlist(include_agents
,log_rec
.agent
)==NULL
) &&
1135 (isinlist(include_users
,log_rec
.ident
)==NULL
) )
1137 if (isinlist(ignored_sites
,log_rec
.hostname
)!=NULL
)
1138 { total_ignore
++; continue; }
1139 if (isinlist(ignored_urls
,log_rec
.url
)!=NULL
)
1140 { total_ignore
++; continue; }
1141 if (isinlist(ignored_agents
,log_rec
.agent
)!=NULL
)
1142 { total_ignore
++; continue; }
1143 if (isinlist(ignored_refs
,log_rec
.refer
)!=NULL
)
1144 { total_ignore
++; continue; }
1145 if (isinlist(ignored_users
,log_rec
.ident
)!=NULL
)
1146 { total_ignore
++; continue; }
1149 /* Bump response code totals */
1150 switch (log_rec
.resp_code
) {
1151 case RC_CONTINUE
: i
=IDX_CONTINUE
; break;
1152 case RC_SWITCHPROTO
: i
=IDX_SWITCHPROTO
; break;
1153 case RC_OK
: i
=IDX_OK
; break;
1154 case RC_CREATED
: i
=IDX_CREATED
; break;
1155 case RC_ACCEPTED
: i
=IDX_ACCEPTED
; break;
1156 case RC_NONAUTHINFO
: i
=IDX_NONAUTHINFO
; break;
1157 case RC_NOCONTENT
: i
=IDX_NOCONTENT
; break;
1158 case RC_RESETCONTENT
: i
=IDX_RESETCONTENT
; break;
1159 case RC_PARTIALCONTENT
: i
=IDX_PARTIALCONTENT
; break;
1160 case RC_MULTIPLECHOICES
: i
=IDX_MULTIPLECHOICES
; break;
1161 case RC_MOVEDPERM
: i
=IDX_MOVEDPERM
; break;
1162 case RC_MOVEDTEMP
: i
=IDX_MOVEDTEMP
; break;
1163 case RC_SEEOTHER
: i
=IDX_SEEOTHER
; break;
1164 case RC_NOMOD
: i
=IDX_NOMOD
; break;
1165 case RC_USEPROXY
: i
=IDX_USEPROXY
; break;
1166 case RC_MOVEDTEMPORARILY
: i
=IDX_MOVEDTEMPORARILY
; break;
1167 case RC_BAD
: i
=IDX_BAD
; break;
1168 case RC_UNAUTH
: i
=IDX_UNAUTH
; break;
1169 case RC_PAYMENTREQ
: i
=IDX_PAYMENTREQ
; break;
1170 case RC_FORBIDDEN
: i
=IDX_FORBIDDEN
; break;
1171 case RC_NOTFOUND
: i
=IDX_NOTFOUND
; break;
1172 case RC_METHODNOTALLOWED
: i
=IDX_METHODNOTALLOWED
; break;
1173 case RC_NOTACCEPTABLE
: i
=IDX_NOTACCEPTABLE
; break;
1174 case RC_PROXYAUTHREQ
: i
=IDX_PROXYAUTHREQ
; break;
1175 case RC_TIMEOUT
: i
=IDX_TIMEOUT
; break;
1176 case RC_CONFLICT
: i
=IDX_CONFLICT
; break;
1177 case RC_GONE
: i
=IDX_GONE
; break;
1178 case RC_LENGTHREQ
: i
=IDX_LENGTHREQ
; break;
1179 case RC_PREFAILED
: i
=IDX_PREFAILED
; break;
1180 case RC_REQENTTOOLARGE
: i
=IDX_REQENTTOOLARGE
; break;
1181 case RC_REQURITOOLARGE
: i
=IDX_REQURITOOLARGE
; break;
1182 case RC_UNSUPMEDIATYPE
: i
=IDX_UNSUPMEDIATYPE
; break;
1183 case RC_RNGNOTSATISFIABLE
:i
=IDX_RNGNOTSATISFIABLE
;break;
1184 case RC_EXPECTATIONFAILED
:i
=IDX_EXPECTATIONFAILED
;break;
1185 case RC_SERVERERR
: i
=IDX_SERVERERR
; break;
1186 case RC_NOTIMPLEMENTED
: i
=IDX_NOTIMPLEMENTED
; break;
1187 case RC_BADGATEWAY
: i
=IDX_BADGATEWAY
; break;
1188 case RC_UNAVAIL
: i
=IDX_UNAVAIL
; break;
1189 case RC_GATEWAYTIMEOUT
: i
=IDX_GATEWAYTIMEOUT
; break;
1190 case RC_BADHTTPVER
: i
=IDX_BADHTTPVER
; break;
1191 default: i
=IDX_UNDEFINED
; break;
1193 response
[i
].count
++;
1195 /* now save in the various hash tables... */
1196 if (log_rec
.resp_code
==RC_OK
|| log_rec
.resp_code
==RC_PARTIALCONTENT
)
1199 /* URL/ident hash table (only if valid response code) */
1200 if ((log_rec
.resp_code
==RC_OK
)||(log_rec
.resp_code
==RC_NOMOD
)||
1201 (log_rec
.resp_code
==RC_PARTIALCONTENT
))
1203 /* URL hash table */
1204 if (put_unode(log_rec
.url
,OBJ_REG
,(u_int64_t
)1,
1205 log_rec
.xfer_size
,&t_url
,(u_int64_t
)0,(u_int64_t
)0,um_htab
))
1208 /* Error adding URL node, skipping ... */
1209 fprintf(stderr
,"%s %s\n", msg_nomem_u
, log_rec
.url
);
1212 /* ident (username) hash table */
1213 if (put_inode(log_rec
.ident
,OBJ_REG
,
1214 1,(u_int64_t
)i
,log_rec
.xfer_size
,&t_user
,
1215 0,rec_tstamp
,im_htab
))
1218 /* Error adding ident node, skipping .... */
1219 fprintf(stderr
,"%s %s\n", msg_nomem_i
, log_rec
.ident
);
1223 /* referrer hash table */
1226 if (log_rec
.refer
[0]!='\0')
1227 if (put_rnode(log_rec
.refer
,OBJ_REG
,(u_int64_t
)1,&t_ref
,rm_htab
))
1230 fprintf(stderr
,"%s %s\n", msg_nomem_r
, log_rec
.refer
);
1234 /* hostname (site) hash table - daily */
1235 if (put_hnode(log_rec
.hostname
,OBJ_REG
,
1236 1,(u_int64_t
)i
,log_rec
.xfer_size
,&dt_site
,
1237 0,rec_tstamp
,"",sd_htab
))
1240 /* Error adding host node (daily), skipping .... */
1241 fprintf(stderr
,"%s %s\n",msg_nomem_dh
, log_rec
.hostname
);
1244 /* hostname (site) hash table - monthly */
1245 if (put_hnode(log_rec
.hostname
,OBJ_REG
,
1246 1,(u_int64_t
)i
,log_rec
.xfer_size
,&t_site
,
1247 0,rec_tstamp
,"",sm_htab
))
1250 /* Error adding host node (monthly), skipping .... */
1251 fprintf(stderr
,"%s %s\n", msg_nomem_mh
, log_rec
.hostname
);
1254 /* user agent hash table */
1257 if (log_rec
.agent
[0]!='\0')
1258 if (put_anode(log_rec
.agent
,OBJ_REG
,(u_int64_t
)1,&t_agent
,am_htab
))
1261 fprintf(stderr
,"%s %s\n", msg_nomem_a
, log_rec
.agent
);
1265 /* bump monthly/daily/hourly totals */
1266 t_hit
++; ht_hit
++; /* daily/hourly hits */
1267 t_xfer
+= log_rec
.xfer_size
; /* total xfer size */
1268 tm_xfer
[rec_day
-1] += log_rec
.xfer_size
; /* daily xfer total */
1269 tm_hit
[rec_day
-1]++; /* daily hits total */
1270 th_xfer
[rec_hour
] += log_rec
.xfer_size
; /* hourly xfer total */
1271 th_hit
[rec_hour
]++; /* hourly hits total */
1273 /* if RC_OK, increase file counters */
1274 if (log_rec
.resp_code
== RC_OK
)
1277 tm_file
[rec_day
-1]++;
1278 th_file
[rec_hour
]++;
1281 /* Pages (pageview) calculation */
1282 if (ispage(log_rec
.url
))
1285 tm_page
[rec_day
-1]++;
1286 th_page
[rec_hour
]++;
1288 /* do search string stuff if needed */
1289 if (ntop_search
) srch_string(log_rec
.srchstr
);
1292 /*********************************************/
1293 /* RECORD PROCESSED - DO GROUPS HERE */
1294 /*********************************************/
1297 if ( (cp1
=isinglist(group_urls
,log_rec
.url
))!=NULL
)
1299 if (put_unode(cp1
,OBJ_GRP
,(u_int64_t
)1,log_rec
.xfer_size
,
1300 &ul_bogus
,(u_int64_t
)0,(u_int64_t
)0,um_htab
))
1303 /* Error adding URL node, skipping ... */
1304 fprintf(stderr
,"%s %s\n", msg_nomem_u
, cp1
);
1309 if ( (cp1
=isinglist(group_sites
,log_rec
.hostname
))!=NULL
)
1311 if (put_hnode(cp1
,OBJ_GRP
,1,
1312 (u_int64_t
)(log_rec
.resp_code
==RC_OK
)?1:0,
1313 log_rec
.xfer_size
,&ul_bogus
,
1314 0,rec_tstamp
,"",sm_htab
))
1317 /* Error adding Site node, skipping ... */
1318 fprintf(stderr
,"%s %s\n", msg_nomem_mh
, cp1
);
1323 /* Domain Grouping */
1326 cp1
= get_domain(log_rec
.hostname
);
1329 if (put_hnode(cp1
,OBJ_GRP
,1,
1330 (u_int64_t
)(log_rec
.resp_code
==RC_OK
)?1:0,
1331 log_rec
.xfer_size
,&ul_bogus
,
1332 0,rec_tstamp
,"",sm_htab
))
1335 /* Error adding Site node, skipping ... */
1336 fprintf(stderr
,"%s %s\n", msg_nomem_mh
, cp1
);
1342 /* Referrer Grouping */
1343 if ( (cp1
=isinglist(group_refs
,log_rec
.refer
))!=NULL
)
1345 if (put_rnode(cp1
,OBJ_GRP
,(u_int64_t
)1,&ul_bogus
,rm_htab
))
1348 /* Error adding Referrer node, skipping ... */
1349 fprintf(stderr
,"%s %s\n", msg_nomem_r
, cp1
);
1353 /* User Agent Grouping */
1354 if ( (cp1
=isinglist(group_agents
,log_rec
.agent
))!=NULL
)
1356 if (put_anode(cp1
,OBJ_GRP
,(u_int64_t
)1,&ul_bogus
,am_htab
))
1359 /* Error adding User Agent node, skipping ... */
1360 fprintf(stderr
,"%s %s\n", msg_nomem_a
, cp1
);
1364 /* Ident (username) Grouping */
1365 if ( (cp1
=isinglist(group_users
,log_rec
.ident
))!=NULL
)
1367 if (put_inode(cp1
,OBJ_GRP
,1,
1368 (u_int64_t
)(log_rec
.resp_code
==RC_OK
)?1:0,
1369 log_rec
.xfer_size
,&ul_bogus
,
1370 0,rec_tstamp
,im_htab
))
1373 /* Error adding Username node, skipping ... */
1374 fprintf(stderr
,"%s %s\n", msg_nomem_i
, cp1
);
1379 /*********************************************/
1381 /*********************************************/
1385 /* If first record, check if stupid Netscape header stuff */
1386 if ( (total_rec
==1) && (strncmp(buffer
,"format=",7)==0) )
1388 /* Skipping Netscape header record */
1389 if (verbose
>1) printf("%s\n",msg_ign_nscp
);
1390 /* count it as ignored... */
1395 /* Check if it's a W3C header or IIS Null-Character line */
1396 if ((buffer
[0]=='\0') || (buffer
[0]=='#'))
1402 /* really bad record... */
1406 fprintf(stderr
,"%s (%llu)",msg_bad_rec
,total_rec
);
1407 if (debug_mode
) fprintf(stderr
,":\n%s\n",tmp_buf
);
1408 else fprintf(stderr
,"\n");
1415 /*********************************************/
1416 /* DONE READING LOG FILE - final processing */
1417 /*********************************************/
1419 /* close log file if needed */
1421 if (gz_log
) (gz_log
==COMP_BZIP
)?BZ2_bzclose(zlog_fp
):gzclose(zlog_fp
);
1423 if (gz_log
) gzclose(zlog_fp
);
1425 else if (log_fname
) fclose(log_fp
);
1427 if (good_rec
) /* were any good records? */
1429 tm_site
[cur_day
-1]=dt_site
; /* If yes, clean up a bit */
1430 tm_visit
[cur_day
-1]=tot_visit(sd_htab
);
1431 t_visit
=tot_visit(sm_htab
);
1432 if (ht_hit
> mh_hit
) mh_hit
= ht_hit
;
1434 if (total_rec
> (total_ignore
+total_bad
)) /* did we process any? */
1438 if (save_state()) /* incremental stuff */
1440 /* Error: Unable to save current run data */
1441 if (verbose
) fprintf(stderr
,"%s\n",msg_data_err
);
1442 unlink(state_fname
);
1445 month_update_exit(rec_tstamp
); /* calculate exit pages */
1447 write_month_html(); /* write monthly HTML file */
1448 put_history(); /* write history */
1450 if (hist
[0].month
!=0) write_main_index(); /* write main HTML file */
1452 /* get processing end time */
1453 end_time
= time(NULL
);
1455 /* display end of processing statistics */
1456 if (time_me
|| (verbose
>1))
1458 printf("%llu %s ",total_rec
, msg_records
);
1461 printf("(%llu %s",total_ignore
,msg_ignored
);
1462 if (total_bad
) printf(", %llu %s) ",total_bad
,msg_bad
);
1465 else if (total_bad
) printf("(%llu %s) ",total_bad
,msg_bad
);
1467 /* totoal processing time in seconds */
1468 temp_time
= difftime(end_time
, start_time
);
1469 if (temp_time
==0) temp_time
=1;
1470 printf("%s %.0f %s", msg_in
, temp_time
, msg_seconds
);
1472 /* calculate records per second */
1474 i
=( (int)( (float)total_rec
/temp_time
) );
1477 if ( (i
>0) && (i
<=total_rec
) ) printf(", %d/sec\n", i
);
1482 /* Close DNS cache file */
1483 if (dns_db
) close_cache();
1484 /* Close GeoDB database */
1485 if (geo_db
) geodb_close(geo_db
);
1489 /* Close GeoIP database */
1490 if (geo_fp
) GeoIP_delete(geo_fp
);
1493 /* Whew, all done! Exit with completion status (0) */
1498 /* No valid records found... exit with error (1) */
1499 if (verbose
) printf("%s\n",msg_no_vrec
);
1500 if (hist
[0].month
!=0) write_main_index(); /* write main HTML file */
1505 /*********************************************/
1506 /* GET_CONFIG - get configuration file info */
1507 /*********************************************/
1509 void get_config(char *fname
)
1511 char *kwords
[]= { "Undefined", /* 0 = undefined keyword 0 */
1512 "OutputDir", /* Output directory 1 */
1513 "LogFile", /* Log file to use for input 2 */
1514 "ReportTitle", /* Title for reports 3 */
1515 "HostName", /* Hostname to use 4 */
1516 "IgnoreHist", /* Ignore history file 5 */
1517 "Quiet", /* Run in quiet mode 6 */
1518 "TimeMe", /* Produce timing results 7 */
1519 "Debug", /* Produce debug information 8 */
1520 "HourlyGraph", /* Hourly stats graph 9 */
1521 "HourlyStats", /* Hourly stats table 10 */
1522 "TopSites", /* Top sites 11 */
1523 "TopURLs", /* Top URLs 12 */
1524 "TopReferrers", /* Top Referrers 13 */
1525 "TopAgents", /* Top User Agents 14 */
1526 "TopCountries", /* Top Countries 15 */
1527 "HideSite", /* Sites to hide 16 */
1528 "HideURL", /* URLs to hide 17 */
1529 "HideReferrer", /* Referrers to hide 18 */
1530 "HideAgent", /* User Agents to hide 19 */
1531 "IndexAlias", /* Aliases for index.html 20 */
1532 "HTMLHead", /* HTML Top1 code 21 */
1533 "HTMLPost", /* HTML Top2 code 22 */
1534 "HTMLTail", /* HTML Tail code 23 */
1535 "MangleAgents", /* Mangle User Agents 24 */
1536 "IgnoreSite", /* Sites to ignore 25 */
1537 "IgnoreURL", /* Url's to ignore 26 */
1538 "IgnoreReferrer", /* Referrers to ignore 27 */
1539 "IgnoreAgent", /* User Agents to ignore 28 */
1540 "ReallyQuiet", /* Dont display ANY messages 29 */
1541 "GMTTime", /* Local or UTC time? 30 */
1542 "GroupURL", /* Group URLs 31 */
1543 "GroupSite", /* Group Sites 32 */
1544 "GroupReferrer", /* Group Referrers 33 */
1545 "GroupAgent", /* Group Agents 34 */
1546 "GroupShading", /* Shade Grouped entries 35 */
1547 "GroupHighlight", /* BOLD Grouped entries 36 */
1548 "Incremental", /* Incremental runs 37 */
1549 "IncrementalName", /* Filename for state data 38 */
1550 "HistoryName", /* Filename for history data 39 */
1551 "HTMLExtension", /* HTML filename extension 40 */
1552 "HTMLPre", /* HTML code at beginning 41 */
1553 "HTMLBody", /* HTML body code 42 */
1554 "HTMLEnd", /* HTML code at end 43 */
1555 "UseHTTPS", /* Use https:// on URLs 44 */
1556 "IncludeSite", /* Sites to always include 45 */
1557 "IncludeURL", /* URLs to always include 46 */
1558 "IncludeReferrer", /* Referrers to include 47 */
1559 "IncludeAgent", /* User Agents to include 48 */
1560 "PageType", /* Page Type (pageview) 49 */
1561 "VisitTimeout", /* Visit timeout (seconds) 50 */
1562 "GraphLegend", /* Graph Legends (yes/no) 51 */
1563 "GraphLines", /* Graph Lines (0=none) 52 */
1564 "FoldSeqErr", /* Fold sequence errors 53 */
1565 "CountryGraph", /* Display ctry graph (0=no) 54 */
1566 "TopKSites", /* Top sites (by KBytes) 55 */
1567 "TopKURLs", /* Top URLs (by KBytes) 56 */
1568 "TopEntry", /* Top Entry Pages 57 */
1569 "TopExit", /* Top Exit Pages 58 */
1570 "TopSearch", /* Top Search Strings 59 */
1571 "LogType", /* Log Type (clf/ftp/squid) 60 */
1572 "SearchEngine", /* SearchEngine strings 61 */
1573 "GroupDomains", /* Group domains (n=level) 62 */
1574 "HideAllSites", /* Hide ind. sites (0=no) 63 */
1575 "AllSites", /* List all sites? 64 */
1576 "AllURLs", /* List all URLs? 65 */
1577 "AllReferrers", /* List all Referrers? 66 */
1578 "AllAgents", /* List all User Agents? 67 */
1579 "AllSearchStr", /* List all Search Strings? 68 */
1580 "AllUsers", /* List all Users? 69 */
1581 "TopUsers", /* Top Usernames to show 70 */
1582 "HideUser", /* Usernames to hide 71 */
1583 "IgnoreUser", /* Usernames to ignore 72 */
1584 "IncludeUser", /* Usernames to include 73 */
1585 "GroupUser", /* Usernames to group 74 */
1586 "DumpPath", /* Path for dump files 75 */
1587 "DumpExtension", /* Dump filename extension 76 */
1588 "DumpHeader", /* Dump header as first rec? 77 */
1589 "DumpSites", /* Dump sites tab file 78 */
1590 "DumpURLs", /* Dump urls tab file 79 */
1591 "DumpReferrers", /* Dump referrers tab file 80 */
1592 "DumpAgents", /* Dump user agents tab file 81 */
1593 "DumpUsers", /* Dump usernames tab file 82 */
1594 "DumpSearchStr", /* Dump search str tab file 83 */
1595 "DNSCache", /* DNS Cache file name 84 */
1596 "DNSChildren", /* DNS Children (0=no DNS) 85 */
1597 "DailyGraph", /* Daily Graph (0=no) 86 */
1598 "DailyStats", /* Daily Stats (0=no) 87 */
1599 "LinkReferrer", /* Link referrer (0=no) 88 */
1600 "PagePrefix", /* PagePrefix - treat as page 89 */
1601 "ColorHit", /* Hit Color (def=00805c) 90 */
1602 "ColorFile", /* File Color (def=0040ff) 91 */
1603 "ColorSite", /* Site Color (def=ff8000) 92 */
1604 "ColorKbyte", /* Kbyte Color (def=ff0000) 93 */
1605 "ColorPage", /* Page Color (def=00e0ff) 94 */
1606 "ColorVisit", /* Visit Color (def=ffff00) 95 */
1607 "ColorMisc", /* Misc Color (def=00e0ff) 96 */
1608 "PieColor1", /* Pie Color 1 (def=800080) 97 */
1609 "PieColor2", /* Pie Color 2 (def=80ffc0) 98 */
1610 "PieColor3", /* Pie Color 3 (def=ff00ff) 99 */
1611 "PieColor4", /* Pie Color 4 (def=ffc080) 100 */
1612 "CacheIPs", /* Cache IPs in DNS DB (0=no) 101 */
1613 "CacheTTL", /* DNS Cache entry TTL (days) 102 */
1614 "GeoDB", /* GeoDB lookups (0=no) 103 */
1615 "GeoDBDatabase", /* GeoDB database filename 104 */
1616 "StripCGI", /* Strip CGI in URLS (0=no) 105 */
1617 "TrimSquidURL", /* Trim squid URLs (0=none) 106 */
1618 "OmitPage", /* URLs not counted as pages 107 */
1619 "HTAccess", /* Write .httaccess files? 108 */
1620 "IgnoreState", /* Ignore state file (0=no) 109 */
1621 "DefaultIndex", /* Default index.* (1=yes) 110 */
1622 "GeoIP", /* Use GeoIP? (1=yes) 111 */
1623 "GeoIPDatabase", /* Database to use for GeoIP 112 */
1624 "NormalizeURL", /* Normalize CLF URLs (1=yes) 113 */
1625 "IndexMonths", /* # months for main page 114 */
1626 "GraphMonths", /* # months for yearly graph 115 */
1627 "YearHeaders", /* use year headers? (1=yes) 116 */
1628 "YearTotals", /* show year subtotals (0=no) 117 */
1629 "CountryFlags", /* show country flags? (0-no) 118 */
1630 "FlagDir", /* directory w/flag images 119 */
1631 "SearchCaseI" /* srch str case insensitive 120 */
1636 char buffer
[BUFSIZE
];
1637 char keyword
[MAXKWORD
];
1638 char value
[MAXKVAL
];
1641 int num_kwords
=sizeof(kwords
)/sizeof(char *);
1643 if ( (fp
=fopen(fname
,"r")) == NULL
)
1646 fprintf(stderr
,"%s %s\n",msg_bad_conf
,fname
);
1650 while ( (fgets(buffer
,BUFSIZE
,fp
)) != NULL
)
1652 /* skip comments and blank lines */
1653 if ( (buffer
[0]=='#') || isspace((unsigned char)buffer
[0]) ) continue;
1656 cp1
=buffer
;cp2
=keyword
;count
=MAXKWORD
-1;
1657 while ( (isalnum((unsigned char)*cp1
)) && count
)
1658 { *cp2
++ = *cp1
++; count
--; }
1662 cp2
=value
; count
=MAXKVAL
-1;
1663 while ((*cp1
!='\n')&&(*cp1
!='\0')&&(isspace((unsigned char)*cp1
))) cp1
++;
1664 while ((*cp1
!='\n')&&(*cp1
!='\0')&&count
) { *cp2
++ = *cp1
++; count
--; }
1666 while ((isspace((unsigned char)*cp2
)) && (cp2
!= value
) ) *cp2
--='\0';
1668 /* check if blank keyword/value */
1669 if ( (keyword
[0]=='\0') || (value
[0]=='\0') ) continue;
1672 for (i
=0;i
<num_kwords
;i
++)
1673 if (!ouricmp(keyword
,kwords
[i
])) { key
=i
; break; }
1675 if (key
==0) { printf("%s '%s' (%s)\n", /* Invalid keyword */
1676 msg_bad_key
,keyword
,fname
);
1682 case 1: out_dir
=save_opt(value
); break; /* OutputDir */
1683 case 2: log_fname
=save_opt(value
); break; /* LogFile */
1684 case 3: msg_title
=save_opt(value
); break; /* ReportTitle */
1685 case 4: hname
=save_opt(value
); break; /* HostName */
1686 case 5: ignore_hist
=
1687 (tolower(value
[0])=='y')?1:0; break; /* IgnoreHist */
1689 (tolower(value
[0])=='y')?1:2; break; /* Quiet */
1691 (tolower(value
[0])=='n')?0:1; break; /* TimeMe */
1693 (tolower(value
[0])=='y')?1:0; break; /* Debug */
1694 case 9: hourly_graph
=
1695 (tolower(value
[0])=='n')?0:1; break; /* HourlyGraph */
1696 case 10: hourly_stats
=
1697 (tolower(value
[0])=='n')?0:1; break; /* HourlyStats */
1698 case 11: ntop_sites
= atoi(value
); break; /* TopSites */
1699 case 12: ntop_urls
= atoi(value
); break; /* TopURLs */
1700 case 13: ntop_refs
= atoi(value
); break; /* TopRefs */
1701 case 14: ntop_agents
= atoi(value
); break; /* TopAgents */
1702 case 15: ntop_ctrys
= atoi(value
); break; /* TopCountries */
1703 case 16: add_nlist(value
,&hidden_sites
); break; /* HideSite */
1704 case 17: add_nlist(value
,&hidden_urls
); break; /* HideURL */
1705 case 18: add_nlist(value
,&hidden_refs
); break; /* HideReferrer */
1706 case 19: add_nlist(value
,&hidden_agents
); break; /* HideAgent */
1707 case 20: add_nlist(value
,&index_alias
); break; /* IndexAlias */
1708 case 21: add_nlist(value
,&html_head
); break; /* HTMLHead */
1709 case 22: add_nlist(value
,&html_post
); break; /* HTMLPost */
1710 case 23: add_nlist(value
,&html_tail
); break; /* HTMLTail */
1711 case 24: mangle_agent
=atoi(value
); break; /* MangleAgents */
1712 case 25: add_nlist(value
,&ignored_sites
); break; /* IgnoreSite */
1713 case 26: add_nlist(value
,&ignored_urls
); break; /* IgnoreURL */
1714 case 27: add_nlist(value
,&ignored_refs
); break; /* IgnoreReferrer */
1715 case 28: add_nlist(value
,&ignored_agents
); break; /* IgnoreAgent */
1716 case 29: if (tolower(value
[0])=='y')
1717 verbose
=0; break; /* ReallyQuiet */
1718 case 30: local_time
=
1719 (tolower(value
[0])=='y')?0:1; break; /* GMTTime */
1720 case 31: add_glist(value
,&group_urls
); break; /* GroupURL */
1721 case 32: add_glist(value
,&group_sites
); break; /* GroupSite */
1722 case 33: add_glist(value
,&group_refs
); break; /* GroupReferrer */
1723 case 34: add_glist(value
,&group_agents
); break; /* GroupAgent */
1724 case 35: shade_groups
=
1725 (tolower(value
[0])=='n')?0:1; break; /* GroupShading */
1726 case 36: hlite_groups
=
1727 (tolower(value
[0])=='n')?0:1; break; /* GroupHighlight */
1728 case 37: incremental
=
1729 (tolower(value
[0])=='y')?1:0; break; /* Incremental */
1730 case 38: state_fname
=save_opt(value
); break; /* State FName */
1731 case 39: hist_fname
=save_opt(value
); break; /* History FName */
1732 case 40: html_ext
=save_opt(value
); break; /* HTML extension */
1733 case 41: add_nlist(value
,&html_pre
); break; /* HTML Pre code */
1734 case 42: add_nlist(value
,&html_body
); break; /* HTML Body code */
1735 case 43: add_nlist(value
,&html_end
); break; /* HTML End code */
1737 (tolower(value
[0])=='y')?1:0; break; /* Use https:// */
1738 case 45: add_nlist(value
,&include_sites
); break; /* IncludeSite */
1739 case 46: add_nlist(value
,&include_urls
); break; /* IncludeURL */
1740 case 47: add_nlist(value
,&include_refs
); break; /* IncludeReferrer*/
1741 case 48: add_nlist(value
,&include_agents
); break; /* IncludeAgent */
1742 case 49: add_nlist(value
,&page_type
); break; /* PageType */
1743 case 50: visit_timeout
=atoi(value
); break; /* VisitTimeout */
1744 case 51: graph_legend
=
1745 (tolower(value
[0])=='n')?0:1; break; /* GraphLegend */
1746 case 52: graph_lines
= atoi(value
); break; /* GraphLines */
1747 case 53: fold_seq_err
=
1748 (tolower(value
[0])=='y')?1:0; break; /* FoldSeqErr */
1749 case 54: ctry_graph
=
1750 (tolower(value
[0])=='n')?0:1; break; /* CountryGraph */
1751 case 55: ntop_sitesK
= atoi(value
); break; /* TopKSites (KB) */
1752 case 56: ntop_urlsK
= atoi(value
); break; /* TopKUrls (KB) */
1753 case 57: ntop_entry
= atoi(value
); break; /* Top Entry pgs */
1754 case 58: ntop_exit
= atoi(value
); break; /* Top Exit pages */
1755 case 59: ntop_search
= atoi(value
); break; /* Top Search pgs */
1756 case 60: log_type
=(tolower(value
[0])=='f')?
1757 LOG_FTP
:((tolower(value
[0])=='s')?
1758 LOG_SQUID
:((tolower(value
[0])=='w')?
1759 LOG_W3C
:LOG_CLF
)); break; /* LogType */
1760 case 61: add_glist(value
,&search_list
); break; /* SearchEngine */
1761 case 62: group_domains
=atoi(value
); break; /* GroupDomains */
1762 case 63: hide_sites
=
1763 (tolower(value
[0])=='y')?1:0; break; /* HideAllSites */
1765 (tolower(value
[0])=='y')?1:0; break; /* All Sites? */
1767 (tolower(value
[0])=='y')?1:0; break; /* All URLs? */
1769 (tolower(value
[0])=='y')?1:0; break; /* All Refs */
1770 case 67: all_agents
=
1771 (tolower(value
[0])=='y')?1:0; break; /* All Agents? */
1772 case 68: all_search
=
1773 (tolower(value
[0])=='y')?1:0; break; /* All Srch str */
1775 (tolower(value
[0])=='y')?1:0; break; /* All Users? */
1776 case 70: ntop_users
=atoi(value
); break; /* TopUsers */
1777 case 71: add_nlist(value
,&hidden_users
); break; /* HideUser */
1778 case 72: add_nlist(value
,&ignored_users
); break; /* IgnoreUser */
1779 case 73: add_nlist(value
,&include_users
); break; /* IncludeUser */
1780 case 74: add_glist(value
,&group_users
); break; /* GroupUser */
1781 case 75: dump_path
=save_opt(value
); break; /* DumpPath */
1782 case 76: dump_ext
=save_opt(value
); break; /* Dumpfile ext */
1783 case 77: dump_header
=
1784 (tolower(value
[0])=='y')?1:0; break; /* DumpHeader? */
1785 case 78: dump_sites
=
1786 (tolower(value
[0])=='y')?1:0; break; /* DumpSites? */
1788 (tolower(value
[0])=='y')?1:0; break; /* DumpURLs? */
1790 (tolower(value
[0])=='y')?1:0; break; /* DumpReferrers? */
1791 case 81: dump_agents
=
1792 (tolower(value
[0])=='y')?1:0; break; /* DumpAgents? */
1793 case 82: dump_users
=
1794 (tolower(value
[0])=='y')?1:0; break; /* DumpUsers? */
1795 case 83: dump_search
=
1796 (tolower(value
[0])=='y')?1:0; break; /* DumpSrchStrs? */
1798 case 84: dns_cache
=save_opt(value
); break; /* DNSCache fname */
1799 case 85: dns_children
=atoi(value
); break; /* DNSChildren */
1801 case 84: /* Disable DNSCache and DNSChildren if DNS is not enabled */
1802 case 85: printf("%s '%s' (%s)\n",msg_bad_key
,keyword
,fname
); break;
1803 #endif /* USE_DNS */
1804 case 86: daily_graph
=
1805 (tolower(value
[0])=='n')?0:1; break; /* HourlyGraph */
1806 case 87: daily_stats
=
1807 (tolower(value
[0])=='n')?0:1; break; /* HourlyStats */
1808 case 88: link_referrer
=
1809 (tolower(value
[0])=='y')?1:0; break; /* LinkReferrer */
1810 case 89: add_nlist(value
,&page_prefix
); break; /* PagePrefix */
1811 case 90: strncpy(hit_color
+1, value
, 6); break; /* ColorHit */
1812 case 91: strncpy(file_color
+1, value
, 6); break; /* ColorFile */
1813 case 92: strncpy(site_color
+1, value
, 6); break; /* ColorSite */
1814 case 93: strncpy(kbyte_color
+1,value
, 6); break; /* ColorKbyte */
1815 case 94: strncpy(page_color
+1, value
, 6); break; /* ColorPage */
1816 case 95: strncpy(visit_color
+1,value
, 6); break; /* ColorVisit */
1817 case 96: strncpy(misc_color
+1, value
, 6); break; /* ColorMisc */
1818 case 97: strncpy(pie_color1
+1, value
, 6); break; /* PieColor1 */
1819 case 98: strncpy(pie_color2
+1, value
, 6); break; /* PieColor2 */
1820 case 99: strncpy(pie_color3
+1, value
, 6); break; /* PieColor3 */
1821 case 100:strncpy(pie_color4
+1, value
, 6); break; /* PieColor4 */
1823 case 101: cache_ips
=
1824 (tolower(value
[0])=='y')?1:0; break; /* CacheIPs */
1825 case 102: cache_ttl
=atoi(value
); break; /* CacheTTL days */
1827 (tolower(value
[0])=='y')?1:0; break; /* GeoDB */
1828 case 104: geodb_fname
=save_opt(value
); break; /* GeoDBDatabase */
1830 case 101: /* Disable CacheIPs/CacheTTL/GeoDB/GeoDBDatabase if none */
1833 case 104: printf("%s '%s' (%s)\n",msg_bad_key
,keyword
,fname
); break;
1834 #endif /* USE_DNS */
1836 (tolower(value
[0])=='n')?0:1; break; /* StripCGI */
1837 case 106: trimsquid
=atoi(value
); break; /* TrimSquidURL */
1838 case 107: add_nlist(value
,&omit_page
); break; /* OmitPage */
1840 (tolower(value
[0])=='y')?1:0; break; /* HTAccess */
1841 case 109: ignore_state
=
1842 (tolower(value
[0])=='y')?1:0; break; /* IgnoreState */
1843 case 110: default_index
=
1844 (tolower(value
[0])=='n')?0:1; break; /* DefaultIndex */
1847 (tolower(value
[0])=='y')?1:0; break; /* GeoIP */
1848 case 112: geoip_db
=save_opt(value
); break; /* GeoIPDatabase */
1850 case 111: /* Disable GeoIP and GeoIPDatabase if not enabled */
1851 case 112: printf("%s '%s' (%s)\n",msg_bad_key
,keyword
,fname
); break;
1853 case 113: normalize
=
1854 (tolower(value
[0])=='n')?0:1; break; /* NormalizeURL */
1855 case 114: index_mths
=atoi(value
); break; /* IndexMonths */
1856 case 115: graph_mths
=atoi(value
); break; /* GraphMonths */
1857 case 116: year_hdrs
=
1858 (tolower(value
[0])=='n')?0:1; break; /* YearHeaders */
1859 case 117: year_totals
=
1860 (tolower(value
[0])=='n')?0:1; break; /* YearTotals */
1861 case 118: use_flags
=
1862 (tolower(value
[0])=='y')?1:0; break; /* CountryFlags */
1863 case 119: use_flags
=1; flag_dir
=save_opt(value
); break; /* FlagDir */
1864 case 120: searchcasei
=
1865 (tolower(value
[0])=='n')?0:1; break; /* SearchCaseI */
1871 /*********************************************/
1872 /* SAVE_OPT - save option from config file */
1873 /*********************************************/
1875 static char *save_opt(char *str
)
1879 if ( (cp1
=malloc(strlen(str
)+1))==NULL
) return NULL
;
1885 /*********************************************/
1886 /* CLEAR_MONTH - initalize monthly stuff */
1887 /*********************************************/
1893 init_counters(); /* reset monthly counters */
1894 del_htabs(); /* clear hash tables */
1895 if (ntop_ctrys
!=0 ) for (i
=0;i
<ntop_ctrys
;i
++) top_ctrys
[i
]=NULL
;
1898 /*********************************************/
1899 /* INIT_COUNTERS - prep counters for use */
1900 /*********************************************/
1902 void init_counters()
1905 for (i
=0;i
<TOTAL_RC
;i
++) response
[i
].count
= 0;
1906 for (i
=0;i
<31;i
++) /* monthly totals */
1909 tm_hit
[i
]=tm_file
[i
]=tm_site
[i
]=tm_page
[i
]=tm_visit
[i
]=0;
1911 for (i
=0;i
<24;i
++) /* hourly totals */
1913 th_hit
[i
]=th_file
[i
]=th_page
[i
]=0;
1916 for (i
=0;ctry
[i
].desc
;i
++) /* country totals */
1922 t_hit
=t_file
=t_site
=t_url
=t_ref
=t_agent
=t_page
=t_visit
=t_user
=0;
1924 mh_hit
= dt_site
= 0;
1928 /*********************************************/
1929 /* PRINT_OPTS - print command line options */
1930 /*********************************************/
1932 void print_opts(char *pname
)
1936 printf("%s: %s %s\n",h_usage1
,pname
,h_usage2
);
1937 for (i
=0;h_msg
[i
];i
++) printf("%s\n",h_msg
[i
]);
1941 /*********************************************/
1943 /*********************************************/
1945 void print_version()
1948 uname(&system_info
);
1950 printf("Webalizer V%s-%s (%s %s %s) %s\n%s\n",
1952 system_info
.sysname
,system_info
.release
,system_info
.machine
,
1953 language
,copyright
);
1956 strncpy(&buf
[strlen(buf
)],"DNS/GeoDB ",11);
1959 strncpy(&buf
[strlen(buf
)],"BZip2 ",7);
1962 strncpy(&buf
[strlen(buf
)],"GeoIP ",7);
1967 printf("Mod date: %s Options: ",moddate
);
1968 if (buf
[0]!=0) printf("%s",buf
);
1969 else printf("none");
1972 printf("Default GeoDB dir : %s\n",GEODB_LOC
);
1974 printf("Default config dir: %s\n",ETCDIR
);
1981 /*********************************************/
1982 /* CUR_TIME - return date/time as a string */
1983 /*********************************************/
1988 static char timestamp
[48];
1990 /* get system time */
1992 /* convert to timestamp string */
1994 strftime(timestamp
,sizeof(timestamp
),"%d-%b-%Y %H:%M %Z",
1997 strftime(timestamp
,sizeof(timestamp
),"%d-%b-%Y %H:%M GMT",
2003 /*********************************************/
2004 /* ISPAGE - determine if an HTML page or not */
2005 /*********************************************/
2007 int ispage(char *str
)
2012 if (isinlist(omit_page
,str
)!=NULL
) return 0;
2015 while (*cp1
!='\0') { if (*cp1
=='.') cp2
=cp1
; cp1
++; }
2016 if ((cp2
++==str
)||(*(--cp1
)=='/')) return 1;
2020 /* Check if a PagePrefix matches */
2021 if(strncmp(str
,t
->string
,strlen(t
->string
))==0) return 1;
2024 return (isinlist(page_type
,cp2
)!=NULL
);
2027 /*********************************************/
2028 /* ISURLCHAR - checks for valid URL chars */
2029 /*********************************************/
2031 int isurlchar(unsigned char ch
, int flag
)
2033 if (isalnum(ch
)) return 1; /* allow letters, numbers... */
2034 if (ch
> 127) return 1; /* allow extended chars... */
2035 if (flag
) /* and filter some others */
2036 return (strchr(":/\\.,' *!-+_@~()[]!",ch
)!=NULL
); /* strip cgi vars */
2038 return (strchr(":/\\.,' *!-+_@~()[]!;?&=",ch
)!=NULL
); /* keep cgi vars */
2041 /*********************************************/
2042 /* CTRY_IDX - create unique # from TLD */
2043 /*********************************************/
2045 u_int64_t
ctry_idx(char *str
)
2047 int i
=strlen(str
),j
=0;
2051 for (;i
>0;i
--) { idx
+=((*--cp
-'a'+1)<<j
); j
+=(j
==0)?7:5; }
2055 /*********************************************/
2056 /* UN_IDX - get TLD from index # */
2057 /*********************************************/
2059 char *un_idx(u_int64_t idx
)
2065 memset(buf
, 0, sizeof(buf
));
2066 if (idx
<=0) return buf
;
2067 if ((j
=(idx
&0x7f))>32) /* only for a1, a2 and o1 */
2068 { buf
[0]=(idx
>>7)+'a'; buf
[1]=j
-32; return buf
; }
2071 buf
[i
]=(i
==5)?(idx
&0x7f)+'a'-1:(j
=(idx
>>(((5-i
)*5)+2))&0x1f)?j
+'a'-1:' ';
2072 cp
=buf
; while (*cp
==' ') { for (i
=0;i
<6;i
++) buf
[i
]=buf
[i
+1]; } return buf
;
2075 /*********************************************/
2076 /* FROM_HEX - convert hex char to decimal */
2077 /*********************************************/
2079 char from_hex(char c
) /* convert hex to dec */
2081 c
= (c
>='0'&&c
<='9')?c
-'0': /* 0-9? */
2082 (c
>='A'&&c
<='F')?c
-'A'+10: /* A-F? */
2083 c
- 'a' + 10; /* lowercase... */
2084 return (c
<0||c
>15)?0:c
; /* return 0 if bad... */
2087 /*********************************************/
2088 /* UNESCAPE - convert escape seqs to chars */
2089 /*********************************************/
2091 char *unescape(char *str
)
2093 unsigned char *cp1
=(unsigned char *)str
; /* force unsigned so we */
2094 unsigned char *cp2
=cp1
; /* can do > 127 */
2096 if (!str
) return NULL
; /* make sure strings valid */
2100 if (*cp1
=='%') /* Found an escape? */
2103 if (isxdigit(*cp1
)) /* ensure a hex digit */
2105 if (*cp1
) *cp2
=from_hex(*cp1
++)*16; /* convert hex to an ASCII */
2106 if (*cp1
) *cp2
+=from_hex(*cp1
); /* (hopefully) character */
2107 if ((*cp2
<32)||(*cp2
==127)) *cp2
='_'; /* make '_' if its bad */
2108 if (*cp1
) { cp2
++; cp1
++; }
2112 else *cp2
++ = *cp1
++; /* if not, just continue */
2114 *cp2
=*cp1
; /* don't forget terminator */
2115 return str
; /* return the string */
2118 /*********************************************/
2119 /* OURICMP - Case insensitive string compare */
2120 /*********************************************/
2122 int ouricmp(char *str1
, char *str2
)
2125 (tolower((unsigned char)*str1
)==tolower((unsigned char)*str2
)))
2127 if (*str1
==0) return 0; else return 1;
2130 /*********************************************/
2131 /* SRCH_STRING - get search strings from ref */
2132 /*********************************************/
2134 void srch_string(char *ptr
)
2136 /* ptr should point to unescaped query string */
2137 char tmpbuf
[BUFSIZE
];
2139 unsigned char *cp1
, *cp2
, *cps
;
2142 /* Check if search engine referrer or return */
2143 if ( (cps
=(unsigned char *)isinglist(search_list
,log_rec
.refer
))==NULL
)
2146 /* Try to find query variable */
2147 srch
[0]='?'; srch
[sizeof(srch
)-1] = '\0';
2148 strncpy(&srch
[1],(char *)cps
,sizeof(srch
)-2); /* First, try "?..." */
2149 if ((cp1
=(unsigned char *)strstr(ptr
,srch
))==NULL
)
2151 srch
[0]='&'; /* Next, try "&..." */
2152 if ((cp1
=(unsigned char *)strstr(ptr
,srch
))==NULL
) return;
2154 cp2
=(unsigned char *)tmpbuf
;
2155 while (*cp1
!='=' && *cp1
!=0) cp1
++; if (*cp1
!=0) cp1
++;
2156 while (*cp1
!='&' && *cp1
!=0)
2158 if (*cp1
=='"' || *cp1
==',' || *cp1
=='?')
2159 { cp1
++; continue; } /* skip bad ones.. */
2162 if (*cp1
=='+') *cp1
=' '; /* change + to space */
2163 if (sp_flg
&& *cp1
==' ') { cp1
++; continue; } /* compress spaces */
2164 if (*cp1
==' ') sp_flg
=1; else sp_flg
=0; /* (flag spaces here) */
2166 *cp2
++=tolower(*cp1
++); /* normal character */
2170 *cp2
=0; cp2
=(unsigned char *)tmpbuf
;
2171 if (tmpbuf
[0]=='?') tmpbuf
[0]=' '; /* format fix ? */
2172 while( *cp2
!=0 && isspace((unsigned char)*cp2
) ) cp2
++; /* skip sps. */
2173 if (*cp2
==0) return;
2175 /* any trailing spaces? */
2176 cp1
=cp2
+strlen((char *)cp2
)-1;
2177 while (cp1
!=cp2
) if (isspace((unsigned char)*cp1
)) *cp1
--='\0'; else break;
2179 /* strip invalid chars */
2181 while (*cp1
!=0) { if ((*cp1
<32)||(*cp1
==127)) *cp1
='_'; cp1
++; }
2183 if (put_snode((char *)cp2
,(u_int64_t
)1,sr_htab
))
2186 /* Error adding search string node, skipping .... */
2187 fprintf(stderr
,"%s %s\n", msg_nomem_sc
, tmpbuf
);
2192 /*********************************************/
2193 /* GET_DOMAIN - Get domain portion of host */
2194 /*********************************************/
2196 char *get_domain(char *str
)
2199 int i
=group_domains
+1;
2201 if (isipaddr(str
)) return NULL
;
2202 cp
= str
+strlen(str
)-1;
2207 if (!(--i
)) return ++cp
;
2213 /*********************************************/
2214 /* AGENT_MANGLE - Re-format user agent */
2215 /*********************************************/
2217 void agent_mangle(char *str
)
2219 char *cp1
, *cp2
, *cp3
;
2221 str
=cp2
=log_rec
.agent
;
2222 cp1
=strstr(str
,"ompatible"); /* check known fakers */
2225 while (*cp1
!=';'&&*cp1
!='\0') cp1
++;
2226 /* kludge for Mozilla/3.01 (compatible;) */
2227 if (*cp1
++==';' && strcmp(cp1
,")\"")) /* success! */
2229 /* Opera can hide as MSIE */
2230 cp3
=strstr(str
,"Opera");
2233 while (*cp3
!='.'&&*cp3
!='\0')
2235 if(*cp3
=='/') *cp2
++=' ';
2243 while (*cp1
== ' ') cp1
++; /* eat spaces */
2244 while (*cp1
!='.'&&*cp1
!='\0'&&*cp1
!=';') *cp2
++=*cp1
++;
2248 while (*cp1
!='.'&&*cp1
!=';'&&*cp1
!='\0') *cp2
++=*cp1
++;
2249 if (*cp1
!=';'&&*cp1
!='\0') { *cp2
++=*cp1
++; *cp2
++=*cp1
++; }
2252 if (*cp1
>='0'&&*cp1
<='9') *cp2
++=*cp1
++;
2254 while (*cp1
!=';'&&*cp1
!='\0'&&*cp1
!='('&&*cp1
!=' ') *cp2
++=*cp1
++;
2257 /* Level 1 - try to get OS */
2258 cp1
=strstr(cp1
,")");
2263 while (*cp1
!=';'&&*cp1
!='('&&cp1
!=str
) cp1
--;
2264 if (cp1
!=str
&&*cp1
!='\0') cp1
++;
2265 while (*cp1
==' '&&*cp1
!='\0') cp1
++;
2266 while (*cp1
!=')'&&*cp1
!='\0') *cp2
++=*cp1
++;
2274 /* nothing after "compatible", should we mangle? */
2280 cp1
=strstr(str
,"Opera"); /* Opera flavor */
2283 while (*cp1
!='/'&&*cp1
!=' '&&*cp1
!='\0') *cp2
++=*cp1
++;
2284 while (*cp1
!='.'&&*cp1
!='\0')
2286 if(*cp1
=='/') *cp2
++=' ';
2292 while (*cp1
!='.'&&*cp1
!='\0') *cp2
++=*cp1
++;
2297 if (*cp1
>='0'&&*cp1
<='9') *cp2
++=*cp1
++;
2299 while (*cp1
!=' '&&*cp1
!='\0'&&*cp1
!='(') *cp2
++=*cp1
++;
2302 cp1
=strstr(cp1
,"(");
2308 while (*cp1
!=';'&&*cp1
!=')'&&*cp1
!='\0') *cp2
++=*cp1
++;
2316 cp1
=strstr(str
,"Mozilla"); /* Netscape flavor */
2319 while (*cp1
!='/'&&*cp1
!=' '&&*cp1
!='\0') *cp2
++=*cp1
++;
2320 if (*cp1
==' ') *cp1
='/';
2321 while (*cp1
!='.'&&*cp1
!='\0') *cp2
++=*cp1
++;
2324 while (*cp1
!='.'&&*cp1
!='\0') *cp2
++=*cp1
++;
2329 if (*cp1
>='0'&&*cp1
<='9') *cp2
++=*cp1
++;
2331 while (*cp1
!=' '&&*cp1
!='\0'&&*cp1
!='(') *cp2
++=*cp1
++;
2334 /* Level 1 - Try to get OS */
2335 cp1
=strstr(cp1
,"(");
2341 while (*cp1
!=';'&&*cp1
!=')'&&*cp1
!='\0') *cp2
++=*cp1
++;
2351 /*********************************************/
2352 /* OUR_GZGETS - enhanced gzgets for log only */
2353 /*********************************************/
2355 char *our_gzgets(void *fp
, char *buf
, int size
)
2357 char *out_cp
=buf
; /* point to output */
2360 if (f_cp
>(f_buf
+f_end
-1)) /* load? */
2363 f_end
=(gz_log
==COMP_BZIP
)?
2364 BZ2_bzread(fp
, f_buf
, GZ_BUFSIZE
):
2365 gzread(fp
, f_buf
, GZ_BUFSIZE
);
2367 f_end
=gzread(fp
, f_buf
, GZ_BUFSIZE
);
2369 if (f_end
<=0) return Z_NULL
;
2373 if (--size
) /* more? */
2376 if (*f_cp
++ == '\n') { *out_cp
='\0'; return buf
; }
2378 else { *out_cp
='\0'; return buf
; }
2383 /*********************************************/
2384 /* bz2_rewind - our 'rewind' for bz2 files */
2385 /*********************************************/
2387 int bz2_rewind( void **fp
, char *fname
, char *mode
)
2390 *fp
= BZ2_bzopen( fname
, "rb");
2391 f_cp
=f_buf
+GZ_BUFSIZE
; f_end
=0; /* reset buffer counters */
2392 memset(f_buf
, 0, sizeof(f_buf
));
2393 if (*fp
== Z_NULL
) return -1;
2396 #endif /* USE_BZIP */
2398 /*********************************************/
2399 /* ISIPADDR - Determine if str is IP address */
2400 /*********************************************/
2402 int isipaddr(char *str
)
2405 char *cp
; /* generic ptr */
2407 if (strchr(str
,':')!=NULL
)
2409 /* Possible IPv6 Address */
2411 while (strchr(":.abcdef0123456789",*cp
)!=NULL
&& *cp
!='\0')
2414 if (*cp
++==':') i
++;
2417 if (*cp
!='\0') return -1; /* bad hostname (has ':') */
2418 if (i
>1 && j
) return 2; /* IPv4/IPv6 */
2419 return 3; /* IPv6 */
2423 /* Not an IPv6 address, check for IPv4 */
2425 while (strchr(".0123456789",*cp
)!=NULL
&& *cp
!='\0')
2427 if (*cp
++=='.') i
++;
2429 if (*cp
!='\0') return 0; /* hostname */
2430 if (i
!=4) return -1; /* bad hostname */
2431 return 1; /* IPv4 */
2435 /*****************************************************************/
2437 /* JDATE - Julian date calculator */
2439 /* Calculates the number of days since Jan 1, 0000. */
2441 /* Originally written by Bradford L. Barrett (03/17/1988) */
2442 /* Returns an unsigned long value representing the number of */
2443 /* days since January 1, 0000. */
2445 /* Note: Due to the changes made by Pope Gregory XIII in the */
2446 /* 16th Centyry (Feb 24, 1582), dates before 1583 will */
2447 /* not return a truely accurate number (will be at least */
2448 /* 10 days off). Somehow, I don't think this will */
2449 /* present much of a problem for most situations :) */
2451 /* Usage: days = jdate(day, month, year) */
2453 /* The number returned is adjusted by 5 to facilitate day of */
2454 /* week calculations. The mod of the returned value gives the */
2455 /* day of the week the date is. (ie: dow = days % 7 ) where */
2456 /* dow will return 0=Sunday, 1=Monday, 2=Tuesday, etc... */
2458 /*****************************************************************/
2460 u_int64_t
jdate( int day
, int month
, int year
)
2462 u_int64_t days
; /* value returned */
2463 int mtable
[] = {0,31,59,90,120,151,181,212,243,273,304,334};
2465 /* First, calculate base number including leap and Centenial year stuff */
2467 days
=(((u_int64_t
)year
*365)+day
+mtable
[month
-1]+
2468 ((year
+4)/4) - ((year
/100)-(year
/400)));
2470 /* now adjust for leap year before March 1st */
2472 if ((year
% 4 == 0) && !((year
% 100 == 0) &&
2473 (year
% 400 != 0)) && (month
< 3))
2476 /* done, return with calculated value */