2 webalizer - a web server log analysis program
4 Copyright (C) 1997-2011 Bradford L. Barrett
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version, and provided that the above
10 copyright and permission notice is included with all distributed
11 copies of this or derived software.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
24 /*********************************************/
25 /* STANDARD INCLUDES */
26 /*********************************************/
32 #include <unistd.h> /* normal stuff */
34 #include <sys/utsname.h>
36 /* ensure sys/types */
38 #include <sys/types.h>
41 /* some need for uint* */
46 /* need socket header? */
47 #ifdef HAVE_SYS_SOCKET_H
48 #include <sys/socket.h>
51 /* some systems need this */
56 #include "webalizer.h" /* main header */
61 /* internal function prototypes */
63 HNODEPTR
new_hnode(char *); /* new host node */
64 UNODEPTR
new_unode(char *); /* new url node */
65 RNODEPTR
new_rnode(char *); /* new referrer node */
66 ANODEPTR
new_anode(char *); /* new user agent node */
67 SNODEPTR
new_snode(char *); /* new search string.. */
68 INODEPTR
new_inode(char *); /* new ident node */
70 DNODEPTR
new_dnode(char *); /* new DNS node */
73 void update_entry(char *); /* update entry/exit */
74 void update_exit(char *); /* page totals */
76 unsigned int hash(char *); /* hash function */
80 HNODEPTR sm_htab
[MAXHASH
]; /* hash tables */
81 HNODEPTR sd_htab
[MAXHASH
];
82 UNODEPTR um_htab
[MAXHASH
]; /* for hits, sites, */
83 RNODEPTR rm_htab
[MAXHASH
]; /* referrers and agents... */
84 ANODEPTR am_htab
[MAXHASH
];
85 SNODEPTR sr_htab
[MAXHASH
]; /* search string table */
86 INODEPTR im_htab
[MAXHASH
]; /* ident table (username) */
88 DNODEPTR host_table
[MAXHASH
]; /* DNS hash table */
91 /* Last node pointers */
92 HNODEPTR lm_hnode
=NULL
;
93 HNODEPTR ld_hnode
=NULL
;
94 RNODEPTR l_rnode
=NULL
;
96 /*********************************************/
97 /* DEL_HTABS - clear out our hash tables */
98 /*********************************************/
102 del_hlist(sd_htab
); /* Clear out our various */
103 del_ulist(um_htab
); /* hash tables here by */
104 del_hlist(sm_htab
); /* calling the appropriate */
105 del_rlist(rm_htab
); /* del_* fuction for each */
110 /* del_dlist(host_table); */ /* delete DNS hash table */
114 /*********************************************/
115 /* NEW_HNODE - create host node */
116 /*********************************************/
118 HNODEPTR
new_hnode(char *str
)
123 if (strlen(str
) >= MAXHOST
)
127 fprintf(stderr
,"[new_hnode] %s (%d)",msg_big_one
,strlen(str
));
129 fprintf(stderr
,":\n--> %s",str
);
130 fprintf(stderr
,"\n");
135 if ( (sptr
=malloc(strlen(str
)+1))==NULL
) return (HNODEPTR
)NULL
;
138 if (( newptr
= malloc(sizeof(struct hnode
))) != NULL
)
140 newptr
->string
=sptr
;
143 newptr
->lasturl
=blank_str
;
149 /*********************************************/
150 /* PUT_HNODE - insert/update host node */
151 /*********************************************/
153 int put_hnode( char *str
, /* Hostname */
154 int type
, /* obj type */
155 u_int64_t count
, /* hit count */
156 u_int64_t file
, /* File flag */
157 double xfer
, /* xfer size */
158 u_int64_t
*ctr
, /* counter */
159 u_int64_t visit
, /* visits */
160 u_int64_t tstamp
,/* timestamp */
161 char *lasturl
, /* lasturl */
162 HNODEPTR
*htab
) /* ptr>next */
167 /* check if hashed */
169 if ( (cptr
= htab
[hval
]) == NULL
)
172 if ( (nptr
=new_hnode(str
)) != NULL
)
174 if (htab
==sm_htab
) lm_hnode
=nptr
;
182 if (type
!=OBJ_GRP
) (*ctr
)++;
186 nptr
->visit
=(visit
-1);
187 nptr
->lasturl
=find_url(lasturl
);
193 if (ispage(log_rec
.url
))
195 if (htab
==sm_htab
) update_entry(log_rec
.url
);
196 nptr
->lasturl
=find_url(log_rec
.url
);
205 /* hashed (SPEEDUP) */
208 if (lm_hnode
!=NULL
&& strcmp(lm_hnode
->string
,str
)==0) cptr
=lm_hnode
;
212 if (ld_hnode
!=NULL
&& strcmp(ld_hnode
->string
,str
)==0) cptr
=ld_hnode
;
217 if (strcmp(cptr
->string
,str
)==0)
219 if ((type
==cptr
->flag
)||((type
!=OBJ_GRP
)&&(cptr
->flag
!=OBJ_GRP
)))
221 /* found... bump counter */
226 if (ispage(log_rec
.url
))
228 if ((tstamp
-cptr
->tstamp
)>=visit_timeout
)
233 update_exit(cptr
->lasturl
);
234 update_entry(log_rec
.url
);
237 cptr
->lasturl
=find_url(log_rec
.url
);
240 if (htab
==sm_htab
) lm_hnode
=cptr
;
248 if ( (nptr
= new_hnode(str
)) != NULL
)
250 if (htab
==sm_htab
) lm_hnode
=nptr
;
256 nptr
->next
= htab
[hval
];
258 if (type
!=OBJ_GRP
) (*ctr
)++;
262 nptr
->visit
= (visit
-1);
263 nptr
->lasturl
=find_url(lasturl
);
264 nptr
->tstamp
= tstamp
;
269 if (ispage(log_rec
.url
))
271 if (htab
==sm_htab
) update_entry(log_rec
.url
);
272 nptr
->lasturl
=find_url(log_rec
.url
);
273 nptr
->tstamp
= tstamp
;
282 /* set object type */
283 if (type
==OBJ_GRP
) nptr
->flag
=OBJ_GRP
; /* is it a grouping? */
286 /* check if it's a hidden object */
287 if ((hide_sites
)||(isinlist(hidden_sites
,nptr
->string
)!=NULL
))
289 if (htab
==sm_htab
) lm_hnode
=nptr
;
296 /*********************************************/
297 /* DEL_HLIST - delete host hash table */
298 /*********************************************/
300 void del_hlist(HNODEPTR
*htab
)
302 /* free memory used by hash table */
306 for (i
=0;i
<MAXHASH
;i
++)
314 free (aptr
->string
); /* free hostname string space */
315 free (aptr
); /* free hostname structure */
325 /*********************************************/
326 /* NEW_UNODE - URL node creation */
327 /*********************************************/
329 UNODEPTR
new_unode(char *str
)
334 if (strlen(str
) >= MAXURLH
)
338 fprintf(stderr
,"[new_unode] %s (%d)",msg_big_one
,strlen(str
));
340 fprintf(stderr
,":\n--> %s",str
);
341 fprintf(stderr
,"\n");
346 if ( (sptr
=malloc(strlen(str
)+1))==NULL
) return (UNODEPTR
)NULL
;
349 if (( newptr
= malloc(sizeof(struct unode
))) != NULL
)
353 newptr
->flag
= OBJ_REG
;
359 /*********************************************/
360 /* PUT_UNODE - insert/update URL node */
361 /*********************************************/
363 int put_unode(char *str
, int type
, u_int64_t count
, double xfer
,
364 u_int64_t
*ctr
, u_int64_t entry
, u_int64_t exit
, UNODEPTR
*htab
)
369 if (str
[0]=='-') return 0;
372 /* check if hashed */
373 if ( (cptr
= htab
[hval
]) == NULL
)
376 if ( (nptr
=new_unode(str
)) != NULL
)
385 if (type
!=OBJ_GRP
) (*ctr
)++;
393 if (strcmp(cptr
->string
,str
)==0)
395 if ((type
==cptr
->flag
)||((type
!=OBJ_GRP
)&&(cptr
->flag
!=OBJ_GRP
)))
397 /* found... bump counter */
406 if ( (nptr
= new_unode(str
)) != NULL
)
411 nptr
->next
= htab
[hval
];
415 if (type
!=OBJ_GRP
) (*ctr
)++;
420 if (type
==OBJ_GRP
) nptr
->flag
=OBJ_GRP
;
421 else if (isinlist(hidden_urls
,nptr
->string
)!=NULL
)
427 /*********************************************/
428 /* DEL_ULIST - delete URL hash table */
429 /*********************************************/
431 void del_ulist(UNODEPTR
*htab
)
433 /* free memory used by hash table */
437 for (i
=0;i
<MAXHASH
;i
++)
445 free (aptr
->string
); /* free up URL string memory */
446 free (aptr
); /* free up URL struct node */
454 /*********************************************/
455 /* NEW_RNODE - Referrer node creation */
456 /*********************************************/
458 RNODEPTR
new_rnode(char *str
)
463 if (strlen(str
) >= MAXREFH
)
467 fprintf(stderr
,"[new_rnode] %s (%d)",msg_big_one
,strlen(str
));
469 fprintf(stderr
,":\n--> %s",str
);
470 fprintf(stderr
,"\n");
475 if ( (sptr
=malloc(strlen(str
)+1))==NULL
) return (RNODEPTR
)NULL
;
478 if (( newptr
= malloc(sizeof(struct rnode
))) != NULL
)
480 newptr
->string
= sptr
;
482 newptr
->flag
= OBJ_REG
;
488 /*********************************************/
489 /* PUT_RNODE - insert/update referrer node */
490 /*********************************************/
492 int put_rnode(char *str
, int type
, u_int64_t count
,
493 u_int64_t
*ctr
, RNODEPTR
*htab
)
498 if (str
[0]=='-') strcpy(str
,"- (Direct Request)");
501 /* check if hashed */
502 if ( (cptr
= htab
[hval
]) == NULL
)
505 if ( (nptr
=new_rnode(str
)) != NULL
)
511 if (type
!=OBJ_GRP
) (*ctr
)++;
516 /* hashed (SPEEDUP) */
517 if (l_rnode
!=NULL
&& strcmp(l_rnode
->string
,str
)==0) cptr
=l_rnode
;
521 if (strcmp(cptr
->string
,str
)==0)
523 if ((type
==cptr
->flag
)||((type
!=OBJ_GRP
)&&(cptr
->flag
!=OBJ_GRP
)))
525 /* found... bump counter */
533 if ( (nptr
= new_rnode(str
)) != NULL
)
537 nptr
->next
= htab
[hval
];
539 if (type
!=OBJ_GRP
) (*ctr
)++;
544 if (type
==OBJ_GRP
) nptr
->flag
=OBJ_GRP
;
545 else if (isinlist(hidden_refs
,nptr
->string
)!=NULL
)
552 /*********************************************/
553 /* DEL_RLIST - delete referrer hash table */
554 /*********************************************/
556 void del_rlist(RNODEPTR
*htab
)
558 /* free memory used by hash table */
562 for (i
=0;i
<MAXHASH
;i
++)
580 /*********************************************/
581 /* NEW_ANODE - User Agent node creation */
582 /*********************************************/
584 ANODEPTR
new_anode(char *str
)
589 if (strlen(str
) >= MAXAGENT
)
593 fprintf(stderr
,"[new_anode] %s (%d)",msg_big_one
,strlen(str
));
595 fprintf(stderr
,":\n--> %s",str
);
596 fprintf(stderr
,"\n");
601 if ( (sptr
=malloc(strlen(str
)+1))==NULL
) return (ANODEPTR
)NULL
;
604 if (( newptr
= malloc(sizeof(struct anode
))) != NULL
)
606 newptr
->string
= sptr
;
608 newptr
->flag
= OBJ_REG
;
614 /*********************************************/
615 /* PUT_ANODE - insert/update user agent node */
616 /*********************************************/
618 int put_anode(char *str
, int type
, u_int64_t count
,
619 u_int64_t
*ctr
, ANODEPTR
*htab
)
624 if (str
[0]=='-') return 0; /* skip bad user agents */
627 /* check if hashed */
628 if ( (cptr
= htab
[hval
]) == NULL
)
631 if ( (nptr
=new_anode(str
)) != NULL
)
637 if (type
!=OBJ_GRP
) (*ctr
)++;
645 if (strcmp(cptr
->string
,str
)==0)
647 if ((type
==cptr
->flag
)||((type
!=OBJ_GRP
)&&(cptr
->flag
!=OBJ_GRP
)))
649 /* found... bump counter */
657 if ( (nptr
= new_anode(str
)) != NULL
)
661 nptr
->next
= htab
[hval
];
663 if (type
!=OBJ_GRP
) (*ctr
)++;
666 if (type
==OBJ_GRP
) nptr
->flag
=OBJ_GRP
;
667 else if (isinlist(hidden_agents
,nptr
->string
)!=NULL
)
672 /*********************************************/
673 /* DEL_ALIST - delete user agent hash table */
674 /*********************************************/
676 void del_alist(ANODEPTR
*htab
)
678 /* free memory used by hash table */
682 for (i
=0;i
<MAXHASH
;i
++)
699 /*********************************************/
700 /* NEW_SNODE - Search str node creation */
701 /*********************************************/
703 SNODEPTR
new_snode(char *str
)
708 if (strlen(str
) >= MAXSRCHH
)
712 fprintf(stderr
,"[new_snode] %s (%d)",msg_big_one
,strlen(str
));
714 fprintf(stderr
,":\n--> %s",str
);
715 fprintf(stderr
,"\n");
720 if ( (sptr
=malloc(strlen(str
)+1))==NULL
) return (SNODEPTR
)NULL
;
723 if (( newptr
= malloc(sizeof(struct snode
))) != NULL
)
725 newptr
->string
= sptr
;
732 /*********************************************/
733 /* PUT_SNODE - insert/update search str node */
734 /*********************************************/
736 int put_snode(char *str
, u_int64_t count
, SNODEPTR
*htab
)
741 if (str
[0]==0 || str
[0]==' ') return 0; /* skip bad search strs */
744 /* check if hashed */
745 if ( (cptr
= htab
[hval
]) == NULL
)
748 if ( (nptr
=new_snode(str
)) != NULL
)
760 if (strcmp(cptr
->string
,str
)==0)
762 /* found... bump counter */
769 if ( (nptr
= new_snode(str
)) != NULL
)
772 nptr
->next
= htab
[hval
];
779 /*********************************************/
780 /* DEL_SLIST - delete search str hash table */
781 /*********************************************/
783 void del_slist(SNODEPTR
*htab
)
785 /* free memory used by hash table */
789 for (i
=0;i
<MAXHASH
;i
++)
806 /*********************************************/
807 /* NEW_INODE - create ident (username) node */
808 /*********************************************/
810 INODEPTR
new_inode(char *str
)
815 if (strlen(str
) >= MAXIDENT
)
819 fprintf(stderr
,"[new_inode] %s (%d)",msg_big_one
,strlen(str
));
821 fprintf(stderr
,":\n--> %s",str
);
822 fprintf(stderr
,"\n");
827 if ( (sptr
=malloc(strlen(str
)+1))==NULL
) return (INODEPTR
)NULL
;
830 if (( newptr
= malloc(sizeof(struct inode
))) != NULL
)
832 newptr
->string
=sptr
;
840 /*********************************************/
841 /* PUT_INODE - insert/update ident node */
842 /*********************************************/
844 int put_inode( char *str
, /* ident str */
845 int type
, /* obj type */
846 u_int64_t count
, /* hit count */
847 u_int64_t file
, /* File flag */
848 double xfer
, /* xfer size */
849 u_int64_t
*ctr
, /* counter */
850 u_int64_t visit
, /* visits */
851 u_int64_t tstamp
,/* timestamp */
852 INODEPTR
*htab
) /* hashtable */
857 if ((str
[0]=='-') || (str
[0]==0)) return 0; /* skip if no username */
860 /* check if hashed */
861 if ( (cptr
= htab
[hval
]) == NULL
)
864 if ( (nptr
=new_inode(str
)) != NULL
)
872 if (type
!=OBJ_GRP
) (*ctr
)++;
876 nptr
->visit
=(visit
-1);
882 if (ispage(log_rec
.url
)) nptr
->tstamp
=tstamp
;
891 if (strcmp(cptr
->string
,str
)==0)
893 if ((type
==cptr
->flag
)||((type
!=OBJ_GRP
)&&(cptr
->flag
!=OBJ_GRP
)))
895 /* found... bump counter */
900 if (ispage(log_rec
.url
))
902 if ((tstamp
-cptr
->tstamp
)>=visit_timeout
)
912 if ( (nptr
= new_inode(str
)) != NULL
)
918 nptr
->next
= htab
[hval
];
920 if (type
!=OBJ_GRP
) (*ctr
)++;
924 nptr
->visit
= (visit
-1);
925 nptr
->tstamp
= tstamp
;
930 if (ispage(log_rec
.url
)) nptr
->tstamp
= tstamp
;
937 /* set object type */
938 if (type
==OBJ_GRP
) nptr
->flag
=OBJ_GRP
; /* is it a grouping? */
941 /* check if it's a hidden object */
942 if (isinlist(hidden_users
,nptr
->string
)!=NULL
)
949 /*********************************************/
950 /* DEL_ILIST - delete ident hash table */
951 /*********************************************/
953 void del_ilist(INODEPTR
*htab
)
955 /* free memory used by hash table */
959 for (i
=0;i
<MAXHASH
;i
++)
967 free (aptr
->string
); /* free ident string space */
968 free (aptr
); /* free ident structure */
976 #ifdef USE_DNS /* only add these for DNS */
978 /*********************************************/
979 /* NEW_DNODE - DNS resolver node creation */
980 /*********************************************/
982 DNODEPTR
new_dnode(char *str
)
987 if (strlen(str
) >= MAXHOST
)
991 fprintf(stderr
,"[new_dnode] %s (%d)",msg_big_one
,strlen(str
));
993 fprintf(stderr
,":\n--> %s",str
);
994 fprintf(stderr
,"\n");
999 if ( (sptr
=malloc(strlen(str
)+1))==NULL
) return (DNODEPTR
)NULL
;
1002 if (( newptr
= malloc(sizeof(struct dnode
))) != NULL
)
1004 newptr
->string
= sptr
;
1010 /*********************************************/
1011 /* PUT_DNODE - insert/update dns host node */
1012 /*********************************************/
1014 int put_dnode(char *str
, void *addr
, int len
, DNODEPTR
*htab
)
1019 if (str
[0]==0 || str
[0]==' ') return 0; /* skip bad hostnames */
1022 /* check if hashed */
1023 if ( (cptr
= htab
[hval
]) == NULL
)
1026 if ( (nptr
=new_dnode(str
)) != NULL
)
1028 if (addr
) memcpy(&nptr
->addr
, addr
, len
);
1029 else memset(&nptr
->addr
, 0, sizeof(struct sockaddr_storage
));
1030 nptr
->addrlen
= len
;
1038 while (cptr
!= NULL
)
1040 if (strcmp(cptr
->string
,str
)==0) return 0;
1044 if ( (nptr
= new_dnode(str
)) != NULL
)
1046 if (addr
) memcpy(&nptr
->addr
, addr
, len
);
1047 else memset(&nptr
->addr
, 0, sizeof(struct sockaddr_storage
));
1048 nptr
->addrlen
= len
;
1049 nptr
->next
= htab
[hval
];
1056 /*********************************************/
1057 /* DEL_DLIST - delete dns hash table */
1058 /*********************************************/
1060 void del_dlist(DNODEPTR
*htab
)
1062 /* free memory used by hash table */
1066 for (i
=0;i
<MAXHASH
;i
++)
1068 if (htab
[i
] != NULL
)
1071 while (dptr
!= NULL
)
1074 free (dptr
->string
);
1083 #endif /* USE_DNS */
1085 /*********************************************/
1086 /* FIND_URL - Find URL in hash table */
1087 /*********************************************/
1089 char *find_url(char *str
)
1093 if ( (cptr
=um_htab
[hash(str
)]) != NULL
)
1095 while (cptr
!= NULL
)
1097 if (strcmp(cptr
->string
,str
)==0)
1098 return cptr
->string
;
1102 return blank_str
; /* shouldn't get here */
1105 /*********************************************/
1106 /* UPDATE_ENTRY - update entry page total */
1107 /*********************************************/
1109 void update_entry(char *str
)
1113 if (str
==NULL
) return;
1114 if ( (uptr
= um_htab
[hash(str
)]) == NULL
) return;
1117 while (uptr
!= NULL
)
1119 if (strcmp(uptr
->string
,str
)==0)
1121 if (uptr
->flag
!=OBJ_GRP
)
1132 /*********************************************/
1133 /* UPDATE_EXIT - update exit page total */
1134 /*********************************************/
1136 void update_exit(char *str
)
1140 if (str
==NULL
) return;
1141 if ( (uptr
= um_htab
[hash(str
)]) == NULL
) return;
1144 while (uptr
!= NULL
)
1146 if (strcmp(uptr
->string
,str
)==0)
1148 if (uptr
->flag
!=OBJ_GRP
)
1159 /*********************************************/
1160 /* MONTH_UPDATE_EXIT - eom exit page update */
1161 /*********************************************/
1163 void month_update_exit(u_int64_t tstamp
)
1168 for (i
=0;i
<MAXHASH
;i
++)
1173 if (nptr
->flag
!=OBJ_GRP
)
1175 if ((tstamp
-nptr
->tstamp
)>=visit_timeout
)
1176 update_exit(nptr
->lasturl
);
1183 /*********************************************/
1184 /* TOT_VISIT - calculate total visits */
1185 /*********************************************/
1187 u_int64_t
tot_visit(HNODEPTR
*list
)
1193 for (i
=0;i
<MAXHASH
;i
++)
1198 if (hptr
->flag
!=OBJ_GRP
) tot
+=hptr
->visit
;
1206 /*********************************************/
1207 /* HASH - return hash value for string */
1208 /*********************************************/
1210 unsigned int hash(char *str
)
1214 for (hashval
= 0; *str
!= '\0'; str
++)
1215 hashval
= *str
+ (hashval
<< 5) - hashval
;
1217 return hashval
% MAXHASH
;
1220 #else /* USE_OLDHASH */
1221 /*********************************************/
1222 /* HASH (SuperFastHash by Paul Hsieh) */
1223 /*********************************************/
1226 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
1227 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
1228 #define get16bits(d) (*((const uint16_t *) (d)))
1231 #if !defined (get16bits)
1232 #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
1233 +(uint32_t)(((const uint8_t *)(d))[0]) )
1236 unsigned int hash(char *str
)
1238 int len
=strlen(str
);
1239 uint32_t hash
= len
, tmp
;
1242 if (len
<= 0 || str
== NULL
) return 0;
1248 for (;len
> 0; len
--)
1250 hash
+= get16bits (str
);
1251 tmp
= (get16bits (str
+2) << 11) ^ hash
;
1252 hash
= (hash
<< 16) ^ tmp
;
1253 str
+= 2*sizeof (uint16_t);
1257 /* Handle end cases */
1260 case 3: hash
+= get16bits (str
);
1262 hash
^= str
[sizeof (uint16_t)] << 18;
1265 case 2: hash
+= get16bits (str
);
1269 case 1: hash
+= *str
;
1274 /* Force "avalanching" of final 127 bits */
1282 return hash
% MAXHASH
;
1284 #endif /* USE_OLDHASH */