From f086616f7bc6b8f1fd2c23530a7a2337a67c110e Mon Sep 17 00:00:00 2001 From: Adam Chlipala Date: Sun, 9 Dec 2007 18:30:50 +0000 Subject: [PATCH] Ported webbw --- Makefile | 6 +- bin/.cvsignore | 1 + configDefault/apache.cfg | 12 ++ configDefault/apache.csg | 1 + configDefault/webalizer.cfg | 3 + configDefault/webalizer.csg | 3 + src/plugins/apache.sig | 3 + src/plugins/apache.sml | 7 + src/plugins/webalizer.sml | 2 +- src/stats/.cvsignore | 1 + src/stats/webbw-main.sml | 1 + src/stats/webbw.cm | 11 ++ src/stats/webbw.mlb | 10 ++ src/stats/webbw.sml | 282 ++++++++++++++++++++++++++++++++++++ 14 files changed, 341 insertions(+), 2 deletions(-) create mode 100644 src/stats/.cvsignore create mode 100644 src/stats/webbw-main.sml create mode 100644 src/stats/webbw.cm create mode 100644 src/stats/webbw.mlb create mode 100644 src/stats/webbw.sml diff --git a/Makefile b/Makefile index 45e6cc5..9396f38 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ config.sml: mlton: bin/domtool-server bin/domtool-client bin/domtool-slave \ bin/domtool-admin bin/domtool-doc bin/dbtool bin/vmail \ - bin/smtplog bin/setsa bin/mysql-fixperms + bin/smtplog bin/setsa bin/mysql-fixperms bin/webbw smlnj: $(COMMON_DEPS) openssl/smlnj/FFI/libssl.h.cm src/domtool.cm @@ -149,6 +149,9 @@ bin/smtplog: $(COMMON_MLTON_DEPS) src/smtplog.mlb bin/mysql-fixperms: $(COMMON_MLTON_DEPS) src/mysql-fixperms.mlb $(MLTON) -output bin/mysql-fixperms src/mysql-fixperms.mlb +bin/webbw: $(COMMON_MLTON_DEPS) src/stats/webbw.mlb + mlton -output bin/webbw src/stats/webbw.mlb + elisp/domtool-tables.el: lib/*.dtl bin/domtool-doc bin/domtool-doc -basis -emacs >$@ @@ -178,6 +181,7 @@ install: -cp bin/smtplog /usr/local/bin/ -cp bin/mysql-fixperms /usr/local/bin/ -cp bin/vmailpasswd /usr/local/bin/ + -cp bin/webbw /usr/local/sbin/ cp src/plugins/domtool-postgres /usr/local/sbin/ cp src/plugins/domtool-mysql /usr/local/sbin/ -mkdir -p $(EMACS_DIR) diff --git a/bin/.cvsignore b/bin/.cvsignore index 7686142..e350300 100644 --- a/bin/.cvsignore +++ b/bin/.cvsignore @@ -9,3 +9,4 @@ setsa smtplog mysql-fixperms vmailpasswd +webbw diff --git a/configDefault/apache.cfg b/configDefault/apache.cfg index f2b9918..db278ba 100644 --- a/configDefault/apache.cfg +++ b/configDefault/apache.cfg @@ -39,6 +39,18 @@ fun logDirOf version1 user = user, "/apache/log"] +fun realLogDirOf user = + if size user < 2 then + "/bad_username" + else + String.concat ["/afs/hcoop.net/user/", + String.substring (user, 0, 1), + "/", + String.substring (user, 0, 2), + "/", + user, + "/.logs/apache"] + fun backupLogDirOf version1 = if version1 then "/afs/hcoop.net/common/etc/domtool/backup/apache/" diff --git a/configDefault/apache.csg b/configDefault/apache.csg index 0e827e7..adc3000 100644 --- a/configDefault/apache.csg +++ b/configDefault/apache.csg @@ -21,6 +21,7 @@ signature APACHE_CONFIG = sig val public_html : string val logDirOf : bool -> string -> string + val realLogDirOf : string -> string val backupLogDirOf : bool -> string end diff --git a/configDefault/webalizer.cfg b/configDefault/webalizer.cfg index 279ca9d..addea9c 100644 --- a/configDefault/webalizer.cfg +++ b/configDefault/webalizer.cfg @@ -4,4 +4,7 @@ val configDir = "/afs/hcoop.net/common/etc/domtool/webalizer/config" val outputDir = "/afs/hcoop.net/common/etc/domtool/webalizer/output" val backupDir = "/afs/hcoop.net/common/etc/domtool/backup/webalizer" +val defaultOutput = "/afs/hcoop.net/common/etc/domtool/webalizer/output/main/" +val defaultHost = "www.hcoop.net" + end diff --git a/configDefault/webalizer.csg b/configDefault/webalizer.csg index 1ac2812..a59319e 100644 --- a/configDefault/webalizer.csg +++ b/configDefault/webalizer.csg @@ -4,4 +4,7 @@ signature WEBALIZER_CONFIG = sig val outputDir : string val backupDir : string + val defaultOutput : string + val defaultHost : string + end diff --git a/src/plugins/apache.sig b/src/plugins/apache.sig index d08433d..07e56e3 100644 --- a/src/plugins/apache.sig +++ b/src/plugins/apache.sig @@ -35,6 +35,9 @@ signature APACHE = sig val logDir : {user : string, node : string, vhostId : string} -> string (* Where is a vhost's log directory located? *) + val realLogDir : {user : string, node : string, vhostId : string} -> string + (* OK, where is it _really_ located? (Target of log syncing into AFS) *) + val defaults : (string * Ast.typ * (unit -> Ast.exp)) list (* Default environment variables *) diff --git a/src/plugins/apache.sml b/src/plugins/apache.sml index c4b684f..7783d0d 100644 --- a/src/plugins/apache.sml +++ b/src/plugins/apache.sml @@ -267,6 +267,13 @@ fun logDir {user, node, vhostId} = "/", vhostId] +fun realLogDir {user, node, vhostId} = + String.concat [Config.Apache.realLogDirOf user, + "/", + node, + "/", + vhostId] + val () = Slave.registerFileHandler (fn fs => let val spl = OS.Path.splitDirFile (#file fs) diff --git a/src/plugins/webalizer.sml b/src/plugins/webalizer.sml index c73ed98..50ca6a9 100644 --- a/src/plugins/webalizer.sml +++ b/src/plugins/webalizer.sml @@ -32,7 +32,7 @@ val () = Apache.registerPre name = id ^ ".wbl"} in TextIO.output (fd, "LogFile\t"); - TextIO.output (fd, Apache.logDir {user = user, node = node, vhostId = hostname}); + TextIO.output (fd, Apache.realLogDir {user = user, node = node, vhostId = hostname}); TextIO.output (fd, "/access.log\nOutputDir\t"); TextIO.output (fd, Config.Webalizer.outputDir); TextIO.output (fd, "/"); diff --git a/src/stats/.cvsignore b/src/stats/.cvsignore new file mode 100644 index 0000000..6dc8e1a --- /dev/null +++ b/src/stats/.cvsignore @@ -0,0 +1 @@ +.cm diff --git a/src/stats/webbw-main.sml b/src/stats/webbw-main.sml new file mode 100644 index 0000000..08409b7 --- /dev/null +++ b/src/stats/webbw-main.sml @@ -0,0 +1 @@ +val _ = Webbw.doit () diff --git a/src/stats/webbw.cm b/src/stats/webbw.cm new file mode 100644 index 0000000..139b9ed --- /dev/null +++ b/src/stats/webbw.cm @@ -0,0 +1,11 @@ +Group is + +$/basis.cm +$/smlnj-lib.cm + +../configTypes.sml +../../configDefault/config.sig +../../configDefault/configDefault.sml +../../config.sml + +webbw.sml diff --git a/src/stats/webbw.mlb b/src/stats/webbw.mlb new file mode 100644 index 0000000..cfc1dc8 --- /dev/null +++ b/src/stats/webbw.mlb @@ -0,0 +1,10 @@ +$(SML_LIB)/basis/basis.mlb +$(SML_LIB)/smlnj-lib/Util/smlnj-lib.mlb + +../configTypes.sml +../../configDefault/config.sig +../../configDefault/configDefault.sml +../../config.sml + +webbw.sml +webbw-main.sml diff --git a/src/stats/webbw.sml b/src/stats/webbw.sml new file mode 100644 index 0000000..f180683 --- /dev/null +++ b/src/stats/webbw.sml @@ -0,0 +1,282 @@ +(* +Domtool 2 (http://hcoop.sf.net/) +Copyright (C) 2004-2007 Adam Chlipala + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +*) + +(* Generation of aggregate per-user/per-vhost web bandwidth statistics *) + +structure Webbw = struct + +val groupsBase = Config.Webalizer.defaultOutput +(* Where to look for grouped user statistics *) + +fun mots m = + let + open Date + in + case m of + Jan => "Jan" + | Feb => "Feb" + | Mar => "Mar" + | Apr => "Apr" + | May => "May" + | Jun => "Jun" + | Jul => "Jul" + | Aug => "Aug" + | Sep => "Sep" + | Oct => "Oct" + | Nov => "Nov" + | Dec => "Dec" + end + +fun motn m = + let + open Date + in + case m of + Jan => "01" + | Feb => "02" + | Mar => "03" + | Apr => "04" + | May => "05" + | Jun => "06" + | Jul => "07" + | Aug => "08" + | Sep => "09" + | Oct => "10" + | Nov => "11" + | Dec => "12" + end + +val monthInc = Time.fromSeconds (LargeInt.fromInt 2592000) + +fun doit () = + let + val now = Date.fromTimeLocal (Time.now ()) + + fun backupMonth t = + let + val now = Date.fromTimeLocal t + + fun backupMonth' t = + let + val d = Date.fromTimeLocal t + in + if Date.month d = Date.month now then + backupMonth' (Time.- (t, monthInc)) + else + t + end + in + backupMonth' t + end + + fun backupMulti n = + if n = 0 then + Time.now () + else + backupMonth (backupMulti (n-1)) + + val now = + case CommandLine.arguments () of + [n] => + (case Int.fromString n of + NONE => raise Fail "Invalid integer parameter" + | SOME n => + if n >= 0 then + Date.fromTimeLocal (backupMulti n) + else + raise Fail "Negative parameter") + | _ => now + + val when = mots (Date.month now) ^ " " ^ Int.toString (Date.year now) + + val groups = let + val inf = TextIO.openIn (groupsBase ^ Int.toString (Date.year now) ^ motn (Date.month now) ^ ".html") + val _ = TextIO.inputLine inf + val _ = TextIO.inputLine inf + val _ = TextIO.inputLine inf + val _ = TextIO.inputLine inf + + fun loop groups = + case TextIO.inputLine inf of + NONE => groups + | SOME line => + case String.tokens Char.isSpace line of + [hits, perc, kb, kbperc, url] => + if size url >= 4 + andalso String.sub (url, 0) = #"/" + andalso String.sub (url, 1) = #"~" + andalso String.sub (url, size url - 2) = #"/" + andalso String.sub (url, size url - 1) = #"*" then + let + val uname = String.substring (url, 2, size url - 4) + in + loop (((uname, ["www.hcoop.net"]), + valOf (Int.fromString kb)) :: groups) + end + else + loop groups + | _ => groups + + val groups : ((string * string list) * int) list ref = ref (loop []) + val _ = TextIO.closeIn inf + in + groups + end handle ex => ref [] + + fun addGroup (group, n, d, d') = + let + val groups' = if List.exists (fn ((x, _), _) => x = group) (!groups) then + map (fn v as ((gr, ds), n') => if gr = group then ((gr, d ^ ":" ^ d' :: ds), n + n') else v) (!groups) + else + ((group, [d ^ ":" ^ d']), n) :: (!groups) + in + groups := groups' + end + + fun dodir {node, host} = + let + val file = Config.Webalizer.outputDir ^ "/" ^ node ^ "/" ^ host ^ "/index.html" + in + if not (Posix.FileSys.access (file, [])) then + NONE + else + let + val inf = TextIO.openIn file + + fun andWeep () = + let + fun waste n = + if n <= 0 then + () + else + (TextIO.inputLine inf; + waste (n-1)) + + val _ = waste 5 + + val l = valOf (TextIO.inputLine inf) + val num = String.extract (l, 32, NONE) + + fun getNum i = + if Char.isDigit (String.sub (num, i)) then + getNum (i+1) + else + valOf (Int.fromString (String.substring (num, 0, i))) + in + getNum 0 + end + + fun readEm () = + case TextIO.inputLine inf of + NONE => NONE + | SOME l => + if Substring.isSubstring when (Substring.full l) then + SOME (andWeep ()) + else + readEm () + + val ret = readEm () + + val group = + if host <> Config.Webalizer.defaultHost then + let + val tokens = String.tokens (fn ch => ch = #".") host + val (tokens, ssl) = + case rev tokens of + "ssl" :: tokens => (rev tokens, true) + | _ => (tokens, false) + val (host, tokens) = + case tokens of + host :: tokens => (host, tokens) + | _ => raise Fail "Host name too short" + + val file = Config.resultRoot ^ "/" ^ node ^ "/" ^ String.concatWith "/" (rev tokens) + ^ "/" ^ host ^ "." ^ String.concatWith "." tokens ^ ".vhost" + val file = if ssl then + file ^ "_ssl" + else + file + + val inf = TextIO.openIn file + val line = case TextIO.inputLine inf of + NONE => raise Fail ("Empty file: " ^ file) + | SOME line => line + + val user = case String.tokens Char.isSpace line of + [_, _, user] => user + | _ => raise Fail ("Bad vhost file format in " ^ file) + in + TextIO.closeIn inf; + SOME user + end + else + NONE + in + (case (ret, group) of + (SOME ret, SOME group) => addGroup (group, ret, node, host) + | _ => ()); + TextIO.closeIn inf; + ret + end handle IO.Io {name, function, ...} => NONE + end + + val dir = Posix.FileSys.opendir Config.Webalizer.outputDir + + fun loop L = + case Posix.FileSys.readdir dir of + NONE => L + | SOME d => + let + val dir = Posix.FileSys.opendir (OS.Path.joinDirFile {dir = Config.Webalizer.outputDir, + file = d}) + + fun loop' L = + case Posix.FileSys.readdir dir of + NONE => L + | SOME d' => + case dodir {node = d, host = d'} of + NONE => loop' L + | SOME n => loop' (((d, d'), n) :: L) + + val L = + if d = "main" then + L + else + loop' L + in + loop L + end + + fun sort ls = ListMergeSort.sort (fn ((_, n1), (_, n2)) => n1 > n2) ls + + val doms = loop [] + val doms = sort doms + val groups = sort (!groups) + + val sum = List.foldl (fn ((_, n), s) => s+n) 0 doms + in + print ("TOTAL: " ^ Int.toString sum ^ "\n\n"); + List.app (fn ((d, d'), n) => print (d ^ ":" ^ d' ^ ": " ^ Int.toString n ^ "\n")) doms; + print "\n"; + List.app (fn ((d, ds), n) => print (d ^ "[" ^ String.concatWith "," ds ^ "]: " ^ Int.toString n ^ "\n")) groups; + Posix.FileSys.closedir dir + end + +end + -- 2.20.1