Ported webbw
authorAdam Chlipala <adamc@hcoop.net>
Sun, 9 Dec 2007 18:30:50 +0000 (18:30 +0000)
committerAdam Chlipala <adamc@hcoop.net>
Sun, 9 Dec 2007 18:30:50 +0000 (18:30 +0000)
14 files changed:
Makefile
bin/.cvsignore
configDefault/apache.cfg
configDefault/apache.csg
configDefault/webalizer.cfg
configDefault/webalizer.csg
src/plugins/apache.sig
src/plugins/apache.sml
src/plugins/webalizer.sml
src/stats/.cvsignore [new file with mode: 0644]
src/stats/webbw-main.sml [new file with mode: 0644]
src/stats/webbw.cm [new file with mode: 0644]
src/stats/webbw.mlb [new file with mode: 0644]
src/stats/webbw.sml [new file with mode: 0644]

index 45e6cc5..9396f38 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -15,7 +15,7 @@ config.sml:
 
 mlton: bin/domtool-server bin/domtool-client bin/domtool-slave \
        bin/domtool-admin bin/domtool-doc bin/dbtool bin/vmail \
 
 mlton: bin/domtool-server bin/domtool-client bin/domtool-slave \
        bin/domtool-admin bin/domtool-doc bin/dbtool bin/vmail \
-       bin/smtplog bin/setsa bin/mysql-fixperms
+       bin/smtplog bin/setsa bin/mysql-fixperms bin/webbw
 
 smlnj: $(COMMON_DEPS) openssl/smlnj/FFI/libssl.h.cm src/domtool.cm
 
 
 smlnj: $(COMMON_DEPS) openssl/smlnj/FFI/libssl.h.cm src/domtool.cm
 
@@ -149,6 +149,9 @@ bin/smtplog: $(COMMON_MLTON_DEPS) src/smtplog.mlb
 bin/mysql-fixperms: $(COMMON_MLTON_DEPS) src/mysql-fixperms.mlb
        $(MLTON) -output bin/mysql-fixperms src/mysql-fixperms.mlb
 
 bin/mysql-fixperms: $(COMMON_MLTON_DEPS) src/mysql-fixperms.mlb
        $(MLTON) -output bin/mysql-fixperms src/mysql-fixperms.mlb
 
+bin/webbw: $(COMMON_MLTON_DEPS) src/stats/webbw.mlb
+       mlton -output bin/webbw src/stats/webbw.mlb
+
 elisp/domtool-tables.el: lib/*.dtl bin/domtool-doc
        bin/domtool-doc -basis -emacs >$@
 
 elisp/domtool-tables.el: lib/*.dtl bin/domtool-doc
        bin/domtool-doc -basis -emacs >$@
 
@@ -178,6 +181,7 @@ install:
        -cp bin/smtplog /usr/local/bin/
        -cp bin/mysql-fixperms /usr/local/bin/
        -cp bin/vmailpasswd /usr/local/bin/
        -cp bin/smtplog /usr/local/bin/
        -cp bin/mysql-fixperms /usr/local/bin/
        -cp bin/vmailpasswd /usr/local/bin/
+       -cp bin/webbw /usr/local/sbin/
        cp src/plugins/domtool-postgres /usr/local/sbin/
        cp src/plugins/domtool-mysql /usr/local/sbin/
        -mkdir -p $(EMACS_DIR)
        cp src/plugins/domtool-postgres /usr/local/sbin/
        cp src/plugins/domtool-mysql /usr/local/sbin/
        -mkdir -p $(EMACS_DIR)
index 7686142..e350300 100644 (file)
@@ -9,3 +9,4 @@ setsa
 smtplog
 mysql-fixperms
 vmailpasswd
 smtplog
 mysql-fixperms
 vmailpasswd
+webbw
index f2b9918..db278ba 100644 (file)
@@ -39,6 +39,18 @@ fun logDirOf version1 user =
                       user,
                       "/apache/log"]
 
                       user,
                       "/apache/log"]
 
+fun realLogDirOf user =
+    if size user < 2 then
+       "/bad_username"
+    else
+       String.concat ["/afs/hcoop.net/user/",
+                      String.substring (user, 0, 1),
+                      "/",
+                      String.substring (user, 0, 2),
+                      "/",
+                      user,
+                      "/.logs/apache"]
+
 fun backupLogDirOf version1 =
     if version1 then
        "/afs/hcoop.net/common/etc/domtool/backup/apache/"
 fun backupLogDirOf version1 =
     if version1 then
        "/afs/hcoop.net/common/etc/domtool/backup/apache/"
index 0e827e7..adc3000 100644 (file)
@@ -21,6 +21,7 @@ signature APACHE_CONFIG = sig
     val        public_html : string
 
     val logDirOf : bool -> string -> string
     val        public_html : string
 
     val logDirOf : bool -> string -> string
+    val realLogDirOf : string -> string
     val backupLogDirOf : bool -> string
 
 end
     val backupLogDirOf : bool -> string
 
 end
index 279ca9d..addea9c 100644 (file)
@@ -4,4 +4,7 @@ val configDir = "/afs/hcoop.net/common/etc/domtool/webalizer/config"
 val outputDir = "/afs/hcoop.net/common/etc/domtool/webalizer/output"
 val backupDir = "/afs/hcoop.net/common/etc/domtool/backup/webalizer"
 
 val outputDir = "/afs/hcoop.net/common/etc/domtool/webalizer/output"
 val backupDir = "/afs/hcoop.net/common/etc/domtool/backup/webalizer"
 
+val defaultOutput = "/afs/hcoop.net/common/etc/domtool/webalizer/output/main/"
+val defaultHost = "www.hcoop.net"
+
 end
 end
index 1ac2812..a59319e 100644 (file)
@@ -4,4 +4,7 @@ signature WEBALIZER_CONFIG = sig
     val outputDir : string
     val backupDir : string
 
     val outputDir : string
     val backupDir : string
 
+    val defaultOutput : string
+    val defaultHost : string
+
 end
 end
index d08433d..07e56e3 100644 (file)
@@ -35,6 +35,9 @@ signature APACHE = sig
     val logDir : {user : string, node : string, vhostId : string} -> string
     (* Where is a vhost's log directory located? *)
 
     val logDir : {user : string, node : string, vhostId : string} -> string
     (* Where is a vhost's log directory located? *)
 
+    val realLogDir : {user : string, node : string, vhostId : string} -> string
+    (* OK, where is it _really_ located?  (Target of log syncing into AFS) *)
+
     val defaults : (string * Ast.typ * (unit -> Ast.exp)) list
     (* Default environment variables *)
 
     val defaults : (string * Ast.typ * (unit -> Ast.exp)) list
     (* Default environment variables *)
 
index c4b684f..7783d0d 100644 (file)
@@ -267,6 +267,13 @@ fun logDir {user, node, vhostId} =
                   "/",
                   vhostId]
 
                   "/",
                   vhostId]
 
+fun realLogDir {user, node, vhostId} =
+    String.concat [Config.Apache.realLogDirOf user,
+                  "/",
+                  node,
+                  "/",
+                  vhostId]
+
 val () = Slave.registerFileHandler (fn fs =>
                                       let
                                           val spl = OS.Path.splitDirFile (#file fs)
 val () = Slave.registerFileHandler (fn fs =>
                                       let
                                           val spl = OS.Path.splitDirFile (#file fs)
index c73ed98..50ca6a9 100644 (file)
@@ -32,7 +32,7 @@ val () = Apache.registerPre
                                                                   name = id ^ ".wbl"}
                                   in
                                       TextIO.output (fd, "LogFile\t");
                                                                   name = id ^ ".wbl"}
                                   in
                                       TextIO.output (fd, "LogFile\t");
-                                      TextIO.output (fd, Apache.logDir {user = user, node = node, vhostId = hostname});
+                                      TextIO.output (fd, Apache.realLogDir {user = user, node = node, vhostId = hostname});
                                       TextIO.output (fd, "/access.log\nOutputDir\t");
                                       TextIO.output (fd, Config.Webalizer.outputDir);
                                       TextIO.output (fd, "/");
                                       TextIO.output (fd, "/access.log\nOutputDir\t");
                                       TextIO.output (fd, Config.Webalizer.outputDir);
                                       TextIO.output (fd, "/");
diff --git a/src/stats/.cvsignore b/src/stats/.cvsignore
new file mode 100644 (file)
index 0000000..6dc8e1a
--- /dev/null
@@ -0,0 +1 @@
+.cm
diff --git a/src/stats/webbw-main.sml b/src/stats/webbw-main.sml
new file mode 100644 (file)
index 0000000..08409b7
--- /dev/null
@@ -0,0 +1 @@
+val _ = Webbw.doit ()
diff --git a/src/stats/webbw.cm b/src/stats/webbw.cm
new file mode 100644 (file)
index 0000000..139b9ed
--- /dev/null
@@ -0,0 +1,11 @@
+Group is
+
+$/basis.cm
+$/smlnj-lib.cm
+
+../configTypes.sml
+../../configDefault/config.sig
+../../configDefault/configDefault.sml
+../../config.sml
+
+webbw.sml
diff --git a/src/stats/webbw.mlb b/src/stats/webbw.mlb
new file mode 100644 (file)
index 0000000..cfc1dc8
--- /dev/null
@@ -0,0 +1,10 @@
+$(SML_LIB)/basis/basis.mlb
+$(SML_LIB)/smlnj-lib/Util/smlnj-lib.mlb
+
+../configTypes.sml
+../../configDefault/config.sig
+../../configDefault/configDefault.sml
+../../config.sml
+
+webbw.sml
+webbw-main.sml
diff --git a/src/stats/webbw.sml b/src/stats/webbw.sml
new file mode 100644 (file)
index 0000000..f180683
--- /dev/null
@@ -0,0 +1,282 @@
+(*
+Domtool 2 (http://hcoop.sf.net/)
+Copyright (C) 2004-2007  Adam Chlipala
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+*)
+
+(* Generation of aggregate per-user/per-vhost web bandwidth statistics *)
+
+structure Webbw = struct
+
+val groupsBase = Config.Webalizer.defaultOutput
+(* Where to look for grouped user statistics *)
+
+fun mots m =
+    let
+       open Date
+    in
+       case m of 
+           Jan => "Jan"
+         | Feb => "Feb"
+         | Mar => "Mar"
+         | Apr => "Apr"
+         | May => "May"
+         | Jun => "Jun"
+         | Jul => "Jul"
+         | Aug => "Aug"
+         | Sep => "Sep"
+         | Oct => "Oct"
+         | Nov => "Nov"
+         | Dec => "Dec"
+    end
+
+fun motn m =
+    let
+       open Date
+    in
+       case m of 
+           Jan => "01"
+         | Feb => "02"
+         | Mar => "03"
+         | Apr => "04"
+         | May => "05"
+         | Jun => "06"
+         | Jul => "07"
+         | Aug => "08"
+         | Sep => "09"
+         | Oct => "10"
+         | Nov => "11"
+         | Dec => "12"
+    end
+
+val monthInc = Time.fromSeconds (LargeInt.fromInt 2592000)
+
+fun doit () =
+    let
+       val now = Date.fromTimeLocal (Time.now ())
+
+       fun backupMonth t =
+           let
+               val now = Date.fromTimeLocal t
+
+               fun backupMonth' t =
+                   let
+                       val d = Date.fromTimeLocal t
+                   in
+                       if Date.month d = Date.month now then
+                           backupMonth' (Time.- (t, monthInc))
+                       else
+                           t
+                   end
+           in
+               backupMonth' t
+           end
+
+       fun backupMulti n =
+           if n = 0 then
+               Time.now ()
+           else
+               backupMonth (backupMulti (n-1))
+
+       val now =
+           case CommandLine.arguments () of
+               [n] =>
+               (case Int.fromString n of
+                    NONE => raise Fail "Invalid integer parameter"
+                  | SOME n =>
+                    if n >= 0 then
+                        Date.fromTimeLocal (backupMulti n)
+                    else
+                        raise Fail "Negative parameter")
+             | _ => now
+
+       val when = mots (Date.month now) ^ " " ^ Int.toString (Date.year now)
+
+       val groups = let
+           val inf = TextIO.openIn (groupsBase ^ Int.toString (Date.year now) ^ motn (Date.month now) ^ ".html")
+           val _ = TextIO.inputLine inf
+           val _ = TextIO.inputLine inf
+           val _ = TextIO.inputLine inf
+           val _ = TextIO.inputLine inf
+
+           fun loop groups =
+               case TextIO.inputLine inf of
+                   NONE => groups
+                 | SOME line =>
+                   case String.tokens Char.isSpace line of
+                       [hits, perc, kb, kbperc, url] =>
+                       if size url >= 4
+                          andalso String.sub (url, 0) = #"/"
+                          andalso String.sub (url, 1) = #"~"
+                          andalso String.sub (url, size url - 2) = #"/"
+                          andalso String.sub (url, size url - 1) = #"*" then
+                           let
+                               val uname = String.substring (url, 2, size url - 4)
+                           in
+                               loop (((uname, ["www.hcoop.net"]),
+                                      valOf (Int.fromString kb)) :: groups)
+                           end
+                       else
+                           loop groups
+                     | _ => groups
+
+           val groups : ((string * string list) * int) list ref = ref (loop [])
+           val _ = TextIO.closeIn inf
+       in
+           groups
+       end handle ex => ref []
+
+       fun addGroup (group, n, d, d') =
+           let
+               val groups' = if List.exists (fn ((x, _), _) => x = group) (!groups) then
+                                 map (fn v as ((gr, ds), n') => if gr = group then ((gr, d ^ ":" ^ d' :: ds), n + n') else v) (!groups)
+                             else
+                                 ((group, [d ^ ":" ^ d']), n) :: (!groups)
+           in
+               groups := groups'
+           end
+
+       fun dodir {node, host} =
+           let
+               val file = Config.Webalizer.outputDir ^ "/" ^ node ^ "/" ^ host ^ "/index.html"
+           in
+               if not (Posix.FileSys.access (file, [])) then
+                   NONE
+               else
+                   let
+                       val inf = TextIO.openIn file
+
+                       fun andWeep () =
+                           let
+                               fun waste n =
+                                   if n <= 0 then
+                                       ()
+                                   else
+                                       (TextIO.inputLine inf;
+                                        waste (n-1))
+
+                               val _ = waste 5
+
+                               val l = valOf (TextIO.inputLine inf)
+                               val num = String.extract (l, 32, NONE)
+                                         
+                               fun getNum i =
+                                   if Char.isDigit (String.sub (num, i)) then
+                                       getNum (i+1)
+                                   else
+                                       valOf (Int.fromString (String.substring (num, 0, i)))
+                           in
+                               getNum 0
+                           end
+
+                       fun readEm () =
+                           case TextIO.inputLine inf of
+                               NONE => NONE
+                             | SOME l =>
+                               if Substring.isSubstring when (Substring.full l) then
+                                   SOME (andWeep ())
+                               else
+                                   readEm ()
+
+                       val ret = readEm ()
+
+                       val group =
+                           if host <> Config.Webalizer.defaultHost then
+                               let
+                                   val tokens = String.tokens (fn ch => ch = #".") host
+                                   val (tokens, ssl) =
+                                       case rev tokens of
+                                           "ssl" :: tokens => (rev tokens, true)
+                                         | _ => (tokens, false)
+                                   val (host, tokens) =
+                                       case tokens of
+                                           host :: tokens => (host, tokens)
+                                         | _ => raise Fail "Host name too short"
+
+                                   val file = Config.resultRoot ^ "/" ^ node ^ "/" ^ String.concatWith "/" (rev tokens)
+                                              ^ "/" ^ host ^ "." ^ String.concatWith "." tokens ^ ".vhost"
+                                   val file = if ssl then
+                                                  file ^ "_ssl"
+                                              else
+                                                  file
+                                                  
+                                   val inf = TextIO.openIn file
+                                   val line = case TextIO.inputLine inf of
+                                                  NONE => raise Fail ("Empty file: " ^ file)
+                                                | SOME line => line
+                                                               
+                                   val user = case String.tokens Char.isSpace line of
+                                                  [_, _, user] => user
+                                                | _ => raise Fail ("Bad vhost file format in " ^ file)
+                               in
+                                   TextIO.closeIn inf;
+                                   SOME user
+                               end
+                           else
+                               NONE
+                   in
+                       (case (ret, group) of
+                            (SOME ret, SOME group) => addGroup (group, ret, node, host)
+                          | _ => ());
+                       TextIO.closeIn inf;
+                       ret
+                   end handle IO.Io {name, function, ...} => NONE
+           end
+
+       val dir = Posix.FileSys.opendir Config.Webalizer.outputDir
+
+       fun loop L =
+           case Posix.FileSys.readdir dir of
+               NONE => L
+             | SOME d =>
+               let
+                   val dir = Posix.FileSys.opendir (OS.Path.joinDirFile {dir = Config.Webalizer.outputDir,
+                                                                         file = d})
+
+                   fun loop' L =
+                       case Posix.FileSys.readdir dir of
+                           NONE => L
+                         | SOME d' =>
+                           case dodir {node = d, host = d'} of
+                               NONE => loop' L
+                             | SOME n => loop' (((d, d'), n) :: L)
+               
+                   val L =
+                       if d = "main" then
+                           L
+                       else
+                           loop' L
+               in
+                   loop L
+               end
+
+       fun sort ls = ListMergeSort.sort (fn ((_, n1), (_, n2)) => n1 > n2) ls
+
+       val doms = loop []
+       val doms = sort doms
+       val groups = sort (!groups)
+
+       val sum = List.foldl (fn ((_, n), s) => s+n) 0 doms
+    in
+       print ("TOTAL: " ^ Int.toString sum ^ "\n\n");
+       List.app (fn ((d, d'), n) => print (d ^ ":" ^ d' ^ ": " ^ Int.toString n ^ "\n")) doms;
+       print "\n";
+       List.app (fn ((d, ds), n) => print (d ^ "[" ^ String.concatWith "," ds ^ "]: " ^ Int.toString n ^ "\n")) groups;
+       Posix.FileSys.closedir dir
+    end
+
+end
+