message format spec fixes, commit # 13
[bpt/emacs.git] / lisp / mh-e / mh-junk.el
CommitLineData
924df208
BW
1;;; mh-junk.el --- Interface to anti-spam measures
2
e495eaec 3;; Copyright (C) 2003, 2004 Free Software Foundation, Inc.
924df208
BW
4
5;; Author: Satyaki Das <satyaki@theforce.stanford.edu>,
6;; Bill Wohler <wohler@newt.com>
7;; Maintainer: Bill Wohler <wohler@newt.com>
8;; Keywords: mail, spam
9
10;; This file is part of GNU Emacs.
11
12;; GNU Emacs is free software; you can redistribute it and/or modify
13;; it under the terms of the GNU General Public License as published by
14;; the Free Software Foundation; either version 2, or (at your option)
15;; any later version.
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
23;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
24;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25;; Boston, MA 02110-1301, USA.
924df208
BW
26
27;;; Commentary:
28
29;; Spam handling in MH-E.
30
31;;; Change Log:
32
33;;; Code:
34
f0d73c14
BW
35(eval-when-compile (require 'mh-acros))
36(mh-require-cl)
924df208
BW
37(require 'mh-e)
38
39;; Interactive functions callable from the folder buffer
40;;;###mh-autoload
a66894d8
BW
41(defun mh-junk-blacklist (range)
42 "Blacklist RANGE as spam.
43
f0d73c14
BW
44This command trains the spam program in use (see the `mh-junk-program' option)
45with the content of the range (see `mh-interactive-range') and then handles
46the message(s) as specified by the `mh-junk-disposition' option.
924df208 47
f0d73c14 48For more information about using your particular spam fighting program, see:
924df208 49
f0d73c14 50 - `mh-spamassassin-blacklist'
924df208 51 - `mh-bogofilter-blacklist'
f0d73c14 52 - `mh-spamprobe-blacklist'"
a66894d8 53 (interactive (list (mh-interactive-range "Blacklist")))
924df208
BW
54 (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist))))
55 (unless blacklist-func
56 (error "Customize `mh-junk-program' appropriately"))
f0d73c14
BW
57 (let ((dest (cond ((null mh-junk-disposition) nil)
58 ((equal mh-junk-disposition "") "+")
59 ((eq (aref mh-junk-disposition 0) ?+)
60 mh-junk-disposition)
61 ((eq (aref mh-junk-disposition 0) ?@)
924df208 62 (concat mh-current-folder "/"
f0d73c14
BW
63 (substring mh-junk-disposition 1)))
64 (t (concat "+" mh-junk-disposition)))))
a66894d8 65 (mh-iterate-on-range msg range
f0d73c14 66 (message (format "Blacklisting message %d..." msg))
924df208 67 (funcall (symbol-function blacklist-func) msg)
f0d73c14
BW
68 (message (format "Blacklisting message %d...done" msg))
69 (if (not (memq msg mh-seen-list))
70 (setq mh-seen-list (cons msg mh-seen-list)))
924df208
BW
71 (if dest
72 (mh-refile-a-msg nil (intern dest))
73 (mh-delete-a-msg nil)))
74 (mh-next-msg))))
75
76;;;###mh-autoload
a66894d8 77(defun mh-junk-whitelist (range)
f0d73c14 78 "Whitelist RANGE as ham.
924df208 79
f0d73c14
BW
80This command reclassifies a range of messages (see `mh-interactive-range') as
81ham if it were incorrectly classified as spam. It then refiles the message
82into the `+inbox' folder.
924df208 83
f0d73c14 84The `mh-junk-program' option specifies the spam program in use."
a66894d8 85 (interactive (list (mh-interactive-range "Whitelist")))
924df208
BW
86 (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist))))
87 (unless whitelist-func
88 (error "Customize `mh-junk-program' appropriately"))
a66894d8 89 (mh-iterate-on-range msg range
f0d73c14 90 (message (format "Whitelisting message %d..." msg))
924df208 91 (funcall (symbol-function whitelist-func) msg)
f0d73c14 92 (message (format "Whitelisting message %d...done" msg))
924df208
BW
93 (mh-refile-a-msg nil (intern mh-inbox)))
94 (mh-next-msg)))
95
96\f
97
f0d73c14 98;; Spamassassin Interface
924df208 99
f0d73c14
BW
100(defvar mh-spamassassin-executable (executable-find "spamassassin"))
101(defvar mh-sa-learn-executable (executable-find "sa-learn"))
924df208 102
f0d73c14
BW
103(defun mh-spamassassin-blacklist (msg)
104 "Blacklist MSG with SpamAssassin.
924df208 105
f0d73c14
BW
106SpamAssassin is one of the more popular spam filtering programs. Get it from
107your local distribution or from http://spamassassin.org/.
924df208 108
f0d73c14 109To use SpamAssassin, add the following recipes to `.procmailrc':
924df208 110
f0d73c14 111 MAILDIR=$HOME/`mhparam Path`
924df208 112
f0d73c14
BW
113 # Fight spam with SpamAssassin.
114 :0fw
115 | spamc
924df208 116
f0d73c14
BW
117 # Anything with a spam level of 10 or more is junked immediately.
118 :0:
119 * ^X-Spam-Level: ..........
120 /dev/null
924df208 121
f0d73c14
BW
122 :0:
123 * ^X-Spam-Status: Yes
124 spam/.
924df208 125
f0d73c14 126If you don't use `spamc', use `spamassassin -P -a'.
924df208 127
f0d73c14
BW
128Note that one of the recipes above throws away messages with a score greater
129than or equal to 10. Here's how you can determine a value that works best for
130you.
924df208 131
f0d73c14
BW
132First, run `spamassassin -t' on every mail message in your archive and use
133Gnumeric to verify that the average plus the standard deviation of good mail
134is under 5, the SpamAssassin default for \"spam\".
924df208 135
f0d73c14
BW
136Using Gnumeric, sort the messages by score and view the messages with the
137highest score. Determine the score which encompasses all of your interesting
138messages and add a couple of points to be conservative. Add that many dots to
139the `X-Spam-Level:' header field above to send messages with that score down
140the drain.
924df208 141
f0d73c14
BW
142In the example above, messages with a score of 5-9 are set aside in the
143`+spam' folder for later review. The major weakness of rules-based filters is
144a plethora of false positives so it is worthwhile to check.
924df208 145
f0d73c14
BW
146If SpamAssassin classifies a message incorrectly, or is unsure, you can use
147the MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist].
924df208 148
f0d73c14
BW
149The \\[mh-junk-blacklist] command adds a `blacklist_from' entry to
150`~/spamassassin/user_prefs', deletes the message, and sends the message to the
151Razor, so that others might not see this spam. If the `sa-learn' command is
152available, the message is also recategorized as spam.
924df208 153
f0d73c14
BW
154The \\[mh-junk-whitelist] command adds a `whitelist_from' rule to the
155`~/.spamassassin/user_prefs' file. If the `sa-learn' command is available, the
156message is also recategorized as ham.
924df208 157
f0d73c14
BW
158Over time, you'll observe that the same host or domain occurs repeatedly in
159the `blacklist_from' entries, so you might think that you could avoid future
160spam by blacklisting all mail from a particular domain. The utility function
161`mh-spamassassin-identify-spammers' helps you do precisely that. This function
162displays a frequency count of the hosts and domains in the `blacklist_from'
163entries from the last blank line in `~/.spamassassin/user_prefs' to the end of
164the file. This information can be used so that you can replace multiple
165`blacklist_from' entries with a single wildcard entry such as:
924df208 166
f0d73c14 167 blacklist_from *@*amazingoffersdirect2u.com
924df208 168
f0d73c14
BW
169In versions of SpamAssassin (2.50 and on) that support a Bayesian classifier,
170\\[mh-junk-blacklist] uses the `sa-learn' program to recategorize the message
171as spam. Neither MH-E, nor SpamAssassin, rebuilds the database after adding
172words, so you will need to run `sa-learn --rebuild' periodically. This can be
173done by adding the following to your crontab:
924df208 174
f0d73c14 175 0 * * * * sa-learn --rebuild > /dev/null 2>&1"
924df208 176 (unless mh-spamassassin-executable
f0d73c14 177 (error "Unable to find the spamassassin executable"))
924df208
BW
178 (let ((current-folder mh-current-folder)
179 (msg-file (mh-msg-filename msg mh-current-folder))
180 (sender))
181 (save-excursion
f0d73c14 182 (message (format "Reporting message %d..." msg))
924df208
BW
183 (mh-truncate-log-buffer)
184 (call-process mh-spamassassin-executable msg-file mh-log-buffer nil
f0d73c14
BW
185 ;;"--report" "--remove-from-whitelist"
186 "-r" "-R") ; spamassassin V2.20
924df208
BW
187 (when mh-sa-learn-executable
188 (message "Recategorizing this message as spam...")
189 (call-process mh-sa-learn-executable msg-file mh-log-buffer nil
a66894d8 190 "--single" "--spam" "--local" "--no-rebuild"))
f0d73c14 191 (message (format "Blacklisting message %d..." msg))
924df208
BW
192 (set-buffer (get-buffer-create mh-temp-buffer))
193 (erase-buffer)
e495eaec
BW
194 (call-process (expand-file-name mh-scan-prog mh-progs)
195 nil mh-junk-background nil
924df208
BW
196 (format "%s" msg) current-folder
197 "-format" "%<(mymbox{from})%|%(addr{from})%>")
198 (goto-char (point-min))
199 (if (search-forward-regexp "^\\(.+\\)$" nil t)
200 (progn
201 (setq sender (match-string 0))
202 (mh-spamassassin-add-rule "blacklist_from" sender)
f0d73c14
BW
203 (message (format "Blacklisting message %d...done" msg)))
204 (message (format "Blacklisting message %d...not done (from my address)" msg))))))
924df208
BW
205
206(defun mh-spamassassin-whitelist (msg)
f0d73c14
BW
207 "Whitelist MSG with SpamAssassin.
208
209The \\[mh-junk-whitelist] command adds a `whitelist_from' rule to the
210`~/.spamassassin/user_prefs' file. If the `sa-learn' command is available, the
211message is also recategorized as ham.
212
213See `mh-spamassassin-blacklist' for more information."
924df208 214 (unless mh-spamassassin-executable
f0d73c14 215 (error "Unable to find the spamassassin executable"))
924df208
BW
216 (let ((msg-file (mh-msg-filename msg mh-current-folder))
217 (show-buffer (get-buffer mh-show-buffer))
218 from)
219 (save-excursion
220 (set-buffer (get-buffer-create mh-temp-buffer))
221 (erase-buffer)
222 (message "Removing spamassassin markup from message...")
223 (call-process mh-spamassassin-executable msg-file mh-temp-buffer nil
f0d73c14
BW
224 ;; "--remove-markup"
225 "-d") ; spamassassin V2.20
924df208
BW
226 (if show-buffer
227 (kill-buffer show-buffer))
228 (write-file msg-file)
229 (when mh-sa-learn-executable
230 (message "Recategorizing this message as ham...")
231 (call-process mh-sa-learn-executable msg-file mh-temp-buffer nil
232 "--single" "--ham" "--local --no-rebuild"))
f0d73c14
BW
233 (message (format "Whitelisting message %d..." msg))
234 (setq from
235 (car (mh-funcall-if-exists
236 ietf-drums-parse-address (mh-get-header-field "From:"))))
924df208 237 (kill-buffer nil)
f0d73c14 238 (unless (or (null from) (equal from ""))
924df208 239 (mh-spamassassin-add-rule "whitelist_from" from))
f0d73c14 240 (message (format "Whitelisting message %d...done" msg)))))
924df208
BW
241
242(defun mh-spamassassin-add-rule (rule body)
f0d73c14 243 "Add a new rule to `~/.spamassassin/user_prefs'.
924df208
BW
244The name of the rule is RULE and its body is BODY."
245 (save-window-excursion
246 (let* ((line (format "%s\t%s\n" rule body))
247 (case-fold-search t)
248 (file (expand-file-name "~/.spamassassin/user_prefs"))
249 (buffer-exists (find-buffer-visiting file)))
250 (find-file file)
251 (if (not (search-forward (format "\n%s" line) nil t))
252 (progn
253 (goto-char (point-max))
254 (insert (if (bolp) "" "\n") line)
255 (save-buffer)))
256 (if (not buffer-exists)
257 (kill-buffer nil)))))
258
259(defun mh-spamassassin-identify-spammers ()
f0d73c14 260 "Identify spammers who are repeat offenders.
924df208 261
f0d73c14
BW
262This function displays a frequency count of the hosts and domains in the
263`blacklist_from' entries from the last blank line in
264`~/.spamassassin/user_prefs' to the end of the file. This information can be
265used so that you can replace multiple `blacklist_from' entries with a single
266wildcard entry such as:
924df208 267
f0d73c14 268 blacklist_from *@*amazingoffersdirect2u.com"
924df208
BW
269 (interactive)
270 (let* ((file (expand-file-name "~/.spamassassin/user_prefs"))
271 (domains (make-hash-table :test 'equal)))
272 (find-file file)
273 ;; Only consider entries between last blank line and end of file.
274 (goto-char (1- (point-max)))
275 (search-backward-regexp "^$")
276 ;; Perform frequency count.
277 (save-excursion
278 (while (search-forward-regexp "^blacklist_from\\s-*\\(.*\\)@\\(.*\\)$"
279 nil t)
280 (let ((host (match-string 2))
281 value)
282 ;; Remove top-level-domain from hostname.
283 (setq host (cdr (reverse (split-string host "\\."))))
284 ;; Add counts for each host and domain part.
285 (while host
286 (setq value (gethash (car host) domains))
f0d73c14 287 (setf (gethash (car host) domains) (1+ (if (not value) 0 value)))
924df208
BW
288 (setq host (cdr host))))))
289
290 ;; Output
291 (delete-other-windows)
292 (pop-to-buffer (get-buffer-create "*MH-E Spammer Frequencies*"))
293 (erase-buffer)
294 (maphash '(lambda (key value) ""
295 (if (> value 2)
296 (insert (format "%s %s\n" key value))))
297 domains)
298 (sort-numeric-fields 2 (point-min) (point-max))
299 (reverse-region (point-min) (point-max))
300 (goto-char (point-min))))
301
f0d73c14
BW
302\f
303
304;; Bogofilter Interface
305
306(defvar mh-bogofilter-executable (executable-find "bogofilter"))
307
308(defun mh-bogofilter-blacklist (msg)
309 "Blacklist MSG with Bogofilter.
310
311Bogofilter is a Bayesian spam filtering program. Get it from your local
312distribution or from http://bogofilter.sourceforge.net/.
313
314Bogofilter is taught by running:
315
316 bogofilter -n < good-message
317
318on every good message, and
319
320 bogofilter -s < spam-message
321
322on every spam message. This is called a full training; three other
323training methods are described in the FAQ that is distributed with bogofilter.
324Note that most Bayesian filters need 1000 to 5000 of each type of message to
325start doing a good job.
326
327To use Bogofilter, add the following recipes to `.procmailrc':
328
329 MAILDIR=$HOME/`mhparam Path`
330
331 # Fight spam with Bogofilter.
332 :0fw
333 | bogofilter -3 -e -p
334
335 :0:
336 * ^X-Bogosity: Yes, tests=bogofilter
337 spam/.
338
339 :0:
340 * ^X-Bogosity: Unsure, tests=bogofilter
341 spam/unsure/.
342
343If Bogofilter classifies a message incorrectly, or is unsure, you can use the
344MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist] to update
345Bogofilter's training.
346
347The \"Bogofilter FAQ\" suggests that you run the following
348occasionally to shrink the database:
349
350 bogoutil -d wordlist.db | bogoutil -l wordlist.db.new
351 mv wordlist.db wordlist.db.prv
352 mv wordlist.db.new wordlist.db
353
354The \"Bogofilter tuning HOWTO\" describes how you can fine-tune Bogofilter."
355 (unless mh-bogofilter-executable
356 (error "Unable to find the bogofilter executable"))
357 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
358 (call-process mh-bogofilter-executable msg-file mh-junk-background
359 nil "-s")))
360
361(defun mh-bogofilter-whitelist (msg)
362 "Whitelist MSG with Bogofilter.
363
364See `mh-bogofilter-blacklist' for more information."
365 (unless mh-bogofilter-executable
366 (error "Unable to find the bogofilter executable"))
367 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
368 (call-process mh-bogofilter-executable msg-file mh-junk-background
369 nil "-n")))
370
371\f
372
373;; Spamprobe Interface
374
375(defvar mh-spamprobe-executable (executable-find "spamprobe"))
376
377(defun mh-spamprobe-blacklist (msg)
378 "Blacklist MSG with SpamProbe.
379
380SpamProbe is a Bayesian spam filtering program. Get it from your local
381distribution or from http://spamprobe.sourceforge.net.
382
383To use SpamProbe, add the following recipes to `.procmailrc':
384
385 MAILDIR=$HOME/`mhparam Path`
386
387 # Fight spam with SpamProbe.
388 :0
389 SCORE=| spamprobe receive
390
391 :0 wf
392 | formail -I \"X-SpamProbe: $SCORE\"
393
394 :0:
395 *^X-SpamProbe: SPAM
396 spam/.
397
398If SpamProbe classifies a message incorrectly, you can use the MH-E commands
399\\[mh-junk-blacklist] and \\[mh-junk-whitelist] to update SpamProbe's
400training."
401 (unless mh-spamprobe-executable
402 (error "Unable to find the spamprobe executable"))
403 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
404 (call-process mh-spamprobe-executable msg-file mh-junk-background
405 nil "spam")))
406
407(defun mh-spamprobe-whitelist (msg)
408 "Whitelist MSG with SpamProbe.
409
410See `mh-spamprobe-blacklist' for more information."
411 (unless mh-spamprobe-executable
412 (error "Unable to find the spamprobe executable"))
413 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
414 (call-process mh-spamprobe-executable msg-file mh-junk-background
415 nil "good")))
416
924df208
BW
417(provide 'mh-junk)
418
419;;; Local Variables:
420;;; indent-tabs-mode: nil
421;;; sentence-end-double-space: nil
422;;; End:
423
ab5796a9 424;;; arch-tag: 603335f1-77ff-4306-8828-5d3dad51abe1
924df208 425;;; mh-junk.el ends here