message format spec fixes, commit # 13
[bpt/emacs.git] / lisp / mh-e / mh-junk.el
1 ;;; mh-junk.el --- Interface to anti-spam measures
2
3 ;; Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4
5 ;; Author: Satyaki Das <satyaki@theforce.stanford.edu>,
6 ;; Bill Wohler <wohler@newt.com>
7 ;; Maintainer: Bill Wohler <wohler@newt.com>
8 ;; Keywords: mail, spam
9
10 ;; This file is part of GNU Emacs.
11
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; any later version.
16
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
21
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 ;; Boston, MA 02110-1301, USA.
26
27 ;;; Commentary:
28
29 ;; Spam handling in MH-E.
30
31 ;;; Change Log:
32
33 ;;; Code:
34
35 (eval-when-compile (require 'mh-acros))
36 (mh-require-cl)
37 (require 'mh-e)
38
39 ;; Interactive functions callable from the folder buffer
40 ;;;###mh-autoload
41 (defun mh-junk-blacklist (range)
42 "Blacklist RANGE as spam.
43
44 This command trains the spam program in use (see the `mh-junk-program' option)
45 with the content of the range (see `mh-interactive-range') and then handles
46 the message(s) as specified by the `mh-junk-disposition' option.
47
48 For more information about using your particular spam fighting program, see:
49
50 - `mh-spamassassin-blacklist'
51 - `mh-bogofilter-blacklist'
52 - `mh-spamprobe-blacklist'"
53 (interactive (list (mh-interactive-range "Blacklist")))
54 (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist))))
55 (unless blacklist-func
56 (error "Customize `mh-junk-program' appropriately"))
57 (let ((dest (cond ((null mh-junk-disposition) nil)
58 ((equal mh-junk-disposition "") "+")
59 ((eq (aref mh-junk-disposition 0) ?+)
60 mh-junk-disposition)
61 ((eq (aref mh-junk-disposition 0) ?@)
62 (concat mh-current-folder "/"
63 (substring mh-junk-disposition 1)))
64 (t (concat "+" mh-junk-disposition)))))
65 (mh-iterate-on-range msg range
66 (message (format "Blacklisting message %d..." msg))
67 (funcall (symbol-function blacklist-func) msg)
68 (message (format "Blacklisting message %d...done" msg))
69 (if (not (memq msg mh-seen-list))
70 (setq mh-seen-list (cons msg mh-seen-list)))
71 (if dest
72 (mh-refile-a-msg nil (intern dest))
73 (mh-delete-a-msg nil)))
74 (mh-next-msg))))
75
76 ;;;###mh-autoload
77 (defun mh-junk-whitelist (range)
78 "Whitelist RANGE as ham.
79
80 This command reclassifies a range of messages (see `mh-interactive-range') as
81 ham if it were incorrectly classified as spam. It then refiles the message
82 into the `+inbox' folder.
83
84 The `mh-junk-program' option specifies the spam program in use."
85 (interactive (list (mh-interactive-range "Whitelist")))
86 (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist))))
87 (unless whitelist-func
88 (error "Customize `mh-junk-program' appropriately"))
89 (mh-iterate-on-range msg range
90 (message (format "Whitelisting message %d..." msg))
91 (funcall (symbol-function whitelist-func) msg)
92 (message (format "Whitelisting message %d...done" msg))
93 (mh-refile-a-msg nil (intern mh-inbox)))
94 (mh-next-msg)))
95
96 \f
97
98 ;; Spamassassin Interface
99
100 (defvar mh-spamassassin-executable (executable-find "spamassassin"))
101 (defvar mh-sa-learn-executable (executable-find "sa-learn"))
102
103 (defun mh-spamassassin-blacklist (msg)
104 "Blacklist MSG with SpamAssassin.
105
106 SpamAssassin is one of the more popular spam filtering programs. Get it from
107 your local distribution or from http://spamassassin.org/.
108
109 To use SpamAssassin, add the following recipes to `.procmailrc':
110
111 MAILDIR=$HOME/`mhparam Path`
112
113 # Fight spam with SpamAssassin.
114 :0fw
115 | spamc
116
117 # Anything with a spam level of 10 or more is junked immediately.
118 :0:
119 * ^X-Spam-Level: ..........
120 /dev/null
121
122 :0:
123 * ^X-Spam-Status: Yes
124 spam/.
125
126 If you don't use `spamc', use `spamassassin -P -a'.
127
128 Note that one of the recipes above throws away messages with a score greater
129 than or equal to 10. Here's how you can determine a value that works best for
130 you.
131
132 First, run `spamassassin -t' on every mail message in your archive and use
133 Gnumeric to verify that the average plus the standard deviation of good mail
134 is under 5, the SpamAssassin default for \"spam\".
135
136 Using Gnumeric, sort the messages by score and view the messages with the
137 highest score. Determine the score which encompasses all of your interesting
138 messages and add a couple of points to be conservative. Add that many dots to
139 the `X-Spam-Level:' header field above to send messages with that score down
140 the drain.
141
142 In the example above, messages with a score of 5-9 are set aside in the
143 `+spam' folder for later review. The major weakness of rules-based filters is
144 a plethora of false positives so it is worthwhile to check.
145
146 If SpamAssassin classifies a message incorrectly, or is unsure, you can use
147 the MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist].
148
149 The \\[mh-junk-blacklist] command adds a `blacklist_from' entry to
150 `~/spamassassin/user_prefs', deletes the message, and sends the message to the
151 Razor, so that others might not see this spam. If the `sa-learn' command is
152 available, the message is also recategorized as spam.
153
154 The \\[mh-junk-whitelist] command adds a `whitelist_from' rule to the
155 `~/.spamassassin/user_prefs' file. If the `sa-learn' command is available, the
156 message is also recategorized as ham.
157
158 Over time, you'll observe that the same host or domain occurs repeatedly in
159 the `blacklist_from' entries, so you might think that you could avoid future
160 spam by blacklisting all mail from a particular domain. The utility function
161 `mh-spamassassin-identify-spammers' helps you do precisely that. This function
162 displays a frequency count of the hosts and domains in the `blacklist_from'
163 entries from the last blank line in `~/.spamassassin/user_prefs' to the end of
164 the file. This information can be used so that you can replace multiple
165 `blacklist_from' entries with a single wildcard entry such as:
166
167 blacklist_from *@*amazingoffersdirect2u.com
168
169 In versions of SpamAssassin (2.50 and on) that support a Bayesian classifier,
170 \\[mh-junk-blacklist] uses the `sa-learn' program to recategorize the message
171 as spam. Neither MH-E, nor SpamAssassin, rebuilds the database after adding
172 words, so you will need to run `sa-learn --rebuild' periodically. This can be
173 done by adding the following to your crontab:
174
175 0 * * * * sa-learn --rebuild > /dev/null 2>&1"
176 (unless mh-spamassassin-executable
177 (error "Unable to find the spamassassin executable"))
178 (let ((current-folder mh-current-folder)
179 (msg-file (mh-msg-filename msg mh-current-folder))
180 (sender))
181 (save-excursion
182 (message (format "Reporting message %d..." msg))
183 (mh-truncate-log-buffer)
184 (call-process mh-spamassassin-executable msg-file mh-log-buffer nil
185 ;;"--report" "--remove-from-whitelist"
186 "-r" "-R") ; spamassassin V2.20
187 (when mh-sa-learn-executable
188 (message "Recategorizing this message as spam...")
189 (call-process mh-sa-learn-executable msg-file mh-log-buffer nil
190 "--single" "--spam" "--local" "--no-rebuild"))
191 (message (format "Blacklisting message %d..." msg))
192 (set-buffer (get-buffer-create mh-temp-buffer))
193 (erase-buffer)
194 (call-process (expand-file-name mh-scan-prog mh-progs)
195 nil mh-junk-background nil
196 (format "%s" msg) current-folder
197 "-format" "%<(mymbox{from})%|%(addr{from})%>")
198 (goto-char (point-min))
199 (if (search-forward-regexp "^\\(.+\\)$" nil t)
200 (progn
201 (setq sender (match-string 0))
202 (mh-spamassassin-add-rule "blacklist_from" sender)
203 (message (format "Blacklisting message %d...done" msg)))
204 (message (format "Blacklisting message %d...not done (from my address)" msg))))))
205
206 (defun mh-spamassassin-whitelist (msg)
207 "Whitelist MSG with SpamAssassin.
208
209 The \\[mh-junk-whitelist] command adds a `whitelist_from' rule to the
210 `~/.spamassassin/user_prefs' file. If the `sa-learn' command is available, the
211 message is also recategorized as ham.
212
213 See `mh-spamassassin-blacklist' for more information."
214 (unless mh-spamassassin-executable
215 (error "Unable to find the spamassassin executable"))
216 (let ((msg-file (mh-msg-filename msg mh-current-folder))
217 (show-buffer (get-buffer mh-show-buffer))
218 from)
219 (save-excursion
220 (set-buffer (get-buffer-create mh-temp-buffer))
221 (erase-buffer)
222 (message "Removing spamassassin markup from message...")
223 (call-process mh-spamassassin-executable msg-file mh-temp-buffer nil
224 ;; "--remove-markup"
225 "-d") ; spamassassin V2.20
226 (if show-buffer
227 (kill-buffer show-buffer))
228 (write-file msg-file)
229 (when mh-sa-learn-executable
230 (message "Recategorizing this message as ham...")
231 (call-process mh-sa-learn-executable msg-file mh-temp-buffer nil
232 "--single" "--ham" "--local --no-rebuild"))
233 (message (format "Whitelisting message %d..." msg))
234 (setq from
235 (car (mh-funcall-if-exists
236 ietf-drums-parse-address (mh-get-header-field "From:"))))
237 (kill-buffer nil)
238 (unless (or (null from) (equal from ""))
239 (mh-spamassassin-add-rule "whitelist_from" from))
240 (message (format "Whitelisting message %d...done" msg)))))
241
242 (defun mh-spamassassin-add-rule (rule body)
243 "Add a new rule to `~/.spamassassin/user_prefs'.
244 The name of the rule is RULE and its body is BODY."
245 (save-window-excursion
246 (let* ((line (format "%s\t%s\n" rule body))
247 (case-fold-search t)
248 (file (expand-file-name "~/.spamassassin/user_prefs"))
249 (buffer-exists (find-buffer-visiting file)))
250 (find-file file)
251 (if (not (search-forward (format "\n%s" line) nil t))
252 (progn
253 (goto-char (point-max))
254 (insert (if (bolp) "" "\n") line)
255 (save-buffer)))
256 (if (not buffer-exists)
257 (kill-buffer nil)))))
258
259 (defun mh-spamassassin-identify-spammers ()
260 "Identify spammers who are repeat offenders.
261
262 This function displays a frequency count of the hosts and domains in the
263 `blacklist_from' entries from the last blank line in
264 `~/.spamassassin/user_prefs' to the end of the file. This information can be
265 used so that you can replace multiple `blacklist_from' entries with a single
266 wildcard entry such as:
267
268 blacklist_from *@*amazingoffersdirect2u.com"
269 (interactive)
270 (let* ((file (expand-file-name "~/.spamassassin/user_prefs"))
271 (domains (make-hash-table :test 'equal)))
272 (find-file file)
273 ;; Only consider entries between last blank line and end of file.
274 (goto-char (1- (point-max)))
275 (search-backward-regexp "^$")
276 ;; Perform frequency count.
277 (save-excursion
278 (while (search-forward-regexp "^blacklist_from\\s-*\\(.*\\)@\\(.*\\)$"
279 nil t)
280 (let ((host (match-string 2))
281 value)
282 ;; Remove top-level-domain from hostname.
283 (setq host (cdr (reverse (split-string host "\\."))))
284 ;; Add counts for each host and domain part.
285 (while host
286 (setq value (gethash (car host) domains))
287 (setf (gethash (car host) domains) (1+ (if (not value) 0 value)))
288 (setq host (cdr host))))))
289
290 ;; Output
291 (delete-other-windows)
292 (pop-to-buffer (get-buffer-create "*MH-E Spammer Frequencies*"))
293 (erase-buffer)
294 (maphash '(lambda (key value) ""
295 (if (> value 2)
296 (insert (format "%s %s\n" key value))))
297 domains)
298 (sort-numeric-fields 2 (point-min) (point-max))
299 (reverse-region (point-min) (point-max))
300 (goto-char (point-min))))
301
302 \f
303
304 ;; Bogofilter Interface
305
306 (defvar mh-bogofilter-executable (executable-find "bogofilter"))
307
308 (defun mh-bogofilter-blacklist (msg)
309 "Blacklist MSG with Bogofilter.
310
311 Bogofilter is a Bayesian spam filtering program. Get it from your local
312 distribution or from http://bogofilter.sourceforge.net/.
313
314 Bogofilter is taught by running:
315
316 bogofilter -n < good-message
317
318 on every good message, and
319
320 bogofilter -s < spam-message
321
322 on every spam message. This is called a full training; three other
323 training methods are described in the FAQ that is distributed with bogofilter.
324 Note that most Bayesian filters need 1000 to 5000 of each type of message to
325 start doing a good job.
326
327 To use Bogofilter, add the following recipes to `.procmailrc':
328
329 MAILDIR=$HOME/`mhparam Path`
330
331 # Fight spam with Bogofilter.
332 :0fw
333 | bogofilter -3 -e -p
334
335 :0:
336 * ^X-Bogosity: Yes, tests=bogofilter
337 spam/.
338
339 :0:
340 * ^X-Bogosity: Unsure, tests=bogofilter
341 spam/unsure/.
342
343 If Bogofilter classifies a message incorrectly, or is unsure, you can use the
344 MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist] to update
345 Bogofilter's training.
346
347 The \"Bogofilter FAQ\" suggests that you run the following
348 occasionally to shrink the database:
349
350 bogoutil -d wordlist.db | bogoutil -l wordlist.db.new
351 mv wordlist.db wordlist.db.prv
352 mv wordlist.db.new wordlist.db
353
354 The \"Bogofilter tuning HOWTO\" describes how you can fine-tune Bogofilter."
355 (unless mh-bogofilter-executable
356 (error "Unable to find the bogofilter executable"))
357 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
358 (call-process mh-bogofilter-executable msg-file mh-junk-background
359 nil "-s")))
360
361 (defun mh-bogofilter-whitelist (msg)
362 "Whitelist MSG with Bogofilter.
363
364 See `mh-bogofilter-blacklist' for more information."
365 (unless mh-bogofilter-executable
366 (error "Unable to find the bogofilter executable"))
367 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
368 (call-process mh-bogofilter-executable msg-file mh-junk-background
369 nil "-n")))
370
371 \f
372
373 ;; Spamprobe Interface
374
375 (defvar mh-spamprobe-executable (executable-find "spamprobe"))
376
377 (defun mh-spamprobe-blacklist (msg)
378 "Blacklist MSG with SpamProbe.
379
380 SpamProbe is a Bayesian spam filtering program. Get it from your local
381 distribution or from http://spamprobe.sourceforge.net.
382
383 To use SpamProbe, add the following recipes to `.procmailrc':
384
385 MAILDIR=$HOME/`mhparam Path`
386
387 # Fight spam with SpamProbe.
388 :0
389 SCORE=| spamprobe receive
390
391 :0 wf
392 | formail -I \"X-SpamProbe: $SCORE\"
393
394 :0:
395 *^X-SpamProbe: SPAM
396 spam/.
397
398 If SpamProbe classifies a message incorrectly, you can use the MH-E commands
399 \\[mh-junk-blacklist] and \\[mh-junk-whitelist] to update SpamProbe's
400 training."
401 (unless mh-spamprobe-executable
402 (error "Unable to find the spamprobe executable"))
403 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
404 (call-process mh-spamprobe-executable msg-file mh-junk-background
405 nil "spam")))
406
407 (defun mh-spamprobe-whitelist (msg)
408 "Whitelist MSG with SpamProbe.
409
410 See `mh-spamprobe-blacklist' for more information."
411 (unless mh-spamprobe-executable
412 (error "Unable to find the spamprobe executable"))
413 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
414 (call-process mh-spamprobe-executable msg-file mh-junk-background
415 nil "good")))
416
417 (provide 'mh-junk)
418
419 ;;; Local Variables:
420 ;;; indent-tabs-mode: nil
421 ;;; sentence-end-double-space: nil
422 ;;; End:
423
424 ;;; arch-tag: 603335f1-77ff-4306-8828-5d3dad51abe1
425 ;;; mh-junk.el ends here