Merge from emacs-23
[bpt/emacs.git] / lisp / mh-e / mh-junk.el
CommitLineData
dda00b2c 1;;; mh-junk.el --- MH-E interface to anti-spam measures
924df208 2
5df4f04c 3;; Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
eb2ffb18 4;; Free Software Foundation, Inc.
924df208
BW
5
6;; Author: Satyaki Das <satyaki@theforce.stanford.edu>,
7;; Bill Wohler <wohler@newt.com>
8;; Maintainer: Bill Wohler <wohler@newt.com>
9;; Keywords: mail, spam
10
11;; This file is part of GNU Emacs.
12
5e809f55 13;; GNU Emacs is free software: you can redistribute it and/or modify
924df208 14;; it under the terms of the GNU General Public License as published by
5e809f55
GM
15;; the Free Software Foundation, either version 3 of the License, or
16;; (at your option) any later version.
924df208
BW
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
5e809f55 24;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
924df208
BW
25
26;;; Commentary:
27
28;; Spam handling in MH-E.
29
30;;; Change Log:
31
32;;; Code:
33
34(require 'mh-e)
dda00b2c
BW
35(require 'mh-scan)
36(mh-require-cl)
924df208 37
924df208 38;;;###mh-autoload
a66894d8
BW
39(defun mh-junk-blacklist (range)
40 "Blacklist RANGE as spam.
41
2dcf34f9
BW
42This command trains the spam program in use (see the option
43`mh-junk-program') with the content of RANGE and then handles the
44message(s) as specified by the option `mh-junk-disposition'.
2be362c2 45
2dcf34f9
BW
46Check the documentation of `mh-interactive-range' to see how RANGE is
47read in interactive use.
924df208 48
2dcf34f9
BW
49For more information about using your particular spam fighting
50program, see:
924df208 51
f0d73c14 52 - `mh-spamassassin-blacklist'
924df208 53 - `mh-bogofilter-blacklist'
f0d73c14 54 - `mh-spamprobe-blacklist'"
a66894d8 55 (interactive (list (mh-interactive-range "Blacklist")))
924df208
BW
56 (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist))))
57 (unless blacklist-func
58 (error "Customize `mh-junk-program' appropriately"))
f0d73c14
BW
59 (let ((dest (cond ((null mh-junk-disposition) nil)
60 ((equal mh-junk-disposition "") "+")
61 ((eq (aref mh-junk-disposition 0) ?+)
62 mh-junk-disposition)
63 ((eq (aref mh-junk-disposition 0) ?@)
924df208 64 (concat mh-current-folder "/"
f0d73c14
BW
65 (substring mh-junk-disposition 1)))
66 (t (concat "+" mh-junk-disposition)))))
a66894d8 67 (mh-iterate-on-range msg range
47570699 68 (message "Blacklisting message %d..." msg)
924df208 69 (funcall (symbol-function blacklist-func) msg)
47570699 70 (message "Blacklisting message %d...done" msg)
f0d73c14
BW
71 (if (not (memq msg mh-seen-list))
72 (setq mh-seen-list (cons msg mh-seen-list)))
924df208
BW
73 (if dest
74 (mh-refile-a-msg nil (intern dest))
75 (mh-delete-a-msg nil)))
76 (mh-next-msg))))
77
78;;;###mh-autoload
a66894d8 79(defun mh-junk-whitelist (range)
f0d73c14 80 "Whitelist RANGE as ham.
924df208 81
2dcf34f9
BW
82This command reclassifies the RANGE as ham if it were incorrectly
83classified as spam (see the option `mh-junk-program'). It then
84refiles the message into the \"+inbox\" folder.
924df208 85
2dcf34f9
BW
86Check the documentation of `mh-interactive-range' to see how
87RANGE is read in interactive use."
a66894d8 88 (interactive (list (mh-interactive-range "Whitelist")))
924df208
BW
89 (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist))))
90 (unless whitelist-func
91 (error "Customize `mh-junk-program' appropriately"))
a66894d8 92 (mh-iterate-on-range msg range
47570699 93 (message "Whitelisting message %d..." msg)
924df208 94 (funcall (symbol-function whitelist-func) msg)
47570699 95 (message "Whitelisting message %d...done" msg)
924df208
BW
96 (mh-refile-a-msg nil (intern mh-inbox)))
97 (mh-next-msg)))
98
99\f
100
f0d73c14 101;; Spamassassin Interface
924df208 102
f0d73c14
BW
103(defvar mh-spamassassin-executable (executable-find "spamassassin"))
104(defvar mh-sa-learn-executable (executable-find "sa-learn"))
924df208 105
dda00b2c 106;;;###mh-autoload
f0d73c14
BW
107(defun mh-spamassassin-blacklist (msg)
108 "Blacklist MSG with SpamAssassin.
924df208 109
af435184 110SpamAssassin is one of the more popular spam filtering programs.
a9cc50d9
BW
111Get it from your local distribution or from the SpamAssassin web
112site at URL `http://spamassassin.org/'.
924df208 113
af435184
BW
114To use SpamAssassin, add the following recipes to
115\".procmailrc\":
924df208 116
25173d93 117 PATH=$PATH:/usr/bin/mh
f0d73c14 118 MAILDIR=$HOME/`mhparam Path`
924df208 119
f0d73c14
BW
120 # Fight spam with SpamAssassin.
121 :0fw
122 | spamc
924df208 123
f0d73c14
BW
124 # Anything with a spam level of 10 or more is junked immediately.
125 :0:
126 * ^X-Spam-Level: ..........
127 /dev/null
924df208 128
f0d73c14
BW
129 :0:
130 * ^X-Spam-Status: Yes
131 spam/.
924df208 132
5a4aad03 133If you don't use \"spamc\", use \"spamassassin -P -a\".
924df208 134
af435184
BW
135Note that one of the recipes above throws away messages with a
136score greater than or equal to 10. Here's how you can determine a
137value that works best for you.
924df208 138
af435184
BW
139First, run \"spamassassin -t\" on every mail message in your
140archive and use Gnumeric to verify that the average plus the
141standard deviation of good mail is under 5, the SpamAssassin
142default for \"spam\".
924df208 143
af435184
BW
144Using Gnumeric, sort the messages by score and view the messages
145with the highest score. Determine the score which encompasses all
146of your interesting messages and add a couple of points to be
147conservative. Add that many dots to the \"X-Spam-Level:\" header
148field above to send messages with that score down the drain.
924df208 149
af435184
BW
150In the example above, messages with a score of 5-9 are set aside
151in the \"+spam\" folder for later review. The major weakness of
152rules-based filters is a plethora of false positives so it is
153worthwhile to check.
924df208 154
af435184
BW
155If SpamAssassin classifies a message incorrectly, or is unsure,
156you can use the MH-E commands \\[mh-junk-blacklist] and
2dcf34f9 157\\[mh-junk-whitelist].
924df208 158
af435184
BW
159The command \\[mh-junk-blacklist] adds a \"blacklist_from\" entry
160to \"~/spamassassin/user_prefs\", deletes the message, and sends
161the message to the Razor, so that others might not see this spam.
162If the \"sa-learn\" command is available, the message is also
163recategorized as spam.
924df208 164
af435184
BW
165The command \\[mh-junk-whitelist] adds a \"whitelist_from\" rule
166to the \"~/.spamassassin/user_prefs\" file. If the \"sa-learn\"
167command is available, the message is also recategorized as ham.
2dcf34f9
BW
168
169Over time, you'll observe that the same host or domain occurs
af435184
BW
170repeatedly in the \"blacklist_from\" entries, so you might think
171that you could avoid future spam by blacklisting all mail from a
172particular domain. The utility function
173`mh-spamassassin-identify-spammers' helps you do precisely that.
174This function displays a frequency count of the hosts and domains
175in the \"blacklist_from\" entries from the last blank line in
176\"~/.spamassassin/user_prefs\" to the end of the file. This
2dcf34f9 177information can be used so that you can replace multiple
5a4aad03 178\"blacklist_from\" entries with a single wildcard entry such as:
924df208 179
f0d73c14 180 blacklist_from *@*amazingoffersdirect2u.com
924df208 181
2dcf34f9 182In versions of SpamAssassin (2.50 and on) that support a Bayesian
af435184
BW
183classifier, \\[mh-junk-blacklist] uses the program \"sa-learn\"
184to recategorize the message as spam. Neither MH-E, nor
185SpamAssassin, rebuilds the database after adding words, so you
186will need to run \"sa-learn --rebuild\" periodically. This can be
187done by adding the following to your crontab:
924df208 188
dda00b2c 189 0 * * * * sa-learn --rebuild > /dev/null 2>&1"
924df208 190 (unless mh-spamassassin-executable
f0d73c14 191 (error "Unable to find the spamassassin executable"))
924df208
BW
192 (let ((current-folder mh-current-folder)
193 (msg-file (mh-msg-filename msg mh-current-folder))
194 (sender))
a4de8c3d
SG
195 (message "Reporting message %d..." msg)
196 (mh-truncate-log-buffer)
197 ;; Put call-process output in log buffer if we are saving it
198 ;; (this happens if mh-junk-background is t).
199 (with-current-buffer mh-log-buffer
200 (call-process mh-spamassassin-executable msg-file mh-junk-background nil
f0d73c14
BW
201 ;;"--report" "--remove-from-whitelist"
202 "-r" "-R") ; spamassassin V2.20
a4de8c3d
SG
203 (when mh-sa-learn-executable
204 (message "Recategorizing message %d as spam..." msg)
205 (mh-truncate-log-buffer)
206 (call-process mh-sa-learn-executable msg-file mh-junk-background nil
207 "--single" "--spam" "--local" "--no-rebuild")))
208 (message "Blacklisting sender of message %d..." msg)
209 (with-current-buffer (get-buffer-create mh-temp-buffer)
924df208 210 (erase-buffer)
e495eaec 211 (call-process (expand-file-name mh-scan-prog mh-progs)
a4de8c3d
SG
212 nil t nil
213 (format "%d" msg) current-folder
924df208
BW
214 "-format" "%<(mymbox{from})%|%(addr{from})%>")
215 (goto-char (point-min))
216 (if (search-forward-regexp "^\\(.+\\)$" nil t)
217 (progn
218 (setq sender (match-string 0))
219 (mh-spamassassin-add-rule "blacklist_from" sender)
a4de8c3d
SG
220 (message "Blacklisting sender of message %d...done" msg))
221 (message "Blacklisting sender of message %d...not done (from my address)" msg)))))
924df208 222
dda00b2c 223;;;###mh-autoload
924df208 224(defun mh-spamassassin-whitelist (msg)
f0d73c14
BW
225 "Whitelist MSG with SpamAssassin.
226
5a4aad03
BW
227The \\[mh-junk-whitelist] command adds a \"whitelist_from\" rule to
228the \"~/.spamassassin/user_prefs\" file. If the \"sa-learn\" command
2dcf34f9 229is available, the message is also recategorized as ham.
f0d73c14
BW
230
231See `mh-spamassassin-blacklist' for more information."
924df208 232 (unless mh-spamassassin-executable
f0d73c14 233 (error "Unable to find the spamassassin executable"))
924df208
BW
234 (let ((msg-file (mh-msg-filename msg mh-current-folder))
235 (show-buffer (get-buffer mh-show-buffer))
236 from)
a4de8c3d 237 (with-current-buffer (get-buffer-create mh-temp-buffer)
924df208 238 (erase-buffer)
a4de8c3d
SG
239 (message "Removing spamassassin markup from message %d..." msg)
240 (call-process mh-spamassassin-executable msg-file t nil
f0d73c14
BW
241 ;; "--remove-markup"
242 "-d") ; spamassassin V2.20
924df208
BW
243 (if show-buffer
244 (kill-buffer show-buffer))
245 (write-file msg-file)
246 (when mh-sa-learn-executable
a4de8c3d
SG
247 (message "Recategorizing message %d as ham..." msg)
248 (mh-truncate-log-buffer)
249 ;; Put call-process output in log buffer if we are saving it
250 ;; (this happens if mh-junk-background is t).
251 (with-current-buffer mh-log-buffer
252 (call-process mh-sa-learn-executable msg-file mh-junk-background nil
253 "--single" "--ham" "--local" "--no-rebuild")))
254 (message "Whitelisting sender of message %d..." msg)
f0d73c14
BW
255 (setq from
256 (car (mh-funcall-if-exists
257 ietf-drums-parse-address (mh-get-header-field "From:"))))
924df208 258 (kill-buffer nil)
f0d73c14 259 (unless (or (null from) (equal from ""))
924df208 260 (mh-spamassassin-add-rule "whitelist_from" from))
a4de8c3d 261 (message "Whitelisting sender of message %d...done" msg))))
924df208
BW
262
263(defun mh-spamassassin-add-rule (rule body)
5a4aad03 264 "Add a new rule to \"~/.spamassassin/user_prefs\".
924df208
BW
265The name of the rule is RULE and its body is BODY."
266 (save-window-excursion
267 (let* ((line (format "%s\t%s\n" rule body))
268 (case-fold-search t)
269 (file (expand-file-name "~/.spamassassin/user_prefs"))
270 (buffer-exists (find-buffer-visiting file)))
271 (find-file file)
272 (if (not (search-forward (format "\n%s" line) nil t))
273 (progn
274 (goto-char (point-max))
275 (insert (if (bolp) "" "\n") line)
276 (save-buffer)))
277 (if (not buffer-exists)
278 (kill-buffer nil)))))
279
dda00b2c 280;;;###mh-autoload
924df208 281(defun mh-spamassassin-identify-spammers ()
f0d73c14 282 "Identify spammers who are repeat offenders.
924df208 283
2dcf34f9 284This function displays a frequency count of the hosts and domains
5a4aad03
BW
285in the \"blacklist_from\" entries from the last blank line in
286\"~/.spamassassin/user_prefs\" to the end of the file. This
2dcf34f9 287information can be used so that you can replace multiple
5a4aad03 288\"blacklist_from\" entries with a single wildcard entry such as:
924df208 289
f0d73c14 290 blacklist_from *@*amazingoffersdirect2u.com"
924df208
BW
291 (interactive)
292 (let* ((file (expand-file-name "~/.spamassassin/user_prefs"))
293 (domains (make-hash-table :test 'equal)))
294 (find-file file)
295 ;; Only consider entries between last blank line and end of file.
296 (goto-char (1- (point-max)))
297 (search-backward-regexp "^$")
298 ;; Perform frequency count.
299 (save-excursion
300 (while (search-forward-regexp "^blacklist_from\\s-*\\(.*\\)@\\(.*\\)$"
301 nil t)
302 (let ((host (match-string 2))
303 value)
304 ;; Remove top-level-domain from hostname.
305 (setq host (cdr (reverse (split-string host "\\."))))
306 ;; Add counts for each host and domain part.
307 (while host
308 (setq value (gethash (car host) domains))
f0d73c14 309 (setf (gethash (car host) domains) (1+ (if (not value) 0 value)))
924df208
BW
310 (setq host (cdr host))))))
311
312 ;; Output
313 (delete-other-windows)
314 (pop-to-buffer (get-buffer-create "*MH-E Spammer Frequencies*"))
315 (erase-buffer)
316 (maphash '(lambda (key value) ""
317 (if (> value 2)
318 (insert (format "%s %s\n" key value))))
319 domains)
320 (sort-numeric-fields 2 (point-min) (point-max))
321 (reverse-region (point-min) (point-max))
322 (goto-char (point-min))))
323
f0d73c14
BW
324\f
325
326;; Bogofilter Interface
327
328(defvar mh-bogofilter-executable (executable-find "bogofilter"))
329
dda00b2c 330;;;###mh-autoload
f0d73c14 331(defun mh-bogofilter-blacklist (msg)
f09e2a44 332 "Blacklist MSG with bogofilter.
f0d73c14 333
2dcf34f9 334Bogofilter is a Bayesian spam filtering program. Get it from your
a9cc50d9
BW
335local distribution or from the bogofilter web site at URL
336`http://bogofilter.sourceforge.net/'.
f0d73c14
BW
337
338Bogofilter is taught by running:
339
340 bogofilter -n < good-message
341
342on every good message, and
343
344 bogofilter -s < spam-message
345
346on every spam message. This is called a full training; three other
2dcf34f9
BW
347training methods are described in the FAQ that is distributed with
348bogofilter. Note that most Bayesian filters need 1000 to 5000 of each
349type of message to start doing a good job.
f0d73c14 350
5a4aad03 351To use bogofilter, add the following recipes to \".procmailrc\":
f0d73c14 352
b4f8b162 353 PATH=$PATH:/usr/bin/mh
f0d73c14
BW
354 MAILDIR=$HOME/`mhparam Path`
355
f09e2a44 356 # Fight spam with bogofilter.
f0d73c14
BW
357 :0fw
358 | bogofilter -3 -e -p
359
360 :0:
361 * ^X-Bogosity: Yes, tests=bogofilter
362 spam/.
363
364 :0:
365 * ^X-Bogosity: Unsure, tests=bogofilter
366 spam/unsure/.
367
2dcf34f9
BW
368If bogofilter classifies a message incorrectly, or is unsure, you can
369use the MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist]
370to update bogofilter's training.
f0d73c14
BW
371
372The \"Bogofilter FAQ\" suggests that you run the following
373occasionally to shrink the database:
374
375 bogoutil -d wordlist.db | bogoutil -l wordlist.db.new
376 mv wordlist.db wordlist.db.prv
377 mv wordlist.db.new wordlist.db
378
379The \"Bogofilter tuning HOWTO\" describes how you can fine-tune Bogofilter."
380 (unless mh-bogofilter-executable
381 (error "Unable to find the bogofilter executable"))
382 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
a4de8c3d
SG
383 (mh-truncate-log-buffer)
384 ;; Put call-process output in log buffer if we are saving it
385 ;; (this happens if mh-junk-background is t).
386 (with-current-buffer mh-log-buffer
387 (call-process mh-bogofilter-executable msg-file mh-junk-background
388 nil "-s"))))
f0d73c14 389
dda00b2c 390;;;###mh-autoload
f0d73c14 391(defun mh-bogofilter-whitelist (msg)
f09e2a44 392 "Whitelist MSG with bogofilter.
f0d73c14
BW
393
394See `mh-bogofilter-blacklist' for more information."
395 (unless mh-bogofilter-executable
396 (error "Unable to find the bogofilter executable"))
397 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
a4de8c3d
SG
398 (mh-truncate-log-buffer)
399 ;; Put call-process output in log buffer if we are saving it
400 ;; (this happens if mh-junk-background is t).
401 (with-current-buffer mh-log-buffer
402 (call-process mh-bogofilter-executable msg-file mh-junk-background
403 nil "-n"))))
f0d73c14
BW
404
405\f
406
407;; Spamprobe Interface
408
409(defvar mh-spamprobe-executable (executable-find "spamprobe"))
410
dda00b2c 411;;;###mh-autoload
f0d73c14
BW
412(defun mh-spamprobe-blacklist (msg)
413 "Blacklist MSG with SpamProbe.
414
a9cc50d9
BW
415SpamProbe is a Bayesian spam filtering program. Get it from your
416local distribution or from the SpamProbe web site at URL
417`http://spamprobe.sourceforge.net'.
f0d73c14 418
5a4aad03 419To use SpamProbe, add the following recipes to \".procmailrc\":
f0d73c14 420
b4f8b162 421 PATH=$PATH:/usr/bin/mh
f0d73c14
BW
422 MAILDIR=$HOME/`mhparam Path`
423
424 # Fight spam with SpamProbe.
425 :0
426 SCORE=| spamprobe receive
427
428 :0 wf
429 | formail -I \"X-SpamProbe: $SCORE\"
430
431 :0:
432 *^X-SpamProbe: SPAM
433 spam/.
434
2dcf34f9
BW
435If SpamProbe classifies a message incorrectly, you can use the
436MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist] to
437update SpamProbe's training."
f0d73c14
BW
438 (unless mh-spamprobe-executable
439 (error "Unable to find the spamprobe executable"))
440 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
a4de8c3d
SG
441 (mh-truncate-log-buffer)
442 ;; Put call-process output in log buffer if we are saving it
443 ;; (this happens if mh-junk-background is t).
444 (with-current-buffer mh-log-buffer
445 (call-process mh-spamprobe-executable msg-file mh-junk-background
446 nil "spam"))))
f0d73c14 447
dda00b2c 448;;;###mh-autoload
f0d73c14
BW
449(defun mh-spamprobe-whitelist (msg)
450 "Whitelist MSG with SpamProbe.
451
452See `mh-spamprobe-blacklist' for more information."
453 (unless mh-spamprobe-executable
454 (error "Unable to find the spamprobe executable"))
455 (let ((msg-file (mh-msg-filename msg mh-current-folder)))
a4de8c3d
SG
456 (mh-truncate-log-buffer)
457 ;; Put call-process output in log buffer if we are saving it
458 ;; (this happens if mh-junk-background is t).
459 (with-current-buffer mh-log-buffer
460 (call-process mh-spamprobe-executable msg-file mh-junk-background
461 nil "good"))))
f0d73c14 462
924df208
BW
463(provide 'mh-junk)
464
cee9f5c6
BW
465;; Local Variables:
466;; indent-tabs-mode: nil
467;; sentence-end-double-space: nil
468;; End:
924df208 469
cee9f5c6 470;; arch-tag: 603335f1-77ff-4306-8828-5d3dad51abe1
924df208 471;;; mh-junk.el ends here