Commit | Line | Data |
---|---|---|
dda00b2c | 1 | ;;; mh-junk.el --- MH-E interface to anti-spam measures |
924df208 | 2 | |
af435184 | 3 | ;; Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc. |
924df208 BW |
4 | |
5 | ;; Author: Satyaki Das <satyaki@theforce.stanford.edu>, | |
6 | ;; Bill Wohler <wohler@newt.com> | |
7 | ;; Maintainer: Bill Wohler <wohler@newt.com> | |
8 | ;; Keywords: mail, spam | |
9 | ||
10 | ;; This file is part of GNU Emacs. | |
11 | ||
12 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
13 | ;; it under the terms of the GNU General Public License as published by | |
14 | ;; the Free Software Foundation; either version 2, or (at your option) | |
15 | ;; any later version. | |
16 | ||
17 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;; GNU General Public License for more details. | |
21 | ||
22 | ;; You should have received a copy of the GNU General Public License | |
23 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
3a35cf56 LK |
24 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
25 | ;; Boston, MA 02110-1301, USA. | |
924df208 BW |
26 | |
27 | ;;; Commentary: | |
28 | ||
29 | ;; Spam handling in MH-E. | |
30 | ||
31 | ;;; Change Log: | |
32 | ||
33 | ;;; Code: | |
34 | ||
35 | (require 'mh-e) | |
dda00b2c BW |
36 | (require 'mh-scan) |
37 | (mh-require-cl) | |
924df208 | 38 | |
924df208 | 39 | ;;;###mh-autoload |
a66894d8 BW |
40 | (defun mh-junk-blacklist (range) |
41 | "Blacklist RANGE as spam. | |
42 | ||
2dcf34f9 BW |
43 | This command trains the spam program in use (see the option |
44 | `mh-junk-program') with the content of RANGE and then handles the | |
45 | message(s) as specified by the option `mh-junk-disposition'. | |
2be362c2 | 46 | |
2dcf34f9 BW |
47 | Check the documentation of `mh-interactive-range' to see how RANGE is |
48 | read in interactive use. | |
924df208 | 49 | |
2dcf34f9 BW |
50 | For more information about using your particular spam fighting |
51 | program, see: | |
924df208 | 52 | |
f0d73c14 | 53 | - `mh-spamassassin-blacklist' |
924df208 | 54 | - `mh-bogofilter-blacklist' |
f0d73c14 | 55 | - `mh-spamprobe-blacklist'" |
a66894d8 | 56 | (interactive (list (mh-interactive-range "Blacklist"))) |
924df208 BW |
57 | (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist)))) |
58 | (unless blacklist-func | |
59 | (error "Customize `mh-junk-program' appropriately")) | |
f0d73c14 BW |
60 | (let ((dest (cond ((null mh-junk-disposition) nil) |
61 | ((equal mh-junk-disposition "") "+") | |
62 | ((eq (aref mh-junk-disposition 0) ?+) | |
63 | mh-junk-disposition) | |
64 | ((eq (aref mh-junk-disposition 0) ?@) | |
924df208 | 65 | (concat mh-current-folder "/" |
f0d73c14 BW |
66 | (substring mh-junk-disposition 1))) |
67 | (t (concat "+" mh-junk-disposition))))) | |
a66894d8 | 68 | (mh-iterate-on-range msg range |
47570699 | 69 | (message "Blacklisting message %d..." msg) |
924df208 | 70 | (funcall (symbol-function blacklist-func) msg) |
47570699 | 71 | (message "Blacklisting message %d...done" msg) |
f0d73c14 BW |
72 | (if (not (memq msg mh-seen-list)) |
73 | (setq mh-seen-list (cons msg mh-seen-list))) | |
924df208 BW |
74 | (if dest |
75 | (mh-refile-a-msg nil (intern dest)) | |
76 | (mh-delete-a-msg nil))) | |
77 | (mh-next-msg)))) | |
78 | ||
79 | ;;;###mh-autoload | |
a66894d8 | 80 | (defun mh-junk-whitelist (range) |
f0d73c14 | 81 | "Whitelist RANGE as ham. |
924df208 | 82 | |
2dcf34f9 BW |
83 | This command reclassifies the RANGE as ham if it were incorrectly |
84 | classified as spam (see the option `mh-junk-program'). It then | |
85 | refiles the message into the \"+inbox\" folder. | |
924df208 | 86 | |
2dcf34f9 BW |
87 | Check the documentation of `mh-interactive-range' to see how |
88 | RANGE is read in interactive use." | |
a66894d8 | 89 | (interactive (list (mh-interactive-range "Whitelist"))) |
924df208 BW |
90 | (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist)))) |
91 | (unless whitelist-func | |
92 | (error "Customize `mh-junk-program' appropriately")) | |
a66894d8 | 93 | (mh-iterate-on-range msg range |
47570699 | 94 | (message "Whitelisting message %d..." msg) |
924df208 | 95 | (funcall (symbol-function whitelist-func) msg) |
47570699 | 96 | (message "Whitelisting message %d...done" msg) |
924df208 BW |
97 | (mh-refile-a-msg nil (intern mh-inbox))) |
98 | (mh-next-msg))) | |
99 | ||
100 | \f | |
101 | ||
f0d73c14 | 102 | ;; Spamassassin Interface |
924df208 | 103 | |
f0d73c14 BW |
104 | (defvar mh-spamassassin-executable (executable-find "spamassassin")) |
105 | (defvar mh-sa-learn-executable (executable-find "sa-learn")) | |
924df208 | 106 | |
dda00b2c | 107 | ;;;###mh-autoload |
f0d73c14 BW |
108 | (defun mh-spamassassin-blacklist (msg) |
109 | "Blacklist MSG with SpamAssassin. | |
924df208 | 110 | |
af435184 BW |
111 | SpamAssassin is one of the more popular spam filtering programs. |
112 | Get it from your local distribution or from | |
113 | http://spamassassin.org/. | |
924df208 | 114 | |
af435184 BW |
115 | To use SpamAssassin, add the following recipes to |
116 | \".procmailrc\": | |
924df208 | 117 | |
f0d73c14 | 118 | MAILDIR=$HOME/`mhparam Path` |
924df208 | 119 | |
f0d73c14 BW |
120 | # Fight spam with SpamAssassin. |
121 | :0fw | |
122 | | spamc | |
924df208 | 123 | |
f0d73c14 BW |
124 | # Anything with a spam level of 10 or more is junked immediately. |
125 | :0: | |
126 | * ^X-Spam-Level: .......... | |
127 | /dev/null | |
924df208 | 128 | |
f0d73c14 BW |
129 | :0: |
130 | * ^X-Spam-Status: Yes | |
131 | spam/. | |
924df208 | 132 | |
5a4aad03 | 133 | If you don't use \"spamc\", use \"spamassassin -P -a\". |
924df208 | 134 | |
af435184 BW |
135 | Note that one of the recipes above throws away messages with a |
136 | score greater than or equal to 10. Here's how you can determine a | |
137 | value that works best for you. | |
924df208 | 138 | |
af435184 BW |
139 | First, run \"spamassassin -t\" on every mail message in your |
140 | archive and use Gnumeric to verify that the average plus the | |
141 | standard deviation of good mail is under 5, the SpamAssassin | |
142 | default for \"spam\". | |
924df208 | 143 | |
af435184 BW |
144 | Using Gnumeric, sort the messages by score and view the messages |
145 | with the highest score. Determine the score which encompasses all | |
146 | of your interesting messages and add a couple of points to be | |
147 | conservative. Add that many dots to the \"X-Spam-Level:\" header | |
148 | field above to send messages with that score down the drain. | |
924df208 | 149 | |
af435184 BW |
150 | In the example above, messages with a score of 5-9 are set aside |
151 | in the \"+spam\" folder for later review. The major weakness of | |
152 | rules-based filters is a plethora of false positives so it is | |
153 | worthwhile to check. | |
924df208 | 154 | |
af435184 BW |
155 | If SpamAssassin classifies a message incorrectly, or is unsure, |
156 | you can use the MH-E commands \\[mh-junk-blacklist] and | |
2dcf34f9 | 157 | \\[mh-junk-whitelist]. |
924df208 | 158 | |
af435184 BW |
159 | The command \\[mh-junk-blacklist] adds a \"blacklist_from\" entry |
160 | to \"~/spamassassin/user_prefs\", deletes the message, and sends | |
161 | the message to the Razor, so that others might not see this spam. | |
162 | If the \"sa-learn\" command is available, the message is also | |
163 | recategorized as spam. | |
924df208 | 164 | |
af435184 BW |
165 | The command \\[mh-junk-whitelist] adds a \"whitelist_from\" rule |
166 | to the \"~/.spamassassin/user_prefs\" file. If the \"sa-learn\" | |
167 | command is available, the message is also recategorized as ham. | |
2dcf34f9 BW |
168 | |
169 | Over time, you'll observe that the same host or domain occurs | |
af435184 BW |
170 | repeatedly in the \"blacklist_from\" entries, so you might think |
171 | that you could avoid future spam by blacklisting all mail from a | |
172 | particular domain. The utility function | |
173 | `mh-spamassassin-identify-spammers' helps you do precisely that. | |
174 | This function displays a frequency count of the hosts and domains | |
175 | in the \"blacklist_from\" entries from the last blank line in | |
176 | \"~/.spamassassin/user_prefs\" to the end of the file. This | |
2dcf34f9 | 177 | information can be used so that you can replace multiple |
5a4aad03 | 178 | \"blacklist_from\" entries with a single wildcard entry such as: |
924df208 | 179 | |
f0d73c14 | 180 | blacklist_from *@*amazingoffersdirect2u.com |
924df208 | 181 | |
2dcf34f9 | 182 | In versions of SpamAssassin (2.50 and on) that support a Bayesian |
af435184 BW |
183 | classifier, \\[mh-junk-blacklist] uses the program \"sa-learn\" |
184 | to recategorize the message as spam. Neither MH-E, nor | |
185 | SpamAssassin, rebuilds the database after adding words, so you | |
186 | will need to run \"sa-learn --rebuild\" periodically. This can be | |
187 | done by adding the following to your crontab: | |
924df208 | 188 | |
dda00b2c | 189 | 0 * * * * sa-learn --rebuild > /dev/null 2>&1" |
924df208 | 190 | (unless mh-spamassassin-executable |
f0d73c14 | 191 | (error "Unable to find the spamassassin executable")) |
924df208 BW |
192 | (let ((current-folder mh-current-folder) |
193 | (msg-file (mh-msg-filename msg mh-current-folder)) | |
194 | (sender)) | |
195 | (save-excursion | |
47570699 | 196 | (message "Reporting message %d..." msg) |
924df208 BW |
197 | (mh-truncate-log-buffer) |
198 | (call-process mh-spamassassin-executable msg-file mh-log-buffer nil | |
f0d73c14 BW |
199 | ;;"--report" "--remove-from-whitelist" |
200 | "-r" "-R") ; spamassassin V2.20 | |
924df208 BW |
201 | (when mh-sa-learn-executable |
202 | (message "Recategorizing this message as spam...") | |
203 | (call-process mh-sa-learn-executable msg-file mh-log-buffer nil | |
a66894d8 | 204 | "--single" "--spam" "--local" "--no-rebuild")) |
47570699 | 205 | (message "Blacklisting message %d..." msg) |
924df208 BW |
206 | (set-buffer (get-buffer-create mh-temp-buffer)) |
207 | (erase-buffer) | |
e495eaec BW |
208 | (call-process (expand-file-name mh-scan-prog mh-progs) |
209 | nil mh-junk-background nil | |
924df208 BW |
210 | (format "%s" msg) current-folder |
211 | "-format" "%<(mymbox{from})%|%(addr{from})%>") | |
212 | (goto-char (point-min)) | |
213 | (if (search-forward-regexp "^\\(.+\\)$" nil t) | |
214 | (progn | |
215 | (setq sender (match-string 0)) | |
216 | (mh-spamassassin-add-rule "blacklist_from" sender) | |
47570699 DG |
217 | (message "Blacklisting message %d...done" msg)) |
218 | (message "Blacklisting message %d...not done (from my address)" msg))))) | |
924df208 | 219 | |
dda00b2c | 220 | ;;;###mh-autoload |
924df208 | 221 | (defun mh-spamassassin-whitelist (msg) |
f0d73c14 BW |
222 | "Whitelist MSG with SpamAssassin. |
223 | ||
5a4aad03 BW |
224 | The \\[mh-junk-whitelist] command adds a \"whitelist_from\" rule to |
225 | the \"~/.spamassassin/user_prefs\" file. If the \"sa-learn\" command | |
2dcf34f9 | 226 | is available, the message is also recategorized as ham. |
f0d73c14 BW |
227 | |
228 | See `mh-spamassassin-blacklist' for more information." | |
924df208 | 229 | (unless mh-spamassassin-executable |
f0d73c14 | 230 | (error "Unable to find the spamassassin executable")) |
924df208 BW |
231 | (let ((msg-file (mh-msg-filename msg mh-current-folder)) |
232 | (show-buffer (get-buffer mh-show-buffer)) | |
233 | from) | |
234 | (save-excursion | |
235 | (set-buffer (get-buffer-create mh-temp-buffer)) | |
236 | (erase-buffer) | |
237 | (message "Removing spamassassin markup from message...") | |
238 | (call-process mh-spamassassin-executable msg-file mh-temp-buffer nil | |
f0d73c14 BW |
239 | ;; "--remove-markup" |
240 | "-d") ; spamassassin V2.20 | |
924df208 BW |
241 | (if show-buffer |
242 | (kill-buffer show-buffer)) | |
243 | (write-file msg-file) | |
244 | (when mh-sa-learn-executable | |
245 | (message "Recategorizing this message as ham...") | |
246 | (call-process mh-sa-learn-executable msg-file mh-temp-buffer nil | |
247 | "--single" "--ham" "--local --no-rebuild")) | |
47570699 | 248 | (message "Whitelisting message %d..." msg) |
f0d73c14 BW |
249 | (setq from |
250 | (car (mh-funcall-if-exists | |
251 | ietf-drums-parse-address (mh-get-header-field "From:")))) | |
924df208 | 252 | (kill-buffer nil) |
f0d73c14 | 253 | (unless (or (null from) (equal from "")) |
924df208 | 254 | (mh-spamassassin-add-rule "whitelist_from" from)) |
47570699 | 255 | (message "Whitelisting message %d...done" msg)))) |
924df208 BW |
256 | |
257 | (defun mh-spamassassin-add-rule (rule body) | |
5a4aad03 | 258 | "Add a new rule to \"~/.spamassassin/user_prefs\". |
924df208 BW |
259 | The name of the rule is RULE and its body is BODY." |
260 | (save-window-excursion | |
261 | (let* ((line (format "%s\t%s\n" rule body)) | |
262 | (case-fold-search t) | |
263 | (file (expand-file-name "~/.spamassassin/user_prefs")) | |
264 | (buffer-exists (find-buffer-visiting file))) | |
265 | (find-file file) | |
266 | (if (not (search-forward (format "\n%s" line) nil t)) | |
267 | (progn | |
268 | (goto-char (point-max)) | |
269 | (insert (if (bolp) "" "\n") line) | |
270 | (save-buffer))) | |
271 | (if (not buffer-exists) | |
272 | (kill-buffer nil))))) | |
273 | ||
dda00b2c | 274 | ;;;###mh-autoload |
924df208 | 275 | (defun mh-spamassassin-identify-spammers () |
f0d73c14 | 276 | "Identify spammers who are repeat offenders. |
924df208 | 277 | |
2dcf34f9 | 278 | This function displays a frequency count of the hosts and domains |
5a4aad03 BW |
279 | in the \"blacklist_from\" entries from the last blank line in |
280 | \"~/.spamassassin/user_prefs\" to the end of the file. This | |
2dcf34f9 | 281 | information can be used so that you can replace multiple |
5a4aad03 | 282 | \"blacklist_from\" entries with a single wildcard entry such as: |
924df208 | 283 | |
f0d73c14 | 284 | blacklist_from *@*amazingoffersdirect2u.com" |
924df208 BW |
285 | (interactive) |
286 | (let* ((file (expand-file-name "~/.spamassassin/user_prefs")) | |
287 | (domains (make-hash-table :test 'equal))) | |
288 | (find-file file) | |
289 | ;; Only consider entries between last blank line and end of file. | |
290 | (goto-char (1- (point-max))) | |
291 | (search-backward-regexp "^$") | |
292 | ;; Perform frequency count. | |
293 | (save-excursion | |
294 | (while (search-forward-regexp "^blacklist_from\\s-*\\(.*\\)@\\(.*\\)$" | |
295 | nil t) | |
296 | (let ((host (match-string 2)) | |
297 | value) | |
298 | ;; Remove top-level-domain from hostname. | |
299 | (setq host (cdr (reverse (split-string host "\\.")))) | |
300 | ;; Add counts for each host and domain part. | |
301 | (while host | |
302 | (setq value (gethash (car host) domains)) | |
f0d73c14 | 303 | (setf (gethash (car host) domains) (1+ (if (not value) 0 value))) |
924df208 BW |
304 | (setq host (cdr host)))))) |
305 | ||
306 | ;; Output | |
307 | (delete-other-windows) | |
308 | (pop-to-buffer (get-buffer-create "*MH-E Spammer Frequencies*")) | |
309 | (erase-buffer) | |
310 | (maphash '(lambda (key value) "" | |
311 | (if (> value 2) | |
312 | (insert (format "%s %s\n" key value)))) | |
313 | domains) | |
314 | (sort-numeric-fields 2 (point-min) (point-max)) | |
315 | (reverse-region (point-min) (point-max)) | |
316 | (goto-char (point-min)))) | |
317 | ||
f0d73c14 BW |
318 | \f |
319 | ||
320 | ;; Bogofilter Interface | |
321 | ||
322 | (defvar mh-bogofilter-executable (executable-find "bogofilter")) | |
323 | ||
dda00b2c | 324 | ;;;###mh-autoload |
f0d73c14 | 325 | (defun mh-bogofilter-blacklist (msg) |
f09e2a44 | 326 | "Blacklist MSG with bogofilter. |
f0d73c14 | 327 | |
2dcf34f9 BW |
328 | Bogofilter is a Bayesian spam filtering program. Get it from your |
329 | local distribution or from http://bogofilter.sourceforge.net/. | |
f0d73c14 BW |
330 | |
331 | Bogofilter is taught by running: | |
332 | ||
333 | bogofilter -n < good-message | |
334 | ||
335 | on every good message, and | |
336 | ||
337 | bogofilter -s < spam-message | |
338 | ||
339 | on every spam message. This is called a full training; three other | |
2dcf34f9 BW |
340 | training methods are described in the FAQ that is distributed with |
341 | bogofilter. Note that most Bayesian filters need 1000 to 5000 of each | |
342 | type of message to start doing a good job. | |
f0d73c14 | 343 | |
5a4aad03 | 344 | To use bogofilter, add the following recipes to \".procmailrc\": |
f0d73c14 BW |
345 | |
346 | MAILDIR=$HOME/`mhparam Path` | |
347 | ||
f09e2a44 | 348 | # Fight spam with bogofilter. |
f0d73c14 BW |
349 | :0fw |
350 | | bogofilter -3 -e -p | |
351 | ||
352 | :0: | |
353 | * ^X-Bogosity: Yes, tests=bogofilter | |
354 | spam/. | |
355 | ||
356 | :0: | |
357 | * ^X-Bogosity: Unsure, tests=bogofilter | |
358 | spam/unsure/. | |
359 | ||
2dcf34f9 BW |
360 | If bogofilter classifies a message incorrectly, or is unsure, you can |
361 | use the MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist] | |
362 | to update bogofilter's training. | |
f0d73c14 BW |
363 | |
364 | The \"Bogofilter FAQ\" suggests that you run the following | |
365 | occasionally to shrink the database: | |
366 | ||
367 | bogoutil -d wordlist.db | bogoutil -l wordlist.db.new | |
368 | mv wordlist.db wordlist.db.prv | |
369 | mv wordlist.db.new wordlist.db | |
370 | ||
371 | The \"Bogofilter tuning HOWTO\" describes how you can fine-tune Bogofilter." | |
372 | (unless mh-bogofilter-executable | |
373 | (error "Unable to find the bogofilter executable")) | |
374 | (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
375 | (call-process mh-bogofilter-executable msg-file mh-junk-background | |
376 | nil "-s"))) | |
377 | ||
dda00b2c | 378 | ;;;###mh-autoload |
f0d73c14 | 379 | (defun mh-bogofilter-whitelist (msg) |
f09e2a44 | 380 | "Whitelist MSG with bogofilter. |
f0d73c14 BW |
381 | |
382 | See `mh-bogofilter-blacklist' for more information." | |
383 | (unless mh-bogofilter-executable | |
384 | (error "Unable to find the bogofilter executable")) | |
385 | (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
386 | (call-process mh-bogofilter-executable msg-file mh-junk-background | |
387 | nil "-n"))) | |
388 | ||
389 | \f | |
390 | ||
391 | ;; Spamprobe Interface | |
392 | ||
393 | (defvar mh-spamprobe-executable (executable-find "spamprobe")) | |
394 | ||
dda00b2c | 395 | ;;;###mh-autoload |
f0d73c14 BW |
396 | (defun mh-spamprobe-blacklist (msg) |
397 | "Blacklist MSG with SpamProbe. | |
398 | ||
399 | SpamProbe is a Bayesian spam filtering program. Get it from your local | |
400 | distribution or from http://spamprobe.sourceforge.net. | |
401 | ||
5a4aad03 | 402 | To use SpamProbe, add the following recipes to \".procmailrc\": |
f0d73c14 BW |
403 | |
404 | MAILDIR=$HOME/`mhparam Path` | |
405 | ||
406 | # Fight spam with SpamProbe. | |
407 | :0 | |
408 | SCORE=| spamprobe receive | |
409 | ||
410 | :0 wf | |
411 | | formail -I \"X-SpamProbe: $SCORE\" | |
412 | ||
413 | :0: | |
414 | *^X-SpamProbe: SPAM | |
415 | spam/. | |
416 | ||
2dcf34f9 BW |
417 | If SpamProbe classifies a message incorrectly, you can use the |
418 | MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist] to | |
419 | update SpamProbe's training." | |
f0d73c14 BW |
420 | (unless mh-spamprobe-executable |
421 | (error "Unable to find the spamprobe executable")) | |
422 | (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
423 | (call-process mh-spamprobe-executable msg-file mh-junk-background | |
424 | nil "spam"))) | |
425 | ||
dda00b2c | 426 | ;;;###mh-autoload |
f0d73c14 BW |
427 | (defun mh-spamprobe-whitelist (msg) |
428 | "Whitelist MSG with SpamProbe. | |
429 | ||
430 | See `mh-spamprobe-blacklist' for more information." | |
431 | (unless mh-spamprobe-executable | |
432 | (error "Unable to find the spamprobe executable")) | |
433 | (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
434 | (call-process mh-spamprobe-executable msg-file mh-junk-background | |
435 | nil "good"))) | |
436 | ||
924df208 BW |
437 | (provide 'mh-junk) |
438 | ||
cee9f5c6 BW |
439 | ;; Local Variables: |
440 | ;; indent-tabs-mode: nil | |
441 | ;; sentence-end-double-space: nil | |
442 | ;; End: | |
924df208 | 443 | |
cee9f5c6 | 444 | ;; arch-tag: 603335f1-77ff-4306-8828-5d3dad51abe1 |
924df208 | 445 | ;;; mh-junk.el ends here |