Commit | Line | Data |
---|---|---|
924df208 BW |
1 | ;;; mh-junk.el --- Interface to anti-spam measures |
2 | ||
e495eaec | 3 | ;; Copyright (C) 2003, 2004 Free Software Foundation, Inc. |
924df208 BW |
4 | |
5 | ;; Author: Satyaki Das <satyaki@theforce.stanford.edu>, | |
6 | ;; Bill Wohler <wohler@newt.com> | |
7 | ;; Maintainer: Bill Wohler <wohler@newt.com> | |
8 | ;; Keywords: mail, spam | |
9 | ||
10 | ;; This file is part of GNU Emacs. | |
11 | ||
12 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
13 | ;; it under the terms of the GNU General Public License as published by | |
14 | ;; the Free Software Foundation; either version 2, or (at your option) | |
15 | ;; any later version. | |
16 | ||
17 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;; GNU General Public License for more details. | |
21 | ||
22 | ;; You should have received a copy of the GNU General Public License | |
23 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
3a35cf56 LK |
24 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
25 | ;; Boston, MA 02110-1301, USA. | |
924df208 BW |
26 | |
27 | ;;; Commentary: | |
28 | ||
29 | ;; Spam handling in MH-E. | |
30 | ||
31 | ;;; Change Log: | |
32 | ||
33 | ;;; Code: | |
34 | ||
f0d73c14 BW |
35 | (eval-when-compile (require 'mh-acros)) |
36 | (mh-require-cl) | |
924df208 BW |
37 | (require 'mh-e) |
38 | ||
39 | ;; Interactive functions callable from the folder buffer | |
40 | ;;;###mh-autoload | |
a66894d8 BW |
41 | (defun mh-junk-blacklist (range) |
42 | "Blacklist RANGE as spam. | |
43 | ||
f0d73c14 BW |
44 | This command trains the spam program in use (see the `mh-junk-program' option) |
45 | with the content of the range (see `mh-interactive-range') and then handles | |
46 | the message(s) as specified by the `mh-junk-disposition' option. | |
924df208 | 47 | |
f0d73c14 | 48 | For more information about using your particular spam fighting program, see: |
924df208 | 49 | |
f0d73c14 | 50 | - `mh-spamassassin-blacklist' |
924df208 | 51 | - `mh-bogofilter-blacklist' |
f0d73c14 | 52 | - `mh-spamprobe-blacklist'" |
a66894d8 | 53 | (interactive (list (mh-interactive-range "Blacklist"))) |
924df208 BW |
54 | (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist)))) |
55 | (unless blacklist-func | |
56 | (error "Customize `mh-junk-program' appropriately")) | |
f0d73c14 BW |
57 | (let ((dest (cond ((null mh-junk-disposition) nil) |
58 | ((equal mh-junk-disposition "") "+") | |
59 | ((eq (aref mh-junk-disposition 0) ?+) | |
60 | mh-junk-disposition) | |
61 | ((eq (aref mh-junk-disposition 0) ?@) | |
924df208 | 62 | (concat mh-current-folder "/" |
f0d73c14 BW |
63 | (substring mh-junk-disposition 1))) |
64 | (t (concat "+" mh-junk-disposition))))) | |
a66894d8 | 65 | (mh-iterate-on-range msg range |
f0d73c14 | 66 | (message (format "Blacklisting message %d..." msg)) |
924df208 | 67 | (funcall (symbol-function blacklist-func) msg) |
f0d73c14 BW |
68 | (message (format "Blacklisting message %d...done" msg)) |
69 | (if (not (memq msg mh-seen-list)) | |
70 | (setq mh-seen-list (cons msg mh-seen-list))) | |
924df208 BW |
71 | (if dest |
72 | (mh-refile-a-msg nil (intern dest)) | |
73 | (mh-delete-a-msg nil))) | |
74 | (mh-next-msg)))) | |
75 | ||
76 | ;;;###mh-autoload | |
a66894d8 | 77 | (defun mh-junk-whitelist (range) |
f0d73c14 | 78 | "Whitelist RANGE as ham. |
924df208 | 79 | |
f0d73c14 BW |
80 | This command reclassifies a range of messages (see `mh-interactive-range') as |
81 | ham if it were incorrectly classified as spam. It then refiles the message | |
82 | into the `+inbox' folder. | |
924df208 | 83 | |
f0d73c14 | 84 | The `mh-junk-program' option specifies the spam program in use." |
a66894d8 | 85 | (interactive (list (mh-interactive-range "Whitelist"))) |
924df208 BW |
86 | (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist)))) |
87 | (unless whitelist-func | |
88 | (error "Customize `mh-junk-program' appropriately")) | |
a66894d8 | 89 | (mh-iterate-on-range msg range |
f0d73c14 | 90 | (message (format "Whitelisting message %d..." msg)) |
924df208 | 91 | (funcall (symbol-function whitelist-func) msg) |
f0d73c14 | 92 | (message (format "Whitelisting message %d...done" msg)) |
924df208 BW |
93 | (mh-refile-a-msg nil (intern mh-inbox))) |
94 | (mh-next-msg))) | |
95 | ||
96 | \f | |
97 | ||
f0d73c14 | 98 | ;; Spamassassin Interface |
924df208 | 99 | |
f0d73c14 BW |
100 | (defvar mh-spamassassin-executable (executable-find "spamassassin")) |
101 | (defvar mh-sa-learn-executable (executable-find "sa-learn")) | |
924df208 | 102 | |
f0d73c14 BW |
103 | (defun mh-spamassassin-blacklist (msg) |
104 | "Blacklist MSG with SpamAssassin. | |
924df208 | 105 | |
f0d73c14 BW |
106 | SpamAssassin is one of the more popular spam filtering programs. Get it from |
107 | your local distribution or from http://spamassassin.org/. | |
924df208 | 108 | |
f0d73c14 | 109 | To use SpamAssassin, add the following recipes to `.procmailrc': |
924df208 | 110 | |
f0d73c14 | 111 | MAILDIR=$HOME/`mhparam Path` |
924df208 | 112 | |
f0d73c14 BW |
113 | # Fight spam with SpamAssassin. |
114 | :0fw | |
115 | | spamc | |
924df208 | 116 | |
f0d73c14 BW |
117 | # Anything with a spam level of 10 or more is junked immediately. |
118 | :0: | |
119 | * ^X-Spam-Level: .......... | |
120 | /dev/null | |
924df208 | 121 | |
f0d73c14 BW |
122 | :0: |
123 | * ^X-Spam-Status: Yes | |
124 | spam/. | |
924df208 | 125 | |
f0d73c14 | 126 | If you don't use `spamc', use `spamassassin -P -a'. |
924df208 | 127 | |
f0d73c14 BW |
128 | Note that one of the recipes above throws away messages with a score greater |
129 | than or equal to 10. Here's how you can determine a value that works best for | |
130 | you. | |
924df208 | 131 | |
f0d73c14 BW |
132 | First, run `spamassassin -t' on every mail message in your archive and use |
133 | Gnumeric to verify that the average plus the standard deviation of good mail | |
134 | is under 5, the SpamAssassin default for \"spam\". | |
924df208 | 135 | |
f0d73c14 BW |
136 | Using Gnumeric, sort the messages by score and view the messages with the |
137 | highest score. Determine the score which encompasses all of your interesting | |
138 | messages and add a couple of points to be conservative. Add that many dots to | |
139 | the `X-Spam-Level:' header field above to send messages with that score down | |
140 | the drain. | |
924df208 | 141 | |
f0d73c14 BW |
142 | In the example above, messages with a score of 5-9 are set aside in the |
143 | `+spam' folder for later review. The major weakness of rules-based filters is | |
144 | a plethora of false positives so it is worthwhile to check. | |
924df208 | 145 | |
f0d73c14 BW |
146 | If SpamAssassin classifies a message incorrectly, or is unsure, you can use |
147 | the MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist]. | |
924df208 | 148 | |
f0d73c14 BW |
149 | The \\[mh-junk-blacklist] command adds a `blacklist_from' entry to |
150 | `~/spamassassin/user_prefs', deletes the message, and sends the message to the | |
151 | Razor, so that others might not see this spam. If the `sa-learn' command is | |
152 | available, the message is also recategorized as spam. | |
924df208 | 153 | |
f0d73c14 BW |
154 | The \\[mh-junk-whitelist] command adds a `whitelist_from' rule to the |
155 | `~/.spamassassin/user_prefs' file. If the `sa-learn' command is available, the | |
156 | message is also recategorized as ham. | |
924df208 | 157 | |
f0d73c14 BW |
158 | Over time, you'll observe that the same host or domain occurs repeatedly in |
159 | the `blacklist_from' entries, so you might think that you could avoid future | |
160 | spam by blacklisting all mail from a particular domain. The utility function | |
161 | `mh-spamassassin-identify-spammers' helps you do precisely that. This function | |
162 | displays a frequency count of the hosts and domains in the `blacklist_from' | |
163 | entries from the last blank line in `~/.spamassassin/user_prefs' to the end of | |
164 | the file. This information can be used so that you can replace multiple | |
165 | `blacklist_from' entries with a single wildcard entry such as: | |
924df208 | 166 | |
f0d73c14 | 167 | blacklist_from *@*amazingoffersdirect2u.com |
924df208 | 168 | |
f0d73c14 BW |
169 | In versions of SpamAssassin (2.50 and on) that support a Bayesian classifier, |
170 | \\[mh-junk-blacklist] uses the `sa-learn' program to recategorize the message | |
171 | as spam. Neither MH-E, nor SpamAssassin, rebuilds the database after adding | |
172 | words, so you will need to run `sa-learn --rebuild' periodically. This can be | |
173 | done by adding the following to your crontab: | |
924df208 | 174 | |
f0d73c14 | 175 | 0 * * * * sa-learn --rebuild > /dev/null 2>&1" |
924df208 | 176 | (unless mh-spamassassin-executable |
f0d73c14 | 177 | (error "Unable to find the spamassassin executable")) |
924df208 BW |
178 | (let ((current-folder mh-current-folder) |
179 | (msg-file (mh-msg-filename msg mh-current-folder)) | |
180 | (sender)) | |
181 | (save-excursion | |
f0d73c14 | 182 | (message (format "Reporting message %d..." msg)) |
924df208 BW |
183 | (mh-truncate-log-buffer) |
184 | (call-process mh-spamassassin-executable msg-file mh-log-buffer nil | |
f0d73c14 BW |
185 | ;;"--report" "--remove-from-whitelist" |
186 | "-r" "-R") ; spamassassin V2.20 | |
924df208 BW |
187 | (when mh-sa-learn-executable |
188 | (message "Recategorizing this message as spam...") | |
189 | (call-process mh-sa-learn-executable msg-file mh-log-buffer nil | |
a66894d8 | 190 | "--single" "--spam" "--local" "--no-rebuild")) |
f0d73c14 | 191 | (message (format "Blacklisting message %d..." msg)) |
924df208 BW |
192 | (set-buffer (get-buffer-create mh-temp-buffer)) |
193 | (erase-buffer) | |
e495eaec BW |
194 | (call-process (expand-file-name mh-scan-prog mh-progs) |
195 | nil mh-junk-background nil | |
924df208 BW |
196 | (format "%s" msg) current-folder |
197 | "-format" "%<(mymbox{from})%|%(addr{from})%>") | |
198 | (goto-char (point-min)) | |
199 | (if (search-forward-regexp "^\\(.+\\)$" nil t) | |
200 | (progn | |
201 | (setq sender (match-string 0)) | |
202 | (mh-spamassassin-add-rule "blacklist_from" sender) | |
f0d73c14 BW |
203 | (message (format "Blacklisting message %d...done" msg))) |
204 | (message (format "Blacklisting message %d...not done (from my address)" msg)))))) | |
924df208 BW |
205 | |
206 | (defun mh-spamassassin-whitelist (msg) | |
f0d73c14 BW |
207 | "Whitelist MSG with SpamAssassin. |
208 | ||
209 | The \\[mh-junk-whitelist] command adds a `whitelist_from' rule to the | |
210 | `~/.spamassassin/user_prefs' file. If the `sa-learn' command is available, the | |
211 | message is also recategorized as ham. | |
212 | ||
213 | See `mh-spamassassin-blacklist' for more information." | |
924df208 | 214 | (unless mh-spamassassin-executable |
f0d73c14 | 215 | (error "Unable to find the spamassassin executable")) |
924df208 BW |
216 | (let ((msg-file (mh-msg-filename msg mh-current-folder)) |
217 | (show-buffer (get-buffer mh-show-buffer)) | |
218 | from) | |
219 | (save-excursion | |
220 | (set-buffer (get-buffer-create mh-temp-buffer)) | |
221 | (erase-buffer) | |
222 | (message "Removing spamassassin markup from message...") | |
223 | (call-process mh-spamassassin-executable msg-file mh-temp-buffer nil | |
f0d73c14 BW |
224 | ;; "--remove-markup" |
225 | "-d") ; spamassassin V2.20 | |
924df208 BW |
226 | (if show-buffer |
227 | (kill-buffer show-buffer)) | |
228 | (write-file msg-file) | |
229 | (when mh-sa-learn-executable | |
230 | (message "Recategorizing this message as ham...") | |
231 | (call-process mh-sa-learn-executable msg-file mh-temp-buffer nil | |
232 | "--single" "--ham" "--local --no-rebuild")) | |
f0d73c14 BW |
233 | (message (format "Whitelisting message %d..." msg)) |
234 | (setq from | |
235 | (car (mh-funcall-if-exists | |
236 | ietf-drums-parse-address (mh-get-header-field "From:")))) | |
924df208 | 237 | (kill-buffer nil) |
f0d73c14 | 238 | (unless (or (null from) (equal from "")) |
924df208 | 239 | (mh-spamassassin-add-rule "whitelist_from" from)) |
f0d73c14 | 240 | (message (format "Whitelisting message %d...done" msg))))) |
924df208 BW |
241 | |
242 | (defun mh-spamassassin-add-rule (rule body) | |
f0d73c14 | 243 | "Add a new rule to `~/.spamassassin/user_prefs'. |
924df208 BW |
244 | The name of the rule is RULE and its body is BODY." |
245 | (save-window-excursion | |
246 | (let* ((line (format "%s\t%s\n" rule body)) | |
247 | (case-fold-search t) | |
248 | (file (expand-file-name "~/.spamassassin/user_prefs")) | |
249 | (buffer-exists (find-buffer-visiting file))) | |
250 | (find-file file) | |
251 | (if (not (search-forward (format "\n%s" line) nil t)) | |
252 | (progn | |
253 | (goto-char (point-max)) | |
254 | (insert (if (bolp) "" "\n") line) | |
255 | (save-buffer))) | |
256 | (if (not buffer-exists) | |
257 | (kill-buffer nil))))) | |
258 | ||
259 | (defun mh-spamassassin-identify-spammers () | |
f0d73c14 | 260 | "Identify spammers who are repeat offenders. |
924df208 | 261 | |
f0d73c14 BW |
262 | This function displays a frequency count of the hosts and domains in the |
263 | `blacklist_from' entries from the last blank line in | |
264 | `~/.spamassassin/user_prefs' to the end of the file. This information can be | |
265 | used so that you can replace multiple `blacklist_from' entries with a single | |
266 | wildcard entry such as: | |
924df208 | 267 | |
f0d73c14 | 268 | blacklist_from *@*amazingoffersdirect2u.com" |
924df208 BW |
269 | (interactive) |
270 | (let* ((file (expand-file-name "~/.spamassassin/user_prefs")) | |
271 | (domains (make-hash-table :test 'equal))) | |
272 | (find-file file) | |
273 | ;; Only consider entries between last blank line and end of file. | |
274 | (goto-char (1- (point-max))) | |
275 | (search-backward-regexp "^$") | |
276 | ;; Perform frequency count. | |
277 | (save-excursion | |
278 | (while (search-forward-regexp "^blacklist_from\\s-*\\(.*\\)@\\(.*\\)$" | |
279 | nil t) | |
280 | (let ((host (match-string 2)) | |
281 | value) | |
282 | ;; Remove top-level-domain from hostname. | |
283 | (setq host (cdr (reverse (split-string host "\\.")))) | |
284 | ;; Add counts for each host and domain part. | |
285 | (while host | |
286 | (setq value (gethash (car host) domains)) | |
f0d73c14 | 287 | (setf (gethash (car host) domains) (1+ (if (not value) 0 value))) |
924df208 BW |
288 | (setq host (cdr host)))))) |
289 | ||
290 | ;; Output | |
291 | (delete-other-windows) | |
292 | (pop-to-buffer (get-buffer-create "*MH-E Spammer Frequencies*")) | |
293 | (erase-buffer) | |
294 | (maphash '(lambda (key value) "" | |
295 | (if (> value 2) | |
296 | (insert (format "%s %s\n" key value)))) | |
297 | domains) | |
298 | (sort-numeric-fields 2 (point-min) (point-max)) | |
299 | (reverse-region (point-min) (point-max)) | |
300 | (goto-char (point-min)))) | |
301 | ||
f0d73c14 BW |
302 | \f |
303 | ||
304 | ;; Bogofilter Interface | |
305 | ||
306 | (defvar mh-bogofilter-executable (executable-find "bogofilter")) | |
307 | ||
308 | (defun mh-bogofilter-blacklist (msg) | |
309 | "Blacklist MSG with Bogofilter. | |
310 | ||
311 | Bogofilter is a Bayesian spam filtering program. Get it from your local | |
312 | distribution or from http://bogofilter.sourceforge.net/. | |
313 | ||
314 | Bogofilter is taught by running: | |
315 | ||
316 | bogofilter -n < good-message | |
317 | ||
318 | on every good message, and | |
319 | ||
320 | bogofilter -s < spam-message | |
321 | ||
322 | on every spam message. This is called a full training; three other | |
323 | training methods are described in the FAQ that is distributed with bogofilter. | |
324 | Note that most Bayesian filters need 1000 to 5000 of each type of message to | |
325 | start doing a good job. | |
326 | ||
327 | To use Bogofilter, add the following recipes to `.procmailrc': | |
328 | ||
329 | MAILDIR=$HOME/`mhparam Path` | |
330 | ||
331 | # Fight spam with Bogofilter. | |
332 | :0fw | |
333 | | bogofilter -3 -e -p | |
334 | ||
335 | :0: | |
336 | * ^X-Bogosity: Yes, tests=bogofilter | |
337 | spam/. | |
338 | ||
339 | :0: | |
340 | * ^X-Bogosity: Unsure, tests=bogofilter | |
341 | spam/unsure/. | |
342 | ||
343 | If Bogofilter classifies a message incorrectly, or is unsure, you can use the | |
344 | MH-E commands \\[mh-junk-blacklist] and \\[mh-junk-whitelist] to update | |
345 | Bogofilter's training. | |
346 | ||
347 | The \"Bogofilter FAQ\" suggests that you run the following | |
348 | occasionally to shrink the database: | |
349 | ||
350 | bogoutil -d wordlist.db | bogoutil -l wordlist.db.new | |
351 | mv wordlist.db wordlist.db.prv | |
352 | mv wordlist.db.new wordlist.db | |
353 | ||
354 | The \"Bogofilter tuning HOWTO\" describes how you can fine-tune Bogofilter." | |
355 | (unless mh-bogofilter-executable | |
356 | (error "Unable to find the bogofilter executable")) | |
357 | (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
358 | (call-process mh-bogofilter-executable msg-file mh-junk-background | |
359 | nil "-s"))) | |
360 | ||
361 | (defun mh-bogofilter-whitelist (msg) | |
362 | "Whitelist MSG with Bogofilter. | |
363 | ||
364 | See `mh-bogofilter-blacklist' for more information." | |
365 | (unless mh-bogofilter-executable | |
366 | (error "Unable to find the bogofilter executable")) | |
367 | (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
368 | (call-process mh-bogofilter-executable msg-file mh-junk-background | |
369 | nil "-n"))) | |
370 | ||
371 | \f | |
372 | ||
373 | ;; Spamprobe Interface | |
374 | ||
375 | (defvar mh-spamprobe-executable (executable-find "spamprobe")) | |
376 | ||
377 | (defun mh-spamprobe-blacklist (msg) | |
378 | "Blacklist MSG with SpamProbe. | |
379 | ||
380 | SpamProbe is a Bayesian spam filtering program. Get it from your local | |
381 | distribution or from http://spamprobe.sourceforge.net. | |
382 | ||
383 | To use SpamProbe, add the following recipes to `.procmailrc': | |
384 | ||
385 | MAILDIR=$HOME/`mhparam Path` | |
386 | ||
387 | # Fight spam with SpamProbe. | |
388 | :0 | |
389 | SCORE=| spamprobe receive | |
390 | ||
391 | :0 wf | |
392 | | formail -I \"X-SpamProbe: $SCORE\" | |
393 | ||
394 | :0: | |
395 | *^X-SpamProbe: SPAM | |
396 | spam/. | |
397 | ||
398 | If SpamProbe classifies a message incorrectly, you can use the MH-E commands | |
399 | \\[mh-junk-blacklist] and \\[mh-junk-whitelist] to update SpamProbe's | |
400 | training." | |
401 | (unless mh-spamprobe-executable | |
402 | (error "Unable to find the spamprobe executable")) | |
403 | (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
404 | (call-process mh-spamprobe-executable msg-file mh-junk-background | |
405 | nil "spam"))) | |
406 | ||
407 | (defun mh-spamprobe-whitelist (msg) | |
408 | "Whitelist MSG with SpamProbe. | |
409 | ||
410 | See `mh-spamprobe-blacklist' for more information." | |
411 | (unless mh-spamprobe-executable | |
412 | (error "Unable to find the spamprobe executable")) | |
413 | (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
414 | (call-process mh-spamprobe-executable msg-file mh-junk-background | |
415 | nil "good"))) | |
416 | ||
924df208 BW |
417 | (provide 'mh-junk) |
418 | ||
419 | ;;; Local Variables: | |
420 | ;;; indent-tabs-mode: nil | |
421 | ;;; sentence-end-double-space: nil | |
422 | ;;; End: | |
423 | ||
ab5796a9 | 424 | ;;; arch-tag: 603335f1-77ff-4306-8828-5d3dad51abe1 |
924df208 | 425 | ;;; mh-junk.el ends here |