Commit | Line | Data |
---|---|---|
924df208 BW |
1 | ;;; mh-junk.el --- Interface to anti-spam measures |
2 | ||
3 | ;; Copyright (C) 2003 Free Software Foundation, Inc. | |
4 | ||
5 | ;; Author: Satyaki Das <satyaki@theforce.stanford.edu>, | |
6 | ;; Bill Wohler <wohler@newt.com> | |
7 | ;; Maintainer: Bill Wohler <wohler@newt.com> | |
8 | ;; Keywords: mail, spam | |
9 | ||
10 | ;; This file is part of GNU Emacs. | |
11 | ||
12 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
13 | ;; it under the terms of the GNU General Public License as published by | |
14 | ;; the Free Software Foundation; either version 2, or (at your option) | |
15 | ;; any later version. | |
16 | ||
17 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | ;; GNU General Public License for more details. | |
21 | ||
22 | ;; You should have received a copy of the GNU General Public License | |
23 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
24 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
25 | ;; Boston, MA 02111-1307, USA. | |
26 | ||
27 | ;;; Commentary: | |
28 | ||
29 | ;; Spam handling in MH-E. | |
30 | ||
31 | ;;; Change Log: | |
32 | ||
33 | ;;; Code: | |
34 | ||
35 | (require 'mh-e) | |
36 | ||
37 | ;; Interactive functions callable from the folder buffer | |
38 | ;;;###mh-autoload | |
39 | (defun mh-junk-blacklist (msg-or-seq) | |
40 | "Blacklist MSG-OR-SEQ as spam. | |
41 | Default is the displayed message. | |
42 | If optional prefix argument is provided, then prompt for the message sequence. | |
43 | If variable `transient-mark-mode' is non-nil and the mark is active, then the | |
44 | selected region is blacklisted. | |
45 | In a program, MSG-OR-SEQ can be a message number, a list of message numbers, a | |
46 | region in a cons cell, or a sequence. | |
47 | ||
48 | First the appropriate function is called depending on the value of | |
49 | `mh-junk-choice'. Then if `mh-junk-mail-folder' is a string then the message is | |
50 | refiled to that folder. If nil, the message is deleted. | |
51 | ||
52 | To change the spam program being used, customize `mh-junk-program'. Directly | |
53 | setting `mh-junk-choice' is not recommended. | |
54 | ||
55 | The documentation for the following functions describes what setup is needed | |
56 | for the different spam fighting programs: | |
57 | ||
58 | - `mh-bogofilter-blacklist' | |
59 | - `mh-spamprobe-blacklist' | |
60 | - `mh-spamassassin-blacklist'" | |
61 | (interactive (list (mh-interactive-msg-or-seq "Blacklist"))) | |
62 | (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist)))) | |
63 | (unless blacklist-func | |
64 | (error "Customize `mh-junk-program' appropriately")) | |
65 | (let ((dest (cond ((null mh-junk-mail-folder) nil) | |
66 | ((equal mh-junk-mail-folder "") "+") | |
67 | ((eq (aref mh-junk-mail-folder 0) ?+) | |
68 | mh-junk-mail-folder) | |
69 | ((eq (aref mh-junk-mail-folder 0) ?@) | |
70 | (concat mh-current-folder "/" | |
71 | (substring mh-junk-mail-folder 1))) | |
72 | (t (concat "+" mh-junk-mail-folder))))) | |
73 | (mh-iterate-on-msg-or-seq msg msg-or-seq | |
74 | (funcall (symbol-function blacklist-func) msg) | |
75 | (if dest | |
76 | (mh-refile-a-msg nil (intern dest)) | |
77 | (mh-delete-a-msg nil))) | |
78 | (mh-next-msg)))) | |
79 | ||
80 | ;;;###mh-autoload | |
81 | (defun mh-junk-whitelist (msg-or-seq) | |
82 | "Whitelist MSG-OR-SEQ incorrectly classified as spam. | |
83 | Default is the displayed message. | |
84 | If optional prefix argument is provided, then prompt for the message sequence. | |
85 | If variable `transient-mark-mode' is non-nil and the mark is active, then the | |
86 | selected region is whitelisted. | |
87 | In a program, MSG-OR-SEQ can be a message number, a list of message numbers, a | |
88 | region in a cons cell, or a sequence. | |
89 | ||
90 | First the appropriate function is called depending on the value of | |
91 | `mh-junk-choice'. Then the message is refiled to `mh-inbox'. | |
92 | ||
93 | To change the spam program being used, customize `mh-junk-program'. Directly | |
94 | setting `mh-junk-choice' is not recommended." | |
95 | (interactive (list (mh-interactive-msg-or-seq "Whitelist"))) | |
96 | (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist)))) | |
97 | (unless whitelist-func | |
98 | (error "Customize `mh-junk-program' appropriately")) | |
99 | (mh-iterate-on-msg-or-seq msg msg-or-seq | |
100 | (funcall (symbol-function whitelist-func) msg) | |
101 | (mh-refile-a-msg nil (intern mh-inbox))) | |
102 | (mh-next-msg))) | |
103 | ||
104 | \f | |
105 | ||
106 | ;; Bogofilter Interface | |
107 | ||
108 | (defvar mh-bogofilter-executable (executable-find "bogofilter")) | |
109 | ||
110 | (defun mh-bogofilter-blacklist (msg) | |
111 | "Classify MSG as spam. | |
112 | Tell bogofilter that the message is spam. | |
113 | ||
114 | Bogofilter is a Bayesian spam filtering program. Get it from your local | |
115 | distribution or from: | |
116 | http://bogofilter.sourceforge.net/ | |
117 | ||
118 | You first need to teach bogofilter. This is done by running | |
119 | ||
120 | bogofilter -n < good-message | |
121 | ||
122 | on every good message, and | |
123 | ||
124 | bogofilter -s < spam-message | |
125 | ||
126 | on every spam message. Most Bayesian filters need 1000 to 5000 of each to | |
127 | start doing a good job. | |
128 | ||
129 | To use bogofilter, add the following .procmailrc recipes which you can also | |
130 | find in the bogofilter man page: | |
131 | ||
132 | # Bogofilter | |
133 | :0fw | |
134 | | bogofilter -u -e -p | |
135 | ||
136 | :0 | |
137 | * ^X-Bogosity: Yes, tests=bogofilter | |
138 | $SPAM | |
139 | ||
140 | Bogofilter continues to feed the messages it classifies back into its | |
141 | database. Occasionally it misses, and those messages need to be reclassified. | |
142 | MH-E can do this for you. Use \\[mh-junk-blacklist] to reclassify messges in | |
143 | your +inbox as spam, and \\[mh-junk-whitelist] to reclassify messages in your | |
144 | spambox as good messages." | |
145 | (unless mh-bogofilter-executable | |
146 | (error "Couldn't find the bogofilter executable")) | |
147 | (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
148 | (call-process mh-bogofilter-executable msg-file 0 nil "-Ns"))) | |
149 | ||
150 | (defun mh-bogofilter-whitelist (msg) | |
151 | "Reinstate incorrectly filtered MSG. | |
152 | Train bogofilter to think of the message as non-spam." | |
153 | (unless mh-bogofilter-executable | |
154 | (error "Couldn't find the bogofilter executable")) | |
155 | (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
156 | (call-process mh-bogofilter-executable msg-file 0 nil "-Sn"))) | |
157 | ||
158 | \f | |
159 | ||
160 | ;; Spamprobe Interface | |
161 | ||
162 | (defvar mh-spamprobe-executable (executable-find "spamprobe")) | |
163 | ||
164 | (defun mh-spamprobe-blacklist (msg) | |
165 | "Classify MSG as spam. | |
166 | Tell spamprobe that the message is spam. | |
167 | ||
168 | Spamprobe is a Bayesian spam filtering program. More info about the program can | |
169 | be found at: | |
170 | http://spamprobe.sourceforge.net | |
171 | ||
172 | Here is a procmail recipe to stores incoming spam mail into the folder +spam | |
173 | and good mail in /home/user/Mail/mdrop/mbox. This recipe is provided as an | |
174 | example in the spamprobe man page. | |
175 | ||
176 | PATH=/bin:/usr/bin:/usr/local/bin | |
177 | DEFAULT=/home/user/Mail/mdrop/mbox | |
178 | SPAM=/home/user/Mail/spam/. | |
179 | ||
180 | # Spamprobe filtering | |
181 | :0 | |
182 | SCORE=| spamprobe receive | |
183 | :0 wf | |
184 | | formail -I \"X-SpamProbe: $SCORE\" | |
185 | :0 a: | |
186 | *^X-SpamProbe: SPAM | |
187 | $SPAM | |
188 | ||
189 | Occasionally some good mail gets misclassified as spam. You can use | |
190 | \\[mh-junk-whitelist] to reclassify that as good mail." | |
191 | (unless mh-spamprobe-executable | |
192 | (error "Couldn't find the spamprobe executable")) | |
193 | (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
194 | (call-process mh-spamprobe-executable msg-file 0 nil "spam"))) | |
195 | ||
196 | (defun mh-spamprobe-whitelist (msg) | |
197 | "Reinstate incorrectly filtered MSG. | |
198 | Train spamprobe to think of the message as non-spam." | |
199 | (unless mh-spamprobe-executable | |
200 | (error "Couldn't find the spamprobe executable")) | |
201 | (let ((msg-file (mh-msg-filename msg mh-current-folder))) | |
202 | (call-process mh-spamprobe-executable msg-file 0 nil "good"))) | |
203 | ||
204 | \f | |
205 | ||
206 | ;; Spamassassin Interface | |
207 | ||
208 | (defvar mh-spamassassin-executable (executable-find "spamassassin")) | |
209 | (defvar mh-sa-learn-executable (executable-find "sa-learn")) | |
210 | ||
211 | (defun mh-spamassassin-blacklist (msg) | |
212 | "Blacklist MSG. | |
213 | This is done by sending the message to Razor and by appending the sender to | |
214 | ~/.spamassassin/user_prefs in a blacklist_from rule. If sa-learn is available, | |
215 | the message is also recategorized as spam. | |
216 | ||
217 | Spamassassin is an excellent spam filter. For more information, see: | |
218 | http://spamassassin.org/. | |
219 | ||
220 | I ran \"spamassassin -t\" on every mail message in my archive and ran an | |
221 | analysis in Gnumeric to find that the standard deviation of good mail | |
222 | scored under 5 (coincidentally, the spamassassin default for \"spam\"). | |
223 | ||
224 | Furthermore, I observed that there weren't any messages with a score of 8 | |
225 | or more that were interesting, so I added a couple of points to be | |
226 | conservative and send any message with a score of 10 or more down the | |
227 | drain. You might want to use a score of 12 or 13 to be really conservative. | |
228 | I have found that this really decreases the amount of junk to review. | |
229 | ||
230 | Messages with a score of 5-9 are set aside for later review. The major | |
231 | weakness of rules-based filters is a plethora of false positives\; I catch one | |
232 | or two legitimate messages in here a week, so it is worthwhile to check. | |
233 | ||
234 | You might choose to do this analysis yourself to pick a good score for | |
235 | deleting spam sight unseen, or you might pick a score out of a hat, or you | |
236 | might choose to be very conservative and not delete any messages at all. | |
237 | ||
238 | Based upon this discussion, here is what the associated ~/.procmailrc | |
239 | entries look like. These rules appear before my list filters so that spam | |
240 | sent to mailing lists gets pruned too. | |
241 | ||
242 | # | |
243 | # Spam | |
244 | # | |
245 | :0fw | |
246 | | spamc | |
247 | ||
248 | # Anything with a spam level of 10 or more is junked immediately. | |
249 | :0: | |
250 | * ^X-Spam-Level: .......... | |
251 | /dev/null | |
252 | ||
253 | :0 | |
254 | * ^X-Spam-Status: Yes | |
255 | $SPAM | |
256 | ||
257 | If you don't use \"spamc\", use \"spamassassin -P -a\". | |
258 | ||
259 | A handful of spam does find its way into +inbox. In this case, use | |
260 | \\[mh-junk-blacklist] to add a \"blacklist_from\" line to | |
261 | ~/spamassassin/user_prefs, delete the message, and send the message to the | |
262 | Razor, so that others might not see this spam. | |
263 | ||
264 | Over time, you see some patterns in the blacklisted addresses and can | |
265 | replace several lines with wildcards. For example, it is clear that High | |
266 | Speed Media is the biggest bunch of jerks on the Net. Here are some of the | |
267 | entries I have for them, and the list continues to grow. | |
268 | ||
269 | blacklist_from *@*-hsm-*.com | |
270 | blacklist_from *@*182*643*.com | |
271 | blacklist_from *@*antarhsm*.com | |
272 | blacklist_from *@*h*speed* | |
273 | blacklist_from *@*hsm*182*.com | |
274 | blacklist_from *@*hsm*643*.com | |
275 | blacklist_from *@*hsmridi2983cslt227.com | |
276 | blacklist_from *@*list*hsm*.com | |
277 | blacklist_from *@h*s*media* | |
278 | blacklist_from *@hsmdrct.com | |
279 | blacklist_from *@hsmridi2983csltsite.com | |
280 | ||
281 | The function `mh-spamassassin-identify-spammers' is provided that shows the | |
282 | frequency counts of the host and domain names in your blacklist_from | |
283 | entries. This can be helpful when editing the blacklist_from entries. | |
284 | ||
285 | In versions of spamassassin (2.50 and on) that support a Bayesian classifier, | |
286 | \\[mh-junk-blacklist] uses the sa-learn program to recategorize the message as | |
287 | spam. Neither MH-E, nor spamassassin, rebuilds the database after adding | |
288 | words, so you will need to run \"sa-learn --rebuild\" periodically. This can | |
289 | be done by adding the following to your crontab: | |
290 | ||
291 | 0 * * * * sa-learn --rebuild > /dev/null 2>&1" | |
292 | (unless mh-spamassassin-executable | |
293 | (error "Couldn't find the spamassassin executable")) | |
294 | (let ((current-folder mh-current-folder) | |
295 | (msg-file (mh-msg-filename msg mh-current-folder)) | |
296 | (sender)) | |
297 | (save-excursion | |
298 | (message "Giving this message the Razor...") | |
299 | (mh-truncate-log-buffer) | |
300 | (call-process mh-spamassassin-executable msg-file mh-log-buffer nil | |
301 | "--report" "--remove-from-whitelist") | |
302 | (when mh-sa-learn-executable | |
303 | (message "Recategorizing this message as spam...") | |
304 | (call-process mh-sa-learn-executable msg-file mh-log-buffer nil | |
305 | "--single" "--spam" "--local --no-rebuild")) | |
306 | (message "Blacklisting address...") | |
307 | (set-buffer (get-buffer-create mh-temp-buffer)) | |
308 | (erase-buffer) | |
309 | (call-process (expand-file-name mh-scan-prog mh-progs) nil t nil | |
310 | (format "%s" msg) current-folder | |
311 | "-format" "%<(mymbox{from})%|%(addr{from})%>") | |
312 | (goto-char (point-min)) | |
313 | (if (search-forward-regexp "^\\(.+\\)$" nil t) | |
314 | (progn | |
315 | (setq sender (match-string 0)) | |
316 | (mh-spamassassin-add-rule "blacklist_from" sender) | |
317 | (message "Blacklisting address...done")) | |
318 | (message "Blacklisting address...not done (from my address)"))))) | |
319 | ||
320 | (defun mh-spamassassin-whitelist (msg) | |
321 | "Whitelist MSG. | |
322 | Add a whitelist_from rule to the ~/.spamassassin/user_prefs file. If sa-learn | |
323 | is available, then the message is recategorized as ham." | |
324 | (unless mh-spamassassin-executable | |
325 | (error "Couldn't find the spamassassin executable")) | |
326 | (let ((msg-file (mh-msg-filename msg mh-current-folder)) | |
327 | (show-buffer (get-buffer mh-show-buffer)) | |
328 | from) | |
329 | (save-excursion | |
330 | (set-buffer (get-buffer-create mh-temp-buffer)) | |
331 | (erase-buffer) | |
332 | (message "Removing spamassassin markup from message...") | |
333 | (call-process mh-spamassassin-executable msg-file mh-temp-buffer nil | |
334 | "--remove-markup") | |
335 | (if show-buffer | |
336 | (kill-buffer show-buffer)) | |
337 | (write-file msg-file) | |
338 | (when mh-sa-learn-executable | |
339 | (message "Recategorizing this message as ham...") | |
340 | (call-process mh-sa-learn-executable msg-file mh-temp-buffer nil | |
341 | "--single" "--ham" "--local --no-rebuild")) | |
342 | (message "Whitelisting address...") | |
343 | (setq from (car (ietf-drums-parse-address (mh-get-header-field "From:")))) | |
344 | (kill-buffer nil) | |
345 | (unless (equal from "") | |
346 | (mh-spamassassin-add-rule "whitelist_from" from)) | |
347 | (message "Whitelisting address...done")))) | |
348 | ||
349 | (defun mh-spamassassin-add-rule (rule body) | |
350 | "Add a new rule to ~/.spamassassin/user_prefs. | |
351 | The name of the rule is RULE and its body is BODY." | |
352 | (save-window-excursion | |
353 | (let* ((line (format "%s\t%s\n" rule body)) | |
354 | (case-fold-search t) | |
355 | (file (expand-file-name "~/.spamassassin/user_prefs")) | |
356 | (buffer-exists (find-buffer-visiting file))) | |
357 | (find-file file) | |
358 | (if (not (search-forward (format "\n%s" line) nil t)) | |
359 | (progn | |
360 | (goto-char (point-max)) | |
361 | (insert (if (bolp) "" "\n") line) | |
362 | (save-buffer))) | |
363 | (if (not buffer-exists) | |
364 | (kill-buffer nil))))) | |
365 | ||
366 | (defun mh-spamassassin-identify-spammers () | |
367 | "Identifies spammers who are repeat offenders. | |
368 | ||
369 | For each blacklist_from entry from the last blank line of | |
370 | ~/.spamassassin/user_prefs to the end of the file, a list of host and domain | |
371 | names along with their frequency counts is displayed. This information can be | |
372 | used to replace multiple blacklist_from entries with a single wildcard entry | |
373 | such as: | |
374 | ||
375 | blacklist_from *@*amazingoffersdirect2u.com" | |
376 | (interactive) | |
377 | (let* ((file (expand-file-name "~/.spamassassin/user_prefs")) | |
378 | (domains (make-hash-table :test 'equal))) | |
379 | (find-file file) | |
380 | ;; Only consider entries between last blank line and end of file. | |
381 | (goto-char (1- (point-max))) | |
382 | (search-backward-regexp "^$") | |
383 | ;; Perform frequency count. | |
384 | (save-excursion | |
385 | (while (search-forward-regexp "^blacklist_from\\s-*\\(.*\\)@\\(.*\\)$" | |
386 | nil t) | |
387 | (let ((host (match-string 2)) | |
388 | value) | |
389 | ;; Remove top-level-domain from hostname. | |
390 | (setq host (cdr (reverse (split-string host "\\.")))) | |
391 | ;; Add counts for each host and domain part. | |
392 | (while host | |
393 | (setq value (gethash (car host) domains)) | |
394 | (puthash (car host) (1+ (if (not value) 0 value)) domains) | |
395 | (setq host (cdr host)))))) | |
396 | ||
397 | ;; Output | |
398 | (delete-other-windows) | |
399 | (pop-to-buffer (get-buffer-create "*MH-E Spammer Frequencies*")) | |
400 | (erase-buffer) | |
401 | (maphash '(lambda (key value) "" | |
402 | (if (> value 2) | |
403 | (insert (format "%s %s\n" key value)))) | |
404 | domains) | |
405 | (sort-numeric-fields 2 (point-min) (point-max)) | |
406 | (reverse-region (point-min) (point-max)) | |
407 | (goto-char (point-min)))) | |
408 | ||
409 | (provide 'mh-junk) | |
410 | ||
411 | ;;; Local Variables: | |
412 | ;;; indent-tabs-mode: nil | |
413 | ;;; sentence-end-double-space: nil | |
414 | ;;; End: | |
415 | ||
6b61353c | 416 | ;;; arch-tag: 603335f1-77ff-4306-8828-5d3dad51abe1 |
924df208 | 417 | ;;; mh-junk.el ends here |