Move lisp/emacs-lisp/authors.el to admin/
[bpt/emacs.git] / lisp / gnus / deuglify.el
CommitLineData
23f87bed
MB
1;;; deuglify.el --- deuglify broken Outlook (Express) articles
2
ba318903 3;; Copyright (C) 2001-2014 Free Software Foundation, Inc.
23f87bed
MB
4
5;; Author: Raymond Scholz <rscholz@zonix.de>
1cc3c18f
GM
6;; Thomas Steffen
7;; (unwrapping algorithm, based on an idea of Stefan Monnier)
23f87bed
MB
8;; Keywords: mail, news
9
10;; This file is part of GNU Emacs.
11
5e809f55 12;; GNU Emacs is free software: you can redistribute it and/or modify
23f87bed 13;; it under the terms of the GNU General Public License as published by
5e809f55
GM
14;; the Free Software Foundation, either version 3 of the License, or
15;; (at your option) any later version.
23f87bed
MB
16
17;; GNU Emacs is distributed in the hope that it will be useful,
18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
5e809f55 19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23f87bed
MB
20;; GNU General Public License for more details.
21
22;; You should have received a copy of the GNU General Public License
5e809f55 23;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23f87bed
MB
24
25;;; Commentary:
26
27;; This file enables Gnus to repair broken citations produced by
28;; common user agents like MS Outlook (Express). It may repair
29;; articles of other user agents too.
30;;
31;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32
33;;
34;; Outlook sometimes wraps cited lines before sending a message as
35;; seen in this example:
36;;
37;; Example #1
38;; ----------
39;;
40;; John Doe wrote:
41;;
42;; > This sentence no verb. This sentence no verb. This sentence
43;; no
44;; > verb. This sentence no verb. This sentence no verb. This
45;; > sentence no verb.
46;;
47;; The function `gnus-article-outlook-unwrap-lines' tries to recognize those
48;; erroneously wrapped lines and will unwrap them. I.e. putting the
49;; wrapped parts ("no" in this example) back where they belong (at the
50;; end of the cited line above).
51;;
52;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
53;;
54;; Note that some people not only use broken user agents but also
55;; practice a bad citation style by omitting blank lines between the
56;; cited text and their own text.
57;:
58;; Example #2
59;; ----------
60;;
61;; John Doe wrote:
62;;
63;; > This sentence no verb. This sentence no verb. This sentence no
64;; You forgot in all your sentences.
65;; > verb. This sentence no verb. This sentence no verb. This
66;; > sentence no verb.
67;;
0138efd4 68;; Unwrapping "You forgot in all your sentences." would be invalid as
23f87bed
MB
69;; this part wasn't intended to be cited text.
70;; `gnus-article-outlook-unwrap-lines' will only unwrap lines if the resulting
71;; citation line will be of a certain maximum length. You can control
72;; this by adjusting `gnus-outlook-deuglify-unwrap-max'. Also
73;; unwrapping will only be done if the line above the (possibly)
74;; wrapped line has a minimum length of `gnus-outlook-deuglify-unwrap-min'.
75;;
76;; Furthermore no unwrapping will be undertaken if the last character
77;; is one of the chars specified in
78;; `gnus-outlook-deuglify-unwrap-stop-chars'. Setting this to ".?!"
79;; inhibits unwrapping if the cited line ends with a full stop,
80;; question mark or exclamation mark. Note that this variable
81;; defaults to `nil', triggering a few false positives but generally
82;; giving you better results.
83;;
84;; Unwrapping works on every level of citation. Thus you will be able
85;; repair broken citations of broken user agents citing broken
86;; citations of broken user agents citing broken citations...
87;;
88;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
89;;
90;; Citations are commonly introduced with an attribution line
91;; indicating who wrote the cited text. Outlook adds superfluous
92;; information that can be found in the header of the message to this
93;; line and often wraps it.
94;;
95;; If that weren't enough, lots of people write their own text above
96;; the cited text and cite the complete original article below.
97;;
98;; Example #3
99;; ----------
100;;
101;; Hey, John. There's no in all your sentences!
102;;
103;; John Doe <john.doe@some.domain> wrote in message
104;; news:a87usw8$dklsssa$2@some.news.server...
105;; > This sentence no verb. This sentence no verb. This sentence
106;; no
107;; > verb. This sentence no verb. This sentence no verb. This
108;; > sentence no verb.
109;; >
110;; > Bye, John
111;;
112;; Repairing the attribution line will be done by function
113;; `gnus-article-outlook-repair-attribution which calls other function that
114;; try to recognize and repair broken attribution lines. See variable
115;; `gnus-outlook-deuglify-attrib-cut-regexp' for stuff that should be
116;; cut off from the beginning of an attribution line and variable
117;; `gnus-outlook-deuglify-attrib-verb-regexp' for the verbs that are
118;; required to be found in an attribution line. These function return
119;; the point where the repaired attribution line starts.
120;;
121;; Rearranging the article so that the cited text appears above the
122;; new text will be done by function
123;; `gnus-article-outlook-rearrange-citation'. This function calls
124;; `gnus-article-outlook-repair-attribution to find and repair an attribution
125;; line.
126;;
127;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
128;;
129;; Well, and that's what the message will look like after applying
130;; deuglification:
131;;
132;; Example #3 (deuglified)
133;; -----------------------
134;;
135;; John Doe <john.doe@some.domain> wrote:
136;;
137;; > This sentence no verb. This sentence no verb. This sentence no
138;; > verb. This sentence no verb. This sentence no verb. This
139;; > sentence no verb.
140;; >
141;; > Bye, John
142;;
143;; Hey, John. There's no in all your sentences!
144;;
145;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
bf247b6e 146;;
23f87bed
MB
147;; Usage
148;; -----
149;;
150;; Press `W k' in the Summary Buffer.
151;;
152;; Non recommended usage :-)
153;; ---------------------
154;;
155;; To automatically invoke deuglification on every article you read,
156;; put something like that in your .gnus:
157;;
158;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-unwrap-lines)
159;;
160;; or _one_ of the following lines:
161;;
162;; ;; repair broken attribution lines
163;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-repair-attribution)
164;;
165;; ;; repair broken attribution lines and citations
166;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-rearrange-citation)
167;;
168;; Note that there always may be some false positives, so I suggest
169;; using the manual invocation. After deuglification you may want to
170;; refill the whole article using `W w'.
171;;
172;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
173;;
174;; Limitations
175;; -----------
176;;
177;; As I said before there may (or will) be a few false positives on
178;; unwrapping cited lines with `gnus-article-outlook-unwrap-lines'.
179;;
180;; `gnus-article-outlook-repair-attribution will only fix the first
181;; attribution line found in the article. Furthermore it fixed to
182;; certain kinds of attributions. And there may be horribly many
183;; false positives, vanishing lines and so on -- so don't trust your
184;; eyes. Again I recommend manual invocation.
185;;
186;; `gnus-article-outlook-rearrange-citation' carries all the limitations of
187;; `gnus-article-outlook-repair-attribution.
188;;
189;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
190;;
191;; See ChangeLog for other changes.
192;;
193;; Revision 1.5 2002/01/27 14:39:17 rscholz
194;; * New variable `gnus-outlook-deuglify-no-wrap-chars' to inhibit
195;; unwrapping if one these chars is first in the possibly wrapped line.
196;; * Improved rearranging of the article.
197;; * New function `gnus-outlook-repair-attribution-block' for repairing
198;; those big "Original Message (following some headers)" attributions.
199;;
200;; Revision 1.4 2002/01/03 14:05:00 rscholz
201;; Renamed `gnus-outlook-deuglify-article' to
202;; `gnus-article-outlook-deuglify-article'.
203;; Made it easier to deuglify the article while being in Gnus' Article
204;; Edit Mode. (suggested by Phil Nitschke)
205;;
206;;
207;; Revision 1.3 2002/01/02 23:35:54 rscholz
208;; Fix a bug that caused succeeding long attribution lines to be
209;; unwrapped. Minor doc fixes and regular expression tuning.
210;;
211;; Revision 1.2 2001/12/30 20:14:34 rscholz
212;; Clean up source.
213;;
214;; Revision 1.1 2001/12/30 20:13:32 rscholz
215;; Initial revision
216;;
217;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
218
219;;; Code:
220
221(require 'gnus-art)
222(require 'gnus-sum)
223
224(defconst gnus-outlook-deuglify-version "1.5 Gnus version"
225 "Version of gnus-outlook-deuglify.")
226
227;;; User Customizable Variables:
228
229(defgroup gnus-outlook-deuglify nil
e2642250 230 "Deuglify articles generated by broken user agents like MS Outlook (Express)."
d0859c9a
MB
231 :version "22.1"
232 :group 'gnus)
23f87bed 233
23f87bed
MB
234(defcustom gnus-outlook-deuglify-unwrap-min 45
235 "Minimum length of the cited line above the (possibly) wrapped line."
bf247b6e 236 :version "22.1"
23f87bed
MB
237 :type 'integer
238 :group 'gnus-outlook-deuglify)
239
23f87bed
MB
240(defcustom gnus-outlook-deuglify-unwrap-max 95
241 "Maximum length of the cited line after unwrapping."
bf247b6e 242 :version "22.1"
23f87bed
MB
243 :type 'integer
244 :group 'gnus-outlook-deuglify)
245
246(defcustom gnus-outlook-deuglify-cite-marks ">|#%"
247 "Characters that indicate cited lines."
bf247b6e 248 :version "22.1"
23f87bed
MB
249 :type 'string
250 :group 'gnus-outlook-deuglify)
251
252(defcustom gnus-outlook-deuglify-unwrap-stop-chars nil ;; ".?!" or nil
253 "Characters that inhibit unwrapping if they are the last one on the cited line above the possible wrapped line."
bf247b6e 254 :version "22.1"
23f87bed 255 :type '(radio (const :format "None " nil)
ad136a7c 256 (string :value ".?!"))
23f87bed
MB
257 :group 'gnus-outlook-deuglify)
258
259(defcustom gnus-outlook-deuglify-no-wrap-chars "`"
260 "Characters that inhibit unwrapping if they are the first one in the possibly wrapped line."
bf247b6e 261 :version "22.1"
23f87bed
MB
262 :type 'string
263 :group 'gnus-outlook-deuglify)
264
265(defcustom gnus-outlook-deuglify-attrib-cut-regexp
266 "\\(On \\|Am \\)?\\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\),[^,]+, "
267 "Regular expression matching the beginning of an attribution line that should be cut off."
bf247b6e 268 :version "22.1"
23f87bed
MB
269 :type 'string
270 :group 'gnus-outlook-deuglify)
271
272(defcustom gnus-outlook-deuglify-attrib-verb-regexp
c38e0c97 273 "wrote\\|writes\\|says\\|schrieb\\|schreibt\\|meinte\\|skrev\\|a écrit\\|schreef\\|escribió"
23f87bed 274 "Regular expression matching the verb used in an attribution line."
bf247b6e 275 :version "22.1"
23f87bed
MB
276 :type 'string
277 :group 'gnus-outlook-deuglify)
278
279(defcustom gnus-outlook-deuglify-attrib-end-regexp
280 ": *\\|\\.\\.\\."
281 "Regular expression matching the end of an attribution line."
bf247b6e 282 :version "22.1"
23f87bed
MB
283 :type 'string
284 :group 'gnus-outlook-deuglify)
285
23f87bed
MB
286(defcustom gnus-outlook-display-hook nil
287 "A hook called after an deuglified article has been prepared.
288It is run after `gnus-article-prepare-hook'."
bf247b6e 289 :version "22.1"
23f87bed
MB
290 :type 'hook
291 :group 'gnus-outlook-deuglify)
292
293;; Functions
294
295(defun gnus-outlook-display-article-buffer ()
296 "Redisplay current buffer or article buffer."
297 (with-current-buffer (or gnus-article-buffer (current-buffer))
298 ;; "Emulate" `gnus-article-prepare-display' without calling
299 ;; it. Calling `gnus-article-prepare-display' on an already
300 ;; prepared article removes all MIME parts. I'm unsure whether
301 ;; this is a bug or not.
302 (gnus-article-highlight t)
303 (gnus-treat-article nil)
304 (gnus-run-hooks 'gnus-article-prepare-hook
305 'gnus-outlook-display-hook)))
306
307;;;###autoload
308(defun gnus-article-outlook-unwrap-lines (&optional nodisplay)
309 "Unwrap lines that appear to be wrapped citation lines.
310You can control what lines will be unwrapped by frobbing
311`gnus-outlook-deuglify-unwrap-min' and `gnus-outlook-deuglify-unwrap-max',
312indicating the minimum and maximum length of an unwrapped citation line. If
313NODISPLAY is non-nil, don't redisplay the article buffer."
314 (interactive "P")
01c52d31
MB
315 (let ((case-fold-search nil)
316 (inhibit-read-only t)
317 (cite-marks gnus-outlook-deuglify-cite-marks)
318 (no-wrap gnus-outlook-deuglify-no-wrap-chars)
319 (stop-chars gnus-outlook-deuglify-unwrap-stop-chars))
320 (gnus-with-article-buffer
321 (article-goto-body)
322 (while (re-search-forward
323 (concat
324 "^\\([ \t" cite-marks "]*\\)"
325 "\\([" cite-marks "].*[^\n " stop-chars "]\\)[ \t]?\n"
326 "\\1\\([^\n " cite-marks no-wrap "]+.*\\)$")
23f87bed 327 nil t)
01c52d31 328 (let ((len12 (- (match-end 2) (match-beginning 1)))
23f87bed 329 (len3 (- (match-end 3) (match-beginning 3))))
01c52d31 330 (when (and (> len12 gnus-outlook-deuglify-unwrap-min)
23f87bed 331 (< (+ len12 len3) gnus-outlook-deuglify-unwrap-max))
01c52d31
MB
332 (replace-match "\\1\\2 \\3")
333 (goto-char (match-beginning 0)))))))
23f87bed
MB
334 (unless nodisplay (gnus-outlook-display-article-buffer)))
335
336(defun gnus-outlook-rearrange-article (attr-start)
337 "Put the text from ATTR-START to the end of buffer at the top of the article buffer."
01c52d31
MB
338 ;; FIXME: 1. (*) text/plain ( ) text/html
339 (let ((inhibit-read-only t)
340 (cite-marks gnus-outlook-deuglify-cite-marks))
341 (gnus-with-article-buffer
342 (article-goto-body)
343 ;; article does not start with attribution
344 (unless (= (point) attr-start)
345 (gnus-kill-all-overlays)
346 (let ((cur (point))
347 ;; before signature or end of buffer
348 (to (if (gnus-article-search-signature)
349 (point)
350 (point-max))))
351 ;; handle the case where the full quote is below the
352 ;; signature
353 (when (< to attr-start)
354 (setq to (point-max)))
355 (save-excursion
356 (narrow-to-region attr-start to)
357 (goto-char attr-start)
358 (forward-line)
359 (unless (looking-at ">")
360 (message-indent-citation (point) (point-max) 'yank-only)
361 (goto-char (point-max))
362 (newline)
363 (setq to (point-max)))
364 (widen))
365 (transpose-regions cur attr-start attr-start to))))))
23f87bed
MB
366
367;; John Doe <john.doe@some.domain> wrote in message
368;; news:a87usw8$dklsssa$2@some.news.server...
369
370(defun gnus-outlook-repair-attribution-outlook ()
371 "Repair a broken attribution line (Outlook)."
01c52d31
MB
372 (let ((case-fold-search nil)
373 (inhibit-read-only t)
374 (cite-marks gnus-outlook-deuglify-cite-marks))
375 (gnus-with-article-buffer
376 (article-goto-body)
377 (when (re-search-forward
23f87bed
MB
378 (concat "^\\([^" cite-marks "].+\\)"
379 "\\(" gnus-outlook-deuglify-attrib-verb-regexp "\\)"
380 "\\(.*\n?[^\n" cite-marks "].*\\)?"
381 "\\(" gnus-outlook-deuglify-attrib-end-regexp "\\)$")
382 nil t)
01c52d31
MB
383 (gnus-kill-all-overlays)
384 (replace-match "\\1\\2\\4")
385 (match-beginning 0)))))
23f87bed
MB
386
387
388;; ----- Original Message -----
389;; From: "John Doe" <john.doe@some.domain>
390;; To: "Doe Foundation" <info@doefnd.org>
391;; Sent: Monday, November 19, 2001 12:13 PM
392;; Subject: More Doenuts
393
394(defun gnus-outlook-repair-attribution-block ()
395 "Repair a big broken attribution block."
01c52d31
MB
396 (let ((case-fold-search nil)
397 (inhibit-read-only t)
398 (cite-marks gnus-outlook-deuglify-cite-marks))
399 (gnus-with-article-buffer
400 (article-goto-body)
401 (when (re-search-forward
402 (concat "^[" cite-marks " \t]*--* ?[^-]+ [^-]+ ?--*\\s *\n"
23f87bed
MB
403 "[^\n:]+:[ \t]*\\([^\n]+\\)\n"
404 "\\([^\n:]+:[ \t]*[^\n]+\n\\)+")
405 nil t)
01c52d31
MB
406 (gnus-kill-all-overlays)
407 (replace-match "\\1 wrote:\n")
408 (match-beginning 0)))))
23f87bed
MB
409
410;; On Wed, 16 Jan 2002 23:23:30 +0100, John Doe <john.doe@some.domain> wrote:
411
412(defun gnus-outlook-repair-attribution-other ()
413 "Repair a broken attribution line (other user agents than Outlook)."
01c52d31
MB
414 (let ((case-fold-search nil)
415 (inhibit-read-only t)
416 (cite-marks gnus-outlook-deuglify-cite-marks))
417 (gnus-with-article-buffer
418 (article-goto-body)
419 (when (re-search-forward
23f87bed
MB
420 (concat "^\\("gnus-outlook-deuglify-attrib-cut-regexp"\\)?"
421 "\\([^" cite-marks "].+\\)\n\\([^\n" cite-marks "].*\\)?"
422 "\\(" gnus-outlook-deuglify-attrib-verb-regexp "\\).*"
423 "\\(" gnus-outlook-deuglify-attrib-end-regexp "\\)$")
424 nil t)
01c52d31
MB
425 (gnus-kill-all-overlays)
426 (replace-match "\\4 \\5\\6\\7")
427 (match-beginning 0)))))
23f87bed
MB
428
429;;;###autoload
430(defun gnus-article-outlook-repair-attribution (&optional nodisplay)
431 "Repair a broken attribution line.
432If NODISPLAY is non-nil, don't redisplay the article buffer."
433 (interactive "P")
434 (let ((attrib-start
435 (or
436 (gnus-outlook-repair-attribution-other)
437 (gnus-outlook-repair-attribution-block)
438 (gnus-outlook-repair-attribution-outlook))))
439 (unless nodisplay (gnus-outlook-display-article-buffer))
440 attrib-start))
441
442(defun gnus-article-outlook-rearrange-citation (&optional nodisplay)
443 "Repair broken citations.
444If NODISPLAY is non-nil, don't redisplay the article buffer."
445 (interactive "P")
446 (let ((attrib-start (gnus-article-outlook-repair-attribution 'nodisplay)))
447 ;; rearrange citations if an attribution line has been recognized
448 (if attrib-start
449 (gnus-outlook-rearrange-article attrib-start)))
450 (unless nodisplay (gnus-outlook-display-article-buffer)))
451
452;;;###autoload
453(defun gnus-outlook-deuglify-article (&optional nodisplay)
454 "Full deuglify of broken Outlook (Express) articles.
455Treat dumbquotes, unwrap lines, repair attribution and rearrange citation. If
456NODISPLAY is non-nil, don't redisplay the article buffer."
457 (interactive "P")
458 ;; apply treatment of dumb quotes
459 (gnus-article-treat-dumbquotes)
460 ;; repair wrapped cited lines
461 (gnus-article-outlook-unwrap-lines 'nodisplay)
462 ;; repair attribution line and rearrange citation.
463 (gnus-article-outlook-rearrange-citation 'nodisplay)
464 (unless nodisplay (gnus-outlook-display-article-buffer)))
465
466;;;###autoload
467(defun gnus-article-outlook-deuglify-article ()
468 "Deuglify broken Outlook (Express) articles and redisplay."
469 (interactive)
470 (gnus-outlook-deuglify-article nil))
471
472(provide 'deuglify)
473
474;; Local Variables:
c38e0c97 475;; coding: utf-8
23f87bed
MB
476;; End:
477
23f87bed 478;;; deuglify.el ends here