Add 2012 to FSF copyright years for Emacs files (do not merge to trunk)
[bpt/emacs.git] / lisp / gnus / deuglify.el
Content-type: text/html HCoop Git - bpt/emacs.git/blame - lisp/gnus/deuglify.el


500 - Internal Server Error

Malformed UTF-8 character (fatal) at (eval 8) line 1, <$fd> line 657.
CommitLineData
23f87bed
MB
1;;; deuglify.el --- deuglify broken Outlook (Express) articles
2
49f70d46 3;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
b2599f09 4;; Free Software Foundation, Inc.
23f87bed
MB
5
6;; Author: Raymond Scholz <rscholz@zonix.de>
1cc3c18f
GM
7;; Thomas Steffen
8;; (unwrapping algorithm, based on an idea of Stefan Monnier)
23f87bed
MB
9;; Keywords: mail, news
10
11;; This file is part of GNU Emacs.
12
5e809f55 13;; GNU Emacs is free software: you can redistribute it and/or modify
23f87bed 14;; it under the terms of the GNU General Public License as published by
5e809f55
GM
15;; the Free Software Foundation, either version 3 of the License, or
16;; (at your option) any later version.
23f87bed
MB
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
5e809f55 20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23f87bed
MB
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
5e809f55 24;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23f87bed
MB
25
26;;; Commentary:
27
28;; This file enables Gnus to repair broken citations produced by
29;; common user agents like MS Outlook (Express). It may repair
30;; articles of other user agents too.
31;;
32;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33
34;;
35;; Outlook sometimes wraps cited lines before sending a message as
36;; seen in this example:
37;;
38;; Example #1
39;; ----------
40;;
41;; John Doe wrote:
42;;
43;; > This sentence no verb. This sentence no verb. This sentence
44;; no
45;; > verb. This sentence no verb. This sentence no verb. This
46;; > sentence no verb.
47;;
48;; The function `gnus-article-outlook-unwrap-lines' tries to recognize those
49;; erroneously wrapped lines and will unwrap them. I.e. putting the
50;; wrapped parts ("no" in this example) back where they belong (at the
51;; end of the cited line above).
52;;
53;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
54;;
55;; Note that some people not only use broken user agents but also
56;; practice a bad citation style by omitting blank lines between the
57;; cited text and their own text.
58;:
59;; Example #2
60;; ----------
61;;
62;; John Doe wrote:
63;;
64;; > This sentence no verb. This sentence no verb. This sentence no
65;; You forgot in all your sentences.
66;; > verb. This sentence no verb. This sentence no verb. This
67;; > sentence no verb.
68;;
0138efd4 69;; Unwrapping "You forgot in all your sentences." would be invalid as
23f87bed
MB
70;; this part wasn't intended to be cited text.
71;; `gnus-article-outlook-unwrap-lines' will only unwrap lines if the resulting
72;; citation line will be of a certain maximum length. You can control
73;; this by adjusting `gnus-outlook-deuglify-unwrap-max'. Also
74;; unwrapping will only be done if the line above the (possibly)
75;; wrapped line has a minimum length of `gnus-outlook-deuglify-unwrap-min'.
76;;
77;; Furthermore no unwrapping will be undertaken if the last character
78;; is one of the chars specified in
79;; `gnus-outlook-deuglify-unwrap-stop-chars'. Setting this to ".?!"
80;; inhibits unwrapping if the cited line ends with a full stop,
81;; question mark or exclamation mark. Note that this variable
82;; defaults to `nil', triggering a few false positives but generally
83;; giving you better results.
84;;
85;; Unwrapping works on every level of citation. Thus you will be able
86;; repair broken citations of broken user agents citing broken
87;; citations of broken user agents citing broken citations...
88;;
89;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
90;;
91;; Citations are commonly introduced with an attribution line
92;; indicating who wrote the cited text. Outlook adds superfluous
93;; information that can be found in the header of the message to this
94;; line and often wraps it.
95;;
96;; If that weren't enough, lots of people write their own text above
97;; the cited text and cite the complete original article below.
98;;
99;; Example #3
100;; ----------
101;;
102;; Hey, John. There's no in all your sentences!
103;;
104;; John Doe <john.doe@some.domain> wrote in message
105;; news:a87usw8$dklsssa$2@some.news.server...
106;; > This sentence no verb. This sentence no verb. This sentence
107;; no
108;; > verb. This sentence no verb. This sentence no verb. This
109;; > sentence no verb.
110;; >
111;; > Bye, John
112;;
113;; Repairing the attribution line will be done by function
114;; `gnus-article-outlook-repair-attribution which calls other function that
115;; try to recognize and repair broken attribution lines. See variable
116;; `gnus-outlook-deuglify-attrib-cut-regexp' for stuff that should be
117;; cut off from the beginning of an attribution line and variable
118;; `gnus-outlook-deuglify-attrib-verb-regexp' for the verbs that are
119;; required to be found in an attribution line. These function return
120;; the point where the repaired attribution line starts.
121;;
122;; Rearranging the article so that the cited text appears above the
123;; new text will be done by function
124;; `gnus-article-outlook-rearrange-citation'. This function calls
125;; `gnus-article-outlook-repair-attribution to find and repair an attribution
126;; line.
127;;
128;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
129;;
130;; Well, and that's what the message will look like after applying
131;; deuglification:
132;;
133;; Example #3 (deuglified)
134;; -----------------------
135;;
136;; John Doe <john.doe@some.domain> wrote:
137;;
138;; > This sentence no verb. This sentence no verb. This sentence no
139;; > verb. This sentence no verb. This sentence no verb. This
140;; > sentence no verb.
141;; >
142;; > Bye, John
143;;
144;; Hey, John. There's no in all your sentences!
145;;
146;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
bf247b6e 147;;
23f87bed
MB
148;; Usage
149;; -----
150;;
151;; Press `W k' in the Summary Buffer.
152;;
153;; Non recommended usage :-)
154;; ---------------------
155;;
156;; To automatically invoke deuglification on every article you read,
157;; put something like that in your .gnus:
158;;
159;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-unwrap-lines)
160;;
161;; or _one_ of the following lines:
162;;
163;; ;; repair broken attribution lines
164;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-repair-attribution)
165;;
166;; ;; repair broken attribution lines and citations
167;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-rearrange-citation)
168;;
169;; Note that there always may be some false positives, so I suggest
170;; using the manual invocation. After deuglification you may want to
171;; refill the whole article using `W w'.
172;;
173;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
174;;
175;; Limitations
176;; -----------
177;;
178;; As I said before there may (or will) be a few false positives on
179;; unwrapping cited lines with `gnus-article-outlook-unwrap-lines'.
180;;
181;; `gnus-article-outlook-repair-attribution will only fix the first
182;; attribution line found in the article. Furthermore it fixed to
183;; certain kinds of attributions. And there may be horribly many
184;; false positives, vanishing lines and so on -- so don't trust your
185;; eyes. Again I recommend manual invocation.
186;;
187;; `gnus-article-outlook-rearrange-citation' carries all the limitations of
188;; `gnus-article-outlook-repair-attribution.
189;;
190;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
191;;
192;; See ChangeLog for other changes.
193;;
194;; Revision 1.5 2002/01/27 14:39:17 rscholz
195;; * New variable `gnus-outlook-deuglify-no-wrap-chars' to inhibit
196;; unwrapping if one these chars is first in the possibly wrapped line.
197;; * Improved rearranging of the article.
198;; * New function `gnus-outlook-repair-attribution-block' for repairing
199;; those big "Original Message (following some headers)" attributions.
200;;
201;; Revision 1.4 2002/01/03 14:05:00 rscholz
202;; Renamed `gnus-outlook-deuglify-article' to
203;; `gnus-article-outlook-deuglify-article'.
204;; Made it easier to deuglify the article while being in Gnus' Article
205;; Edit Mode. (suggested by Phil Nitschke)
206;;
207;;
208;; Revision 1.3 2002/01/02 23:35:54 rscholz
209;; Fix a bug that caused succeeding long attribution lines to be
210;; unwrapped. Minor doc fixes and regular expression tuning.
211;;
212;; Revision 1.2 2001/12/30 20:14:34 rscholz
213;; Clean up source.
214;;
215;; Revision 1.1 2001/12/30 20:13:32 rscholz
216;; Initial revision
217;;
218;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
219
220;;; Code:
221
222(require 'gnus-art)
223(require 'gnus-sum)
224
225(defconst gnus-outlook-deuglify-version "1.5 Gnus version"
226 "Version of gnus-outlook-deuglify.")
227
228;;; User Customizable Variables:
229
230(defgroup gnus-outlook-deuglify nil
e2642250 231 "Deuglify articles generated by broken user agents like MS Outlook (Express)."
d0859c9a
MB
232 :version "22.1"
233 :group 'gnus)
23f87bed 234
23f87bed
MB
235(defcustom gnus-outlook-deuglify-unwrap-min 45
236 "Minimum length of the cited line above the (possibly) wrapped line."
bf247b6e 237 :version "22.1"
23f87bed
MB
238 :type 'integer
239 :group 'gnus-outlook-deuglify)
240
23f87bed
MB
241(defcustom gnus-outlook-deuglify-unwrap-max 95
242 "Maximum length of the cited line after unwrapping."
bf247b6e 243 :version "22.1"
23f87bed
MB
244 :type 'integer
245 :group 'gnus-outlook-deuglify)
246
247(defcustom gnus-outlook-deuglify-cite-marks ">|#%"
248 "Characters that indicate cited lines."
bf247b6e 249 :version "22.1"
23f87bed
MB
250 :type 'string
251 :group 'gnus-outlook-deuglify)
252
253(defcustom gnus-outlook-deuglify-unwrap-stop-chars nil ;; ".?!" or nil
254 "Characters that inhibit unwrapping if they are the last one on the cited line above the possible wrapped line."
bf247b6e 255 :version "22.1"
23f87bed 256 :type '(radio (const :format "None " nil)
ad136a7c 257 (string :value ".?!"))
23f87bed
MB
258 :group 'gnus-outlook-deuglify)
259
260(defcustom gnus-outlook-deuglify-no-wrap-chars "`"
261 "Characters that inhibit unwrapping if they are the first one in the possibly wrapped line."
bf247b6e 262 :version "22.1"
23f87bed
MB
263 :type 'string
264 :group 'gnus-outlook-deuglify)
265
266(defcustom gnus-outlook-deuglify-attrib-cut-regexp
267 "\\(On \\|Am \\)?\\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\),[^,]+, "
268 "Regular expression matching the beginning of an attribution line that should be cut off."
bf247b6e 269 :version "22.1"
23f87bed
MB
270 :type 'string
271 :group 'gnus-outlook-deuglify)
272
273(defcustom gnus-outlook-deuglify-attrib-verb-regexp
274