Commit | Line | Data |
---|---|---|
23f87bed MB |
1 | ;;; deuglify.el --- deuglify broken Outlook (Express) articles |
2 | ||
49f70d46 | 3 | ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 |
b2599f09 | 4 | ;; Free Software Foundation, Inc. |
23f87bed MB |
5 | |
6 | ;; Author: Raymond Scholz <rscholz@zonix.de> | |
1cc3c18f GM |
7 | ;; Thomas Steffen |
8 | ;; (unwrapping algorithm, based on an idea of Stefan Monnier) | |
23f87bed MB |
9 | ;; Keywords: mail, news |
10 | ||
11 | ;; This file is part of GNU Emacs. | |
12 | ||
5e809f55 | 13 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
23f87bed | 14 | ;; it under the terms of the GNU General Public License as published by |
5e809f55 GM |
15 | ;; the Free Software Foundation, either version 3 of the License, or |
16 | ;; (at your option) any later version. | |
23f87bed MB |
17 | |
18 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
5e809f55 | 20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23f87bed MB |
21 | ;; GNU General Public License for more details. |
22 | ||
23 | ;; You should have received a copy of the GNU General Public License | |
5e809f55 | 24 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
23f87bed MB |
25 | |
26 | ;;; Commentary: | |
27 | ||
28 | ;; This file enables Gnus to repair broken citations produced by | |
29 | ;; common user agents like MS Outlook (Express). It may repair | |
30 | ;; articles of other user agents too. | |
31 | ;; | |
32 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
33 | ||
34 | ;; | |
35 | ;; Outlook sometimes wraps cited lines before sending a message as | |
36 | ;; seen in this example: | |
37 | ;; | |
38 | ;; Example #1 | |
39 | ;; ---------- | |
40 | ;; | |
41 | ;; John Doe wrote: | |
42 | ;; | |
43 | ;; > This sentence no verb. This sentence no verb. This sentence | |
44 | ;; no | |
45 | ;; > verb. This sentence no verb. This sentence no verb. This | |
46 | ;; > sentence no verb. | |
47 | ;; | |
48 | ;; The function `gnus-article-outlook-unwrap-lines' tries to recognize those | |
49 | ;; erroneously wrapped lines and will unwrap them. I.e. putting the | |
50 | ;; wrapped parts ("no" in this example) back where they belong (at the | |
51 | ;; end of the cited line above). | |
52 | ;; | |
53 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
54 | ;; | |
55 | ;; Note that some people not only use broken user agents but also | |
56 | ;; practice a bad citation style by omitting blank lines between the | |
57 | ;; cited text and their own text. | |
58 | ;: | |
59 | ;; Example #2 | |
60 | ;; ---------- | |
61 | ;; | |
62 | ;; John Doe wrote: | |
63 | ;; | |
64 | ;; > This sentence no verb. This sentence no verb. This sentence no | |
65 | ;; You forgot in all your sentences. | |
66 | ;; > verb. This sentence no verb. This sentence no verb. This | |
67 | ;; > sentence no verb. | |
68 | ;; | |
0138efd4 | 69 | ;; Unwrapping "You forgot in all your sentences." would be invalid as |
23f87bed MB |
70 | ;; this part wasn't intended to be cited text. |
71 | ;; `gnus-article-outlook-unwrap-lines' will only unwrap lines if the resulting | |
72 | ;; citation line will be of a certain maximum length. You can control | |
73 | ;; this by adjusting `gnus-outlook-deuglify-unwrap-max'. Also | |
74 | ;; unwrapping will only be done if the line above the (possibly) | |
75 | ;; wrapped line has a minimum length of `gnus-outlook-deuglify-unwrap-min'. | |
76 | ;; | |
77 | ;; Furthermore no unwrapping will be undertaken if the last character | |
78 | ;; is one of the chars specified in | |
79 | ;; `gnus-outlook-deuglify-unwrap-stop-chars'. Setting this to ".?!" | |
80 | ;; inhibits unwrapping if the cited line ends with a full stop, | |
81 | ;; question mark or exclamation mark. Note that this variable | |
82 | ;; defaults to `nil', triggering a few false positives but generally | |
83 | ;; giving you better results. | |
84 | ;; | |
85 | ;; Unwrapping works on every level of citation. Thus you will be able | |
86 | ;; repair broken citations of broken user agents citing broken | |
87 | ;; citations of broken user agents citing broken citations... | |
88 | ;; | |
89 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
90 | ;; | |
91 | ;; Citations are commonly introduced with an attribution line | |
92 | ;; indicating who wrote the cited text. Outlook adds superfluous | |
93 | ;; information that can be found in the header of the message to this | |
94 | ;; line and often wraps it. | |
95 | ;; | |
96 | ;; If that weren't enough, lots of people write their own text above | |
97 | ;; the cited text and cite the complete original article below. | |
98 | ;; | |
99 | ;; Example #3 | |
100 | ;; ---------- | |
101 | ;; | |
102 | ;; Hey, John. There's no in all your sentences! | |
103 | ;; | |
104 | ;; John Doe <john.doe@some.domain> wrote in message | |
105 | ;; news:a87usw8$dklsssa$2@some.news.server... | |
106 | ;; > This sentence no verb. This sentence no verb. This sentence | |
107 | ;; no | |
108 | ;; > verb. This sentence no verb. This sentence no verb. This | |
109 | ;; > sentence no verb. | |
110 | ;; > | |
111 | ;; > Bye, John | |
112 | ;; | |
113 | ;; Repairing the attribution line will be done by function | |
114 | ;; `gnus-article-outlook-repair-attribution which calls other function that | |
115 | ;; try to recognize and repair broken attribution lines. See variable | |
116 | ;; `gnus-outlook-deuglify-attrib-cut-regexp' for stuff that should be | |
117 | ;; cut off from the beginning of an attribution line and variable | |
118 | ;; `gnus-outlook-deuglify-attrib-verb-regexp' for the verbs that are | |
119 | ;; required to be found in an attribution line. These function return | |
120 | ;; the point where the repaired attribution line starts. | |
121 | ;; | |
122 | ;; Rearranging the article so that the cited text appears above the | |
123 | ;; new text will be done by function | |
124 | ;; `gnus-article-outlook-rearrange-citation'. This function calls | |
125 | ;; `gnus-article-outlook-repair-attribution to find and repair an attribution | |
126 | ;; line. | |
127 | ;; | |
128 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
129 | ;; | |
130 | ;; Well, and that's what the message will look like after applying | |
131 | ;; deuglification: | |
132 | ;; | |
133 | ;; Example #3 (deuglified) | |
134 | ;; ----------------------- | |
135 | ;; | |
136 | ;; John Doe <john.doe@some.domain> wrote: | |
137 | ;; | |
138 | ;; > This sentence no verb. This sentence no verb. This sentence no | |
139 | ;; > verb. This sentence no verb. This sentence no verb. This | |
140 | ;; > sentence no verb. | |
141 | ;; > | |
142 | ;; > Bye, John | |
143 | ;; | |
144 | ;; Hey, John. There's no in all your sentences! | |
145 | ;; | |
146 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
bf247b6e | 147 | ;; |
23f87bed MB |
148 | ;; Usage |
149 | ;; ----- | |
150 | ;; | |
151 | ;; Press `W k' in the Summary Buffer. | |
152 | ;; | |
153 | ;; Non recommended usage :-) | |
154 | ;; --------------------- | |
155 | ;; | |
156 | ;; To automatically invoke deuglification on every article you read, | |
157 | ;; put something like that in your .gnus: | |
158 | ;; | |
159 | ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-unwrap-lines) | |
160 | ;; | |
161 | ;; or _one_ of the following lines: | |
162 | ;; | |
163 | ;; ;; repair broken attribution lines | |
164 | ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-repair-attribution) | |
165 | ;; | |
166 | ;; ;; repair broken attribution lines and citations | |
167 | ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-rearrange-citation) | |
168 | ;; | |
169 | ;; Note that there always may be some false positives, so I suggest | |
170 | ;; using the manual invocation. After deuglification you may want to | |
171 | ;; refill the whole article using `W w'. | |
172 | ;; | |
173 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
174 | ;; | |
175 | ;; Limitations | |
176 | ;; ----------- | |
177 | ;; | |
178 | ;; As I said before there may (or will) be a few false positives on | |
179 | ;; unwrapping cited lines with `gnus-article-outlook-unwrap-lines'. | |
180 | ;; | |
181 | ;; `gnus-article-outlook-repair-attribution will only fix the first | |
182 | ;; attribution line found in the article. Furthermore it fixed to | |
183 | ;; certain kinds of attributions. And there may be horribly many | |
184 | ;; false positives, vanishing lines and so on -- so don't trust your | |
185 | ;; eyes. Again I recommend manual invocation. | |
186 | ;; | |
187 | ;; `gnus-article-outlook-rearrange-citation' carries all the limitations of | |
188 | ;; `gnus-article-outlook-repair-attribution. | |
189 | ;; | |
190 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
191 | ;; | |
192 | ;; See ChangeLog for other changes. | |
193 | ;; | |
194 | ;; Revision 1.5 2002/01/27 14:39:17 rscholz | |
195 | ;; * New variable `gnus-outlook-deuglify-no-wrap-chars' to inhibit | |
196 | ;; unwrapping if one these chars is first in the possibly wrapped line. | |
197 | ;; * Improved rearranging of the article. | |
198 | ;; * New function `gnus-outlook-repair-attribution-block' for repairing | |
199 | ;; those big "Original Message (following some headers)" attributions. | |
200 | ;; | |
201 | ;; Revision 1.4 2002/01/03 14:05:00 rscholz | |
202 | ;; Renamed `gnus-outlook-deuglify-article' to | |
203 | ;; `gnus-article-outlook-deuglify-article'. | |
204 | ;; Made it easier to deuglify the article while being in Gnus' Article | |
205 | ;; Edit Mode. (suggested by Phil Nitschke) | |
206 | ;; | |
207 | ;; | |
208 | ;; Revision 1.3 2002/01/02 23:35:54 rscholz | |
209 | ;; Fix a bug that caused succeeding long attribution lines to be | |
210 | ;; unwrapped. Minor doc fixes and regular expression tuning. | |
211 | ;; | |
212 | ;; Revision 1.2 2001/12/30 20:14:34 rscholz | |
213 | ;; Clean up source. | |
214 | ;; | |
215 | ;; Revision 1.1 2001/12/30 20:13:32 rscholz | |
216 | ;; Initial revision | |
217 | ;; | |
218 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
219 | ||
220 | ;;; Code: | |
221 | ||
222 | (require 'gnus-art) | |
223 | (require 'gnus-sum) | |
224 | ||
225 | (defconst gnus-outlook-deuglify-version "1.5 Gnus version" | |
226 | "Version of gnus-outlook-deuglify.") | |
227 | ||
228 | ;;; User Customizable Variables: | |
229 | ||
230 | (defgroup gnus-outlook-deuglify nil | |
e2642250 | 231 | "Deuglify articles generated by broken user agents like MS Outlook (Express)." |
d0859c9a MB |
232 | :version "22.1" |
233 | :group 'gnus) | |
23f87bed | 234 | |
23f87bed MB |
235 | (defcustom gnus-outlook-deuglify-unwrap-min 45 |
236 | "Minimum length of the cited line above the (possibly) wrapped line." | |
bf247b6e | 237 | :version "22.1" |
23f87bed MB |
238 | :type 'integer |
239 | :group 'gnus-outlook-deuglify) | |
240 | ||
23f87bed MB |
241 | (defcustom gnus-outlook-deuglify-unwrap-max 95 |
242 | "Maximum length of the cited line after unwrapping." | |
bf247b6e | 243 | :version "22.1" |
23f87bed MB |
244 | :type 'integer |
245 | :group 'gnus-outlook-deuglify) | |
246 | ||
247 | (defcustom gnus-outlook-deuglify-cite-marks ">|#%" | |
248 | "Characters that indicate cited lines." | |
bf247b6e | 249 | :version "22.1" |
23f87bed MB |
250 | :type 'string |
251 | :group 'gnus-outlook-deuglify) | |
252 | ||
253 | (defcustom gnus-outlook-deuglify-unwrap-stop-chars nil ;; ".?!" or nil | |
254 | "Characters that inhibit unwrapping if they are the last one on the cited line above the possible wrapped line." | |
bf247b6e | 255 | :version "22.1" |
23f87bed | 256 | :type '(radio (const :format "None " nil) |
ad136a7c | 257 | (string :value ".?!")) |
23f87bed MB |
258 | :group 'gnus-outlook-deuglify) |
259 | ||
260 | (defcustom gnus-outlook-deuglify-no-wrap-chars "`" | |
261 | "Characters that inhibit unwrapping if they are the first one in the possibly wrapped line." | |
bf247b6e | 262 | :version "22.1" |
23f87bed MB |
263 | :type 'string |
264 | :group 'gnus-outlook-deuglify) | |
265 | ||
266 | (defcustom gnus-outlook-deuglify-attrib-cut-regexp | |
267 | "\\(On \\|Am \\)?\\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\),[^,]+, " | |
268 | "Regular expression matching the beginning of an attribution line that should be cut off." | |
bf247b6e | 269 | :version "22.1" |
23f87bed MB |
270 | :type 'string |
271 | :group 'gnus-outlook-deuglify) | |
272 | ||
273 | (defcustom gnus-outlook-deuglify-attrib-verb-regexp | |
274 | Content-type: text/html