Commit | Line | Data |
---|---|---|
23f87bed MB |
1 | ;;; deuglify.el --- deuglify broken Outlook (Express) articles |
2 | ||
e3fe4da0 | 3 | ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 |
b2599f09 | 4 | ;; Free Software Foundation, Inc. |
23f87bed MB |
5 | |
6 | ;; Author: Raymond Scholz <rscholz@zonix.de> | |
7 | ;; Thomas Steffen (unwrapping algorithm, | |
8 | ;; based on an idea of Stefan Monnier) | |
9 | ;; Keywords: mail, news | |
10 | ||
11 | ;; This file is part of GNU Emacs. | |
12 | ||
13 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
14 | ;; it under the terms of the GNU General Public License as published by | |
5a9dffec | 15 | ;; the Free Software Foundation; either version 3, or (at your option) |
23f87bed MB |
16 | ;; any later version. |
17 | ||
18 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | ;; GNU General Public License for more details. | |
22 | ||
23 | ;; You should have received a copy of the GNU General Public License | |
24 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
3a35cf56 LK |
25 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
26 | ;; Boston, MA 02110-1301, USA. | |
23f87bed MB |
27 | |
28 | ;;; Commentary: | |
29 | ||
30 | ;; This file enables Gnus to repair broken citations produced by | |
31 | ;; common user agents like MS Outlook (Express). It may repair | |
32 | ;; articles of other user agents too. | |
33 | ;; | |
34 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
35 | ||
36 | ;; | |
37 | ;; Outlook sometimes wraps cited lines before sending a message as | |
38 | ;; seen in this example: | |
39 | ;; | |
40 | ;; Example #1 | |
41 | ;; ---------- | |
42 | ;; | |
43 | ;; John Doe wrote: | |
44 | ;; | |
45 | ;; > This sentence no verb. This sentence no verb. This sentence | |
46 | ;; no | |
47 | ;; > verb. This sentence no verb. This sentence no verb. This | |
48 | ;; > sentence no verb. | |
49 | ;; | |
50 | ;; The function `gnus-article-outlook-unwrap-lines' tries to recognize those | |
51 | ;; erroneously wrapped lines and will unwrap them. I.e. putting the | |
52 | ;; wrapped parts ("no" in this example) back where they belong (at the | |
53 | ;; end of the cited line above). | |
54 | ;; | |
55 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
56 | ;; | |
57 | ;; Note that some people not only use broken user agents but also | |
58 | ;; practice a bad citation style by omitting blank lines between the | |
59 | ;; cited text and their own text. | |
60 | ;: | |
61 | ;; Example #2 | |
62 | ;; ---------- | |
63 | ;; | |
64 | ;; John Doe wrote: | |
65 | ;; | |
66 | ;; > This sentence no verb. This sentence no verb. This sentence no | |
67 | ;; You forgot in all your sentences. | |
68 | ;; > verb. This sentence no verb. This sentence no verb. This | |
69 | ;; > sentence no verb. | |
70 | ;; | |
0138efd4 | 71 | ;; Unwrapping "You forgot in all your sentences." would be invalid as |
23f87bed MB |
72 | ;; this part wasn't intended to be cited text. |
73 | ;; `gnus-article-outlook-unwrap-lines' will only unwrap lines if the resulting | |
74 | ;; citation line will be of a certain maximum length. You can control | |
75 | ;; this by adjusting `gnus-outlook-deuglify-unwrap-max'. Also | |
76 | ;; unwrapping will only be done if the line above the (possibly) | |
77 | ;; wrapped line has a minimum length of `gnus-outlook-deuglify-unwrap-min'. | |
78 | ;; | |
79 | ;; Furthermore no unwrapping will be undertaken if the last character | |
80 | ;; is one of the chars specified in | |
81 | ;; `gnus-outlook-deuglify-unwrap-stop-chars'. Setting this to ".?!" | |
82 | ;; inhibits unwrapping if the cited line ends with a full stop, | |
83 | ;; question mark or exclamation mark. Note that this variable | |
84 | ;; defaults to `nil', triggering a few false positives but generally | |
85 | ;; giving you better results. | |
86 | ;; | |
87 | ;; Unwrapping works on every level of citation. Thus you will be able | |
88 | ;; repair broken citations of broken user agents citing broken | |
89 | ;; citations of broken user agents citing broken citations... | |
90 | ;; | |
91 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
92 | ;; | |
93 | ;; Citations are commonly introduced with an attribution line | |
94 | ;; indicating who wrote the cited text. Outlook adds superfluous | |
95 | ;; information that can be found in the header of the message to this | |
96 | ;; line and often wraps it. | |
97 | ;; | |
98 | ;; If that weren't enough, lots of people write their own text above | |
99 | ;; the cited text and cite the complete original article below. | |
100 | ;; | |
101 | ;; Example #3 | |
102 | ;; ---------- | |
103 | ;; | |
104 | ;; Hey, John. There's no in all your sentences! | |
105 | ;; | |
106 | ;; John Doe <john.doe@some.domain> wrote in message | |
107 | ;; news:a87usw8$dklsssa$2@some.news.server... | |
108 | ;; > This sentence no verb. This sentence no verb. This sentence | |
109 | ;; no | |
110 | ;; > verb. This sentence no verb. This sentence no verb. This | |
111 | ;; > sentence no verb. | |
112 | ;; > | |
113 | ;; > Bye, John | |
114 | ;; | |
115 | ;; Repairing the attribution line will be done by function | |
116 | ;; `gnus-article-outlook-repair-attribution which calls other function that | |
117 | ;; try to recognize and repair broken attribution lines. See variable | |
118 | ;; `gnus-outlook-deuglify-attrib-cut-regexp' for stuff that should be | |
119 | ;; cut off from the beginning of an attribution line and variable | |
120 | ;; `gnus-outlook-deuglify-attrib-verb-regexp' for the verbs that are | |
121 | ;; required to be found in an attribution line. These function return | |
122 | ;; the point where the repaired attribution line starts. | |
123 | ;; | |
124 | ;; Rearranging the article so that the cited text appears above the | |
125 | ;; new text will be done by function | |
126 | ;; `gnus-article-outlook-rearrange-citation'. This function calls | |
127 | ;; `gnus-article-outlook-repair-attribution to find and repair an attribution | |
128 | ;; line. | |
129 | ;; | |
130 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
131 | ;; | |
132 | ;; Well, and that's what the message will look like after applying | |
133 | ;; deuglification: | |
134 | ;; | |
135 | ;; Example #3 (deuglified) | |
136 | ;; ----------------------- | |
137 | ;; | |
138 | ;; John Doe <john.doe@some.domain> wrote: | |
139 | ;; | |
140 | ;; > This sentence no verb. This sentence no verb. This sentence no | |
141 | ;; > verb. This sentence no verb. This sentence no verb. This | |
142 | ;; > sentence no verb. | |
143 | ;; > | |
144 | ;; > Bye, John | |
145 | ;; | |
146 | ;; Hey, John. There's no in all your sentences! | |
147 | ;; | |
148 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
bf247b6e | 149 | ;; |
23f87bed MB |
150 | ;; Usage |
151 | ;; ----- | |
152 | ;; | |
153 | ;; Press `W k' in the Summary Buffer. | |
154 | ;; | |
155 | ;; Non recommended usage :-) | |
156 | ;; --------------------- | |
157 | ;; | |
158 | ;; To automatically invoke deuglification on every article you read, | |
159 | ;; put something like that in your .gnus: | |
160 | ;; | |
161 | ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-unwrap-lines) | |
162 | ;; | |
163 | ;; or _one_ of the following lines: | |
164 | ;; | |
165 | ;; ;; repair broken attribution lines | |
166 | ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-repair-attribution) | |
167 | ;; | |
168 | ;; ;; repair broken attribution lines and citations | |
169 | ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-rearrange-citation) | |
170 | ;; | |
171 | ;; Note that there always may be some false positives, so I suggest | |
172 | ;; using the manual invocation. After deuglification you may want to | |
173 | ;; refill the whole article using `W w'. | |
174 | ;; | |
175 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
176 | ;; | |
177 | ;; Limitations | |
178 | ;; ----------- | |
179 | ;; | |
180 | ;; As I said before there may (or will) be a few false positives on | |
181 | ;; unwrapping cited lines with `gnus-article-outlook-unwrap-lines'. | |
182 | ;; | |
183 | ;; `gnus-article-outlook-repair-attribution will only fix the first | |
184 | ;; attribution line found in the article. Furthermore it fixed to | |
185 | ;; certain kinds of attributions. And there may be horribly many | |
186 | ;; false positives, vanishing lines and so on -- so don't trust your | |
187 | ;; eyes. Again I recommend manual invocation. | |
188 | ;; | |
189 | ;; `gnus-article-outlook-rearrange-citation' carries all the limitations of | |
190 | ;; `gnus-article-outlook-repair-attribution. | |
191 | ;; | |
192 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
193 | ;; | |
194 | ;; See ChangeLog for other changes. | |
195 | ;; | |
196 | ;; Revision 1.5 2002/01/27 14:39:17 rscholz | |
197 | ;; * New variable `gnus-outlook-deuglify-no-wrap-chars' to inhibit | |
198 | ;; unwrapping if one these chars is first in the possibly wrapped line. | |
199 | ;; * Improved rearranging of the article. | |
200 | ;; * New function `gnus-outlook-repair-attribution-block' for repairing | |
201 | ;; those big "Original Message (following some headers)" attributions. | |
202 | ;; | |
203 | ;; Revision 1.4 2002/01/03 14:05:00 rscholz | |
204 | ;; Renamed `gnus-outlook-deuglify-article' to | |
205 | ;; `gnus-article-outlook-deuglify-article'. | |
206 | ;; Made it easier to deuglify the article while being in Gnus' Article | |
207 | ;; Edit Mode. (suggested by Phil Nitschke) | |
208 | ;; | |
209 | ;; | |
210 | ;; Revision 1.3 2002/01/02 23:35:54 rscholz | |
211 | ;; Fix a bug that caused succeeding long attribution lines to be | |
212 | ;; unwrapped. Minor doc fixes and regular expression tuning. | |
213 | ;; | |
214 | ;; Revision 1.2 2001/12/30 20:14:34 rscholz | |
215 | ;; Clean up source. | |
216 | ;; | |
217 | ;; Revision 1.1 2001/12/30 20:13:32 rscholz | |
218 | ;; Initial revision | |
219 | ;; | |
220 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
221 | ||
222 | ;;; Code: | |
223 | ||
224 | (require 'gnus-art) | |
225 | (require 'gnus-sum) | |
226 | ||
227 | (defconst gnus-outlook-deuglify-version "1.5 Gnus version" | |
228 | "Version of gnus-outlook-deuglify.") | |
229 | ||
230 | ;;; User Customizable Variables: | |
231 | ||
232 | (defgroup gnus-outlook-deuglify nil | |
e2642250 | 233 | "Deuglify articles generated by broken user agents like MS Outlook (Express)." |
d0859c9a MB |
234 | :version "22.1" |
235 | :group 'gnus) | |
23f87bed | 236 | |
23f87bed MB |
237 | (defcustom gnus-outlook-deuglify-unwrap-min 45 |
238 | "Minimum length of the cited line above the (possibly) wrapped line." | |
bf247b6e | 239 | :version "22.1" |
23f87bed MB |
240 | :type 'integer |
241 | :group 'gnus-outlook-deuglify) | |
242 | ||
23f87bed MB |
243 | (defcustom gnus-outlook-deuglify-unwrap-max 95 |
244 | "Maximum length of the cited line after unwrapping." | |
bf247b6e | 245 | :version "22.1" |
23f87bed MB |
246 | :type 'integer |
247 | :group 'gnus-outlook-deuglify) | |
248 | ||
249 | (defcustom gnus-outlook-deuglify-cite-marks ">|#%" | |
250 | "Characters that indicate cited lines." | |
bf247b6e | 251 | :version "22.1" |
23f87bed MB |
252 | :type 'string |
253 | :group 'gnus-outlook-deuglify) | |
254 | ||
255 | (defcustom gnus-outlook-deuglify-unwrap-stop-chars nil ;; ".?!" or nil | |
256 | "Characters that inhibit unwrapping if they are the last one on the cited line above the possible wrapped line." | |
bf247b6e | 257 | :version "22.1" |
23f87bed | 258 | :type '(radio (const :format "None " nil) |
ad136a7c | 259 | (string :value ".?!")) |
23f87bed MB |
260 | :group 'gnus-outlook-deuglify) |
261 | ||
262 | (defcustom gnus-outlook-deuglify-no-wrap-chars "`" | |
263 | "Characters that inhibit unwrapping if they are the first one in the possibly wrapped line." | |
bf247b6e | 264 | :version "22.1" |
23f87bed MB |
265 | :type 'string |
266 | :group 'gnus-outlook-deuglify) | |
267 | ||
268 | (defcustom gnus-outlook-deuglify-attrib-cut-regexp | |
269 | "\\(On \\|Am \\)?\\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\),[^,]+, " | |
270 | "Regular expression matching the beginning of an attribution line that should be cut off." | |
bf247b6e | 271 | :version "22.1" |
23f87bed MB |
272 | :type 'string |
273 | :group 'gnus-outlook-deuglify) | |
274 | ||
275 | (defcustom gnus-outlook-deuglify-attrib-verb-regexp | |
276 | Content-type: text/html