| 1 | ;;; deuglify.el --- deuglify broken Outlook (Express) articles |
| 2 | |
| 3 | ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 |
| 4 | ;; Free Software Foundation, Inc. |
| 5 | |
| 6 | ;; Author: Raymond Scholz <rscholz@zonix.de> |
| 7 | ;; Thomas Steffen (unwrapping algorithm, |
| 8 | ;; based on an idea of Stefan Monnier) |
| 9 | ;; Keywords: mail, news |
| 10 | |
| 11 | ;; This file is part of GNU Emacs. |
| 12 | |
| 13 | ;; GNU Emacs is free software; you can redistribute it and/or modify |
| 14 | ;; it under the terms of the GNU General Public License as published by |
| 15 | ;; the Free Software Foundation; either version 3, or (at your option) |
| 16 | ;; any later version. |
| 17 | |
| 18 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 21 | ;; GNU General Public License for more details. |
| 22 | |
| 23 | ;; You should have received a copy of the GNU General Public License |
| 24 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
| 25 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 26 | ;; Boston, MA 02110-1301, USA. |
| 27 | |
| 28 | ;;; Commentary: |
| 29 | |
| 30 | ;; This file enables Gnus to repair broken citations produced by |
| 31 | ;; common user agents like MS Outlook (Express). It may repair |
| 32 | ;; articles of other user agents too. |
| 33 | ;; |
| 34 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 35 | |
| 36 | ;; |
| 37 | ;; Outlook sometimes wraps cited lines before sending a message as |
| 38 | ;; seen in this example: |
| 39 | ;; |
| 40 | ;; Example #1 |
| 41 | ;; ---------- |
| 42 | ;; |
| 43 | ;; John Doe wrote: |
| 44 | ;; |
| 45 | ;; > This sentence no verb. This sentence no verb. This sentence |
| 46 | ;; no |
| 47 | ;; > verb. This sentence no verb. This sentence no verb. This |
| 48 | ;; > sentence no verb. |
| 49 | ;; |
| 50 | ;; The function `gnus-article-outlook-unwrap-lines' tries to recognize those |
| 51 | ;; erroneously wrapped lines and will unwrap them. I.e. putting the |
| 52 | ;; wrapped parts ("no" in this example) back where they belong (at the |
| 53 | ;; end of the cited line above). |
| 54 | ;; |
| 55 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 56 | ;; |
| 57 | ;; Note that some people not only use broken user agents but also |
| 58 | ;; practice a bad citation style by omitting blank lines between the |
| 59 | ;; cited text and their own text. |
| 60 | ;: |
| 61 | ;; Example #2 |
| 62 | ;; ---------- |
| 63 | ;; |
| 64 | ;; John Doe wrote: |
| 65 | ;; |
| 66 | ;; > This sentence no verb. This sentence no verb. This sentence no |
| 67 | ;; You forgot in all your sentences. |
| 68 | ;; > verb. This sentence no verb. This sentence no verb. This |
| 69 | ;; > sentence no verb. |
| 70 | ;; |
| 71 | ;; Unwrapping "You forgot in all your sentences." would be illegal as |
| 72 | ;; this part wasn't intended to be cited text. |
| 73 | ;; `gnus-article-outlook-unwrap-lines' will only unwrap lines if the resulting |
| 74 | ;; citation line will be of a certain maximum length. You can control |
| 75 | ;; this by adjusting `gnus-outlook-deuglify-unwrap-max'. Also |
| 76 | ;; unwrapping will only be done if the line above the (possibly) |
| 77 | ;; wrapped line has a minimum length of `gnus-outlook-deuglify-unwrap-min'. |
| 78 | ;; |
| 79 | ;; Furthermore no unwrapping will be undertaken if the last character |
| 80 | ;; is one of the chars specified in |
| 81 | ;; `gnus-outlook-deuglify-unwrap-stop-chars'. Setting this to ".?!" |
| 82 | ;; inhibits unwrapping if the cited line ends with a full stop, |
| 83 | ;; question mark or exclamation mark. Note that this variable |
| 84 | ;; defaults to `nil', triggering a few false positives but generally |
| 85 | ;; giving you better results. |
| 86 | ;; |
| 87 | ;; Unwrapping works on every level of citation. Thus you will be able |
| 88 | ;; repair broken citations of broken user agents citing broken |
| 89 | ;; citations of broken user agents citing broken citations... |
| 90 | ;; |
| 91 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 92 | ;; |
| 93 | ;; Citations are commonly introduced with an attribution line |
| 94 | ;; indicating who wrote the cited text. Outlook adds superfluous |
| 95 | ;; information that can be found in the header of the message to this |
| 96 | ;; line and often wraps it. |
| 97 | ;; |
| 98 | ;; If that weren't enough, lots of people write their own text above |
| 99 | ;; the cited text and cite the complete original article below. |
| 100 | ;; |
| 101 | ;; Example #3 |
| 102 | ;; ---------- |
| 103 | ;; |
| 104 | ;; Hey, John. There's no in all your sentences! |
| 105 | ;; |
| 106 | ;; John Doe <john.doe@some.domain> wrote in message |
| 107 | ;; news:a87usw8$dklsssa$2@some.news.server... |
| 108 | ;; > This sentence no verb. This sentence no verb. This sentence |
| 109 | ;; no |
| 110 | ;; > verb. This sentence no verb. This sentence no verb. This |
| 111 | ;; > sentence no verb. |
| 112 | ;; > |
| 113 | ;; > Bye, John |
| 114 | ;; |
| 115 | ;; Repairing the attribution line will be done by function |
| 116 | ;; `gnus-article-outlook-repair-attribution which calls other function that |
| 117 | ;; try to recognize and repair broken attribution lines. See variable |
| 118 | ;; `gnus-outlook-deuglify-attrib-cut-regexp' for stuff that should be |
| 119 | ;; cut off from the beginning of an attribution line and variable |
| 120 | ;; `gnus-outlook-deuglify-attrib-verb-regexp' for the verbs that are |
| 121 | ;; required to be found in an attribution line. These function return |
| 122 | ;; the point where the repaired attribution line starts. |
| 123 | ;; |
| 124 | ;; Rearranging the article so that the cited text appears above the |
| 125 | ;; new text will be done by function |
| 126 | ;; `gnus-article-outlook-rearrange-citation'. This function calls |
| 127 | ;; `gnus-article-outlook-repair-attribution to find and repair an attribution |
| 128 | ;; line. |
| 129 | ;; |
| 130 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 131 | ;; |
| 132 | ;; Well, and that's what the message will look like after applying |
| 133 | ;; deuglification: |
| 134 | ;; |
| 135 | ;; Example #3 (deuglified) |
| 136 | ;; ----------------------- |
| 137 | ;; |
| 138 | ;; John Doe <john.doe@some.domain> wrote: |
| 139 | ;; |
| 140 | ;; > This sentence no verb. This sentence no verb. This sentence no |
| 141 | ;; > verb. This sentence no verb. This sentence no verb. This |
| 142 | ;; > sentence no verb. |
| 143 | ;; > |
| 144 | ;; > Bye, John |
| 145 | ;; |
| 146 | ;; Hey, John. There's no in all your sentences! |
| 147 | ;; |
| 148 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 149 | ;; |
| 150 | ;; Usage |
| 151 | ;; ----- |
| 152 | ;; |
| 153 | ;; Press `W k' in the Summary Buffer. |
| 154 | ;; |
| 155 | ;; Non recommended usage :-) |
| 156 | ;; --------------------- |
| 157 | ;; |
| 158 | ;; To automatically invoke deuglification on every article you read, |
| 159 | ;; put something like that in your .gnus: |
| 160 | ;; |
| 161 | ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-unwrap-lines) |
| 162 | ;; |
| 163 | ;; or _one_ of the following lines: |
| 164 | ;; |
| 165 | ;; ;; repair broken attribution lines |
| 166 | ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-repair-attribution) |
| 167 | ;; |
| 168 | ;; ;; repair broken attribution lines and citations |
| 169 | ;; (add-hook 'gnus-article-decode-hook 'gnus-article-outlook-rearrange-citation) |
| 170 | ;; |
| 171 | ;; Note that there always may be some false positives, so I suggest |
| 172 | ;; using the manual invocation. After deuglification you may want to |
| 173 | ;; refill the whole article using `W w'. |
| 174 | ;; |
| 175 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 176 | ;; |
| 177 | ;; Limitations |
| 178 | ;; ----------- |
| 179 | ;; |
| 180 | ;; As I said before there may (or will) be a few false positives on |
| 181 | ;; unwrapping cited lines with `gnus-article-outlook-unwrap-lines'. |
| 182 | ;; |
| 183 | ;; `gnus-article-outlook-repair-attribution will only fix the first |
| 184 | ;; attribution line found in the article. Furthermore it fixed to |
| 185 | ;; certain kinds of attributions. And there may be horribly many |
| 186 | ;; false positives, vanishing lines and so on -- so don't trust your |
| 187 | ;; eyes. Again I recommend manual invocation. |
| 188 | ;; |
| 189 | ;; `gnus-article-outlook-rearrange-citation' carries all the limitations of |
| 190 | ;; `gnus-article-outlook-repair-attribution. |
| 191 | ;; |
| 192 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 193 | ;; |
| 194 | ;; See ChangeLog for other changes. |
| 195 | ;; |
| 196 | ;; Revision 1.5 2002/01/27 14:39:17 rscholz |
| 197 | ;; * New variable `gnus-outlook-deuglify-no-wrap-chars' to inhibit |
| 198 | ;; unwrapping if one these chars is first in the possibly wrapped line. |
| 199 | ;; * Improved rearranging of the article. |
| 200 | ;; * New function `gnus-outlook-repair-attribution-block' for repairing |
| 201 | ;; those big "Original Message (following some headers)" attributions. |
| 202 | ;; |
| 203 | ;; Revision 1.4 2002/01/03 14:05:00 rscholz |
| 204 | ;; Renamed `gnus-outlook-deuglify-article' to |
| 205 | ;; `gnus-article-outlook-deuglify-article'. |
| 206 | ;; Made it easier to deuglify the article while being in Gnus' Article |
| 207 | ;; Edit Mode. (suggested by Phil Nitschke) |
| 208 | ;; |
| 209 | ;; |
| 210 | ;; Revision 1.3 2002/01/02 23:35:54 rscholz |
| 211 | ;; Fix a bug that caused succeeding long attribution lines to be |
| 212 | ;; unwrapped. Minor doc fixes and regular expression tuning. |
| 213 | ;; |
| 214 | ;; Revision 1.2 2001/12/30 20:14:34 rscholz |
| 215 | ;; Clean up source. |
| 216 | ;; |
| 217 | ;; Revision 1.1 2001/12/30 20:13:32 rscholz |
| 218 | ;; Initial revision |
| 219 | ;; |
| 220 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 221 | |
| 222 | ;;; Code: |
| 223 | |
| 224 | (require 'gnus-art) |
| 225 | (require 'gnus-sum) |
| 226 | |
| 227 | (defconst gnus-outlook-deuglify-version "1.5 Gnus version" |
| 228 | "Version of gnus-outlook-deuglify.") |
| 229 | |
| 230 | ;;; User Customizable Variables: |
| 231 | |
| 232 | (defgroup gnus-outlook-deuglify nil |
| 233 | "Deuglify articles generated by broken user agents like MS Outlook (Express)." |
| 234 | :version "22.1" |
| 235 | :group 'gnus) |
| 236 | |
| 237 | (defcustom gnus-outlook-deuglify-unwrap-min 45 |
| 238 | "Minimum length of the cited line above the (possibly) wrapped line." |
| 239 | :version "22.1" |
| 240 | :type 'integer |
| 241 | :group 'gnus-outlook-deuglify) |
| 242 | |
| 243 | (defcustom gnus-outlook-deuglify-unwrap-max 95 |
| 244 | "Maximum length of the cited line after unwrapping." |
| 245 | :version "22.1" |
| 246 | :type 'integer |
| 247 | :group 'gnus-outlook-deuglify) |
| 248 | |
| 249 | (defcustom gnus-outlook-deuglify-cite-marks ">|#%" |
| 250 | "Characters that indicate cited lines." |
| 251 | :version "22.1" |
| 252 | :type 'string |
| 253 | :group 'gnus-outlook-deuglify) |
| 254 | |
| 255 | (defcustom gnus-outlook-deuglify-unwrap-stop-chars nil ;; ".?!" or nil |
| 256 | "Characters that inhibit unwrapping if they are the last one on the cited line above the possible wrapped line." |
| 257 | :version "22.1" |
| 258 | :type '(radio (const :format "None " nil) |
| 259 | (string :value ".?!")) |
| 260 | :group 'gnus-outlook-deuglify) |
| 261 | |
| 262 | (defcustom gnus-outlook-deuglify-no-wrap-chars "`" |
| 263 | "Characters that inhibit unwrapping if they are the first one in the possibly wrapped line." |
| 264 | :version "22.1" |
| 265 | :type 'string |
| 266 | :group 'gnus-outlook-deuglify) |
| 267 | |
| 268 | (defcustom gnus-outlook-deuglify-attrib-cut-regexp |
| 269 | "\\(On \\|Am \\)?\\(Mon\\|Tue\\|Wed\\|Thu\\|Fri\\|Sat\\|Sun\\),[^,]+, " |
| 270 | "Regular expression matching the beginning of an attribution line that should be cut off." |
| 271 | :version "22.1" |
| 272 | :type 'string |
| 273 | :group 'gnus-outlook-deuglify) |
| 274 | |
| 275 | (defcustom gnus-outlook-deuglify-attrib-verb-regexp |
| 276 | Content-type: text/html
HCoop Git - bpt/emacs.git/blame_incremental - lisp/gnus/deuglify.el
500 - Internal Server Error
Malformed UTF-8 character (fatal) at (eval 8) line 1, <$fd> line 276.