Commit | Line | Data |
---|---|---|
715a2ca2 | 1 | ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages |
7daa20fc | 2 | ;; Copyright (C) 1998,1999,2000,02,03,2004 Free Software Foundation, Inc. |
c113de23 GM |
3 | |
4 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
5 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
6 | ;; This file is part of GNU Emacs. | |
7 | ||
8 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
9 | ;; it under the terms of the GNU General Public License as published by | |
10 | ;; the Free Software Foundation; either version 2, or (at your option) | |
11 | ;; any later version. | |
12 | ||
13 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | ;; GNU General Public License for more details. | |
17 | ||
18 | ;; You should have received a copy of the GNU General Public License | |
19 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
20 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 | ;; Boston, MA 02111-1307, USA. | |
22 | ||
23 | ;;; Commentary: | |
24 | ||
d49a4975 DL |
25 | ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part |
26 | ;; Three: Message Header Extensions for Non-ASCII Text". | |
27 | ||
c113de23 GM |
28 | ;;; Code: |
29 | ||
7f0321ff DL |
30 | (eval-when-compile |
31 | (require 'cl) | |
32 | (defvar message-posting-charset)) | |
c113de23 GM |
33 | |
34 | (require 'qp) | |
35 | (require 'mm-util) | |
7f0321ff | 36 | ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus. |
c113de23 | 37 | (require 'mail-prsvr) |
f2307f18 | 38 | (require 'base64) |
f2307f18 | 39 | (autoload 'mm-body-7-or-8 "mm-bodies") |
1c33719f | 40 | |
c113de23 | 41 | (defvar rfc2047-header-encoding-alist |
c6e26ce2 | 42 | '(("Newsgroups\\|Followup-To" . nil) |
c113de23 | 43 | ("Message-ID" . nil) |
7f0321ff DL |
44 | ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\)" . |
45 | address-mime) | |
c113de23 GM |
46 | (t . mime)) |
47 | "*Header/encoding method alist. | |
48 | The list is traversed sequentially. The keys can either be | |
f2307f18 | 49 | header regexps or t. |
c113de23 GM |
50 | |
51 | The values can be: | |
52 | ||
53 | 1) nil, in which case no encoding is done; | |
54 | 2) `mime', in which case the header will be encoded according to RFC2047; | |
7f0321ff DL |
55 | 3) `address-mime', like `mime', but takes account of the rules for address |
56 | fields (where quoted strings and comments must be treated separately); | |
57 | 4) a charset, in which case it will be encoded as that charset; | |
58 | 5) `default', in which case the field will be encoded as the rest | |
c113de23 GM |
59 | of the article.") |
60 | ||
61 | (defvar rfc2047-charset-encoding-alist | |
62 | '((us-ascii . nil) | |
63 | (iso-8859-1 . Q) | |
64 | (iso-8859-2 . Q) | |
65 | (iso-8859-3 . Q) | |
66 | (iso-8859-4 . Q) | |
67 | (iso-8859-5 . B) | |
68 | (koi8-r . B) | |
7f0321ff DL |
69 | (iso-8859-7 . B) |
70 | (iso-8859-8 . B) | |
c113de23 | 71 | (iso-8859-9 . Q) |
f2307f18 DL |
72 | (iso-8859-14 . Q) |
73 | (iso-8859-15 . Q) | |
c113de23 GM |
74 | (iso-2022-jp . B) |
75 | (iso-2022-kr . B) | |
76 | (gb2312 . B) | |
676a7cc9 SZ |
77 | (big5 . B) |
78 | (cn-big5 . B) | |
c113de23 GM |
79 | (cn-gb . B) |
80 | (cn-gb-2312 . B) | |
81 | (euc-kr . B) | |
82 | (iso-2022-jp-2 . B) | |
83 | (iso-2022-int-1 . B)) | |
84 | "Alist of MIME charsets to RFC2047 encodings. | |
7f0321ff DL |
85 | Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding, |
86 | quoted-printable and base64 respectively.") | |
c113de23 GM |
87 | |
88 | (defvar rfc2047-encoding-function-alist | |
89 | '((Q . rfc2047-q-encode-region) | |
90 | (B . rfc2047-b-encode-region) | |
91 | (nil . ignore)) | |
92 | "Alist of RFC2047 encodings to encoding functions.") | |
93 | ||
94 | (defvar rfc2047-q-encoding-alist | |
7f0321ff DL |
95 | '(("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\):" |
96 | . "-A-Za-z0-9!*+/" ) | |
ce9401f3 | 97 | ;; = (\075), _ (\137), ? (\077) are used in the encoded word. |
a553a9f5 | 98 | ;; Avoid using 8bit characters. |
ce9401f3 DL |
99 | ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" |
100 | ("." . "\010\012\014\040-\074\076\100-\136\140-\177")) | |
c113de23 GM |
101 | "Alist of header regexps and valid Q characters.") |
102 | ||
103 | ;;; | |
104 | ;;; Functions for encoding RFC2047 messages | |
105 | ;;; | |
106 | ||
107 | (defun rfc2047-narrow-to-field () | |
108 | "Narrow the buffer to the header on the current line." | |
109 | (beginning-of-line) | |
110 | (narrow-to-region | |
111 | (point) | |
112 | (progn | |
113 | (forward-line 1) | |
114 | (if (re-search-forward "^[^ \n\t]" nil t) | |
115 | (progn | |
116 | (beginning-of-line) | |
117 | (point)) | |
118 | (point-max)))) | |
119 | (goto-char (point-min))) | |
120 | ||
7f0321ff DL |
121 | (defvar rfc2047-encoding-type 'address-mime |
122 | "The type of encoding done by `rfc2047-encode-region'. | |
123 | This should be dynamically bound around calls to | |
124 | `rfc2047-encode-region' to either `mime' or `address-mime'. See | |
125 | `rfc2047-header-encoding-alist', for definitions.") | |
126 | ||
c113de23 GM |
127 | (defun rfc2047-encode-message-header () |
128 | "Encode the message header according to `rfc2047-header-encoding-alist'. | |
129 | Should be called narrowed to the head of the message." | |
130 | (interactive "*") | |
131 | (save-excursion | |
132 | (goto-char (point-min)) | |
133 | (let (alist elem method) | |
134 | (while (not (eobp)) | |
135 | (save-restriction | |
136 | (rfc2047-narrow-to-field) | |
137 | (if (not (rfc2047-encodable-p)) | |
c6e26ce2 DL |
138 | (prog1 |
139 | (if (and (eq (mm-body-7-or-8) '8bit) | |
140 | (mm-multibyte-p) | |
141 | (mm-coding-system-p | |
142 | (car message-posting-charset))) | |
143 | ;; 8 bit must be decoded. | |
144 | (mm-encode-coding-region | |
145 | (point-min) (point-max) | |
146 | (mm-charset-to-coding-system | |
147 | (car message-posting-charset)))) | |
148 | ;; No encoding necessary, but folding is nice | |
149 | (rfc2047-fold-region | |
150 | (save-excursion | |
151 | (goto-char (point-min)) | |
152 | (skip-chars-forward "^:") | |
153 | (when (looking-at ": ") | |
154 | (forward-char 2)) | |
155 | (point)) | |
156 | (point-max))) | |
c113de23 GM |
157 | ;; We found something that may perhaps be encoded. |
158 | (setq method nil | |
159 | alist rfc2047-header-encoding-alist) | |
160 | (while (setq elem (pop alist)) | |
161 | (when (or (and (stringp (car elem)) | |
162 | (looking-at (car elem))) | |
163 | (eq (car elem) t)) | |
164 | (setq alist nil | |
165 | method (cdr elem)))) | |
7f0321ff DL |
166 | (goto-char (point-min)) |
167 | (re-search-forward "^[^:]+: *" nil t) | |
c113de23 | 168 | (cond |
7f0321ff DL |
169 | ((eq method 'address-mime) |
170 | (rfc2047-encode-region (point) (point-max))) | |
c113de23 | 171 | ((eq method 'mime) |
7daa20fc | 172 | (let ((rfc2047-encoding-type method)) |
7f0321ff | 173 | (rfc2047-encode-region (point) (point-max)))) |
c113de23 GM |
174 | ((eq method 'default) |
175 | (if (and (featurep 'mule) | |
1bde0b39 DL |
176 | (if (boundp 'default-enable-multibyte-characters) |
177 | default-enable-multibyte-characters) | |
c113de23 | 178 | mail-parse-charset) |
7f0321ff | 179 | (mm-encode-coding-region (point) (point-max) |
c113de23 GM |
180 | mail-parse-charset))) |
181 | ((mm-coding-system-p method) | |
1bde0b39 DL |
182 | (if (and (featurep 'mule) |
183 | (if (boundp 'default-enable-multibyte-characters) | |
184 | default-enable-multibyte-characters)) | |
7f0321ff | 185 | (mm-encode-coding-region (point) (point-max) method))) |
c113de23 GM |
186 | ;; Hm. |
187 | (t))) | |
188 | (goto-char (point-max))))))) | |
189 | ||
a553a9f5 DL |
190 | ;; Fixme: This, and the require below may not be the Right Thing, but |
191 | ;; should be safe just before release. -- fx 2001-02-08 | |
192 | (eval-when-compile (defvar message-posting-charset)) | |
193 | ||
f2307f18 DL |
194 | (defun rfc2047-encodable-p () |
195 | "Return non-nil if any characters in current buffer need encoding in headers. | |
196 | The buffer may be narrowed." | |
a553a9f5 | 197 | (require 'message) ; for message-posting-charset |
c113de23 | 198 | (let ((charsets |
7f0321ff DL |
199 | (mm-find-mime-charset-region (point-min) (point-max)))) |
200 | (and charsets (not (equal charsets (list message-posting-charset)))))) | |
201 | ||
202 | ;; Use this syntax table when parsing into regions that may need | |
203 | ;; encoding. Double quotes are string delimiters, backslash is | |
204 | ;; character quoting, and all other RFC 2822 special characters are | |
205 | ;; treated as punctuation so we can use forward-sexp/forward-word to | |
206 | ;; skip to the end of regions appropriately. Nb. ietf-drums does | |
207 | ;; things differently. | |
208 | (defconst rfc2047-syntax-table | |
209 | (let ((table (make-char-table 'syntax-table '(2)))) | |
210 | (modify-syntax-entry ?\\ "\\" table) | |
211 | (modify-syntax-entry ?\" "\"" table) | |
212 | (modify-syntax-entry ?\( "." table) | |
213 | (modify-syntax-entry ?\) "." table) | |
214 | (modify-syntax-entry ?\< "." table) | |
215 | (modify-syntax-entry ?\> "." table) | |
216 | (modify-syntax-entry ?\[ "." table) | |
217 | (modify-syntax-entry ?\] "." table) | |
218 | (modify-syntax-entry ?: "." table) | |
219 | (modify-syntax-entry ?\; "." table) | |
220 | (modify-syntax-entry ?, "." table) | |
221 | (modify-syntax-entry ?@ "." table) | |
222 | table)) | |
c113de23 GM |
223 | |
224 | (defun rfc2047-encode-region (b e) | |
7f0321ff DL |
225 | "Encode words in region B to E that need encoding. |
226 | By default, the region is treated as containing RFC2822 addresses. | |
227 | Dynamically bind `rfc2047-encoding-type' to change that." | |
228 | (save-restriction | |
229 | (narrow-to-region b e) | |
230 | (if (eq 'mime rfc2047-encoding-type) | |
231 | ;; Simple case -- treat as single word. | |
232 | (progn | |
233 | (goto-char (point-min)) | |
234 | ;; Does it need encoding? | |
235 | (skip-chars-forward "\000-\177" e) | |
236 | (unless (eobp) | |
237 | (rfc2047-encode b e))) | |
238 | ;; `address-mime' case -- take care of quoted words, comments. | |
239 | (with-syntax-table rfc2047-syntax-table | |
240 | (let ((start (point)) ; start of current token | |
241 | end ; end of current token | |
242 | ;; Whether there's an encoded word before the current | |
c6e26ce2 | 243 | ;; token, either immediately or separated by space. |
7f0321ff DL |
244 | last-encoded) |
245 | (goto-char (point-min)) | |
246 | (condition-case nil ; in case of unbalanced quotes | |
247 | ;; Look for rfc2822-style: sequences of atoms, quoted | |
248 | ;; strings, specials, whitespace. (Specials mustn't be | |
249 | ;; encoded.) | |
250 | (while (not (eobp)) | |
251 | (setq start (point)) | |
252 | ;; Skip whitespace. | |
c6e26ce2 | 253 | (unless (= 0 (skip-chars-forward " \t\n")) |
7f0321ff DL |
254 | (setq start (point))) |
255 | (cond | |
256 | ((not (char-after))) ; eob | |
257 | ;; else token start | |
258 | ((eq ?\" (char-syntax (char-after))) | |
259 | ;; Quoted word. | |
260 | (forward-sexp) | |
261 | (setq end (point)) | |
262 | ;; Does it need encoding? | |
263 | (goto-char start) | |
264 | (skip-chars-forward "\000-\177" end) | |
265 | (if (= end (point)) | |
266 | (setq last-encoded nil) | |
267 | ;; It needs encoding. Strip the quotes first, | |
268 | ;; since encoded words can't occur in quotes. | |
269 | (goto-char end) | |
270 | (delete-backward-char 1) | |
271 | (goto-char start) | |
272 | (delete-char 1) | |
273 | (when last-encoded | |
274 | ;; There was a preceding quoted word. We need | |
275 | ;; to include any separating whitespace in this | |
276 | ;; word to avoid it getting lost. | |
277 | (skip-chars-backward " \t") | |
278 | ;; A space is needed between the encoded words. | |
279 | (insert ? ) | |
280 | (setq start (point) | |
281 | end (1+ end))) | |
282 | ;; Adjust the end position for the deleted quotes. | |
283 | (rfc2047-encode start (- end 2)) | |
284 | (setq last-encoded t))) ; record that it was encoded | |
285 | ((eq ?. (char-syntax (char-after))) | |
286 | ;; Skip other delimiters, but record that they've | |
287 | ;; potentially separated quoted words. | |
288 | (forward-char) | |
289 | (setq last-encoded nil)) | |
290 | (t ; normal token/whitespace sequence | |
291 | ;; Find the end. | |
292 | (forward-word 1) | |
293 | (skip-chars-backward " \t") | |
294 | (setq end (point)) | |
295 | ;; Deal with encoding and leading space as for | |
296 | ;; quoted words. | |
297 | (goto-char start) | |
298 | (skip-chars-forward "\000-\177" end) | |
299 | (if (= end (point)) | |
300 | (setq last-encoded nil) | |
301 | (when last-encoded | |
302 | (goto-char start) | |
303 | (skip-chars-backward " \t") | |
304 | (insert ? ) | |
305 | (setq start (point) | |
306 | end (1+ end))) | |
307 | (rfc2047-encode start end) | |
308 | (setq last-encoded t))))) | |
309 | (error (error "Invalid data for rfc2047 encoding: %s" | |
310 | (buffer-substring b e))))))) | |
311 | (rfc2047-fold-region b (point)))) | |
c113de23 GM |
312 | |
313 | (defun rfc2047-encode-string (string) | |
7f0321ff DL |
314 | "Encode words in STRING. |
315 | By default, the string is treated as containing addresses (see | |
316 | `rfc2047-special-chars')." | |
c113de23 GM |
317 | (with-temp-buffer |
318 | (insert string) | |
319 | (rfc2047-encode-region (point-min) (point-max)) | |
320 | (buffer-string))) | |
321 | ||
7f0321ff DL |
322 | (defun rfc2047-encode (b e) |
323 | "Encode the word(s) in the region B to E. | |
324 | By default, the region is treated as containing addresses (see | |
325 | `rfc2047-special-chars')." | |
326 | (let* ((mime-charset (mm-find-mime-charset-region b e)) | |
327 | (cs (if (> (length mime-charset) 1) | |
328 | ;; Fixme: Instead of this, try to break region into | |
329 | ;; parts that can be encoded separately. | |
330 | (error "Can't rfc2047-encode `%s'" | |
331 | (buffer-substring b e)) | |
332 | (setq mime-charset (car mime-charset)) | |
333 | (mm-charset-to-coding-system mime-charset))) | |
334 | ;; Fixme: Better, calculate the number of non-ASCII | |
335 | ;; characters, at least for 8-bit charsets. | |
336 | (encoding (if (assq mime-charset | |
337 | rfc2047-charset-encoding-alist) | |
338 | (cdr (assq mime-charset | |
c113de23 | 339 | rfc2047-charset-encoding-alist)) |
7f0321ff | 340 | 'B)) |
c113de23 GM |
341 | (start (concat |
342 | "=?" (downcase (symbol-name mime-charset)) "?" | |
343 | (downcase (symbol-name encoding)) "?")) | |
344 | (first t)) | |
7f0321ff DL |
345 | (if mime-charset |
346 | (save-restriction | |
347 | (narrow-to-region b e) | |
348 | (when (eq encoding 'B) | |
349 | ;; break into lines before encoding | |
350 | (goto-char (point-min)) | |
351 | (while (not (eobp)) | |
352 | (goto-char (min (point-max) (+ 15 (point)))) | |
353 | (unless (eobp) | |
354 | (insert ?\n)))) | |
355 | (if (and (mm-multibyte-p) | |
356 | (mm-coding-system-p cs)) | |
357 | (mm-encode-coding-region (point-min) (point-max) cs)) | |
358 | (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) | |
359 | (point-min) (point-max)) | |
360 | (goto-char (point-min)) | |
361 | (while (not (eobp)) | |
362 | (unless first | |
363 | (insert ? )) | |
364 | (setq first nil) | |
365 | (insert start) | |
366 | (end-of-line) | |
367 | (insert "?=") | |
368 | (forward-line 1)))))) | |
c113de23 GM |
369 | |
370 | (defun rfc2047-fold-region (b e) | |
a553a9f5 | 371 | "Fold long lines in region B to E." |
c113de23 GM |
372 | (save-restriction |
373 | (narrow-to-region b e) | |
374 | (goto-char (point-min)) | |
f2307f18 DL |
375 | (let ((break nil) |
376 | (qword-break nil) | |
c6e26ce2 | 377 | (first t) |
f2307f18 DL |
378 | (bol (save-restriction |
379 | (widen) | |
7f0321ff | 380 | (mm-point-at-bol)))) |
c113de23 | 381 | (while (not (eobp)) |
f2307f18 DL |
382 | (when (and (or break qword-break) (> (- (point) bol) 76)) |
383 | (goto-char (or break qword-break)) | |
384 | (setq break nil | |
385 | qword-break nil) | |
c6e26ce2 | 386 | (if (looking-at "[ \t]") |
7f0321ff | 387 | (insert ?\n) |
619ac84f | 388 | (insert "\n ")) |
f2307f18 DL |
389 | (setq bol (1- (point))) |
390 | ;; Don't break before the first non-LWSP characters. | |
391 | (skip-chars-forward " \t") | |
a553a9f5 | 392 | (unless (eobp) (forward-char 1))) |
c113de23 | 393 | (cond |
f2307f18 DL |
394 | ((eq (char-after) ?\n) |
395 | (forward-char 1) | |
396 | (setq bol (point) | |
397 | break nil | |
398 | qword-break nil) | |
399 | (skip-chars-forward " \t") | |
400 | (unless (or (eobp) (eq (char-after) ?\n)) | |
401 | (forward-char 1))) | |
402 | ((eq (char-after) ?\r) | |
403 | (forward-char 1)) | |
c113de23 | 404 | ((memq (char-after) '(? ?\t)) |
f2307f18 | 405 | (skip-chars-forward " \t") |
c6e26ce2 DL |
406 | (if first |
407 | ;; Don't break just after the header name. | |
408 | (setq first nil) | |
409 | (setq break (1- (point))))) | |
f2307f18 DL |
410 | ((not break) |
411 | (if (not (looking-at "=\\?[^=]")) | |
412 | (if (eq (char-after) ?=) | |
413 | (forward-char 1) | |
414 | (skip-chars-forward "^ \t\n\r=")) | |
415 | (setq qword-break (point)) | |
416 | (skip-chars-forward "^ \t\n\r"))) | |
417 | (t | |
418 | (skip-chars-forward "^ \t\n\r")))) | |
419 | (when (and (or break qword-break) (> (- (point) bol) 76)) | |
420 | (goto-char (or break qword-break)) | |
421 | (setq break nil | |
422 | qword-break nil) | |
c6e26ce2 | 423 | (if (looking-at "[ \t]") |
7f0321ff | 424 | (insert ?\n) |
619ac84f | 425 | (insert "\n ")) |
f2307f18 DL |
426 | (setq bol (1- (point))) |
427 | ;; Don't break before the first non-LWSP characters. | |
428 | (skip-chars-forward " \t") | |
a553a9f5 | 429 | (unless (eobp) (forward-char 1)))))) |
f2307f18 DL |
430 | |
431 | (defun rfc2047-unfold-region (b e) | |
a553a9f5 | 432 | "Unfold lines in region B to E." |
f2307f18 DL |
433 | (save-restriction |
434 | (narrow-to-region b e) | |
435 | (goto-char (point-min)) | |
436 | (let ((bol (save-restriction | |
437 | (widen) | |
7f0321ff DL |
438 | (mm-point-at-bol))) |
439 | (eol (mm-point-at-eol)) | |
f2307f18 DL |
440 | leading) |
441 | (forward-line 1) | |
442 | (while (not (eobp)) | |
c6e26ce2 DL |
443 | (if (and (looking-at "[ \t]") |
444 | (< (- (mm-point-at-eol) bol) 76)) | |
445 | (delete-region eol (progn | |
446 | (goto-char eol) | |
447 | (skip-chars-forward "\r\n") | |
448 | (point))) | |
7f0321ff DL |
449 | (setq bol (mm-point-at-bol))) |
450 | (setq eol (mm-point-at-eol)) | |
f2307f18 | 451 | (forward-line 1))))) |
c113de23 GM |
452 | |
453 | (defun rfc2047-b-encode-region (b e) | |
f2307f18 | 454 | "Base64-encode the header contained in region B to E." |
c113de23 GM |
455 | (save-restriction |
456 | (narrow-to-region (goto-char b) e) | |
457 | (while (not (eobp)) | |
458 | (base64-encode-region (point) (progn (end-of-line) (point)) t) | |
459 | (if (and (bolp) (eolp)) | |
460 | (delete-backward-char 1)) | |
461 | (forward-line)))) | |
462 | ||
463 | (defun rfc2047-q-encode-region (b e) | |
f2307f18 | 464 | "Quoted-printable-encode the header in region B to E." |
c113de23 GM |
465 | (save-excursion |
466 | (save-restriction | |
467 | (narrow-to-region (goto-char b) e) | |
f2307f18 DL |
468 | (let ((alist rfc2047-q-encoding-alist) |
469 | (bol (save-restriction | |
470 | (widen) | |
7f0321ff | 471 | (mm-point-at-bol)))) |
c113de23 GM |
472 | (while alist |
473 | (when (looking-at (caar alist)) | |
474 | (quoted-printable-encode-region b e nil (cdar alist)) | |
475 | (subst-char-in-region (point-min) (point-max) ? ?_) | |
476 | (setq alist nil)) | |
477 | (pop alist)) | |
f2307f18 DL |
478 | ;; The size of QP encapsulation is about 20, so set limit to |
479 | ;; 56=76-20. | |
480 | (unless (< (- (point-max) (point-min)) 56) | |
481 | ;; Don't break if it could fit in one line. | |
482 | ;; Let rfc2047-encode-region break it later. | |
483 | (goto-char (1+ (point-min))) | |
484 | (while (and (not (bobp)) (not (eobp))) | |
485 | (goto-char (min (point-max) (+ 56 bol))) | |
486 | (search-backward "=" (- (point) 2) t) | |
487 | (unless (or (bobp) (eobp)) | |
652dbc07 | 488 | (insert "\n") |
f2307f18 | 489 | (setq bol (point))))))))) |
c113de23 GM |
490 | |
491 | ;;; | |
492 | ;;; Functions for decoding RFC2047 messages | |
493 | ;;; | |
494 | ||
652dbc07 DL |
495 | (defvar rfc2047-encoded-word-regexp |
496 | "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ +]+\\)\\?=") | |
c113de23 GM |
497 | |
498 | (defun rfc2047-decode-region (start end) | |
499 | "Decode MIME-encoded words in region between START and END." | |
500 | (interactive "r") | |
501 | (let ((case-fold-search t) | |
502 | b e) | |
652dbc07 DL |
503 | (save-excursion |
504 | (save-restriction | |
505 | (narrow-to-region start end) | |
506 | (goto-char (point-min)) | |
507 | ;; Remove whitespace between encoded words. | |
508 | (while (re-search-forward | |
509 | (concat "\\(" rfc2047-encoded-word-regexp "\\)" | |
510 | "\\(\n?[ \t]\\)+" | |
511 | "\\(" rfc2047-encoded-word-regexp "\\)") | |
512 | nil t) | |
513 | (delete-region (goto-char (match-end 1)) (match-beginning 6))) | |
514 | ;; Decode the encoded words. | |
515 | (setq b (goto-char (point-min))) | |
516 | (while (re-search-forward rfc2047-encoded-word-regexp nil t) | |
517 | (setq e (match-beginning 0)) | |
518 | (insert (rfc2047-parse-and-decode | |
519 | (prog1 | |
520 | (match-string 0) | |
521 | (delete-region (match-beginning 0) (match-end 0))))) | |
522 | (when (and (mm-multibyte-p) | |
523 | mail-parse-charset | |
524 | (not (eq mail-parse-charset 'gnus-decoded))) | |
525 | (mm-decode-coding-region b e mail-parse-charset)) | |
526 | (setq b (point))) | |
527 | (when (and (mm-multibyte-p) | |
528 | mail-parse-charset | |
529 | (not (eq mail-parse-charset 'us-ascii)) | |
530 | (not (eq mail-parse-charset 'gnus-decoded))) | |
531 | (mm-decode-coding-region b (point-max) mail-parse-charset)) | |
532 | (rfc2047-unfold-region (point-min) (point-max)))))) | |
c113de23 GM |
533 | |
534 | (defun rfc2047-decode-string (string) | |
535 | "Decode the quoted-printable-encoded STRING and return the results." | |
536 | (let ((m (mm-multibyte-p))) | |
537 | (with-temp-buffer | |
538 | (when m | |
539 | (mm-enable-multibyte)) | |
540 | (insert string) | |
541 | (inline | |
542 | (rfc2047-decode-region (point-min) (point-max))) | |
543 | (buffer-string)))) | |
544 | ||
652dbc07 | 545 | (defun rfc2047-parse-and-decode (word) |
c113de23 GM |
546 | "Decode WORD and return it if it is an encoded word. |
547 | Return WORD if not." | |
652dbc07 DL |
548 | (if (not (string-match rfc2047-encoded-word-regexp word)) |
549 | word | |
550 | (or | |
551 | (condition-case nil | |
552 | (rfc2047-decode | |
553 | (match-string 1 word) | |
554 | (upcase (match-string 2 word)) | |
555 | (match-string 3 word)) | |
556 | (error word)) | |
557 | word))) | |
558 | ||
559 | (defun rfc2047-decode (charset encoding string) | |
560 | "Decode STRING from the given MIME CHARSET in the given ENCODING. | |
c113de23 | 561 | Valid ENCODINGs are \"B\" and \"Q\". |
f2307f18 | 562 | If your Emacs implementation can't decode CHARSET, return nil." |
c113de23 GM |
563 | (if (stringp charset) |
564 | (setq charset (intern (downcase charset)))) | |
f2307f18 | 565 | (if (or (not charset) |
c113de23 GM |
566 | (eq 'gnus-all mail-parse-ignored-charsets) |
567 | (memq 'gnus-all mail-parse-ignored-charsets) | |
568 | (memq charset mail-parse-ignored-charsets)) | |
569 | (setq charset mail-parse-charset)) | |
570 | (let ((cs (mm-charset-to-coding-system charset))) | |
f2307f18 | 571 | (if (and (not cs) charset |
c113de23 GM |
572 | (listp mail-parse-ignored-charsets) |
573 | (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
574 | (setq cs (mm-charset-to-coding-system mail-parse-charset))) | |
575 | (when cs | |
576 | (when (and (eq cs 'ascii) | |
577 | mail-parse-charset) | |
578 | (setq cs mail-parse-charset)) | |
652dbc07 DL |
579 | ;; Ensure unibyte result in Emacs 20. |
580 | (let (default-enable-multibyte-characters) | |
581 | (with-temp-buffer | |
582 | (mm-decode-coding-string | |
583 | (cond | |
584 | ((equal "B" encoding) | |
585 | (base64-decode-string string)) | |
586 | ((equal "Q" encoding) | |
587 | (quoted-printable-decode-string | |
588 | (mm-replace-chars-in-string string ?_ ? ))) | |
589 | (t (error "Invalid encoding: %s" encoding))) | |
590 | cs)))))) | |
c113de23 GM |
591 | |
592 | (provide 'rfc2047) | |
593 | ||
ab5796a9 | 594 | ;;; arch-tag: a07fe3d4-22b5-4c4a-bd89-b1f82d5d36f6 |
c113de23 | 595 | ;;; rfc2047.el ends here |