Commit | Line | Data |
---|---|---|
715a2ca2 | 1 | ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages |
270a576a | 2 | |
e84b4b86 | 3 | ;; Copyright (C) 1998, 1999, 2000, 2002, 2003, 2004, |
88e6695f | 4 | ;; 2005, 2006 Free Software Foundation, Inc. |
c113de23 GM |
5 | |
6 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
7 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation; either version 2, or (at your option) | |
13 | ;; any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
3a35cf56 LK |
22 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
23 | ;; Boston, MA 02110-1301, USA. | |
c113de23 GM |
24 | |
25 | ;;; Commentary: | |
26 | ||
d49a4975 DL |
27 | ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part |
28 | ;; Three: Message Header Extensions for Non-ASCII Text". | |
29 | ||
c113de23 GM |
30 | ;;; Code: |
31 | ||
7f0321ff DL |
32 | (eval-when-compile |
33 | (require 'cl) | |
23f87bed MB |
34 | (defvar message-posting-charset) |
35 | (unless (fboundp 'with-syntax-table) ; not in Emacs 20 | |
36 | (defmacro with-syntax-table (table &rest body) | |
37 | "Evaluate BODY with syntax table of current buffer set to TABLE. | |
38 | The syntax table of the current buffer is saved, BODY is evaluated, and the | |
39 | saved table is restored, even in case of an abnormal exit. | |
40 | Value is what BODY returns." | |
41 | (let ((old-table (make-symbol "table")) | |
42 | (old-buffer (make-symbol "buffer"))) | |
43 | `(let ((,old-table (syntax-table)) | |
44 | (,old-buffer (current-buffer))) | |
45 | (unwind-protect | |
46 | (progn | |
47 | (set-syntax-table ,table) | |
48 | ,@body) | |
49 | (save-current-buffer | |
50 | (set-buffer ,old-buffer) | |
51 | (set-syntax-table ,old-table)))))))) | |
c113de23 GM |
52 | |
53 | (require 'qp) | |
54 | (require 'mm-util) | |
270a576a | 55 | (require 'ietf-drums) |
7f0321ff | 56 | ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus. |
c113de23 | 57 | (require 'mail-prsvr) |
f2307f18 | 58 | (require 'base64) |
f2307f18 | 59 | (autoload 'mm-body-7-or-8 "mm-bodies") |
1c33719f | 60 | |
23f87bed MB |
61 | (eval-and-compile |
62 | ;; Avoid gnus-util for mm- code. | |
63 | (defalias 'rfc2047-point-at-bol | |
64 | (if (fboundp 'point-at-bol) | |
65 | 'point-at-bol | |
66 | 'line-beginning-position)) | |
67 | ||
68 | (defalias 'rfc2047-point-at-eol | |
69 | (if (fboundp 'point-at-eol) | |
70 | 'point-at-eol | |
71 | 'line-end-position))) | |
72 | ||
c113de23 | 73 | (defvar rfc2047-header-encoding-alist |
23f87bed MB |
74 | '(("Newsgroups" . nil) |
75 | ("Followup-To" . nil) | |
c113de23 | 76 | ("Message-ID" . nil) |
ad136a7c | 77 | ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|\\(In-\\)?Reply-To\\|Sender\ |
23f87bed | 78 | \\|Mail-Followup-To\\|Mail-Copies-To\\|Approved\\)" . address-mime) |
c113de23 GM |
79 | (t . mime)) |
80 | "*Header/encoding method alist. | |
81 | The list is traversed sequentially. The keys can either be | |
f2307f18 | 82 | header regexps or t. |
c113de23 GM |
83 | |
84 | The values can be: | |
85 | ||
86 | 1) nil, in which case no encoding is done; | |
87 | 2) `mime', in which case the header will be encoded according to RFC2047; | |
7f0321ff DL |
88 | 3) `address-mime', like `mime', but takes account of the rules for address |
89 | fields (where quoted strings and comments must be treated separately); | |
90 | 4) a charset, in which case it will be encoded as that charset; | |
91 | 5) `default', in which case the field will be encoded as the rest | |
c113de23 GM |
92 | of the article.") |
93 | ||
94 | (defvar rfc2047-charset-encoding-alist | |
95 | '((us-ascii . nil) | |
96 | (iso-8859-1 . Q) | |
97 | (iso-8859-2 . Q) | |
98 | (iso-8859-3 . Q) | |
99 | (iso-8859-4 . Q) | |
100 | (iso-8859-5 . B) | |
101 | (koi8-r . B) | |
7f0321ff DL |
102 | (iso-8859-7 . B) |
103 | (iso-8859-8 . B) | |
c113de23 | 104 | (iso-8859-9 . Q) |
f2307f18 DL |
105 | (iso-8859-14 . Q) |
106 | (iso-8859-15 . Q) | |
c113de23 GM |
107 | (iso-2022-jp . B) |
108 | (iso-2022-kr . B) | |
109 | (gb2312 . B) | |
676a7cc9 SZ |
110 | (big5 . B) |
111 | (cn-big5 . B) | |
c113de23 GM |
112 | (cn-gb . B) |
113 | (cn-gb-2312 . B) | |
114 | (euc-kr . B) | |
115 | (iso-2022-jp-2 . B) | |
23f87bed MB |
116 | (iso-2022-int-1 . B) |
117 | (viscii . Q)) | |
c113de23 | 118 | "Alist of MIME charsets to RFC2047 encodings. |
7f0321ff DL |
119 | Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding, |
120 | quoted-printable and base64 respectively.") | |
c113de23 | 121 | |
10ace8ea MB |
122 | (defvar rfc2047-encode-function-alist |
123 | '((Q . rfc2047-q-encode-string) | |
124 | (B . rfc2047-b-encode-string) | |
125 | (nil . identity)) | |
c113de23 GM |
126 | "Alist of RFC2047 encodings to encoding functions.") |
127 | ||
10ace8ea MB |
128 | (defvar rfc2047-encode-encoded-words t |
129 | "Whether encoded words should be encoded again.") | |
130 | ||
c113de23 GM |
131 | ;;; |
132 | ;;; Functions for encoding RFC2047 messages | |
133 | ;;; | |
134 | ||
f4dd4ae8 MB |
135 | (defun rfc2047-qp-or-base64 () |
136 | "Return the type with which to encode the buffer. | |
137 | This is either `base64' or `quoted-printable'." | |
138 | (save-excursion | |
139 | (let ((limit (min (point-max) (+ 2000 (point-min)))) | |
140 | (n8bit 0)) | |
141 | (goto-char (point-min)) | |
142 | (skip-chars-forward "\x20-\x7f\r\n\t" limit) | |
143 | (while (< (point) limit) | |
144 | (incf n8bit) | |
145 | (forward-char 1) | |
146 | (skip-chars-forward "\x20-\x7f\r\n\t" limit)) | |
147 | (if (or (< (* 6 n8bit) (- limit (point-min))) | |
148 | ;; Don't base64, say, a short line with a single | |
149 | ;; non-ASCII char when splitting parts by charset. | |
150 | (= n8bit 1)) | |
151 | 'quoted-printable | |
152 | 'base64)))) | |
153 | ||
c113de23 GM |
154 | (defun rfc2047-narrow-to-field () |
155 | "Narrow the buffer to the header on the current line." | |
156 | (beginning-of-line) | |
157 | (narrow-to-region | |
158 | (point) | |
159 | (progn | |
160 | (forward-line 1) | |
161 | (if (re-search-forward "^[^ \n\t]" nil t) | |
23f87bed | 162 | (rfc2047-point-at-bol) |
c113de23 GM |
163 | (point-max)))) |
164 | (goto-char (point-min))) | |
165 | ||
23f87bed MB |
166 | (defun rfc2047-field-value () |
167 | "Return the value of the field at point." | |
168 | (save-excursion | |
169 | (save-restriction | |
170 | (rfc2047-narrow-to-field) | |
171 | (re-search-forward ":[ \t\n]*" nil t) | |
10ace8ea | 172 | (buffer-substring-no-properties (point) (point-max))))) |
23f87bed | 173 | |
7f0321ff DL |
174 | (defvar rfc2047-encoding-type 'address-mime |
175 | "The type of encoding done by `rfc2047-encode-region'. | |
176 | This should be dynamically bound around calls to | |
177 | `rfc2047-encode-region' to either `mime' or `address-mime'. See | |
178 | `rfc2047-header-encoding-alist', for definitions.") | |
179 | ||
c113de23 GM |
180 | (defun rfc2047-encode-message-header () |
181 | "Encode the message header according to `rfc2047-header-encoding-alist'. | |
182 | Should be called narrowed to the head of the message." | |
183 | (interactive "*") | |
184 | (save-excursion | |
185 | (goto-char (point-min)) | |
186 | (let (alist elem method) | |
187 | (while (not (eobp)) | |
188 | (save-restriction | |
189 | (rfc2047-narrow-to-field) | |
190 | (if (not (rfc2047-encodable-p)) | |
c6e26ce2 | 191 | (prog1 |
10ace8ea MB |
192 | (if (and (eq (mm-body-7-or-8) '8bit) |
193 | (mm-multibyte-p) | |
194 | (mm-coding-system-p | |
195 | (car message-posting-charset))) | |
196 | ;; 8 bit must be decoded. | |
197 | (mm-encode-coding-region | |
198 | (point-min) (point-max) | |
199 | (mm-charset-to-coding-system | |
200 | (car message-posting-charset)))) | |
c6e26ce2 | 201 | ;; No encoding necessary, but folding is nice |
10ace8ea MB |
202 | (when nil |
203 | (rfc2047-fold-region | |
204 | (save-excursion | |
205 | (goto-char (point-min)) | |
206 | (skip-chars-forward "^:") | |
207 | (when (looking-at ": ") | |
208 | (forward-char 2)) | |
209 | (point)) | |
210 | (point-max)))) | |
c113de23 GM |
211 | ;; We found something that may perhaps be encoded. |
212 | (setq method nil | |
213 | alist rfc2047-header-encoding-alist) | |
214 | (while (setq elem (pop alist)) | |
215 | (when (or (and (stringp (car elem)) | |
216 | (looking-at (car elem))) | |
217 | (eq (car elem) t)) | |
218 | (setq alist nil | |
219 | method (cdr elem)))) | |
7f0321ff | 220 | (re-search-forward "^[^:]+: *" nil t) |
c113de23 | 221 | (cond |
7f0321ff DL |
222 | ((eq method 'address-mime) |
223 | (rfc2047-encode-region (point) (point-max))) | |
c113de23 | 224 | ((eq method 'mime) |
23f87bed | 225 | (let ((rfc2047-encoding-type 'mime)) |
7f0321ff | 226 | (rfc2047-encode-region (point) (point-max)))) |
c113de23 GM |
227 | ((eq method 'default) |
228 | (if (and (featurep 'mule) | |
1bde0b39 DL |
229 | (if (boundp 'default-enable-multibyte-characters) |
230 | default-enable-multibyte-characters) | |
c113de23 | 231 | mail-parse-charset) |
7f0321ff | 232 | (mm-encode-coding-region (point) (point-max) |
c113de23 | 233 | mail-parse-charset))) |
23f87bed MB |
234 | ;; We get this when CC'ing messsages to newsgroups with |
235 | ;; 8-bit names. The group name mail copy just got | |
236 | ;; unconditionally encoded. Previously, it would ask | |
237 | ;; whether to encode, which was quite confusing for the | |
238 | ;; user. If the new behaviour is wrong, tell me. I have | |
239 | ;; left the old code commented out below. | |
240 | ;; -- Per Abrahamsen <abraham@dina.kvl.dk> Date: 2001-10-07. | |
241 | ;; Modified by Dave Love, with the commented-out code changed | |
242 | ;; in accordance with changes elsewhere. | |
243 | ((null method) | |
244 | (rfc2047-encode-region (point) (point-max))) | |
245 | ;;; ((null method) | |
246 | ;;; (if (or (message-options-get | |
247 | ;;; 'rfc2047-encode-message-header-encode-any) | |
248 | ;;; (message-options-set | |
249 | ;;; 'rfc2047-encode-message-header-encode-any | |
250 | ;;; (y-or-n-p | |
251 | ;;; "Some texts are not encoded. Encode anyway?"))) | |
252 | ;;; (rfc2047-encode-region (point-min) (point-max)) | |
253 | ;;; (error "Cannot send unencoded text"))) | |
c113de23 | 254 | ((mm-coding-system-p method) |
1bde0b39 DL |
255 | (if (and (featurep 'mule) |
256 | (if (boundp 'default-enable-multibyte-characters) | |
257 | default-enable-multibyte-characters)) | |
7f0321ff | 258 | (mm-encode-coding-region (point) (point-max) method))) |
c113de23 GM |
259 | ;; Hm. |
260 | (t))) | |
261 | (goto-char (point-max))))))) | |
262 | ||
a553a9f5 DL |
263 | ;; Fixme: This, and the require below may not be the Right Thing, but |
264 | ;; should be safe just before release. -- fx 2001-02-08 | |
265 | (eval-when-compile (defvar message-posting-charset)) | |
266 | ||
f2307f18 DL |
267 | (defun rfc2047-encodable-p () |
268 | "Return non-nil if any characters in current buffer need encoding in headers. | |
269 | The buffer may be narrowed." | |
a553a9f5 | 270 | (require 'message) ; for message-posting-charset |
c113de23 | 271 | (let ((charsets |
7f0321ff | 272 | (mm-find-mime-charset-region (point-min) (point-max)))) |
10ace8ea MB |
273 | (goto-char (point-min)) |
274 | (or (and rfc2047-encode-encoded-words | |
275 | (prog1 | |
276 | (search-forward "=?" nil t) | |
277 | (goto-char (point-min)))) | |
278 | (and charsets | |
279 | (not (equal charsets (list (car message-posting-charset)))))))) | |
7f0321ff DL |
280 | |
281 | ;; Use this syntax table when parsing into regions that may need | |
282 | ;; encoding. Double quotes are string delimiters, backslash is | |
283 | ;; character quoting, and all other RFC 2822 special characters are | |
284 | ;; treated as punctuation so we can use forward-sexp/forward-word to | |
285 | ;; skip to the end of regions appropriately. Nb. ietf-drums does | |
286 | ;; things differently. | |
287 | (defconst rfc2047-syntax-table | |
23f87bed MB |
288 | ;; (make-char-table 'syntax-table '(2)) only works in Emacs. |
289 | (let ((table (make-syntax-table))) | |
290 | ;; The following is done to work for setting all elements of the table | |
291 | ;; in Emacs 21 and 22 and XEmacs; it appears to be the cleanest way. | |
292 | ;; Play safe and don't assume the form of the word syntax entry -- | |
293 | ;; copy it from ?a. | |
294 | (if (fboundp 'set-char-table-range) ; Emacs | |
295 | (funcall (intern "set-char-table-range") | |
296 | table t (aref (standard-syntax-table) ?a)) | |
297 | (if (fboundp 'put-char-table) | |
298 | (if (fboundp 'get-char-table) ; warning avoidance | |
299 | (put-char-table t (get-char-table ?a (standard-syntax-table)) | |
300 | table)))) | |
7f0321ff DL |
301 | (modify-syntax-entry ?\\ "\\" table) |
302 | (modify-syntax-entry ?\" "\"" table) | |
10ace8ea MB |
303 | (modify-syntax-entry ?\( "(" table) |
304 | (modify-syntax-entry ?\) ")" table) | |
7f0321ff DL |
305 | (modify-syntax-entry ?\< "." table) |
306 | (modify-syntax-entry ?\> "." table) | |
307 | (modify-syntax-entry ?\[ "." table) | |
308 | (modify-syntax-entry ?\] "." table) | |
309 | (modify-syntax-entry ?: "." table) | |
310 | (modify-syntax-entry ?\; "." table) | |
311 | (modify-syntax-entry ?, "." table) | |
312 | (modify-syntax-entry ?@ "." table) | |
313 | table)) | |
c113de23 GM |
314 | |
315 | (defun rfc2047-encode-region (b e) | |
7f0321ff DL |
316 | "Encode words in region B to E that need encoding. |
317 | By default, the region is treated as containing RFC2822 addresses. | |
318 | Dynamically bind `rfc2047-encoding-type' to change that." | |
319 | (save-restriction | |
320 | (narrow-to-region b e) | |
10ace8ea MB |
321 | (let ((encodable-regexp (if rfc2047-encode-encoded-words |
322 | "[^\000-\177]+\\|=\\?" | |
323 | "[^\000-\177]+")) | |
324 | start ; start of current token | |
325 | end begin csyntax | |
326 | ;; Whether there's an encoded word before the current token, | |
327 | ;; either immediately or separated by space. | |
328 | last-encoded | |
329 | (orig-text (buffer-substring-no-properties b e))) | |
330 | (if (eq 'mime rfc2047-encoding-type) | |
331 | ;; Simple case. Continuous words in which all those contain | |
332 | ;; non-ASCII characters are encoded collectively. Encoding | |
333 | ;; ASCII words, including `Re:' used in Subject headers, is | |
334 | ;; avoided for interoperability with non-MIME clients and | |
335 | ;; for making it easy to find keywords. | |
336 | (progn | |
337 | (goto-char (point-min)) | |
338 | (while (progn (skip-chars-forward " \t\n") | |
339 | (not (eobp))) | |
340 | (setq start (point)) | |
341 | (while (and (looking-at "[ \t\n]*\\([^ \t\n]+\\)") | |
342 | (progn | |
343 | (setq end (match-end 0)) | |
344 | (re-search-forward encodable-regexp end t))) | |
345 | (goto-char end)) | |
346 | (if (> (point) start) | |
347 | (rfc2047-encode start (point)) | |
348 | (goto-char end)))) | |
349 | ;; `address-mime' case -- take care of quoted words, comments. | |
350 | (with-syntax-table rfc2047-syntax-table | |
7f0321ff | 351 | (goto-char (point-min)) |
10ace8ea | 352 | (condition-case err ; in case of unbalanced quotes |
7f0321ff DL |
353 | ;; Look for rfc2822-style: sequences of atoms, quoted |
354 | ;; strings, specials, whitespace. (Specials mustn't be | |
355 | ;; encoded.) | |
356 | (while (not (eobp)) | |
7f0321ff | 357 | ;; Skip whitespace. |
10ace8ea MB |
358 | (skip-chars-forward " \t\n") |
359 | (setq start (point)) | |
7f0321ff DL |
360 | (cond |
361 | ((not (char-after))) ; eob | |
362 | ;; else token start | |
10ace8ea | 363 | ((eq ?\" (setq csyntax (char-syntax (char-after)))) |
7f0321ff DL |
364 | ;; Quoted word. |
365 | (forward-sexp) | |
366 | (setq end (point)) | |
367 | ;; Does it need encoding? | |
368 | (goto-char start) | |
10ace8ea MB |
369 | (if (re-search-forward encodable-regexp end 'move) |
370 | ;; It needs encoding. Strip the quotes first, | |
371 | ;; since encoded words can't occur in quotes. | |
372 | (progn | |
373 | (goto-char end) | |
374 | (delete-backward-char 1) | |
375 | (goto-char start) | |
376 | (delete-char 1) | |
377 | (when last-encoded | |
378 | ;; There was a preceding quoted word. We need | |
379 | ;; to include any separating whitespace in this | |
380 | ;; word to avoid it getting lost. | |
381 | (skip-chars-backward " \t") | |
382 | ;; A space is needed between the encoded words. | |
383 | (insert ? ) | |
384 | (setq start (point) | |
385 | end (1+ end))) | |
386 | ;; Adjust the end position for the deleted quotes. | |
387 | (rfc2047-encode start (- end 2)) | |
388 | (setq last-encoded t)) ; record that it was encoded | |
389 | (setq last-encoded nil))) | |
390 | ((eq ?. csyntax) | |
7f0321ff DL |
391 | ;; Skip other delimiters, but record that they've |
392 | ;; potentially separated quoted words. | |
393 | (forward-char) | |
394 | (setq last-encoded nil)) | |
10ace8ea MB |
395 | ((eq ?\) csyntax) |
396 | (error "Unbalanced parentheses")) | |
397 | ((eq ?\( csyntax) | |
398 | ;; Look for the end of parentheses. | |
399 | (forward-list) | |
400 | ;; Encode text as an unstructured field. | |
401 | (let ((rfc2047-encoding-type 'mime)) | |
402 | (rfc2047-encode-region (1+ start) (1- (point)))) | |
403 | (skip-chars-forward ")")) | |
7f0321ff DL |
404 | (t ; normal token/whitespace sequence |
405 | ;; Find the end. | |
10ace8ea MB |
406 | ;; Skip one ASCII word, or encode continuous words |
407 | ;; in which all those contain non-ASCII characters. | |
408 | (setq end nil) | |
409 | (while (not (or end (eobp))) | |
410 | (when (looking-at "[\000-\177]+") | |
411 | (setq begin (point) | |
412 | end (match-end 0)) | |
413 | (when (progn | |
414 | (while (and (or (re-search-forward | |
415 | "[ \t\n]\\|\\Sw" end 'move) | |
416 | (setq end nil)) | |
417 | (eq ?\\ (char-syntax (char-before)))) | |
418 | ;; Skip backslash-quoted characters. | |
419 | (forward-char)) | |
420 | end) | |
421 | (setq end (match-beginning 0)) | |
422 | (if rfc2047-encode-encoded-words | |
423 | (progn | |
424 | (goto-char begin) | |
425 | (when (search-forward "=?" end 'move) | |
426 | (goto-char (match-beginning 0)) | |
427 | (setq end nil))) | |
428 | (goto-char end)))) | |
429 | ;; Where the value nil of `end' means there may be | |
430 | ;; text to have to be encoded following the point. | |
431 | ;; Otherwise, the point reached to the end of ASCII | |
432 | ;; words separated by whitespace or a special char. | |
433 | (unless end | |
434 | (when (looking-at encodable-regexp) | |
435 | (goto-char (setq begin (match-end 0))) | |
436 | (while (and (looking-at "[ \t\n]+\\([^ \t\n]+\\)") | |
437 | (setq end (match-end 0)) | |
438 | (progn | |
439 | (while (re-search-forward | |
440 | encodable-regexp end t)) | |
441 | (< begin (point))) | |
442 | (goto-char begin) | |
443 | (or (not (re-search-forward "\\Sw" end t)) | |
444 | (progn | |
445 | (goto-char (match-beginning 0)) | |
446 | nil))) | |
447 | (goto-char end)) | |
448 | (when (looking-at "[^ \t\n]+") | |
449 | (setq end (match-end 0)) | |
450 | (if (re-search-forward "\\Sw+" end t) | |
451 | ;; There are special characters better | |
452 | ;; to be encoded so that MTAs may parse | |
453 | ;; them safely. | |
454 | (cond ((= end (point))) | |
455 | ((looking-at (concat "\\sw*\\(" | |
456 | encodable-regexp | |
457 | "\\)")) | |
458 | (setq end nil)) | |
459 | (t | |
460 | (goto-char (1- (match-end 0))) | |
461 | (unless (= (point) (match-beginning 0)) | |
462 | ;; Separate encodable text and | |
463 | ;; delimiter. | |
464 | (insert " ")))) | |
465 | (goto-char end) | |
466 | (skip-chars-forward " \t\n") | |
467 | (if (and (looking-at "[^ \t\n]+") | |
468 | (string-match encodable-regexp | |
469 | (match-string 0))) | |
470 | (setq end nil) | |
471 | (goto-char end))))))) | |
472 | (skip-chars-backward " \t\n") | |
7f0321ff | 473 | (setq end (point)) |
7f0321ff | 474 | (goto-char start) |
10ace8ea MB |
475 | (if (re-search-forward encodable-regexp end 'move) |
476 | (progn | |
477 | (unless (memq (char-before start) '(nil ?\t ? )) | |
478 | (if (progn | |
479 | (goto-char start) | |
480 | (skip-chars-backward "^ \t\n") | |
481 | (and (looking-at "\\Sw+") | |
482 | (= (match-end 0) start))) | |
483 | ;; Also encode bogus delimiters. | |
484 | (setq start (point)) | |
485 | ;; Separate encodable text and delimiter. | |
486 | (goto-char start) | |
487 | (insert " ") | |
488 | (setq start (1+ start) | |
489 | end (1+ end)))) | |
490 | (rfc2047-encode start end) | |
491 | (setq last-encoded t)) | |
492 | (setq last-encoded nil))))) | |
23f87bed | 493 | (error |
10ace8ea MB |
494 | (if (or debug-on-quit debug-on-error) |
495 | (signal (car err) (cdr err)) | |
496 | (error "Invalid data for rfc2047 encoding: %s" | |
497 | (mm-replace-in-string orig-text "[ \t\n]+" " ")))))))) | |
498 | (rfc2047-fold-region b (point)) | |
499 | (goto-char (point-max)))) | |
c113de23 GM |
500 | |
501 | (defun rfc2047-encode-string (string) | |
7f0321ff DL |
502 | "Encode words in STRING. |
503 | By default, the string is treated as containing addresses (see | |
23f87bed | 504 | `rfc2047-encoding-type')." |
10ace8ea | 505 | (mm-with-multibyte-buffer |
c113de23 GM |
506 | (insert string) |
507 | (rfc2047-encode-region (point-min) (point-max)) | |
508 | (buffer-string))) | |
509 | ||
10ace8ea MB |
510 | (defvar rfc2047-encode-max-chars 76 |
511 | "Maximum characters of each header line that contain encoded-words. | |
512 | If it is nil, encoded-words will not be folded. Too small value may | |
513 | cause an error. Don't change this for no particular reason.") | |
514 | ||
515 | (defun rfc2047-encode-1 (column string cs encoder start crest tail | |
516 | &optional eword) | |
517 | "Subroutine used by `rfc2047-encode'." | |
518 | (cond ((string-equal string "") | |
519 | (or eword "")) | |
520 | ((not rfc2047-encode-max-chars) | |
521 | (concat start | |
522 | (funcall encoder (if cs | |
523 | (mm-encode-coding-string string cs) | |
524 | string)) | |
525 | "?=")) | |
526 | ((>= column rfc2047-encode-max-chars) | |
527 | (when eword | |
528 | (cond ((string-match "\n[ \t]+\\'" eword) | |
529 | ;; Reomove a superfluous empty line. | |
530 | (setq eword (substring eword 0 (match-beginning 0)))) | |
531 | ((string-match "(+\\'" eword) | |
532 | ;; Break the line before the open parenthesis. | |
533 | (setq crest (concat crest (match-string 0 eword)) | |
534 | eword (substring eword 0 (match-beginning 0)))))) | |
535 | (rfc2047-encode-1 (length crest) string cs encoder start " " tail | |
536 | (concat eword "\n" crest))) | |
537 | (t | |
538 | (let ((index 0) | |
539 | (limit (1- (length string))) | |
540 | (prev "") | |
541 | next len) | |
542 | (while (and prev | |
543 | (<= index limit)) | |
544 | (setq next (concat start | |
545 | (funcall encoder | |
546 | (if cs | |
547 | (mm-encode-coding-string | |
548 | (substring string 0 (1+ index)) | |
549 | cs) | |
550 | (substring string 0 (1+ index)))) | |
551 | "?=") | |
552 | len (+ column (length next))) | |
553 | (if (> len rfc2047-encode-max-chars) | |
554 | (setq next prev | |
555 | prev nil) | |
556 | (if (or (< index limit) | |
557 | (<= (+ len (or (string-match "\n" tail) | |
558 | (length tail))) | |
559 | rfc2047-encode-max-chars)) | |
560 | (setq prev next | |
561 | index (1+ index)) | |
562 | (if (string-match "\\`)+" tail) | |
563 | ;; Break the line after the close parenthesis. | |
564 | (setq tail (concat (substring tail 0 (match-end 0)) | |
565 | "\n " | |
566 | (substring tail (match-end 0))) | |
567 | prev next | |
568 | index (1+ index)) | |
569 | (setq next prev | |
570 | prev nil))))) | |
571 | (if (> index limit) | |
572 | (concat eword next tail) | |
573 | (if (= 0 index) | |
574 | (if (and eword | |
575 | (string-match "(+\\'" eword)) | |
576 | (setq crest (concat crest (match-string 0 eword)) | |
577 | eword (substring eword 0 (match-beginning 0))) | |
578 | (setq eword (concat eword next))) | |
579 | (setq crest " " | |
580 | eword (concat eword next))) | |
581 | (when (string-match "\n[ \t]+\\'" eword) | |
582 | ;; Reomove a superfluous empty line. | |
583 | (setq eword (substring eword 0 (match-beginning 0)))) | |
584 | (rfc2047-encode-1 (length crest) (substring string index) | |
585 | cs encoder start " " tail | |
586 | (concat eword "\n" crest))))))) | |
587 | ||
7f0321ff DL |
588 | (defun rfc2047-encode (b e) |
589 | "Encode the word(s) in the region B to E. | |
10ace8ea MB |
590 | Point moves to the end of the region." |
591 | (let ((mime-charset (or (mm-find-mime-charset-region b e) (list 'us-ascii))) | |
592 | cs encoding tail crest eword) | |
593 | (cond ((> (length mime-charset) 1) | |
594 | (error "Can't rfc2047-encode `%s'" | |
595 | (buffer-substring-no-properties b e))) | |
596 | ((= (length mime-charset) 1) | |
597 | (setq mime-charset (car mime-charset) | |
598 | cs (mm-charset-to-coding-system mime-charset)) | |
599 | (unless (and (mm-multibyte-p) | |
600 | (mm-coding-system-p cs)) | |
601 | (setq cs nil)) | |
602 | (save-restriction | |
603 | (narrow-to-region b e) | |
604 | (setq encoding | |
605 | (or (cdr (assq mime-charset | |
c113de23 | 606 | rfc2047-charset-encoding-alist)) |
23f87bed MB |
607 | ;; For the charsets that don't have a preferred |
608 | ;; encoding, choose the one that's shorter. | |
10ace8ea MB |
609 | (if (eq (rfc2047-qp-or-base64) 'base64) |
610 | 'B | |
611 | 'Q))) | |
612 | (widen) | |
613 | (goto-char e) | |
614 | (skip-chars-forward "^ \t\n") | |
615 | ;; `tail' may contain a close parenthesis. | |
616 | (setq tail (buffer-substring-no-properties e (point))) | |
617 | (goto-char b) | |
618 | (setq b (point-marker) | |
619 | e (set-marker (make-marker) e)) | |
620 | (rfc2047-fold-region (rfc2047-point-at-bol) b) | |
621 | (goto-char b) | |
622 | (skip-chars-backward "^ \t\n") | |
623 | (unless (= 0 (skip-chars-backward " \t")) | |
624 | ;; `crest' may contain whitespace and an open parenthesis. | |
625 | (setq crest (buffer-substring-no-properties (point) b))) | |
626 | (setq eword (rfc2047-encode-1 | |
627 | (- b (rfc2047-point-at-bol)) | |
628 | (mm-replace-in-string | |
629 | (buffer-substring-no-properties b e) | |
630 | "\n\\([ \t]?\\)" "\\1") | |
631 | cs | |
632 | (or (cdr (assq encoding | |
633 | rfc2047-encode-function-alist)) | |
634 | 'identity) | |
635 | (concat "=?" (downcase (symbol-name mime-charset)) | |
636 | "?" (upcase (symbol-name encoding)) "?") | |
637 | (or crest " ") | |
638 | tail)) | |
639 | (delete-region (if (eq (aref eword 0) ?\n) | |
640 | (if (bolp) | |
641 | ;; The line was folded before encoding. | |
642 | (1- (point)) | |
643 | (point)) | |
644 | (goto-char b)) | |
645 | (+ e (length tail))) | |
646 | ;; `eword' contains `crest' and `tail'. | |
647 | (insert eword) | |
648 | (set-marker b nil) | |
649 | (set-marker e nil) | |
650 | (unless (or (/= 0 (length tail)) | |
651 | (eobp) | |
652 | (looking-at "[ \t\n)]")) | |
653 | (insert " ")))) | |
654 | (t | |
655 | (goto-char e))))) | |
c113de23 | 656 | |
23f87bed MB |
657 | (defun rfc2047-fold-field () |
658 | "Fold the current header field." | |
659 | (save-excursion | |
660 | (save-restriction | |
661 | (rfc2047-narrow-to-field) | |
662 | (rfc2047-fold-region (point-min) (point-max))))) | |
663 | ||
c113de23 | 664 | (defun rfc2047-fold-region (b e) |
a553a9f5 | 665 | "Fold long lines in region B to E." |
c113de23 GM |
666 | (save-restriction |
667 | (narrow-to-region b e) | |
668 | (goto-char (point-min)) | |
f2307f18 DL |
669 | (let ((break nil) |
670 | (qword-break nil) | |
c6e26ce2 | 671 | (first t) |
f2307f18 DL |
672 | (bol (save-restriction |
673 | (widen) | |
23f87bed | 674 | (rfc2047-point-at-bol)))) |
c113de23 | 675 | (while (not (eobp)) |
23f87bed MB |
676 | (when (and (or break qword-break) |
677 | (> (- (point) bol) 76)) | |
f2307f18 DL |
678 | (goto-char (or break qword-break)) |
679 | (setq break nil | |
680 | qword-break nil) | |
10ace8ea | 681 | (skip-chars-backward " \t") |
c6e26ce2 | 682 | (if (looking-at "[ \t]") |
7f0321ff | 683 | (insert ?\n) |
619ac84f | 684 | (insert "\n ")) |
f2307f18 DL |
685 | (setq bol (1- (point))) |
686 | ;; Don't break before the first non-LWSP characters. | |
687 | (skip-chars-forward " \t") | |
23f87bed MB |
688 | (unless (eobp) |
689 | (forward-char 1))) | |
c113de23 | 690 | (cond |
f2307f18 DL |
691 | ((eq (char-after) ?\n) |
692 | (forward-char 1) | |
693 | (setq bol (point) | |
694 | break nil | |
695 | qword-break nil) | |
696 | (skip-chars-forward " \t") | |
697 | (unless (or (eobp) (eq (char-after) ?\n)) | |
698 | (forward-char 1))) | |
699 | ((eq (char-after) ?\r) | |
700 | (forward-char 1)) | |
c113de23 | 701 | ((memq (char-after) '(? ?\t)) |
f2307f18 | 702 | (skip-chars-forward " \t") |
10ace8ea MB |
703 | (unless first ;; Don't break just after the header name. |
704 | (setq break (point)))) | |
f2307f18 DL |
705 | ((not break) |
706 | (if (not (looking-at "=\\?[^=]")) | |
707 | (if (eq (char-after) ?=) | |
708 | (forward-char 1) | |
709 | (skip-chars-forward "^ \t\n\r=")) | |
23f87bed MB |
710 | ;; Don't break at the start of the field. |
711 | (unless (= (point) b) | |
712 | (setq qword-break (point))) | |
f2307f18 DL |
713 | (skip-chars-forward "^ \t\n\r"))) |
714 | (t | |
10ace8ea MB |
715 | (skip-chars-forward "^ \t\n\r"))) |
716 | (setq first nil)) | |
23f87bed MB |
717 | (when (and (or break qword-break) |
718 | (> (- (point) bol) 76)) | |
f2307f18 DL |
719 | (goto-char (or break qword-break)) |
720 | (setq break nil | |
721 | qword-break nil) | |
10ace8ea MB |
722 | (if (or (> 0 (skip-chars-backward " \t")) |
723 | (looking-at "[ \t]")) | |
724 | (insert ?\n) | |
725 | (insert "\n ")) | |
f2307f18 DL |
726 | (setq bol (1- (point))) |
727 | ;; Don't break before the first non-LWSP characters. | |
728 | (skip-chars-forward " \t") | |
23f87bed MB |
729 | (unless (eobp) |
730 | (forward-char 1)))))) | |
731 | ||
732 | (defun rfc2047-unfold-field () | |
733 | "Fold the current line." | |
734 | (save-excursion | |
735 | (save-restriction | |
736 | (rfc2047-narrow-to-field) | |
737 | (rfc2047-unfold-region (point-min) (point-max))))) | |
f2307f18 DL |
738 | |
739 | (defun rfc2047-unfold-region (b e) | |
a553a9f5 | 740 | "Unfold lines in region B to E." |
f2307f18 DL |
741 | (save-restriction |
742 | (narrow-to-region b e) | |
743 | (goto-char (point-min)) | |
744 | (let ((bol (save-restriction | |
745 | (widen) | |
23f87bed MB |
746 | (rfc2047-point-at-bol))) |
747 | (eol (rfc2047-point-at-eol))) | |
f2307f18 DL |
748 | (forward-line 1) |
749 | (while (not (eobp)) | |
c6e26ce2 | 750 | (if (and (looking-at "[ \t]") |
23f87bed | 751 | (< (- (rfc2047-point-at-eol) bol) 76)) |
c6e26ce2 DL |
752 | (delete-region eol (progn |
753 | (goto-char eol) | |
754 | (skip-chars-forward "\r\n") | |
755 | (point))) | |
23f87bed MB |
756 | (setq bol (rfc2047-point-at-bol))) |
757 | (setq eol (rfc2047-point-at-eol)) | |
f2307f18 | 758 | (forward-line 1))))) |
c113de23 | 759 | |
10ace8ea MB |
760 | (defun rfc2047-b-encode-string (string) |
761 | "Base64-encode the header contained in STRING." | |
762 | (base64-encode-string string t)) | |
763 | ||
764 | (defun rfc2047-q-encode-string (string) | |
765 | "Quoted-printable-encode the header in STRING." | |
766 | (mm-with-unibyte-buffer | |
767 | (insert string) | |
768 | (quoted-printable-encode-region | |
769 | (point-min) (point-max) nil | |
770 | ;; = (\075), _ (\137), ? (\077) are used in the encoded word. | |
771 | ;; Avoid using 8bit characters. | |
772 | ;; This list excludes `especials' (see the RFC2047 syntax), | |
773 | ;; meaning that some characters in non-structured fields will | |
774 | ;; get encoded when they con't need to be. The following is | |
775 | ;; what it used to be. | |
776 | ;;; ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" | |
777 | ;;; "\010\012\014\040-\074\076\100-\136\140-\177") | |
778 | "-\b\n\f !#-'*+0-9A-Z\\^`-~\d") | |
779 | (subst-char-in-region (point-min) (point-max) ? ?_) | |
780 | (buffer-string))) | |
781 | ||
782 | (defun rfc2047-encode-parameter (param value) | |
783 | "Return and PARAM=VALUE string encoded in the RFC2047-like style. | |
784 | This is a replacement for the `rfc2231-encode-string' function. | |
785 | ||
786 | When attaching files as MIME parts, we should use the RFC2231 encoding | |
787 | to specify the file names containing non-ASCII characters. However, | |
788 | many mail softwares don't support it in practice and recipients won't | |
789 | be able to extract files with correct names. Instead, the RFC2047-like | |
790 | encoding is acceptable generally. This function provides the very | |
791 | RFC2047-like encoding, resigning to such a regrettable trend. To use | |
792 | it, put the following line in your ~/.gnus.el file: | |
793 | ||
794 | \(defalias 'mail-header-encode-parameter 'rfc2047-encode-parameter) | |
795 | " | |
796 | (let* ((rfc2047-encoding-type 'mime) | |
797 | (rfc2047-encode-max-chars nil) | |
798 | (string (rfc2047-encode-string value))) | |
799 | (if (string-match (concat "[" ietf-drums-tspecials "]") string) | |
800 | (format "%s=%S" param string) | |
801 | (concat param "=" string)))) | |
c113de23 GM |
802 | |
803 | ;;; | |
804 | ;;; Functions for decoding RFC2047 messages | |
805 | ;;; | |
806 | ||
23f87bed MB |
807 | (eval-and-compile |
808 | (defconst rfc2047-encoded-word-regexp | |
10ace8ea MB |
809 | "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(\\*[^?]+\\)?\ |
810 | \\?\\(B\\|Q\\)\\?\\([!->@-~ ]*\\)\\?=")) | |
23f87bed | 811 | |
270a576a MB |
812 | (defvar rfc2047-quote-decoded-words-containing-tspecials nil |
813 | "If non-nil, quote decoded words containing special characters.") | |
814 | ||
292f71fe MB |
815 | (defvar rfc2047-allow-incomplete-encoded-text t |
816 | "*Non-nil means allow incomplete encoded-text in successive encoded-words. | |
817 | Dividing of encoded-text in the place other than character boundaries | |
818 | violates RFC2047 section 5, while we have a capability to decode it. | |
819 | If it is non-nil, the decoder will decode B- or Q-encoding in each | |
820 | encoded-word, concatenate them, and decode it by charset. Otherwise, | |
821 | the decoder will fully decode each encoded-word before concatenating | |
822 | them.") | |
823 | ||
824 | (defun rfc2047-charset-to-coding-system (charset) | |
825 | "Return coding-system corresponding to MIME CHARSET. | |
826 | If your Emacs implementation can't decode CHARSET, return nil." | |
827 | (when (stringp charset) | |
828 | (setq charset (intern (downcase charset)))) | |
829 | (when (or (not charset) | |
830 | (eq 'gnus-all mail-parse-ignored-charsets) | |
831 | (memq 'gnus-all mail-parse-ignored-charsets) | |
832 | (memq charset mail-parse-ignored-charsets)) | |
833 | (setq charset mail-parse-charset)) | |
3031d8b0 | 834 | (let ((cs (mm-charset-to-coding-system charset))) |
292f71fe MB |
835 | (cond ((eq cs 'ascii) |
836 | (setq cs (or (mm-charset-to-coding-system mail-parse-charset) | |
837 | 'raw-text))) | |
3031d8b0 | 838 | ((setq cs (mm-coding-system-p cs))) |
292f71fe MB |
839 | ((and charset |
840 | (listp mail-parse-ignored-charsets) | |
841 | (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
842 | (setq cs (mm-charset-to-coding-system mail-parse-charset)))) | |
843 | (if (eq cs 'ascii) | |
844 | 'raw-text | |
845 | cs))) | |
846 | ||
847 | (defun rfc2047-decode-encoded-words (words) | |
848 | "Decode successive encoded-words in WORDS and return a decoded string. | |
849 | Each element of WORDS looks like (CHARSET ENCODING ENCODED-TEXT | |
850 | ENCODED-WORD)." | |
851 | (let (word charset cs encoding text rest) | |
852 | (while words | |
853 | (setq word (pop words)) | |
854 | (if (and (or (setq cs (rfc2047-charset-to-coding-system | |
855 | (setq charset (car word)))) | |
856 | (progn | |
857 | (message "Unknown charset: %s" charset) | |
858 | nil)) | |
859 | (condition-case code | |
860 | (cond ((char-equal ?B (nth 1 word)) | |
861 | (setq text (base64-decode-string | |
862 | (rfc2047-pad-base64 (nth 2 word))))) | |
863 | ((char-equal ?Q (nth 1 word)) | |
864 | (setq text (quoted-printable-decode-string | |
865 | (mm-subst-char-in-string | |
866 | ?_ ? (nth 2 word) t))))) | |
867 | (error | |
868 | (message "%s" (error-message-string code)) | |
869 | nil))) | |
870 | (if (and rfc2047-allow-incomplete-encoded-text | |
871 | (eq cs (caar rest))) | |
872 | ;; Concatenate text of which the charset is the same. | |
873 | (setcdr (car rest) (concat (cdar rest) text)) | |
874 | (push (cons cs text) rest)) | |
875 | ;; Don't decode encoded-word. | |
876 | (push (cons nil (nth 3 word)) rest))) | |
877 | (while rest | |
878 | (setq words (concat | |
879 | (or (and (setq cs (caar rest)) | |
880 | (condition-case code | |
881 | (mm-decode-coding-string (cdar rest) cs) | |
882 | (error | |
883 | (message "%s" (error-message-string code)) | |
884 | nil))) | |
885 | (concat (when (cdr rest) " ") | |
886 | (cdar rest) | |
887 | (when (and words | |
888 | (not (eq (string-to-char words) ? ))) | |
889 | " "))) | |
890 | words) | |
891 | rest (cdr rest))) | |
892 | words)) | |
893 | ||
23f87bed MB |
894 | ;; Fixme: This should decode in place, not cons intermediate strings. |
895 | ;; Also check whether it needs to worry about delimiting fields like | |
896 | ;; encoding. | |
897 | ||
898 | ;; In fact it's reported that (invalid) encoding of mailboxes in | |
899 | ;; addr-specs is in use, so delimiting fields might help. Probably | |
900 | ;; not decoding a word which isn't properly delimited is good enough | |
901 | ;; and worthwhile (is it more correct or not?), e.g. something like | |
902 | ;; `=?iso-8859-1?q?foo?=@'. | |
c113de23 GM |
903 | |
904 | (defun rfc2047-decode-region (start end) | |
905 | "Decode MIME-encoded words in region between START and END." | |
906 | (interactive "r") | |
907 | (let ((case-fold-search t) | |
292f71fe MB |
908 | (eword-regexp (eval-when-compile |
909 | ;; Ignore whitespace between encoded-words. | |
910 | (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp | |
911 | "\\)"))) | |
912 | b e match words) | |
652dbc07 DL |
913 | (save-excursion |
914 | (save-restriction | |
915 | (narrow-to-region start end) | |
292f71fe MB |
916 | (goto-char (setq b start)) |
917 | ;; Look for the encoded-words. | |
918 | (while (setq match (re-search-forward eword-regexp nil t)) | |
919 | (setq e (match-beginning 1) | |
920 | end (match-end 0) | |
921 | words nil) | |
922 | (while match | |
923 | (push (list (match-string 2) ;; charset | |
924 | (char-after (match-beginning 4)) ;; encoding | |
925 | (match-string 5) ;; encoded-text | |
926 | (match-string 1)) ;; encoded-word | |
927 | words) | |
928 | ;; Look for the subsequent encoded-words. | |
929 | (when (setq match (looking-at eword-regexp)) | |
930 | (goto-char (setq end (match-end 0))))) | |
931 | ;; Replace the encoded-words with the decoded one. | |
932 | (delete-region e end) | |
933 | (insert (rfc2047-decode-encoded-words (nreverse words))) | |
23f87bed MB |
934 | (save-restriction |
935 | (narrow-to-region e (point)) | |
936 | (goto-char e) | |
270a576a MB |
937 | ;; Remove newlines between decoded words, though such |
938 | ;; things essentially must not be there. | |
23f87bed MB |
939 | (while (re-search-forward "[\n\r]+" nil t) |
940 | (replace-match " ")) | |
270a576a MB |
941 | ;; Quote decoded words if there are special characters |
942 | ;; which might violate RFC2822. | |
943 | (when (and rfc2047-quote-decoded-words-containing-tspecials | |
944 | (let ((regexp (car (rassq | |
945 | 'address-mime | |
946 | rfc2047-header-encoding-alist)))) | |
947 | (when regexp | |
948 | (save-restriction | |
949 | (widen) | |
950 | (beginning-of-line) | |
951 | (while (and (memq (char-after) '(? ?\t)) | |
952 | (zerop (forward-line -1)))) | |
953 | (looking-at regexp))))) | |
954 | (let (quoted) | |
955 | (goto-char e) | |
956 | (skip-chars-forward " \t") | |
957 | (setq start (point)) | |
958 | (setq quoted (eq (char-after) ?\")) | |
959 | (goto-char (point-max)) | |
960 | (skip-chars-backward " \t") | |
961 | (if (setq quoted (and quoted | |
962 | (> (point) (1+ start)) | |
963 | (eq (char-before) ?\"))) | |
964 | (progn | |
965 | (backward-char) | |
966 | (setq start (1+ start) | |
967 | end (point-marker))) | |
968 | (setq end (point-marker))) | |
969 | (goto-char start) | |
970 | (while (search-forward "\"" end t) | |
971 | (when (prog2 | |
972 | (backward-char) | |
973 | (zerop (% (skip-chars-backward "\\\\") 2)) | |
974 | (goto-char (match-beginning 0))) | |
975 | (insert "\\")) | |
976 | (forward-char)) | |
977 | (when (and (not quoted) | |
978 | (progn | |
979 | (goto-char start) | |
980 | (re-search-forward | |
981 | (concat "[" ietf-drums-tspecials "]") | |
982 | end t))) | |
983 | (goto-char start) | |
984 | (insert "\"") | |
985 | (goto-char end) | |
986 | (insert "\"")) | |
987 | (set-marker end nil))) | |
23f87bed | 988 | (goto-char (point-max))) |
652dbc07 DL |
989 | (when (and (mm-multibyte-p) |
990 | mail-parse-charset | |
23f87bed | 991 | (not (eq mail-parse-charset 'us-ascii)) |
652dbc07 DL |
992 | (not (eq mail-parse-charset 'gnus-decoded))) |
993 | (mm-decode-coding-region b e mail-parse-charset)) | |
994 | (setq b (point))) | |
995 | (when (and (mm-multibyte-p) | |
996 | mail-parse-charset | |
997 | (not (eq mail-parse-charset 'us-ascii)) | |
998 | (not (eq mail-parse-charset 'gnus-decoded))) | |
23f87bed | 999 | (mm-decode-coding-region b (point-max) mail-parse-charset)))))) |
c113de23 GM |
1000 | |
1001 | (defun rfc2047-decode-string (string) | |
1002 | "Decode the quoted-printable-encoded STRING and return the results." | |
1003 | (let ((m (mm-multibyte-p))) | |
23f87bed MB |
1004 | (if (string-match "=\\?" string) |
1005 | (with-temp-buffer | |
1006 | ;; Fixme: This logic is wrong, but seems to be required by | |
1007 | ;; Gnus summary buffer generation. The value of `m' depends | |
1008 | ;; on the current buffer, not global multibyteness or that | |
1009 | ;; of the string. Also the string returned should always be | |
1010 | ;; multibyte in a multibyte session, i.e. the buffer should | |
1011 | ;; be multibyte before `buffer-string' is called. | |
1012 | (when m | |
1013 | (mm-enable-multibyte)) | |
1014 | (insert string) | |
1015 | (inline | |
1016 | (rfc2047-decode-region (point-min) (point-max))) | |
1017 | (buffer-string)) | |
1018 | ;; Fixme: As above, `m' here is inappropriate. | |
1019 | (if (and m | |
1020 | mail-parse-charset | |
1021 | (not (eq mail-parse-charset 'us-ascii)) | |
1022 | (not (eq mail-parse-charset 'gnus-decoded))) | |
10ace8ea MB |
1023 | ;; `decode-coding-string' in Emacs offers a third optional |
1024 | ;; arg NOCOPY to avoid consing a new string if the decoding | |
1025 | ;; is "trivial". Unfortunately it currently doesn't | |
1026 | ;; consider anything else than a `nil' coding system | |
1027 | ;; trivial. | |
1028 | ;; `rfc2047-decode-string' is called multiple times for each | |
1029 | ;; article during summary buffer generation, and we really | |
1030 | ;; want to avoid unnecessary consing. So we bypass | |
1031 | ;; `decode-coding-string' if the string is purely ASCII. | |
1032 | (if (and (fboundp 'detect-coding-string) | |
1033 | ;; string is purely ASCII | |
1034 | (eq (detect-coding-string string t) 'undecided)) | |
1035 | string | |
1036 | (mm-decode-coding-string string mail-parse-charset)) | |
23f87bed | 1037 | (mm-string-as-multibyte string))))) |
c113de23 | 1038 | |
23f87bed MB |
1039 | (defun rfc2047-pad-base64 (string) |
1040 | "Pad STRING to quartets." | |
1041 | ;; Be more liberal to accept buggy base64 strings. If | |
1042 | ;; base64-decode-string accepts buggy strings, this function could | |
1043 | ;; be aliased to identity. | |
f4dd4ae8 MB |
1044 | (if (= 0 (mod (length string) 4)) |
1045 | string | |
1046 | (when (string-match "=+$" string) | |
1047 | (setq string (substring string 0 (match-beginning 0)))) | |
1048 | (case (mod (length string) 4) | |
1049 | (0 string) | |
1050 | (1 string) ;; Error, don't pad it. | |
1051 | (2 (concat string "==")) | |
1052 | (3 (concat string "="))))) | |
652dbc07 | 1053 | |
c113de23 GM |
1054 | (provide 'rfc2047) |
1055 | ||
ab5796a9 | 1056 | ;;; arch-tag: a07fe3d4-22b5-4c4a-bd89-b1f82d5d36f6 |
c113de23 | 1057 | ;;; rfc2047.el ends here |