Commit | Line | Data |
---|---|---|
715a2ca2 | 1 | ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages |
270a576a | 2 | |
e84b4b86 | 3 | ;; Copyright (C) 1998, 1999, 2000, 2002, 2003, 2004, |
88e6695f | 4 | ;; 2005, 2006 Free Software Foundation, Inc. |
c113de23 GM |
5 | |
6 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
7 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation; either version 2, or (at your option) | |
13 | ;; any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
3a35cf56 LK |
22 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
23 | ;; Boston, MA 02110-1301, USA. | |
c113de23 GM |
24 | |
25 | ;;; Commentary: | |
26 | ||
d49a4975 DL |
27 | ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part |
28 | ;; Three: Message Header Extensions for Non-ASCII Text". | |
29 | ||
c113de23 GM |
30 | ;;; Code: |
31 | ||
7f0321ff DL |
32 | (eval-when-compile |
33 | (require 'cl) | |
23f87bed MB |
34 | (defvar message-posting-charset) |
35 | (unless (fboundp 'with-syntax-table) ; not in Emacs 20 | |
36 | (defmacro with-syntax-table (table &rest body) | |
37 | "Evaluate BODY with syntax table of current buffer set to TABLE. | |
38 | The syntax table of the current buffer is saved, BODY is evaluated, and the | |
39 | saved table is restored, even in case of an abnormal exit. | |
40 | Value is what BODY returns." | |
41 | (let ((old-table (make-symbol "table")) | |
42 | (old-buffer (make-symbol "buffer"))) | |
43 | `(let ((,old-table (syntax-table)) | |
44 | (,old-buffer (current-buffer))) | |
45 | (unwind-protect | |
46 | (progn | |
47 | (set-syntax-table ,table) | |
48 | ,@body) | |
49 | (save-current-buffer | |
50 | (set-buffer ,old-buffer) | |
51 | (set-syntax-table ,old-table)))))))) | |
c113de23 GM |
52 | |
53 | (require 'qp) | |
54 | (require 'mm-util) | |
270a576a | 55 | (require 'ietf-drums) |
7f0321ff | 56 | ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus. |
c113de23 | 57 | (require 'mail-prsvr) |
f2307f18 | 58 | (require 'base64) |
f2307f18 | 59 | (autoload 'mm-body-7-or-8 "mm-bodies") |
1c33719f | 60 | |
23f87bed MB |
61 | (eval-and-compile |
62 | ;; Avoid gnus-util for mm- code. | |
63 | (defalias 'rfc2047-point-at-bol | |
64 | (if (fboundp 'point-at-bol) | |
65 | 'point-at-bol | |
66 | 'line-beginning-position)) | |
67 | ||
68 | (defalias 'rfc2047-point-at-eol | |
69 | (if (fboundp 'point-at-eol) | |
70 | 'point-at-eol | |
71 | 'line-end-position))) | |
72 | ||
c113de23 | 73 | (defvar rfc2047-header-encoding-alist |
23f87bed MB |
74 | '(("Newsgroups" . nil) |
75 | ("Followup-To" . nil) | |
c113de23 | 76 | ("Message-ID" . nil) |
ad136a7c | 77 | ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|\\(In-\\)?Reply-To\\|Sender\ |
23f87bed | 78 | \\|Mail-Followup-To\\|Mail-Copies-To\\|Approved\\)" . address-mime) |
c113de23 GM |
79 | (t . mime)) |
80 | "*Header/encoding method alist. | |
81 | The list is traversed sequentially. The keys can either be | |
f2307f18 | 82 | header regexps or t. |
c113de23 GM |
83 | |
84 | The values can be: | |
85 | ||
86 | 1) nil, in which case no encoding is done; | |
87 | 2) `mime', in which case the header will be encoded according to RFC2047; | |
7f0321ff DL |
88 | 3) `address-mime', like `mime', but takes account of the rules for address |
89 | fields (where quoted strings and comments must be treated separately); | |
90 | 4) a charset, in which case it will be encoded as that charset; | |
91 | 5) `default', in which case the field will be encoded as the rest | |
c113de23 GM |
92 | of the article.") |
93 | ||
94 | (defvar rfc2047-charset-encoding-alist | |
95 | '((us-ascii . nil) | |
96 | (iso-8859-1 . Q) | |
97 | (iso-8859-2 . Q) | |
98 | (iso-8859-3 . Q) | |
99 | (iso-8859-4 . Q) | |
100 | (iso-8859-5 . B) | |
101 | (koi8-r . B) | |
7f0321ff DL |
102 | (iso-8859-7 . B) |
103 | (iso-8859-8 . B) | |
c113de23 | 104 | (iso-8859-9 . Q) |
f2307f18 DL |
105 | (iso-8859-14 . Q) |
106 | (iso-8859-15 . Q) | |
c113de23 GM |
107 | (iso-2022-jp . B) |
108 | (iso-2022-kr . B) | |
109 | (gb2312 . B) | |
676a7cc9 SZ |
110 | (big5 . B) |
111 | (cn-big5 . B) | |
c113de23 GM |
112 | (cn-gb . B) |
113 | (cn-gb-2312 . B) | |
114 | (euc-kr . B) | |
115 | (iso-2022-jp-2 . B) | |
23f87bed MB |
116 | (iso-2022-int-1 . B) |
117 | (viscii . Q)) | |
c113de23 | 118 | "Alist of MIME charsets to RFC2047 encodings. |
7f0321ff DL |
119 | Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding, |
120 | quoted-printable and base64 respectively.") | |
c113de23 | 121 | |
10ace8ea MB |
122 | (defvar rfc2047-encode-function-alist |
123 | '((Q . rfc2047-q-encode-string) | |
124 | (B . rfc2047-b-encode-string) | |
125 | (nil . identity)) | |
c113de23 GM |
126 | "Alist of RFC2047 encodings to encoding functions.") |
127 | ||
10ace8ea MB |
128 | (defvar rfc2047-encode-encoded-words t |
129 | "Whether encoded words should be encoded again.") | |
130 | ||
c113de23 GM |
131 | ;;; |
132 | ;;; Functions for encoding RFC2047 messages | |
133 | ;;; | |
134 | ||
f4dd4ae8 MB |
135 | (defun rfc2047-qp-or-base64 () |
136 | "Return the type with which to encode the buffer. | |
137 | This is either `base64' or `quoted-printable'." | |
138 | (save-excursion | |
139 | (let ((limit (min (point-max) (+ 2000 (point-min)))) | |
140 | (n8bit 0)) | |
141 | (goto-char (point-min)) | |
142 | (skip-chars-forward "\x20-\x7f\r\n\t" limit) | |
143 | (while (< (point) limit) | |
144 | (incf n8bit) | |
145 | (forward-char 1) | |
146 | (skip-chars-forward "\x20-\x7f\r\n\t" limit)) | |
147 | (if (or (< (* 6 n8bit) (- limit (point-min))) | |
148 | ;; Don't base64, say, a short line with a single | |
149 | ;; non-ASCII char when splitting parts by charset. | |
150 | (= n8bit 1)) | |
151 | 'quoted-printable | |
152 | 'base64)))) | |
153 | ||
c113de23 GM |
154 | (defun rfc2047-narrow-to-field () |
155 | "Narrow the buffer to the header on the current line." | |
156 | (beginning-of-line) | |
157 | (narrow-to-region | |
158 | (point) | |
159 | (progn | |
160 | (forward-line 1) | |
161 | (if (re-search-forward "^[^ \n\t]" nil t) | |
23f87bed | 162 | (rfc2047-point-at-bol) |
c113de23 GM |
163 | (point-max)))) |
164 | (goto-char (point-min))) | |
165 | ||
23f87bed MB |
166 | (defun rfc2047-field-value () |
167 | "Return the value of the field at point." | |
168 | (save-excursion | |
169 | (save-restriction | |
170 | (rfc2047-narrow-to-field) | |
171 | (re-search-forward ":[ \t\n]*" nil t) | |
10ace8ea | 172 | (buffer-substring-no-properties (point) (point-max))))) |
23f87bed | 173 | |
343d6628 MB |
174 | (defun rfc2047-quote-special-characters-in-quoted-strings (&optional |
175 | encodable-regexp) | |
176 | "Quote special characters with `\\'s in quoted strings. | |
177 | Quoting will not be done in a quoted string if it contains characters | |
178 | matching ENCODABLE-REGEXP." | |
179 | (goto-char (point-min)) | |
180 | (let ((tspecials (concat "[" ietf-drums-tspecials "]")) | |
181 | beg) | |
182 | (with-syntax-table (standard-syntax-table) | |
183 | (while (search-forward "\"" nil t) | |
184 | (unless (eq (char-before) ?\\) | |
185 | (setq beg (match-end 0)) | |
186 | (goto-char (match-beginning 0)) | |
187 | (condition-case nil | |
188 | (progn | |
189 | (forward-sexp) | |
190 | (save-restriction | |
191 | (narrow-to-region beg (1- (point))) | |
192 | (goto-char beg) | |
193 | (unless (and encodable-regexp | |
194 | (re-search-forward encodable-regexp nil t)) | |
195 | (while (re-search-forward tspecials nil 'move) | |
196 | (unless (and (eq (char-before) ?\\) ;; Already quoted. | |
197 | (looking-at tspecials)) | |
198 | (goto-char (match-beginning 0)) | |
199 | (unless (or (eq (char-before) ?\\) | |
200 | (and rfc2047-encode-encoded-words | |
201 | (eq (char-after) ??) | |
202 | (eq (char-before) ?=))) | |
203 | (insert "\\"))) | |
204 | (forward-char))))) | |
205 | (error | |
206 | (goto-char beg)))))))) | |
207 | ||
7f0321ff DL |
208 | (defvar rfc2047-encoding-type 'address-mime |
209 | "The type of encoding done by `rfc2047-encode-region'. | |
210 | This should be dynamically bound around calls to | |
211 | `rfc2047-encode-region' to either `mime' or `address-mime'. See | |
212 | `rfc2047-header-encoding-alist', for definitions.") | |
213 | ||
c113de23 GM |
214 | (defun rfc2047-encode-message-header () |
215 | "Encode the message header according to `rfc2047-header-encoding-alist'. | |
216 | Should be called narrowed to the head of the message." | |
217 | (interactive "*") | |
218 | (save-excursion | |
219 | (goto-char (point-min)) | |
220 | (let (alist elem method) | |
221 | (while (not (eobp)) | |
222 | (save-restriction | |
223 | (rfc2047-narrow-to-field) | |
343d6628 MB |
224 | (setq method nil |
225 | alist rfc2047-header-encoding-alist) | |
226 | (while (setq elem (pop alist)) | |
227 | (when (or (and (stringp (car elem)) | |
228 | (looking-at (car elem))) | |
229 | (eq (car elem) t)) | |
230 | (setq alist nil | |
231 | method (cdr elem)))) | |
c113de23 | 232 | (if (not (rfc2047-encodable-p)) |
343d6628 MB |
233 | (prog2 |
234 | (when (eq method 'address-mime) | |
235 | (rfc2047-quote-special-characters-in-quoted-strings)) | |
10ace8ea MB |
236 | (if (and (eq (mm-body-7-or-8) '8bit) |
237 | (mm-multibyte-p) | |
238 | (mm-coding-system-p | |
239 | (car message-posting-charset))) | |
240 | ;; 8 bit must be decoded. | |
241 | (mm-encode-coding-region | |
242 | (point-min) (point-max) | |
243 | (mm-charset-to-coding-system | |
244 | (car message-posting-charset)))) | |
c6e26ce2 | 245 | ;; No encoding necessary, but folding is nice |
10ace8ea MB |
246 | (when nil |
247 | (rfc2047-fold-region | |
248 | (save-excursion | |
249 | (goto-char (point-min)) | |
250 | (skip-chars-forward "^:") | |
251 | (when (looking-at ": ") | |
252 | (forward-char 2)) | |
253 | (point)) | |
254 | (point-max)))) | |
c113de23 | 255 | ;; We found something that may perhaps be encoded. |
7f0321ff | 256 | (re-search-forward "^[^:]+: *" nil t) |
c113de23 | 257 | (cond |
7f0321ff DL |
258 | ((eq method 'address-mime) |
259 | (rfc2047-encode-region (point) (point-max))) | |
c113de23 | 260 | ((eq method 'mime) |
23f87bed | 261 | (let ((rfc2047-encoding-type 'mime)) |
7f0321ff | 262 | (rfc2047-encode-region (point) (point-max)))) |
c113de23 GM |
263 | ((eq method 'default) |
264 | (if (and (featurep 'mule) | |
1bde0b39 DL |
265 | (if (boundp 'default-enable-multibyte-characters) |
266 | default-enable-multibyte-characters) | |
c113de23 | 267 | mail-parse-charset) |
7f0321ff | 268 | (mm-encode-coding-region (point) (point-max) |
c113de23 | 269 | mail-parse-charset))) |
23f87bed MB |
270 | ;; We get this when CC'ing messsages to newsgroups with |
271 | ;; 8-bit names. The group name mail copy just got | |
272 | ;; unconditionally encoded. Previously, it would ask | |
273 | ;; whether to encode, which was quite confusing for the | |
274 | ;; user. If the new behaviour is wrong, tell me. I have | |
275 | ;; left the old code commented out below. | |
276 | ;; -- Per Abrahamsen <abraham@dina.kvl.dk> Date: 2001-10-07. | |
277 | ;; Modified by Dave Love, with the commented-out code changed | |
278 | ;; in accordance with changes elsewhere. | |
279 | ((null method) | |
280 | (rfc2047-encode-region (point) (point-max))) | |
281 | ;;; ((null method) | |
282 | ;;; (if (or (message-options-get | |
283 | ;;; 'rfc2047-encode-message-header-encode-any) | |
284 | ;;; (message-options-set | |
285 | ;;; 'rfc2047-encode-message-header-encode-any | |
286 | ;;; (y-or-n-p | |
287 | ;;; "Some texts are not encoded. Encode anyway?"))) | |
288 | ;;; (rfc2047-encode-region (point-min) (point-max)) | |
289 | ;;; (error "Cannot send unencoded text"))) | |
c113de23 | 290 | ((mm-coding-system-p method) |
1bde0b39 DL |
291 | (if (and (featurep 'mule) |
292 | (if (boundp 'default-enable-multibyte-characters) | |
293 | default-enable-multibyte-characters)) | |
7f0321ff | 294 | (mm-encode-coding-region (point) (point-max) method))) |
c113de23 GM |
295 | ;; Hm. |
296 | (t))) | |
297 | (goto-char (point-max))))))) | |
298 | ||
a553a9f5 DL |
299 | ;; Fixme: This, and the require below may not be the Right Thing, but |
300 | ;; should be safe just before release. -- fx 2001-02-08 | |
301 | (eval-when-compile (defvar message-posting-charset)) | |
302 | ||
f2307f18 DL |
303 | (defun rfc2047-encodable-p () |
304 | "Return non-nil if any characters in current buffer need encoding in headers. | |
305 | The buffer may be narrowed." | |
a553a9f5 | 306 | (require 'message) ; for message-posting-charset |
c113de23 | 307 | (let ((charsets |
7f0321ff | 308 | (mm-find-mime-charset-region (point-min) (point-max)))) |
10ace8ea MB |
309 | (goto-char (point-min)) |
310 | (or (and rfc2047-encode-encoded-words | |
311 | (prog1 | |
312 | (search-forward "=?" nil t) | |
313 | (goto-char (point-min)))) | |
314 | (and charsets | |
315 | (not (equal charsets (list (car message-posting-charset)))))))) | |
7f0321ff DL |
316 | |
317 | ;; Use this syntax table when parsing into regions that may need | |
318 | ;; encoding. Double quotes are string delimiters, backslash is | |
319 | ;; character quoting, and all other RFC 2822 special characters are | |
320 | ;; treated as punctuation so we can use forward-sexp/forward-word to | |
321 | ;; skip to the end of regions appropriately. Nb. ietf-drums does | |
322 | ;; things differently. | |
323 | (defconst rfc2047-syntax-table | |
23f87bed MB |
324 | ;; (make-char-table 'syntax-table '(2)) only works in Emacs. |
325 | (let ((table (make-syntax-table))) | |
326 | ;; The following is done to work for setting all elements of the table | |
327 | ;; in Emacs 21 and 22 and XEmacs; it appears to be the cleanest way. | |
328 | ;; Play safe and don't assume the form of the word syntax entry -- | |
329 | ;; copy it from ?a. | |
330 | (if (fboundp 'set-char-table-range) ; Emacs | |
331 | (funcall (intern "set-char-table-range") | |
332 | table t (aref (standard-syntax-table) ?a)) | |
333 | (if (fboundp 'put-char-table) | |
334 | (if (fboundp 'get-char-table) ; warning avoidance | |
335 | (put-char-table t (get-char-table ?a (standard-syntax-table)) | |
336 | table)))) | |
7f0321ff DL |
337 | (modify-syntax-entry ?\\ "\\" table) |
338 | (modify-syntax-entry ?\" "\"" table) | |
10ace8ea MB |
339 | (modify-syntax-entry ?\( "(" table) |
340 | (modify-syntax-entry ?\) ")" table) | |
7f0321ff DL |
341 | (modify-syntax-entry ?\< "." table) |
342 | (modify-syntax-entry ?\> "." table) | |
343 | (modify-syntax-entry ?\[ "." table) | |
344 | (modify-syntax-entry ?\] "." table) | |
345 | (modify-syntax-entry ?: "." table) | |
346 | (modify-syntax-entry ?\; "." table) | |
347 | (modify-syntax-entry ?, "." table) | |
348 | (modify-syntax-entry ?@ "." table) | |
349 | table)) | |
c113de23 GM |
350 | |
351 | (defun rfc2047-encode-region (b e) | |
7f0321ff DL |
352 | "Encode words in region B to E that need encoding. |
353 | By default, the region is treated as containing RFC2822 addresses. | |
354 | Dynamically bind `rfc2047-encoding-type' to change that." | |
355 | (save-restriction | |
356 | (narrow-to-region b e) | |
10ace8ea MB |
357 | (let ((encodable-regexp (if rfc2047-encode-encoded-words |
358 | "[^\000-\177]+\\|=\\?" | |
359 | "[^\000-\177]+")) | |
360 | start ; start of current token | |
361 | end begin csyntax | |
362 | ;; Whether there's an encoded word before the current token, | |
363 | ;; either immediately or separated by space. | |
364 | last-encoded | |
365 | (orig-text (buffer-substring-no-properties b e))) | |
366 | (if (eq 'mime rfc2047-encoding-type) | |
367 | ;; Simple case. Continuous words in which all those contain | |
368 | ;; non-ASCII characters are encoded collectively. Encoding | |
369 | ;; ASCII words, including `Re:' used in Subject headers, is | |
370 | ;; avoided for interoperability with non-MIME clients and | |
371 | ;; for making it easy to find keywords. | |
372 | (progn | |
373 | (goto-char (point-min)) | |
374 | (while (progn (skip-chars-forward " \t\n") | |
375 | (not (eobp))) | |
376 | (setq start (point)) | |
377 | (while (and (looking-at "[ \t\n]*\\([^ \t\n]+\\)") | |
378 | (progn | |
379 | (setq end (match-end 0)) | |
380 | (re-search-forward encodable-regexp end t))) | |
381 | (goto-char end)) | |
382 | (if (> (point) start) | |
383 | (rfc2047-encode start (point)) | |
384 | (goto-char end)))) | |
385 | ;; `address-mime' case -- take care of quoted words, comments. | |
343d6628 | 386 | (rfc2047-quote-special-characters-in-quoted-strings encodable-regexp) |
10ace8ea | 387 | (with-syntax-table rfc2047-syntax-table |
7f0321ff | 388 | (goto-char (point-min)) |
10ace8ea | 389 | (condition-case err ; in case of unbalanced quotes |
7f0321ff DL |
390 | ;; Look for rfc2822-style: sequences of atoms, quoted |
391 | ;; strings, specials, whitespace. (Specials mustn't be | |
392 | ;; encoded.) | |
393 | (while (not (eobp)) | |
7f0321ff | 394 | ;; Skip whitespace. |
10ace8ea MB |
395 | (skip-chars-forward " \t\n") |
396 | (setq start (point)) | |
7f0321ff DL |
397 | (cond |
398 | ((not (char-after))) ; eob | |
399 | ;; else token start | |
10ace8ea | 400 | ((eq ?\" (setq csyntax (char-syntax (char-after)))) |
7f0321ff DL |
401 | ;; Quoted word. |
402 | (forward-sexp) | |
403 | (setq end (point)) | |
404 | ;; Does it need encoding? | |
405 | (goto-char start) | |
10ace8ea MB |
406 | (if (re-search-forward encodable-regexp end 'move) |
407 | ;; It needs encoding. Strip the quotes first, | |
408 | ;; since encoded words can't occur in quotes. | |
409 | (progn | |
410 | (goto-char end) | |
411 | (delete-backward-char 1) | |
412 | (goto-char start) | |
413 | (delete-char 1) | |
414 | (when last-encoded | |
415 | ;; There was a preceding quoted word. We need | |
416 | ;; to include any separating whitespace in this | |
417 | ;; word to avoid it getting lost. | |
418 | (skip-chars-backward " \t") | |
419 | ;; A space is needed between the encoded words. | |
420 | (insert ? ) | |
421 | (setq start (point) | |
422 | end (1+ end))) | |
423 | ;; Adjust the end position for the deleted quotes. | |
424 | (rfc2047-encode start (- end 2)) | |
425 | (setq last-encoded t)) ; record that it was encoded | |
426 | (setq last-encoded nil))) | |
427 | ((eq ?. csyntax) | |
7f0321ff DL |
428 | ;; Skip other delimiters, but record that they've |
429 | ;; potentially separated quoted words. | |
430 | (forward-char) | |
431 | (setq last-encoded nil)) | |
10ace8ea MB |
432 | ((eq ?\) csyntax) |
433 | (error "Unbalanced parentheses")) | |
434 | ((eq ?\( csyntax) | |
435 | ;; Look for the end of parentheses. | |
436 | (forward-list) | |
437 | ;; Encode text as an unstructured field. | |
438 | (let ((rfc2047-encoding-type 'mime)) | |
439 | (rfc2047-encode-region (1+ start) (1- (point)))) | |
440 | (skip-chars-forward ")")) | |
7f0321ff DL |
441 | (t ; normal token/whitespace sequence |
442 | ;; Find the end. | |
10ace8ea MB |
443 | ;; Skip one ASCII word, or encode continuous words |
444 | ;; in which all those contain non-ASCII characters. | |
445 | (setq end nil) | |
446 | (while (not (or end (eobp))) | |
447 | (when (looking-at "[\000-\177]+") | |
448 | (setq begin (point) | |
449 | end (match-end 0)) | |
450 | (when (progn | |
451 | (while (and (or (re-search-forward | |
452 | "[ \t\n]\\|\\Sw" end 'move) | |
453 | (setq end nil)) | |
454 | (eq ?\\ (char-syntax (char-before)))) | |
455 | ;; Skip backslash-quoted characters. | |
456 | (forward-char)) | |
457 | end) | |
458 | (setq end (match-beginning 0)) | |
459 | (if rfc2047-encode-encoded-words | |
460 | (progn | |
461 | (goto-char begin) | |
462 | (when (search-forward "=?" end 'move) | |
463 | (goto-char (match-beginning 0)) | |
464 | (setq end nil))) | |
465 | (goto-char end)))) | |
466 | ;; Where the value nil of `end' means there may be | |
467 | ;; text to have to be encoded following the point. | |
468 | ;; Otherwise, the point reached to the end of ASCII | |
469 | ;; words separated by whitespace or a special char. | |
470 | (unless end | |
471 | (when (looking-at encodable-regexp) | |
472 | (goto-char (setq begin (match-end 0))) | |
473 | (while (and (looking-at "[ \t\n]+\\([^ \t\n]+\\)") | |
474 | (setq end (match-end 0)) | |
475 | (progn | |
476 | (while (re-search-forward | |
477 | encodable-regexp end t)) | |
478 | (< begin (point))) | |
479 | (goto-char begin) | |
480 | (or (not (re-search-forward "\\Sw" end t)) | |
481 | (progn | |
482 | (goto-char (match-beginning 0)) | |
483 | nil))) | |
484 | (goto-char end)) | |
485 | (when (looking-at "[^ \t\n]+") | |
486 | (setq end (match-end 0)) | |
487 | (if (re-search-forward "\\Sw+" end t) | |
488 | ;; There are special characters better | |
489 | ;; to be encoded so that MTAs may parse | |
490 | ;; them safely. | |
491 | (cond ((= end (point))) | |
492 | ((looking-at (concat "\\sw*\\(" | |
493 | encodable-regexp | |
494 | "\\)")) | |
495 | (setq end nil)) | |
496 | (t | |
497 | (goto-char (1- (match-end 0))) | |
498 | (unless (= (point) (match-beginning 0)) | |
499 | ;; Separate encodable text and | |
500 | ;; delimiter. | |
501 | (insert " ")))) | |
502 | (goto-char end) | |
503 | (skip-chars-forward " \t\n") | |
504 | (if (and (looking-at "[^ \t\n]+") | |
505 | (string-match encodable-regexp | |
506 | (match-string 0))) | |
507 | (setq end nil) | |
508 | (goto-char end))))))) | |
509 | (skip-chars-backward " \t\n") | |
7f0321ff | 510 | (setq end (point)) |
7f0321ff | 511 | (goto-char start) |
10ace8ea MB |
512 | (if (re-search-forward encodable-regexp end 'move) |
513 | (progn | |
514 | (unless (memq (char-before start) '(nil ?\t ? )) | |
515 | (if (progn | |
516 | (goto-char start) | |
517 | (skip-chars-backward "^ \t\n") | |
518 | (and (looking-at "\\Sw+") | |
519 | (= (match-end 0) start))) | |
520 | ;; Also encode bogus delimiters. | |
521 | (setq start (point)) | |
522 | ;; Separate encodable text and delimiter. | |
523 | (goto-char start) | |
524 | (insert " ") | |
525 | (setq start (1+ start) | |
526 | end (1+ end)))) | |
527 | (rfc2047-encode start end) | |
528 | (setq last-encoded t)) | |
529 | (setq last-encoded nil))))) | |
23f87bed | 530 | (error |
10ace8ea MB |
531 | (if (or debug-on-quit debug-on-error) |
532 | (signal (car err) (cdr err)) | |
533 | (error "Invalid data for rfc2047 encoding: %s" | |
534 | (mm-replace-in-string orig-text "[ \t\n]+" " ")))))))) | |
535 | (rfc2047-fold-region b (point)) | |
536 | (goto-char (point-max)))) | |
c113de23 GM |
537 | |
538 | (defun rfc2047-encode-string (string) | |
7f0321ff DL |
539 | "Encode words in STRING. |
540 | By default, the string is treated as containing addresses (see | |
23f87bed | 541 | `rfc2047-encoding-type')." |
10ace8ea | 542 | (mm-with-multibyte-buffer |
c113de23 GM |
543 | (insert string) |
544 | (rfc2047-encode-region (point-min) (point-max)) | |
545 | (buffer-string))) | |
546 | ||
10ace8ea MB |
547 | (defvar rfc2047-encode-max-chars 76 |
548 | "Maximum characters of each header line that contain encoded-words. | |
549 | If it is nil, encoded-words will not be folded. Too small value may | |
550 | cause an error. Don't change this for no particular reason.") | |
551 | ||
552 | (defun rfc2047-encode-1 (column string cs encoder start crest tail | |
553 | &optional eword) | |
554 | "Subroutine used by `rfc2047-encode'." | |
555 | (cond ((string-equal string "") | |
556 | (or eword "")) | |
557 | ((not rfc2047-encode-max-chars) | |
558 | (concat start | |
559 | (funcall encoder (if cs | |
560 | (mm-encode-coding-string string cs) | |
561 | string)) | |
562 | "?=")) | |
563 | ((>= column rfc2047-encode-max-chars) | |
564 | (when eword | |
565 | (cond ((string-match "\n[ \t]+\\'" eword) | |
566 | ;; Reomove a superfluous empty line. | |
567 | (setq eword (substring eword 0 (match-beginning 0)))) | |
568 | ((string-match "(+\\'" eword) | |
569 | ;; Break the line before the open parenthesis. | |
570 | (setq crest (concat crest (match-string 0 eword)) | |
571 | eword (substring eword 0 (match-beginning 0)))))) | |
572 | (rfc2047-encode-1 (length crest) string cs encoder start " " tail | |
573 | (concat eword "\n" crest))) | |
574 | (t | |
575 | (let ((index 0) | |
576 | (limit (1- (length string))) | |
577 | (prev "") | |
578 | next len) | |
579 | (while (and prev | |
580 | (<= index limit)) | |
581 | (setq next (concat start | |
582 | (funcall encoder | |
583 | (if cs | |
584 | (mm-encode-coding-string | |
585 | (substring string 0 (1+ index)) | |
586 | cs) | |
587 | (substring string 0 (1+ index)))) | |
588 | "?=") | |
589 | len (+ column (length next))) | |
590 | (if (> len rfc2047-encode-max-chars) | |
591 | (setq next prev | |
592 | prev nil) | |
593 | (if (or (< index limit) | |
594 | (<= (+ len (or (string-match "\n" tail) | |
595 | (length tail))) | |
596 | rfc2047-encode-max-chars)) | |
597 | (setq prev next | |
598 | index (1+ index)) | |
599 | (if (string-match "\\`)+" tail) | |
600 | ;; Break the line after the close parenthesis. | |
601 | (setq tail (concat (substring tail 0 (match-end 0)) | |
602 | "\n " | |
603 | (substring tail (match-end 0))) | |
604 | prev next | |
605 | index (1+ index)) | |
606 | (setq next prev | |
607 | prev nil))))) | |
608 | (if (> index limit) | |
609 | (concat eword next tail) | |
610 | (if (= 0 index) | |
611 | (if (and eword | |
612 | (string-match "(+\\'" eword)) | |
613 | (setq crest (concat crest (match-string 0 eword)) | |
614 | eword (substring eword 0 (match-beginning 0))) | |
615 | (setq eword (concat eword next))) | |
616 | (setq crest " " | |
617 | eword (concat eword next))) | |
618 | (when (string-match "\n[ \t]+\\'" eword) | |
619 | ;; Reomove a superfluous empty line. | |
620 | (setq eword (substring eword 0 (match-beginning 0)))) | |
621 | (rfc2047-encode-1 (length crest) (substring string index) | |
622 | cs encoder start " " tail | |
623 | (concat eword "\n" crest))))))) | |
624 | ||
7f0321ff DL |
625 | (defun rfc2047-encode (b e) |
626 | "Encode the word(s) in the region B to E. | |
10ace8ea MB |
627 | Point moves to the end of the region." |
628 | (let ((mime-charset (or (mm-find-mime-charset-region b e) (list 'us-ascii))) | |
629 | cs encoding tail crest eword) | |
630 | (cond ((> (length mime-charset) 1) | |
631 | (error "Can't rfc2047-encode `%s'" | |
632 | (buffer-substring-no-properties b e))) | |
633 | ((= (length mime-charset) 1) | |
634 | (setq mime-charset (car mime-charset) | |
635 | cs (mm-charset-to-coding-system mime-charset)) | |
636 | (unless (and (mm-multibyte-p) | |
637 | (mm-coding-system-p cs)) | |
638 | (setq cs nil)) | |
639 | (save-restriction | |
640 | (narrow-to-region b e) | |
641 | (setq encoding | |
642 | (or (cdr (assq mime-charset | |
c113de23 | 643 | rfc2047-charset-encoding-alist)) |
23f87bed MB |
644 | ;; For the charsets that don't have a preferred |
645 | ;; encoding, choose the one that's shorter. | |
10ace8ea MB |
646 | (if (eq (rfc2047-qp-or-base64) 'base64) |
647 | 'B | |
648 | 'Q))) | |
649 | (widen) | |
650 | (goto-char e) | |
651 | (skip-chars-forward "^ \t\n") | |
652 | ;; `tail' may contain a close parenthesis. | |
653 | (setq tail (buffer-substring-no-properties e (point))) | |
654 | (goto-char b) | |
655 | (setq b (point-marker) | |
656 | e (set-marker (make-marker) e)) | |
657 | (rfc2047-fold-region (rfc2047-point-at-bol) b) | |
658 | (goto-char b) | |
659 | (skip-chars-backward "^ \t\n") | |
660 | (unless (= 0 (skip-chars-backward " \t")) | |
661 | ;; `crest' may contain whitespace and an open parenthesis. | |
662 | (setq crest (buffer-substring-no-properties (point) b))) | |
663 | (setq eword (rfc2047-encode-1 | |
664 | (- b (rfc2047-point-at-bol)) | |
665 | (mm-replace-in-string | |
666 | (buffer-substring-no-properties b e) | |
667 | "\n\\([ \t]?\\)" "\\1") | |
668 | cs | |
669 | (or (cdr (assq encoding | |
670 | rfc2047-encode-function-alist)) | |
671 | 'identity) | |
672 | (concat "=?" (downcase (symbol-name mime-charset)) | |
673 | "?" (upcase (symbol-name encoding)) "?") | |
674 | (or crest " ") | |
675 | tail)) | |
676 | (delete-region (if (eq (aref eword 0) ?\n) | |
677 | (if (bolp) | |
678 | ;; The line was folded before encoding. | |
679 | (1- (point)) | |
680 | (point)) | |
681 | (goto-char b)) | |
682 | (+ e (length tail))) | |
683 | ;; `eword' contains `crest' and `tail'. | |
684 | (insert eword) | |
685 | (set-marker b nil) | |
686 | (set-marker e nil) | |
687 | (unless (or (/= 0 (length tail)) | |
688 | (eobp) | |
689 | (looking-at "[ \t\n)]")) | |
690 | (insert " ")))) | |
691 | (t | |
692 | (goto-char e))))) | |
c113de23 | 693 | |
23f87bed MB |
694 | (defun rfc2047-fold-field () |
695 | "Fold the current header field." | |
696 | (save-excursion | |
697 | (save-restriction | |
698 | (rfc2047-narrow-to-field) | |
699 | (rfc2047-fold-region (point-min) (point-max))))) | |
700 | ||
c113de23 | 701 | (defun rfc2047-fold-region (b e) |
a553a9f5 | 702 | "Fold long lines in region B to E." |
c113de23 GM |
703 | (save-restriction |
704 | (narrow-to-region b e) | |
705 | (goto-char (point-min)) | |
f2307f18 DL |
706 | (let ((break nil) |
707 | (qword-break nil) | |
c6e26ce2 | 708 | (first t) |
f2307f18 DL |
709 | (bol (save-restriction |
710 | (widen) | |
23f87bed | 711 | (rfc2047-point-at-bol)))) |
c113de23 | 712 | (while (not (eobp)) |
23f87bed MB |
713 | (when (and (or break qword-break) |
714 | (> (- (point) bol) 76)) | |
f2307f18 DL |
715 | (goto-char (or break qword-break)) |
716 | (setq break nil | |
717 | qword-break nil) | |
10ace8ea | 718 | (skip-chars-backward " \t") |
c6e26ce2 | 719 | (if (looking-at "[ \t]") |
7f0321ff | 720 | (insert ?\n) |
619ac84f | 721 | (insert "\n ")) |
f2307f18 DL |
722 | (setq bol (1- (point))) |
723 | ;; Don't break before the first non-LWSP characters. | |
724 | (skip-chars-forward " \t") | |
23f87bed MB |
725 | (unless (eobp) |
726 | (forward-char 1))) | |
c113de23 | 727 | (cond |
f2307f18 DL |
728 | ((eq (char-after) ?\n) |
729 | (forward-char 1) | |
730 | (setq bol (point) | |
731 | break nil | |
732 | qword-break nil) | |
733 | (skip-chars-forward " \t") | |
734 | (unless (or (eobp) (eq (char-after) ?\n)) | |
735 | (forward-char 1))) | |
736 | ((eq (char-after) ?\r) | |
737 | (forward-char 1)) | |
c113de23 | 738 | ((memq (char-after) '(? ?\t)) |
f2307f18 | 739 | (skip-chars-forward " \t") |
10ace8ea MB |
740 | (unless first ;; Don't break just after the header name. |
741 | (setq break (point)))) | |
f2307f18 DL |
742 | ((not break) |
743 | (if (not (looking-at "=\\?[^=]")) | |
744 | (if (eq (char-after) ?=) | |
745 | (forward-char 1) | |
746 | (skip-chars-forward "^ \t\n\r=")) | |
23f87bed MB |
747 | ;; Don't break at the start of the field. |
748 | (unless (= (point) b) | |
749 | (setq qword-break (point))) | |
f2307f18 DL |
750 | (skip-chars-forward "^ \t\n\r"))) |
751 | (t | |
10ace8ea MB |
752 | (skip-chars-forward "^ \t\n\r"))) |
753 | (setq first nil)) | |
23f87bed MB |
754 | (when (and (or break qword-break) |
755 | (> (- (point) bol) 76)) | |
f2307f18 DL |
756 | (goto-char (or break qword-break)) |
757 | (setq break nil | |
758 | qword-break nil) | |
10ace8ea MB |
759 | (if (or (> 0 (skip-chars-backward " \t")) |
760 | (looking-at "[ \t]")) | |
761 | (insert ?\n) | |
762 | (insert "\n ")) | |
f2307f18 DL |
763 | (setq bol (1- (point))) |
764 | ;; Don't break before the first non-LWSP characters. | |
765 | (skip-chars-forward " \t") | |
23f87bed MB |
766 | (unless (eobp) |
767 | (forward-char 1)))))) | |
768 | ||
769 | (defun rfc2047-unfold-field () | |
770 | "Fold the current line." | |
771 | (save-excursion | |
772 | (save-restriction | |
773 | (rfc2047-narrow-to-field) | |
774 | (rfc2047-unfold-region (point-min) (point-max))))) | |
f2307f18 DL |
775 | |
776 | (defun rfc2047-unfold-region (b e) | |
a553a9f5 | 777 | "Unfold lines in region B to E." |
f2307f18 DL |
778 | (save-restriction |
779 | (narrow-to-region b e) | |
780 | (goto-char (point-min)) | |
781 | (let ((bol (save-restriction | |
782 | (widen) | |
23f87bed MB |
783 | (rfc2047-point-at-bol))) |
784 | (eol (rfc2047-point-at-eol))) | |
f2307f18 DL |
785 | (forward-line 1) |
786 | (while (not (eobp)) | |
c6e26ce2 | 787 | (if (and (looking-at "[ \t]") |
23f87bed | 788 | (< (- (rfc2047-point-at-eol) bol) 76)) |
c6e26ce2 DL |
789 | (delete-region eol (progn |
790 | (goto-char eol) | |
791 | (skip-chars-forward "\r\n") | |
792 | (point))) | |
23f87bed MB |
793 | (setq bol (rfc2047-point-at-bol))) |
794 | (setq eol (rfc2047-point-at-eol)) | |
f2307f18 | 795 | (forward-line 1))))) |
c113de23 | 796 | |
10ace8ea MB |
797 | (defun rfc2047-b-encode-string (string) |
798 | "Base64-encode the header contained in STRING." | |
799 | (base64-encode-string string t)) | |
800 | ||
801 | (defun rfc2047-q-encode-string (string) | |
802 | "Quoted-printable-encode the header in STRING." | |
803 | (mm-with-unibyte-buffer | |
804 | (insert string) | |
805 | (quoted-printable-encode-region | |
806 | (point-min) (point-max) nil | |
807 | ;; = (\075), _ (\137), ? (\077) are used in the encoded word. | |
808 | ;; Avoid using 8bit characters. | |
809 | ;; This list excludes `especials' (see the RFC2047 syntax), | |
810 | ;; meaning that some characters in non-structured fields will | |
811 | ;; get encoded when they con't need to be. The following is | |
812 | ;; what it used to be. | |
813 | ;;; ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" | |
814 | ;;; "\010\012\014\040-\074\076\100-\136\140-\177") | |
815 | "-\b\n\f !#-'*+0-9A-Z\\^`-~\d") | |
816 | (subst-char-in-region (point-min) (point-max) ? ?_) | |
817 | (buffer-string))) | |
818 | ||
819 | (defun rfc2047-encode-parameter (param value) | |
820 | "Return and PARAM=VALUE string encoded in the RFC2047-like style. | |
821 | This is a replacement for the `rfc2231-encode-string' function. | |
822 | ||
823 | When attaching files as MIME parts, we should use the RFC2231 encoding | |
824 | to specify the file names containing non-ASCII characters. However, | |
825 | many mail softwares don't support it in practice and recipients won't | |
826 | be able to extract files with correct names. Instead, the RFC2047-like | |
827 | encoding is acceptable generally. This function provides the very | |
828 | RFC2047-like encoding, resigning to such a regrettable trend. To use | |
829 | it, put the following line in your ~/.gnus.el file: | |
830 | ||
831 | \(defalias 'mail-header-encode-parameter 'rfc2047-encode-parameter) | |
832 | " | |
833 | (let* ((rfc2047-encoding-type 'mime) | |
834 | (rfc2047-encode-max-chars nil) | |
835 | (string (rfc2047-encode-string value))) | |
836 | (if (string-match (concat "[" ietf-drums-tspecials "]") string) | |
837 | (format "%s=%S" param string) | |
838 | (concat param "=" string)))) | |
c113de23 GM |
839 | |
840 | ;;; | |
841 | ;;; Functions for decoding RFC2047 messages | |
842 | ;;; | |
843 | ||
23f87bed MB |
844 | (eval-and-compile |
845 | (defconst rfc2047-encoded-word-regexp | |
10ace8ea MB |
846 | "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(\\*[^?]+\\)?\ |
847 | \\?\\(B\\|Q\\)\\?\\([!->@-~ ]*\\)\\?=")) | |
23f87bed | 848 | |
270a576a MB |
849 | (defvar rfc2047-quote-decoded-words-containing-tspecials nil |
850 | "If non-nil, quote decoded words containing special characters.") | |
851 | ||
292f71fe MB |
852 | (defvar rfc2047-allow-incomplete-encoded-text t |
853 | "*Non-nil means allow incomplete encoded-text in successive encoded-words. | |
854 | Dividing of encoded-text in the place other than character boundaries | |
855 | violates RFC2047 section 5, while we have a capability to decode it. | |
856 | If it is non-nil, the decoder will decode B- or Q-encoding in each | |
857 | encoded-word, concatenate them, and decode it by charset. Otherwise, | |
858 | the decoder will fully decode each encoded-word before concatenating | |
859 | them.") | |
860 | ||
343d6628 | 861 | (defun rfc2047-strip-backslashes-in-quoted-strings () |
d7093904 | 862 | "Strip backslashes in quoted strings. `\\\"' remains." |
343d6628 MB |
863 | (goto-char (point-min)) |
864 | (let (beg) | |
865 | (with-syntax-table (standard-syntax-table) | |
866 | (while (search-forward "\"" nil t) | |
867 | (unless (eq (char-before) ?\\) | |
868 | (setq beg (match-end 0)) | |
869 | (goto-char (match-beginning 0)) | |
870 | (condition-case nil | |
871 | (progn | |
872 | (forward-sexp) | |
873 | (save-restriction | |
874 | (narrow-to-region beg (1- (point))) | |
875 | (goto-char beg) | |
876 | (while (search-forward "\\" nil 'move) | |
d7093904 | 877 | (unless (memq (char-after) '(?\")) |
343d6628 MB |
878 | (delete-backward-char 1)) |
879 | (forward-char))) | |
880 | (forward-char)) | |
881 | (error | |
882 | (goto-char beg)))))))) | |
883 | ||
292f71fe MB |
884 | (defun rfc2047-charset-to-coding-system (charset) |
885 | "Return coding-system corresponding to MIME CHARSET. | |
886 | If your Emacs implementation can't decode CHARSET, return nil." | |
887 | (when (stringp charset) | |
888 | (setq charset (intern (downcase charset)))) | |
889 | (when (or (not charset) | |
890 | (eq 'gnus-all mail-parse-ignored-charsets) | |
891 | (memq 'gnus-all mail-parse-ignored-charsets) | |
892 | (memq charset mail-parse-ignored-charsets)) | |
893 | (setq charset mail-parse-charset)) | |
3031d8b0 | 894 | (let ((cs (mm-charset-to-coding-system charset))) |
292f71fe MB |
895 | (cond ((eq cs 'ascii) |
896 | (setq cs (or (mm-charset-to-coding-system mail-parse-charset) | |
897 | 'raw-text))) | |
e13be7a2 | 898 | ((mm-coding-system-p cs)) |
292f71fe MB |
899 | ((and charset |
900 | (listp mail-parse-ignored-charsets) | |
901 | (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
902 | (setq cs (mm-charset-to-coding-system mail-parse-charset)))) | |
903 | (if (eq cs 'ascii) | |
904 | 'raw-text | |
905 | cs))) | |
906 | ||
907 | (defun rfc2047-decode-encoded-words (words) | |
908 | "Decode successive encoded-words in WORDS and return a decoded string. | |
909 | Each element of WORDS looks like (CHARSET ENCODING ENCODED-TEXT | |
910 | ENCODED-WORD)." | |
911 | (let (word charset cs encoding text rest) | |
912 | (while words | |
913 | (setq word (pop words)) | |
14e6dc54 MB |
914 | (if (and (setq cs (rfc2047-charset-to-coding-system |
915 | (setq charset (car word)))) | |
292f71fe MB |
916 | (condition-case code |
917 | (cond ((char-equal ?B (nth 1 word)) | |
918 | (setq text (base64-decode-string | |
919 | (rfc2047-pad-base64 (nth 2 word))))) | |
920 | ((char-equal ?Q (nth 1 word)) | |
921 | (setq text (quoted-printable-decode-string | |
922 | (mm-subst-char-in-string | |
923 | ?_ ? (nth 2 word) t))))) | |
924 | (error | |
925 | (message "%s" (error-message-string code)) | |
926 | nil))) | |
927 | (if (and rfc2047-allow-incomplete-encoded-text | |
928 | (eq cs (caar rest))) | |
929 | ;; Concatenate text of which the charset is the same. | |
930 | (setcdr (car rest) (concat (cdar rest) text)) | |
931 | (push (cons cs text) rest)) | |
932 | ;; Don't decode encoded-word. | |
933 | (push (cons nil (nth 3 word)) rest))) | |
934 | (while rest | |
935 | (setq words (concat | |
936 | (or (and (setq cs (caar rest)) | |
937 | (condition-case code | |
938 | (mm-decode-coding-string (cdar rest) cs) | |
939 | (error | |
940 | (message "%s" (error-message-string code)) | |
941 | nil))) | |
942 | (concat (when (cdr rest) " ") | |
943 | (cdar rest) | |
944 | (when (and words | |
945 | (not (eq (string-to-char words) ? ))) | |
946 | " "))) | |
947 | words) | |
948 | rest (cdr rest))) | |
949 | words)) | |
950 | ||
23f87bed MB |
951 | ;; Fixme: This should decode in place, not cons intermediate strings. |
952 | ;; Also check whether it needs to worry about delimiting fields like | |
953 | ;; encoding. | |
954 | ||
955 | ;; In fact it's reported that (invalid) encoding of mailboxes in | |
956 | ;; addr-specs is in use, so delimiting fields might help. Probably | |
957 | ;; not decoding a word which isn't properly delimited is good enough | |
958 | ;; and worthwhile (is it more correct or not?), e.g. something like | |
959 | ;; `=?iso-8859-1?q?foo?=@'. | |
c113de23 | 960 | |
343d6628 MB |
961 | (defun rfc2047-decode-region (start end &optional address-mime) |
962 | "Decode MIME-encoded words in region between START and END. | |
963 | If ADDRESS-MIME is non-nil, strip backslashes which precede characters | |
964 | other than `\"' and `\\' in quoted strings." | |
c113de23 GM |
965 | (interactive "r") |
966 | (let ((case-fold-search t) | |
292f71fe MB |
967 | (eword-regexp (eval-when-compile |
968 | ;; Ignore whitespace between encoded-words. | |
969 | (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp | |
970 | "\\)"))) | |
971 | b e match words) | |
652dbc07 DL |
972 | (save-excursion |
973 | (save-restriction | |
974 | (narrow-to-region start end) | |
343d6628 MB |
975 | (when address-mime |
976 | (rfc2047-strip-backslashes-in-quoted-strings)) | |
292f71fe MB |
977 | (goto-char (setq b start)) |
978 | ;; Look for the encoded-words. | |
979 | (while (setq match (re-search-forward eword-regexp nil t)) | |
980 | (setq e (match-beginning 1) | |
981 | end (match-end 0) | |
982 | words nil) | |
983 | (while match | |
984 | (push (list (match-string 2) ;; charset | |
985 | (char-after (match-beginning 4)) ;; encoding | |
986 | (match-string 5) ;; encoded-text | |
987 | (match-string 1)) ;; encoded-word | |
988 | words) | |
989 | ;; Look for the subsequent encoded-words. | |
990 | (when (setq match (looking-at eword-regexp)) | |
991 | (goto-char (setq end (match-end 0))))) | |
992 | ;; Replace the encoded-words with the decoded one. | |
993 | (delete-region e end) | |
994 | (insert (rfc2047-decode-encoded-words (nreverse words))) | |
23f87bed MB |
995 | (save-restriction |
996 | (narrow-to-region e (point)) | |
997 | (goto-char e) | |
270a576a MB |
998 | ;; Remove newlines between decoded words, though such |
999 | ;; things essentially must not be there. | |
23f87bed MB |
1000 | (while (re-search-forward "[\n\r]+" nil t) |
1001 | (replace-match " ")) | |
270a576a MB |
1002 | ;; Quote decoded words if there are special characters |
1003 | ;; which might violate RFC2822. | |
1004 | (when (and rfc2047-quote-decoded-words-containing-tspecials | |
1005 | (let ((regexp (car (rassq | |
1006 | 'address-mime | |
1007 | rfc2047-header-encoding-alist)))) | |
1008 | (when regexp | |
1009 | (save-restriction | |
1010 | (widen) | |
1011 | (beginning-of-line) | |
1012 | (while (and (memq (char-after) '(? ?\t)) | |
1013 | (zerop (forward-line -1)))) | |
1014 | (looking-at regexp))))) | |
1015 | (let (quoted) | |
1016 | (goto-char e) | |
1017 | (skip-chars-forward " \t") | |
1018 | (setq start (point)) | |
1019 | (setq quoted (eq (char-after) ?\")) | |
1020 | (goto-char (point-max)) | |
1021 | (skip-chars-backward " \t") | |
1022 | (if (setq quoted (and quoted | |
1023 | (> (point) (1+ start)) | |
1024 | (eq (char-before) ?\"))) | |
1025 | (progn | |
1026 | (backward-char) | |
1027 | (setq start (1+ start) | |
1028 | end (point-marker))) | |
1029 | (setq end (point-marker))) | |
1030 | (goto-char start) | |
1031 | (while (search-forward "\"" end t) | |
1032 | (when (prog2 | |
1033 | (backward-char) | |
1034 | (zerop (% (skip-chars-backward "\\\\") 2)) | |
1035 | (goto-char (match-beginning 0))) | |
1036 | (insert "\\")) | |
1037 | (forward-char)) | |
1038 | (when (and (not quoted) | |
1039 | (progn | |
1040 | (goto-char start) | |
1041 | (re-search-forward | |
1042 | (concat "[" ietf-drums-tspecials "]") | |
1043 | end t))) | |
1044 | (goto-char start) | |
1045 | (insert "\"") | |
1046 | (goto-char end) | |
1047 | (insert "\"")) | |
1048 | (set-marker end nil))) | |
23f87bed | 1049 | (goto-char (point-max))) |
652dbc07 DL |
1050 | (when (and (mm-multibyte-p) |
1051 | mail-parse-charset | |
23f87bed | 1052 | (not (eq mail-parse-charset 'us-ascii)) |
652dbc07 DL |
1053 | (not (eq mail-parse-charset 'gnus-decoded))) |
1054 | (mm-decode-coding-region b e mail-parse-charset)) | |
1055 | (setq b (point))) | |
1056 | (when (and (mm-multibyte-p) | |
1057 | mail-parse-charset | |
1058 | (not (eq mail-parse-charset 'us-ascii)) | |
1059 | (not (eq mail-parse-charset 'gnus-decoded))) | |
23f87bed | 1060 | (mm-decode-coding-region b (point-max) mail-parse-charset)))))) |
c113de23 | 1061 | |
343d6628 MB |
1062 | (defun rfc2047-decode-address-region (start end) |
1063 | "Decode MIME-encoded words in region between START and END. | |
1064 | Backslashes which precede characters other than `\"' and `\\' in quoted | |
1065 | strings are stripped." | |
1066 | (rfc2047-decode-region start end t)) | |
1067 | ||
1068 | (defun rfc2047-decode-string (string &optional address-mime) | |
1069 | "Decode MIME-encoded STRING and return the result. | |
1070 | If ADDRESS-MIME is non-nil, strip backslashes which precede characters | |
1071 | other than `\"' and `\\' in quoted strings." | |
c113de23 | 1072 | (let ((m (mm-multibyte-p))) |
23f87bed MB |
1073 | (if (string-match "=\\?" string) |
1074 | (with-temp-buffer | |
1075 | ;; Fixme: This logic is wrong, but seems to be required by | |
1076 | ;; Gnus summary buffer generation. The value of `m' depends | |
1077 | ;; on the current buffer, not global multibyteness or that | |
1078 | ;; of the string. Also the string returned should always be | |
1079 | ;; multibyte in a multibyte session, i.e. the buffer should | |
1080 | ;; be multibyte before `buffer-string' is called. | |
1081 | (when m | |
1082 | (mm-enable-multibyte)) | |
1083 | (insert string) | |
1084 | (inline | |
343d6628 | 1085 | (rfc2047-decode-region (point-min) (point-max) address-mime)) |
23f87bed | 1086 | (buffer-string)) |
343d6628 MB |
1087 | (when address-mime |
1088 | (setq string | |
1089 | (with-temp-buffer | |
1090 | (when (mm-multibyte-string-p string) | |
1091 | (mm-enable-multibyte)) | |
1092 | (insert string) | |
1093 | (rfc2047-strip-backslashes-in-quoted-strings) | |
1094 | (buffer-string)))) | |
23f87bed MB |
1095 | ;; Fixme: As above, `m' here is inappropriate. |
1096 | (if (and m | |
1097 | mail-parse-charset | |
1098 | (not (eq mail-parse-charset 'us-ascii)) | |
1099 | (not (eq mail-parse-charset 'gnus-decoded))) | |
10ace8ea MB |
1100 | ;; `decode-coding-string' in Emacs offers a third optional |
1101 | ;; arg NOCOPY to avoid consing a new string if the decoding | |
1102 | ;; is "trivial". Unfortunately it currently doesn't | |
1103 | ;; consider anything else than a `nil' coding system | |
1104 | ;; trivial. | |
1105 | ;; `rfc2047-decode-string' is called multiple times for each | |
1106 | ;; article during summary buffer generation, and we really | |
1107 | ;; want to avoid unnecessary consing. So we bypass | |
1108 | ;; `decode-coding-string' if the string is purely ASCII. | |
1109 | (if (and (fboundp 'detect-coding-string) | |
1110 | ;; string is purely ASCII | |
1111 | (eq (detect-coding-string string t) 'undecided)) | |
1112 | string | |
1113 | (mm-decode-coding-string string mail-parse-charset)) | |
23f87bed | 1114 | (mm-string-as-multibyte string))))) |
c113de23 | 1115 | |
343d6628 MB |
1116 | (defun rfc2047-decode-address-string (string) |
1117 | "Decode MIME-encoded STRING and return the result. | |
1118 | Backslashes which precede characters other than `\"' and `\\' in quoted | |
1119 | strings are stripped." | |
1120 | (rfc2047-decode-string string t)) | |
1121 | ||
23f87bed MB |
1122 | (defun rfc2047-pad-base64 (string) |
1123 | "Pad STRING to quartets." | |
1124 | ;; Be more liberal to accept buggy base64 strings. If | |
1125 | ;; base64-decode-string accepts buggy strings, this function could | |
1126 | ;; be aliased to identity. | |
f4dd4ae8 MB |
1127 | (if (= 0 (mod (length string) 4)) |
1128 | string | |
1129 | (when (string-match "=+$" string) | |
1130 | (setq string (substring string 0 (match-beginning 0)))) | |
1131 | (case (mod (length string) 4) | |
1132 | (0 string) | |
1133 | (1 string) ;; Error, don't pad it. | |
1134 | (2 (concat string "==")) | |
1135 | (3 (concat string "="))))) | |
652dbc07 | 1136 | |
c113de23 GM |
1137 | (provide 'rfc2047) |
1138 | ||
ab5796a9 | 1139 | ;;; arch-tag: a07fe3d4-22b5-4c4a-bd89-b1f82d5d36f6 |
c113de23 | 1140 | ;;; rfc2047.el ends here |