Commit | Line | Data |
---|---|---|
715a2ca2 | 1 | ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages |
270a576a | 2 | |
2e62b574 GM |
3 | ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, |
4 | ;; 2007, 2008, 2009 Free Software Foundation, Inc. | |
c113de23 GM |
5 | |
6 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
7 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
8 | ;; This file is part of GNU Emacs. | |
9 | ||
5e809f55 | 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
c113de23 | 11 | ;; it under the terms of the GNU General Public License as published by |
5e809f55 GM |
12 | ;; the Free Software Foundation, either version 3 of the License, or |
13 | ;; (at your option) any later version. | |
c113de23 GM |
14 | |
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
5e809f55 | 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
c113de23 GM |
18 | ;; GNU General Public License for more details. |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
5e809f55 | 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
c113de23 GM |
22 | |
23 | ;;; Commentary: | |
24 | ||
d49a4975 DL |
25 | ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part |
26 | ;; Three: Message Header Extensions for Non-ASCII Text". | |
27 | ||
c113de23 GM |
28 | ;;; Code: |
29 | ||
7f0321ff | 30 | (eval-when-compile |
9efa445f DN |
31 | (require 'cl)) |
32 | (defvar message-posting-charset) | |
c113de23 GM |
33 | |
34 | (require 'qp) | |
35 | (require 'mm-util) | |
270a576a | 36 | (require 'ietf-drums) |
7f0321ff | 37 | ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus. |
c113de23 | 38 | (require 'mail-prsvr) |
82fe1aed | 39 | (require 'rfc2045) ;; rfc2045-encode-string |
f2307f18 | 40 | (autoload 'mm-body-7-or-8 "mm-bodies") |
1c33719f | 41 | |
c113de23 | 42 | (defvar rfc2047-header-encoding-alist |
23f87bed MB |
43 | '(("Newsgroups" . nil) |
44 | ("Followup-To" . nil) | |
c113de23 | 45 | ("Message-ID" . nil) |
ad136a7c | 46 | ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|\\(In-\\)?Reply-To\\|Sender\ |
23f87bed | 47 | \\|Mail-Followup-To\\|Mail-Copies-To\\|Approved\\)" . address-mime) |
c113de23 GM |
48 | (t . mime)) |
49 | "*Header/encoding method alist. | |
50 | The list is traversed sequentially. The keys can either be | |
f2307f18 | 51 | header regexps or t. |
c113de23 GM |
52 | |
53 | The values can be: | |
54 | ||
55 | 1) nil, in which case no encoding is done; | |
56 | 2) `mime', in which case the header will be encoded according to RFC2047; | |
7f0321ff DL |
57 | 3) `address-mime', like `mime', but takes account of the rules for address |
58 | fields (where quoted strings and comments must be treated separately); | |
59 | 4) a charset, in which case it will be encoded as that charset; | |
60 | 5) `default', in which case the field will be encoded as the rest | |
c113de23 GM |
61 | of the article.") |
62 | ||
63 | (defvar rfc2047-charset-encoding-alist | |
64 | '((us-ascii . nil) | |
65 | (iso-8859-1 . Q) | |
66 | (iso-8859-2 . Q) | |
67 | (iso-8859-3 . Q) | |
68 | (iso-8859-4 . Q) | |
69 | (iso-8859-5 . B) | |
70 | (koi8-r . B) | |
7f0321ff DL |
71 | (iso-8859-7 . B) |
72 | (iso-8859-8 . B) | |
c113de23 | 73 | (iso-8859-9 . Q) |
f2307f18 DL |
74 | (iso-8859-14 . Q) |
75 | (iso-8859-15 . Q) | |
c113de23 GM |
76 | (iso-2022-jp . B) |
77 | (iso-2022-kr . B) | |
78 | (gb2312 . B) | |
650c9e21 KH |
79 | (gbk . B) |
80 | (gb18030 . B) | |
676a7cc9 SZ |
81 | (big5 . B) |
82 | (cn-big5 . B) | |
c113de23 GM |
83 | (cn-gb . B) |
84 | (cn-gb-2312 . B) | |
85 | (euc-kr . B) | |
86 | (iso-2022-jp-2 . B) | |
23f87bed MB |
87 | (iso-2022-int-1 . B) |
88 | (viscii . Q)) | |
c113de23 | 89 | "Alist of MIME charsets to RFC2047 encodings. |
7f0321ff DL |
90 | Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding, |
91 | quoted-printable and base64 respectively.") | |
c113de23 | 92 | |
10ace8ea MB |
93 | (defvar rfc2047-encode-function-alist |
94 | '((Q . rfc2047-q-encode-string) | |
95 | (B . rfc2047-b-encode-string) | |
96 | (nil . identity)) | |
c113de23 GM |
97 | "Alist of RFC2047 encodings to encoding functions.") |
98 | ||
10ace8ea MB |
99 | (defvar rfc2047-encode-encoded-words t |
100 | "Whether encoded words should be encoded again.") | |
101 | ||
b890d447 MB |
102 | (defvar rfc2047-allow-irregular-q-encoded-words t |
103 | "*Whether to decode irregular Q-encoded words.") | |
104 | ||
105 | (eval-and-compile ;; Necessary to hard code them in `rfc2047-decode-region'. | |
106 | (defconst rfc2047-encoded-word-regexp | |
107 | "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\ | |
108 | \\(B\\?[+/0-9A-Za-z]*=*\ | |
109 | \\|Q\\?[ ->@-~]*\ | |
110 | \\)\\?=" | |
111 | "Regexp that matches encoded word." | |
112 | ;; The patterns for the B encoding and the Q encoding, i.e. the ones | |
113 | ;; beginning with "B" and "Q" respectively, are restricted into only | |
114 | ;; the characters that those encodings may generally use. | |
115 | ) | |
116 | (defconst rfc2047-encoded-word-regexp-loose | |
117 | "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\ | |
118 | \\(B\\?[+/0-9A-Za-z]*=*\ | |
119 | \\|Q\\?\\(?:\\?+[ -<>@-~]\\)?\\(?:[ ->@-~]+\\?+[ -<>@-~]\\)*[ ->@-~]*\\?*\ | |
120 | \\)\\?=" | |
121 | "Regexp that matches encoded word allowing loose Q encoding." | |
122 | ;; The pattern for the Q encoding, i.e. the one beginning with "Q", | |
123 | ;; is similar to: | |
124 | ;; "Q\\?\\(\\?+[^\n=?]\\)?\\([^\n?]+\\?+[^\n=?]\\)*[^\n?]*\\?*" | |
125 | ;; <--------1-------><----------2,3----------><--4--><-5-> | |
126 | ;; They mean: | |
127 | ;; 1. After "Q?", allow "?"s that follow a character other than "=". | |
128 | ;; 2. Allow "=" after "Q?"; it isn't regarded as the terminator. | |
129 | ;; 3. In the middle of an encoded word, allow "?"s that follow a | |
130 | ;; character other than "=". | |
131 | ;; 4. Allow any characters other than "?" in the middle of an | |
132 | ;; encoded word. | |
133 | ;; 5. At the end, allow "?"s. | |
134 | )) | |
135 | ||
c113de23 GM |
136 | ;;; |
137 | ;;; Functions for encoding RFC2047 messages | |
138 | ;;; | |
139 | ||
f4dd4ae8 MB |
140 | (defun rfc2047-qp-or-base64 () |
141 | "Return the type with which to encode the buffer. | |
142 | This is either `base64' or `quoted-printable'." | |
143 | (save-excursion | |
144 | (let ((limit (min (point-max) (+ 2000 (point-min)))) | |
145 | (n8bit 0)) | |
146 | (goto-char (point-min)) | |
147 | (skip-chars-forward "\x20-\x7f\r\n\t" limit) | |
148 | (while (< (point) limit) | |
149 | (incf n8bit) | |
150 | (forward-char 1) | |
151 | (skip-chars-forward "\x20-\x7f\r\n\t" limit)) | |
152 | (if (or (< (* 6 n8bit) (- limit (point-min))) | |
153 | ;; Don't base64, say, a short line with a single | |
154 | ;; non-ASCII char when splitting parts by charset. | |
155 | (= n8bit 1)) | |
156 | 'quoted-printable | |
157 | 'base64)))) | |
158 | ||
c113de23 GM |
159 | (defun rfc2047-narrow-to-field () |
160 | "Narrow the buffer to the header on the current line." | |
161 | (beginning-of-line) | |
162 | (narrow-to-region | |
163 | (point) | |
164 | (progn | |
165 | (forward-line 1) | |
166 | (if (re-search-forward "^[^ \n\t]" nil t) | |
01c52d31 | 167 | (point-at-bol) |
c113de23 GM |
168 | (point-max)))) |
169 | (goto-char (point-min))) | |
170 | ||
23f87bed MB |
171 | (defun rfc2047-field-value () |
172 | "Return the value of the field at point." | |
173 | (save-excursion | |
174 | (save-restriction | |
175 | (rfc2047-narrow-to-field) | |
176 | (re-search-forward ":[ \t\n]*" nil t) | |
10ace8ea | 177 | (buffer-substring-no-properties (point) (point-max))))) |
23f87bed | 178 | |
343d6628 MB |
179 | (defun rfc2047-quote-special-characters-in-quoted-strings (&optional |
180 | encodable-regexp) | |
181 | "Quote special characters with `\\'s in quoted strings. | |
182 | Quoting will not be done in a quoted string if it contains characters | |
01c52d31 | 183 | matching ENCODABLE-REGEXP or it is within parentheses." |
343d6628 MB |
184 | (goto-char (point-min)) |
185 | (let ((tspecials (concat "[" ietf-drums-tspecials "]")) | |
01c52d31 | 186 | (start (point)) |
b110774a | 187 | beg end) |
343d6628 | 188 | (with-syntax-table (standard-syntax-table) |
01c52d31 MB |
189 | (while (not (eobp)) |
190 | (if (ignore-errors | |
191 | (forward-list 1) | |
192 | (eq (char-before) ?\))) | |
193 | (forward-list -1) | |
194 | (goto-char (point-max))) | |
195 | (save-restriction | |
196 | (narrow-to-region start (point)) | |
197 | (goto-char start) | |
198 | (while (search-forward "\"" nil t) | |
199 | (setq beg (match-beginning 0)) | |
200 | (unless (eq (char-before beg) ?\\) | |
201 | (goto-char beg) | |
202 | (setq beg (1+ beg)) | |
203 | (condition-case nil | |
204 | (progn | |
205 | (forward-sexp) | |
206 | (setq end (1- (point))) | |
207 | (goto-char beg) | |
208 | (if (and encodable-regexp | |
209 | (re-search-forward encodable-regexp end t)) | |
210 | (goto-char (1+ end)) | |
211 | (save-restriction | |
212 | (narrow-to-region beg end) | |
213 | (while (re-search-forward tspecials nil 'move) | |
214 | (if (eq (char-before) ?\\) | |
215 | (if (looking-at tspecials) ;; Already quoted. | |
216 | (forward-char) | |
217 | (insert "\\")) | |
218 | (goto-char (match-beginning 0)) | |
219 | (insert "\\") | |
220 | (forward-char)))) | |
221 | (forward-char))) | |
222 | (error | |
223 | (goto-char beg))))) | |
224 | (goto-char (point-max))) | |
225 | (forward-list 1) | |
226 | (setq start (point)))))) | |
343d6628 | 227 | |
7f0321ff DL |
228 | (defvar rfc2047-encoding-type 'address-mime |
229 | "The type of encoding done by `rfc2047-encode-region'. | |
230 | This should be dynamically bound around calls to | |
231 | `rfc2047-encode-region' to either `mime' or `address-mime'. See | |
232 | `rfc2047-header-encoding-alist', for definitions.") | |
233 | ||
c113de23 GM |
234 | (defun rfc2047-encode-message-header () |
235 | "Encode the message header according to `rfc2047-header-encoding-alist'. | |
236 | Should be called narrowed to the head of the message." | |
237 | (interactive "*") | |
238 | (save-excursion | |
239 | (goto-char (point-min)) | |
240 | (let (alist elem method) | |
241 | (while (not (eobp)) | |
242 | (save-restriction | |
243 | (rfc2047-narrow-to-field) | |
343d6628 MB |
244 | (setq method nil |
245 | alist rfc2047-header-encoding-alist) | |
246 | (while (setq elem (pop alist)) | |
247 | (when (or (and (stringp (car elem)) | |
248 | (looking-at (car elem))) | |
249 | (eq (car elem) t)) | |
250 | (setq alist nil | |
251 | method (cdr elem)))) | |
c113de23 | 252 | (if (not (rfc2047-encodable-p)) |
343d6628 MB |
253 | (prog2 |
254 | (when (eq method 'address-mime) | |
255 | (rfc2047-quote-special-characters-in-quoted-strings)) | |
10ace8ea MB |
256 | (if (and (eq (mm-body-7-or-8) '8bit) |
257 | (mm-multibyte-p) | |
258 | (mm-coding-system-p | |
259 | (car message-posting-charset))) | |
260 | ;; 8 bit must be decoded. | |
261 | (mm-encode-coding-region | |
262 | (point-min) (point-max) | |
263 | (mm-charset-to-coding-system | |
264 | (car message-posting-charset)))) | |
c6e26ce2 | 265 | ;; No encoding necessary, but folding is nice |
10ace8ea MB |
266 | (when nil |
267 | (rfc2047-fold-region | |
268 | (save-excursion | |
269 | (goto-char (point-min)) | |
270 | (skip-chars-forward "^:") | |
271 | (when (looking-at ": ") | |
272 | (forward-char 2)) | |
273 | (point)) | |
274 | (point-max)))) | |
c113de23 | 275 | ;; We found something that may perhaps be encoded. |
7f0321ff | 276 | (re-search-forward "^[^:]+: *" nil t) |
c113de23 | 277 | (cond |
7f0321ff DL |
278 | ((eq method 'address-mime) |
279 | (rfc2047-encode-region (point) (point-max))) | |
c113de23 | 280 | ((eq method 'mime) |
23f87bed | 281 | (let ((rfc2047-encoding-type 'mime)) |
7f0321ff | 282 | (rfc2047-encode-region (point) (point-max)))) |
c113de23 GM |
283 | ((eq method 'default) |
284 | (if (and (featurep 'mule) | |
2e62b574 GM |
285 | (if (boundp 'enable-multibyte-characters) |
286 | (default-value 'enable-multibyte-characters)) | |
c113de23 | 287 | mail-parse-charset) |
7f0321ff | 288 | (mm-encode-coding-region (point) (point-max) |
c113de23 | 289 | mail-parse-charset))) |
23f87bed MB |
290 | ;; We get this when CC'ing messsages to newsgroups with |
291 | ;; 8-bit names. The group name mail copy just got | |
292 | ;; unconditionally encoded. Previously, it would ask | |
293 | ;; whether to encode, which was quite confusing for the | |
fffa137c | 294 | ;; user. If the new behavior is wrong, tell me. I have |
23f87bed MB |
295 | ;; left the old code commented out below. |
296 | ;; -- Per Abrahamsen <abraham@dina.kvl.dk> Date: 2001-10-07. | |
297 | ;; Modified by Dave Love, with the commented-out code changed | |
298 | ;; in accordance with changes elsewhere. | |
299 | ((null method) | |
300 | (rfc2047-encode-region (point) (point-max))) | |
301 | ;;; ((null method) | |
302 | ;;; (if (or (message-options-get | |
303 | ;;; 'rfc2047-encode-message-header-encode-any) | |
304 | ;;; (message-options-set | |
305 | ;;; 'rfc2047-encode-message-header-encode-any | |
306 | ;;; (y-or-n-p | |
307 | ;;; "Some texts are not encoded. Encode anyway?"))) | |
308 | ;;; (rfc2047-encode-region (point-min) (point-max)) | |
309 | ;;; (error "Cannot send unencoded text"))) | |
c113de23 | 310 | ((mm-coding-system-p method) |
01c52d31 | 311 | (if (or (and (featurep 'mule) |
2e62b574 GM |
312 | (if (boundp 'enable-multibyte-characters) |
313 | (default-value 'enable-multibyte-characters))) | |
01c52d31 | 314 | (featurep 'file-coding)) |
7f0321ff | 315 | (mm-encode-coding-region (point) (point-max) method))) |
c113de23 GM |
316 | ;; Hm. |
317 | (t))) | |
318 | (goto-char (point-max))))))) | |
319 | ||
a553a9f5 DL |
320 | ;; Fixme: This, and the require below may not be the Right Thing, but |
321 | ;; should be safe just before release. -- fx 2001-02-08 | |
a553a9f5 | 322 | |
f2307f18 DL |
323 | (defun rfc2047-encodable-p () |
324 | "Return non-nil if any characters in current buffer need encoding in headers. | |
325 | The buffer may be narrowed." | |
a553a9f5 | 326 | (require 'message) ; for message-posting-charset |
c113de23 | 327 | (let ((charsets |
7f0321ff | 328 | (mm-find-mime-charset-region (point-min) (point-max)))) |
10ace8ea MB |
329 | (goto-char (point-min)) |
330 | (or (and rfc2047-encode-encoded-words | |
331 | (prog1 | |
b890d447 | 332 | (re-search-forward rfc2047-encoded-word-regexp nil t) |
10ace8ea MB |
333 | (goto-char (point-min)))) |
334 | (and charsets | |
335 | (not (equal charsets (list (car message-posting-charset)))))))) | |
7f0321ff DL |
336 | |
337 | ;; Use this syntax table when parsing into regions that may need | |
338 | ;; encoding. Double quotes are string delimiters, backslash is | |
339 | ;; character quoting, and all other RFC 2822 special characters are | |
340 | ;; treated as punctuation so we can use forward-sexp/forward-word to | |
341 | ;; skip to the end of regions appropriately. Nb. ietf-drums does | |
342 | ;; things differently. | |
343 | (defconst rfc2047-syntax-table | |
23f87bed MB |
344 | ;; (make-char-table 'syntax-table '(2)) only works in Emacs. |
345 | (let ((table (make-syntax-table))) | |
346 | ;; The following is done to work for setting all elements of the table | |
8589dc17 | 347 | ;; in Emacs 21-23 and XEmacs; it appears to be the cleanest way. |
23f87bed MB |
348 | ;; Play safe and don't assume the form of the word syntax entry -- |
349 | ;; copy it from ?a. | |
350 | (if (fboundp 'set-char-table-range) ; Emacs | |
351 | (funcall (intern "set-char-table-range") | |
352 | table t (aref (standard-syntax-table) ?a)) | |
353 | (if (fboundp 'put-char-table) | |
354 | (if (fboundp 'get-char-table) ; warning avoidance | |
355 | (put-char-table t (get-char-table ?a (standard-syntax-table)) | |
356 | table)))) | |
7f0321ff DL |
357 | (modify-syntax-entry ?\\ "\\" table) |
358 | (modify-syntax-entry ?\" "\"" table) | |
10ace8ea MB |
359 | (modify-syntax-entry ?\( "(" table) |
360 | (modify-syntax-entry ?\) ")" table) | |
7f0321ff DL |
361 | (modify-syntax-entry ?\< "." table) |
362 | (modify-syntax-entry ?\> "." table) | |
363 | (modify-syntax-entry ?\[ "." table) | |
364 | (modify-syntax-entry ?\] "." table) | |
365 | (modify-syntax-entry ?: "." table) | |
366 | (modify-syntax-entry ?\; "." table) | |
367 | (modify-syntax-entry ?, "." table) | |
368 | (modify-syntax-entry ?@ "." table) | |
369 | table)) | |
c113de23 GM |
370 | |
371 | (defun rfc2047-encode-region (b e) | |
7f0321ff DL |
372 | "Encode words in region B to E that need encoding. |
373 | By default, the region is treated as containing RFC2822 addresses. | |
374 | Dynamically bind `rfc2047-encoding-type' to change that." | |
375 | (save-restriction | |
376 | (narrow-to-region b e) | |
10ace8ea MB |
377 | (let ((encodable-regexp (if rfc2047-encode-encoded-words |
378 | "[^\000-\177]+\\|=\\?" | |
379 | "[^\000-\177]+")) | |
380 | start ; start of current token | |
381 | end begin csyntax | |
382 | ;; Whether there's an encoded word before the current token, | |
383 | ;; either immediately or separated by space. | |
384 | last-encoded | |
385 | (orig-text (buffer-substring-no-properties b e))) | |
386 | (if (eq 'mime rfc2047-encoding-type) | |
387 | ;; Simple case. Continuous words in which all those contain | |
388 | ;; non-ASCII characters are encoded collectively. Encoding | |
389 | ;; ASCII words, including `Re:' used in Subject headers, is | |
390 | ;; avoided for interoperability with non-MIME clients and | |
391 | ;; for making it easy to find keywords. | |
392 | (progn | |
393 | (goto-char (point-min)) | |
394 | (while (progn (skip-chars-forward " \t\n") | |
395 | (not (eobp))) | |
396 | (setq start (point)) | |
397 | (while (and (looking-at "[ \t\n]*\\([^ \t\n]+\\)") | |
398 | (progn | |
399 | (setq end (match-end 0)) | |
400 | (re-search-forward encodable-regexp end t))) | |
401 | (goto-char end)) | |
402 | (if (> (point) start) | |
403 | (rfc2047-encode start (point)) | |
404 | (goto-char end)))) | |
405 | ;; `address-mime' case -- take care of quoted words, comments. | |
343d6628 | 406 | (rfc2047-quote-special-characters-in-quoted-strings encodable-regexp) |
10ace8ea | 407 | (with-syntax-table rfc2047-syntax-table |
7f0321ff | 408 | (goto-char (point-min)) |
10ace8ea | 409 | (condition-case err ; in case of unbalanced quotes |
7f0321ff DL |
410 | ;; Look for rfc2822-style: sequences of atoms, quoted |
411 | ;; strings, specials, whitespace. (Specials mustn't be | |
412 | ;; encoded.) | |
413 | (while (not (eobp)) | |
7f0321ff | 414 | ;; Skip whitespace. |
10ace8ea MB |
415 | (skip-chars-forward " \t\n") |
416 | (setq start (point)) | |
7f0321ff DL |
417 | (cond |
418 | ((not (char-after))) ; eob | |
419 | ;; else token start | |
10ace8ea | 420 | ((eq ?\" (setq csyntax (char-syntax (char-after)))) |
7f0321ff DL |
421 | ;; Quoted word. |
422 | (forward-sexp) | |
423 | (setq end (point)) | |
424 | ;; Does it need encoding? | |
425 | (goto-char start) | |
10ace8ea MB |
426 | (if (re-search-forward encodable-regexp end 'move) |
427 | ;; It needs encoding. Strip the quotes first, | |
428 | ;; since encoded words can't occur in quotes. | |
429 | (progn | |
430 | (goto-char end) | |
431 | (delete-backward-char 1) | |
432 | (goto-char start) | |
433 | (delete-char 1) | |
434 | (when last-encoded | |
435 | ;; There was a preceding quoted word. We need | |
436 | ;; to include any separating whitespace in this | |
437 | ;; word to avoid it getting lost. | |
438 | (skip-chars-backward " \t") | |
439 | ;; A space is needed between the encoded words. | |
440 | (insert ? ) | |
441 | (setq start (point) | |
442 | end (1+ end))) | |
443 | ;; Adjust the end position for the deleted quotes. | |
444 | (rfc2047-encode start (- end 2)) | |
445 | (setq last-encoded t)) ; record that it was encoded | |
446 | (setq last-encoded nil))) | |
447 | ((eq ?. csyntax) | |
7f0321ff DL |
448 | ;; Skip other delimiters, but record that they've |
449 | ;; potentially separated quoted words. | |
450 | (forward-char) | |
451 | (setq last-encoded nil)) | |
10ace8ea MB |
452 | ((eq ?\) csyntax) |
453 | (error "Unbalanced parentheses")) | |
454 | ((eq ?\( csyntax) | |
455 | ;; Look for the end of parentheses. | |
456 | (forward-list) | |
457 | ;; Encode text as an unstructured field. | |
458 | (let ((rfc2047-encoding-type 'mime)) | |
459 | (rfc2047-encode-region (1+ start) (1- (point)))) | |
460 | (skip-chars-forward ")")) | |
7f0321ff DL |
461 | (t ; normal token/whitespace sequence |
462 | ;; Find the end. | |
10ace8ea MB |
463 | ;; Skip one ASCII word, or encode continuous words |
464 | ;; in which all those contain non-ASCII characters. | |
465 | (setq end nil) | |
466 | (while (not (or end (eobp))) | |
467 | (when (looking-at "[\000-\177]+") | |
468 | (setq begin (point) | |
469 | end (match-end 0)) | |
470 | (when (progn | |
471 | (while (and (or (re-search-forward | |
472 | "[ \t\n]\\|\\Sw" end 'move) | |
473 | (setq end nil)) | |
474 | (eq ?\\ (char-syntax (char-before)))) | |
475 | ;; Skip backslash-quoted characters. | |
476 | (forward-char)) | |
477 | end) | |
478 | (setq end (match-beginning 0)) | |
479 | (if rfc2047-encode-encoded-words | |
480 | (progn | |
481 | (goto-char begin) | |
482 | (when (search-forward "=?" end 'move) | |
483 | (goto-char (match-beginning 0)) | |
484 | (setq end nil))) | |
485 | (goto-char end)))) | |
486 | ;; Where the value nil of `end' means there may be | |
487 | ;; text to have to be encoded following the point. | |
488 | ;; Otherwise, the point reached to the end of ASCII | |
489 | ;; words separated by whitespace or a special char. | |
490 | (unless end | |
491 | (when (looking-at encodable-regexp) | |
492 | (goto-char (setq begin (match-end 0))) | |
493 | (while (and (looking-at "[ \t\n]+\\([^ \t\n]+\\)") | |
494 | (setq end (match-end 0)) | |
495 | (progn | |
496 | (while (re-search-forward | |
497 | encodable-regexp end t)) | |
498 | (< begin (point))) | |
499 | (goto-char begin) | |
500 | (or (not (re-search-forward "\\Sw" end t)) | |
501 | (progn | |
502 | (goto-char (match-beginning 0)) | |
503 | nil))) | |
504 | (goto-char end)) | |
505 | (when (looking-at "[^ \t\n]+") | |
506 | (setq end (match-end 0)) | |
507 | (if (re-search-forward "\\Sw+" end t) | |
508 | ;; There are special characters better | |
509 | ;; to be encoded so that MTAs may parse | |
510 | ;; them safely. | |
511 | (cond ((= end (point))) | |
512 | ((looking-at (concat "\\sw*\\(" | |
513 | encodable-regexp | |
514 | "\\)")) | |
515 | (setq end nil)) | |
516 | (t | |
517 | (goto-char (1- (match-end 0))) | |
518 | (unless (= (point) (match-beginning 0)) | |
519 | ;; Separate encodable text and | |
520 | ;; delimiter. | |
521 | (insert " ")))) | |
522 | (goto-char end) | |
523 | (skip-chars-forward " \t\n") | |
524 | (if (and (looking-at "[^ \t\n]+") | |
525 | (string-match encodable-regexp | |
526 | (match-string 0))) | |
527 | (setq end nil) | |
528 | (goto-char end))))))) | |
529 | (skip-chars-backward " \t\n") | |
7f0321ff | 530 | (setq end (point)) |
7f0321ff | 531 | (goto-char start) |
10ace8ea MB |
532 | (if (re-search-forward encodable-regexp end 'move) |
533 | (progn | |
534 | (unless (memq (char-before start) '(nil ?\t ? )) | |
535 | (if (progn | |
536 | (goto-char start) | |
537 | (skip-chars-backward "^ \t\n") | |
538 | (and (looking-at "\\Sw+") | |
539 | (= (match-end 0) start))) | |
540 | ;; Also encode bogus delimiters. | |
541 | (setq start (point)) | |
542 | ;; Separate encodable text and delimiter. | |
543 | (goto-char start) | |
544 | (insert " ") | |
545 | (setq start (1+ start) | |
546 | end (1+ end)))) | |
547 | (rfc2047-encode start end) | |
548 | (setq last-encoded t)) | |
549 | (setq last-encoded nil))))) | |
23f87bed | 550 | (error |
10ace8ea MB |
551 | (if (or debug-on-quit debug-on-error) |
552 | (signal (car err) (cdr err)) | |
553 | (error "Invalid data for rfc2047 encoding: %s" | |
554 | (mm-replace-in-string orig-text "[ \t\n]+" " ")))))))) | |
555 | (rfc2047-fold-region b (point)) | |
556 | (goto-char (point-max)))) | |
c113de23 GM |
557 | |
558 | (defun rfc2047-encode-string (string) | |
7f0321ff DL |
559 | "Encode words in STRING. |
560 | By default, the string is treated as containing addresses (see | |
23f87bed | 561 | `rfc2047-encoding-type')." |
10ace8ea | 562 | (mm-with-multibyte-buffer |
c113de23 GM |
563 | (insert string) |
564 | (rfc2047-encode-region (point-min) (point-max)) | |
565 | (buffer-string))) | |
566 | ||
b890d447 MB |
567 | ;; From RFC 2047: |
568 | ;; 2. Syntax of encoded-words | |
569 | ;; [...] | |
570 | ;; While there is no limit to the length of a multiple-line header | |
571 | ;; field, each line of a header field that contains one or more | |
572 | ;; 'encoded-word's is limited to 76 characters. | |
573 | ;; | |
574 | ;; In `rfc2047-encode-parameter' it is bound to nil, so don't defconst it. | |
10ace8ea MB |
575 | (defvar rfc2047-encode-max-chars 76 |
576 | "Maximum characters of each header line that contain encoded-words. | |
b890d447 MB |
577 | According to RFC 2047, it is 76. If it is nil, encoded-words |
578 | will not be folded. Too small value may cause an error. You | |
579 | should not change this value.") | |
10ace8ea MB |
580 | |
581 | (defun rfc2047-encode-1 (column string cs encoder start crest tail | |
582 | &optional eword) | |
583 | "Subroutine used by `rfc2047-encode'." | |
584 | (cond ((string-equal string "") | |
585 | (or eword "")) | |
586 | ((not rfc2047-encode-max-chars) | |
587 | (concat start | |
588 | (funcall encoder (if cs | |
589 | (mm-encode-coding-string string cs) | |
590 | string)) | |
591 | "?=")) | |
592 | ((>= column rfc2047-encode-max-chars) | |
593 | (when eword | |
594 | (cond ((string-match "\n[ \t]+\\'" eword) | |
595 | ;; Reomove a superfluous empty line. | |
596 | (setq eword (substring eword 0 (match-beginning 0)))) | |
597 | ((string-match "(+\\'" eword) | |
598 | ;; Break the line before the open parenthesis. | |
599 | (setq crest (concat crest (match-string 0 eword)) | |
600 | eword (substring eword 0 (match-beginning 0)))))) | |
601 | (rfc2047-encode-1 (length crest) string cs encoder start " " tail | |
602 | (concat eword "\n" crest))) | |
603 | (t | |
604 | (let ((index 0) | |
605 | (limit (1- (length string))) | |
606 | (prev "") | |
607 | next len) | |
608 | (while (and prev | |
609 | (<= index limit)) | |
610 | (setq next (concat start | |
611 | (funcall encoder | |
612 | (if cs | |
613 | (mm-encode-coding-string | |
614 | (substring string 0 (1+ index)) | |
615 | cs) | |
616 | (substring string 0 (1+ index)))) | |
617 | "?=") | |
618 | len (+ column (length next))) | |
619 | (if (> len rfc2047-encode-max-chars) | |
620 | (setq next prev | |
621 | prev nil) | |
622 | (if (or (< index limit) | |
623 | (<= (+ len (or (string-match "\n" tail) | |
624 | (length tail))) | |
625 | rfc2047-encode-max-chars)) | |
626 | (setq prev next | |
627 | index (1+ index)) | |
628 | (if (string-match "\\`)+" tail) | |
629 | ;; Break the line after the close parenthesis. | |
630 | (setq tail (concat (substring tail 0 (match-end 0)) | |
631 | "\n " | |
632 | (substring tail (match-end 0))) | |
633 | prev next | |
634 | index (1+ index)) | |
635 | (setq next prev | |
636 | prev nil))))) | |
637 | (if (> index limit) | |
638 | (concat eword next tail) | |
639 | (if (= 0 index) | |
640 | (if (and eword | |
641 | (string-match "(+\\'" eword)) | |
642 | (setq crest (concat crest (match-string 0 eword)) | |
643 | eword (substring eword 0 (match-beginning 0))) | |
644 | (setq eword (concat eword next))) | |
645 | (setq crest " " | |
646 | eword (concat eword next))) | |
647 | (when (string-match "\n[ \t]+\\'" eword) | |
648 | ;; Reomove a superfluous empty line. | |
649 | (setq eword (substring eword 0 (match-beginning 0)))) | |
650 | (rfc2047-encode-1 (length crest) (substring string index) | |
651 | cs encoder start " " tail | |
652 | (concat eword "\n" crest))))))) | |
653 | ||
7f0321ff DL |
654 | (defun rfc2047-encode (b e) |
655 | "Encode the word(s) in the region B to E. | |
10ace8ea MB |
656 | Point moves to the end of the region." |
657 | (let ((mime-charset (or (mm-find-mime-charset-region b e) (list 'us-ascii))) | |
658 | cs encoding tail crest eword) | |
659 | (cond ((> (length mime-charset) 1) | |
660 | (error "Can't rfc2047-encode `%s'" | |
661 | (buffer-substring-no-properties b e))) | |
662 | ((= (length mime-charset) 1) | |
663 | (setq mime-charset (car mime-charset) | |
664 | cs (mm-charset-to-coding-system mime-charset)) | |
665 | (unless (and (mm-multibyte-p) | |
666 | (mm-coding-system-p cs)) | |
667 | (setq cs nil)) | |
668 | (save-restriction | |
669 | (narrow-to-region b e) | |
670 | (setq encoding | |
671 | (or (cdr (assq mime-charset | |
c113de23 | 672 | rfc2047-charset-encoding-alist)) |
23f87bed MB |
673 | ;; For the charsets that don't have a preferred |
674 | ;; encoding, choose the one that's shorter. | |
10ace8ea MB |
675 | (if (eq (rfc2047-qp-or-base64) 'base64) |
676 | 'B | |
677 | 'Q))) | |
678 | (widen) | |
679 | (goto-char e) | |
680 | (skip-chars-forward "^ \t\n") | |
681 | ;; `tail' may contain a close parenthesis. | |
682 | (setq tail (buffer-substring-no-properties e (point))) | |
683 | (goto-char b) | |
684 | (setq b (point-marker) | |
685 | e (set-marker (make-marker) e)) | |
01c52d31 | 686 | (rfc2047-fold-region (point-at-bol) b) |
10ace8ea MB |
687 | (goto-char b) |
688 | (skip-chars-backward "^ \t\n") | |
689 | (unless (= 0 (skip-chars-backward " \t")) | |
690 | ;; `crest' may contain whitespace and an open parenthesis. | |
691 | (setq crest (buffer-substring-no-properties (point) b))) | |
692 | (setq eword (rfc2047-encode-1 | |
01c52d31 | 693 | (- b (point-at-bol)) |
10ace8ea MB |
694 | (mm-replace-in-string |
695 | (buffer-substring-no-properties b e) | |
696 | "\n\\([ \t]?\\)" "\\1") | |
697 | cs | |
698 | (or (cdr (assq encoding | |
699 | rfc2047-encode-function-alist)) | |
700 | 'identity) | |
701 | (concat "=?" (downcase (symbol-name mime-charset)) | |
702 | "?" (upcase (symbol-name encoding)) "?") | |
703 | (or crest " ") | |
704 | tail)) | |
705 | (delete-region (if (eq (aref eword 0) ?\n) | |
706 | (if (bolp) | |
707 | ;; The line was folded before encoding. | |
708 | (1- (point)) | |
709 | (point)) | |
710 | (goto-char b)) | |
711 | (+ e (length tail))) | |
712 | ;; `eword' contains `crest' and `tail'. | |
713 | (insert eword) | |
714 | (set-marker b nil) | |
715 | (set-marker e nil) | |
716 | (unless (or (/= 0 (length tail)) | |
717 | (eobp) | |
718 | (looking-at "[ \t\n)]")) | |
719 | (insert " ")))) | |
720 | (t | |
721 | (goto-char e))))) | |
c113de23 | 722 | |
23f87bed MB |
723 | (defun rfc2047-fold-field () |
724 | "Fold the current header field." | |
725 | (save-excursion | |
726 | (save-restriction | |
727 | (rfc2047-narrow-to-field) | |
728 | (rfc2047-fold-region (point-min) (point-max))))) | |
729 | ||
c113de23 | 730 | (defun rfc2047-fold-region (b e) |
a553a9f5 | 731 | "Fold long lines in region B to E." |
c113de23 GM |
732 | (save-restriction |
733 | (narrow-to-region b e) | |
734 | (goto-char (point-min)) | |
f2307f18 DL |
735 | (let ((break nil) |
736 | (qword-break nil) | |
c6e26ce2 | 737 | (first t) |
f2307f18 DL |
738 | (bol (save-restriction |
739 | (widen) | |
01c52d31 | 740 | (point-at-bol)))) |
c113de23 | 741 | (while (not (eobp)) |
23f87bed MB |
742 | (when (and (or break qword-break) |
743 | (> (- (point) bol) 76)) | |
f2307f18 DL |
744 | (goto-char (or break qword-break)) |
745 | (setq break nil | |
746 | qword-break nil) | |
10ace8ea | 747 | (skip-chars-backward " \t") |
c6e26ce2 | 748 | (if (looking-at "[ \t]") |
7f0321ff | 749 | (insert ?\n) |
619ac84f | 750 | (insert "\n ")) |
f2307f18 DL |
751 | (setq bol (1- (point))) |
752 | ;; Don't break before the first non-LWSP characters. | |
753 | (skip-chars-forward " \t") | |
23f87bed MB |
754 | (unless (eobp) |
755 | (forward-char 1))) | |
c113de23 | 756 | (cond |
f2307f18 DL |
757 | ((eq (char-after) ?\n) |
758 | (forward-char 1) | |
759 | (setq bol (point) | |
760 | break nil | |
761 | qword-break nil) | |
762 | (skip-chars-forward " \t") | |
763 | (unless (or (eobp) (eq (char-after) ?\n)) | |
764 | (forward-char 1))) | |
765 | ((eq (char-after) ?\r) | |
766 | (forward-char 1)) | |
c113de23 | 767 | ((memq (char-after) '(? ?\t)) |
f2307f18 | 768 | (skip-chars-forward " \t") |
10ace8ea MB |
769 | (unless first ;; Don't break just after the header name. |
770 | (setq break (point)))) | |
f2307f18 DL |
771 | ((not break) |
772 | (if (not (looking-at "=\\?[^=]")) | |
773 | (if (eq (char-after) ?=) | |
774 | (forward-char 1) | |
775 | (skip-chars-forward "^ \t\n\r=")) | |
23f87bed MB |
776 | ;; Don't break at the start of the field. |
777 | (unless (= (point) b) | |
778 | (setq qword-break (point))) | |
f2307f18 DL |
779 | (skip-chars-forward "^ \t\n\r"))) |
780 | (t | |
10ace8ea MB |
781 | (skip-chars-forward "^ \t\n\r"))) |
782 | (setq first nil)) | |
23f87bed MB |
783 | (when (and (or break qword-break) |
784 | (> (- (point) bol) 76)) | |
f2307f18 DL |
785 | (goto-char (or break qword-break)) |
786 | (setq break nil | |
787 | qword-break nil) | |
10ace8ea MB |
788 | (if (or (> 0 (skip-chars-backward " \t")) |
789 | (looking-at "[ \t]")) | |
790 | (insert ?\n) | |
791 | (insert "\n ")) | |
f2307f18 DL |
792 | (setq bol (1- (point))) |
793 | ;; Don't break before the first non-LWSP characters. | |
794 | (skip-chars-forward " \t") | |
23f87bed MB |
795 | (unless (eobp) |
796 | (forward-char 1)))))) | |
797 | ||
798 | (defun rfc2047-unfold-field () | |
799 | "Fold the current line." | |
800 | (save-excursion | |
801 | (save-restriction | |
802 | (rfc2047-narrow-to-field) | |
803 | (rfc2047-unfold-region (point-min) (point-max))))) | |
f2307f18 DL |
804 | |
805 | (defun rfc2047-unfold-region (b e) | |
a553a9f5 | 806 | "Unfold lines in region B to E." |
f2307f18 DL |
807 | (save-restriction |
808 | (narrow-to-region b e) | |
809 | (goto-char (point-min)) | |
810 | (let ((bol (save-restriction | |
811 | (widen) | |
01c52d31 MB |
812 | (point-at-bol))) |
813 | (eol (point-at-eol))) | |
f2307f18 DL |
814 | (forward-line 1) |
815 | (while (not (eobp)) | |
c6e26ce2 | 816 | (if (and (looking-at "[ \t]") |
01c52d31 | 817 | (< (- (point-at-eol) bol) 76)) |
c6e26ce2 DL |
818 | (delete-region eol (progn |
819 | (goto-char eol) | |
820 | (skip-chars-forward "\r\n") | |
821 | (point))) | |
01c52d31 MB |
822 | (setq bol (point-at-bol))) |
823 | (setq eol (point-at-eol)) | |
f2307f18 | 824 | (forward-line 1))))) |
c113de23 | 825 | |
10ace8ea MB |
826 | (defun rfc2047-b-encode-string (string) |
827 | "Base64-encode the header contained in STRING." | |
828 | (base64-encode-string string t)) | |
829 | ||
830 | (defun rfc2047-q-encode-string (string) | |
831 | "Quoted-printable-encode the header in STRING." | |
832 | (mm-with-unibyte-buffer | |
833 | (insert string) | |
834 | (quoted-printable-encode-region | |
835 | (point-min) (point-max) nil | |
836 | ;; = (\075), _ (\137), ? (\077) are used in the encoded word. | |
837 | ;; Avoid using 8bit characters. | |
838 | ;; This list excludes `especials' (see the RFC2047 syntax), | |
839 | ;; meaning that some characters in non-structured fields will | |
840 | ;; get encoded when they con't need to be. The following is | |
841 | ;; what it used to be. | |
842 | ;;; ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" | |
843 | ;;; "\010\012\014\040-\074\076\100-\136\140-\177") | |
844 | "-\b\n\f !#-'*+0-9A-Z\\^`-~\d") | |
845 | (subst-char-in-region (point-min) (point-max) ? ?_) | |
846 | (buffer-string))) | |
847 | ||
848 | (defun rfc2047-encode-parameter (param value) | |
849 | "Return and PARAM=VALUE string encoded in the RFC2047-like style. | |
850 | This is a replacement for the `rfc2231-encode-string' function. | |
851 | ||
852 | When attaching files as MIME parts, we should use the RFC2231 encoding | |
853 | to specify the file names containing non-ASCII characters. However, | |
854 | many mail softwares don't support it in practice and recipients won't | |
855 | be able to extract files with correct names. Instead, the RFC2047-like | |
856 | encoding is acceptable generally. This function provides the very | |
857 | RFC2047-like encoding, resigning to such a regrettable trend. To use | |
858 | it, put the following line in your ~/.gnus.el file: | |
859 | ||
860 | \(defalias 'mail-header-encode-parameter 'rfc2047-encode-parameter) | |
861 | " | |
82fe1aed MB |
862 | (let ((rfc2047-encoding-type 'mime) |
863 | (rfc2047-encode-max-chars nil)) | |
864 | (rfc2045-encode-string param (rfc2047-encode-string value)))) | |
c113de23 GM |
865 | |
866 | ;;; | |
867 | ;;; Functions for decoding RFC2047 messages | |
868 | ;;; | |
869 | ||
270a576a MB |
870 | (defvar rfc2047-quote-decoded-words-containing-tspecials nil |
871 | "If non-nil, quote decoded words containing special characters.") | |
872 | ||
292f71fe MB |
873 | (defvar rfc2047-allow-incomplete-encoded-text t |
874 | "*Non-nil means allow incomplete encoded-text in successive encoded-words. | |
875 | Dividing of encoded-text in the place other than character boundaries | |
876 | violates RFC2047 section 5, while we have a capability to decode it. | |
877 | If it is non-nil, the decoder will decode B- or Q-encoding in each | |
878 | encoded-word, concatenate them, and decode it by charset. Otherwise, | |
879 | the decoder will fully decode each encoded-word before concatenating | |
880 | them.") | |
881 | ||
343d6628 | 882 | (defun rfc2047-strip-backslashes-in-quoted-strings () |
d7093904 | 883 | "Strip backslashes in quoted strings. `\\\"' remains." |
343d6628 MB |
884 | (goto-char (point-min)) |
885 | (let (beg) | |
886 | (with-syntax-table (standard-syntax-table) | |
887 | (while (search-forward "\"" nil t) | |
888 | (unless (eq (char-before) ?\\) | |
889 | (setq beg (match-end 0)) | |
890 | (goto-char (match-beginning 0)) | |
891 | (condition-case nil | |
892 | (progn | |
893 | (forward-sexp) | |
894 | (save-restriction | |
895 | (narrow-to-region beg (1- (point))) | |
896 | (goto-char beg) | |
897 | (while (search-forward "\\" nil 'move) | |
d7093904 | 898 | (unless (memq (char-after) '(?\")) |
343d6628 MB |
899 | (delete-backward-char 1)) |
900 | (forward-char))) | |
901 | (forward-char)) | |
902 | (error | |
903 | (goto-char beg)))))))) | |
904 | ||
b6b8f5fd | 905 | (defun rfc2047-charset-to-coding-system (charset &optional allow-override) |
292f71fe | 906 | "Return coding-system corresponding to MIME CHARSET. |
b6b8f5fd KY |
907 | If your Emacs implementation can't decode CHARSET, return nil. |
908 | ||
909 | If allow-override is given, use `mm-charset-override-alist' to | |
910 | map undesired charset names to their replacement. This should | |
911 | only be used for decoding, not for encoding." | |
292f71fe MB |
912 | (when (stringp charset) |
913 | (setq charset (intern (downcase charset)))) | |
914 | (when (or (not charset) | |
915 | (eq 'gnus-all mail-parse-ignored-charsets) | |
916 | (memq 'gnus-all mail-parse-ignored-charsets) | |
917 | (memq charset mail-parse-ignored-charsets)) | |
918 | (setq charset mail-parse-charset)) | |
b6b8f5fd | 919 | (let ((cs (mm-charset-to-coding-system charset nil allow-override))) |
292f71fe MB |
920 | (cond ((eq cs 'ascii) |
921 | (setq cs (or (mm-charset-to-coding-system mail-parse-charset) | |
922 | 'raw-text))) | |
e13be7a2 | 923 | ((mm-coding-system-p cs)) |
292f71fe MB |
924 | ((and charset |
925 | (listp mail-parse-ignored-charsets) | |
926 | (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
927 | (setq cs (mm-charset-to-coding-system mail-parse-charset)))) | |
928 | (if (eq cs 'ascii) | |
929 | 'raw-text | |
930 | cs))) | |
931 | ||
932 | (defun rfc2047-decode-encoded-words (words) | |
933 | "Decode successive encoded-words in WORDS and return a decoded string. | |
934 | Each element of WORDS looks like (CHARSET ENCODING ENCODED-TEXT | |
935 | ENCODED-WORD)." | |
936 | (let (word charset cs encoding text rest) | |
937 | (while words | |
938 | (setq word (pop words)) | |
14e6dc54 | 939 | (if (and (setq cs (rfc2047-charset-to-coding-system |
b6b8f5fd | 940 | (setq charset (car word)) t)) |
292f71fe MB |
941 | (condition-case code |
942 | (cond ((char-equal ?B (nth 1 word)) | |
943 | (setq text (base64-decode-string | |
944 | (rfc2047-pad-base64 (nth 2 word))))) | |
945 | ((char-equal ?Q (nth 1 word)) | |
946 | (setq text (quoted-printable-decode-string | |
947 | (mm-subst-char-in-string | |
948 | ?_ ? (nth 2 word) t))))) | |
949 | (error | |
950 | (message "%s" (error-message-string code)) | |
951 | nil))) | |
952 | (if (and rfc2047-allow-incomplete-encoded-text | |
953 | (eq cs (caar rest))) | |
954 | ;; Concatenate text of which the charset is the same. | |
955 | (setcdr (car rest) (concat (cdar rest) text)) | |
956 | (push (cons cs text) rest)) | |
957 | ;; Don't decode encoded-word. | |
958 | (push (cons nil (nth 3 word)) rest))) | |
959 | (while rest | |
960 | (setq words (concat | |
961 | (or (and (setq cs (caar rest)) | |
962 | (condition-case code | |
963 | (mm-decode-coding-string (cdar rest) cs) | |
964 | (error | |
965 | (message "%s" (error-message-string code)) | |
966 | nil))) | |
967 | (concat (when (cdr rest) " ") | |
968 | (cdar rest) | |
969 | (when (and words | |
970 | (not (eq (string-to-char words) ? ))) | |
971 | " "))) | |
972 | words) | |
973 | rest (cdr rest))) | |
974 | words)) | |
975 | ||
23f87bed MB |
976 | ;; Fixme: This should decode in place, not cons intermediate strings. |
977 | ;; Also check whether it needs to worry about delimiting fields like | |
978 | ;; encoding. | |
979 | ||
980 | ;; In fact it's reported that (invalid) encoding of mailboxes in | |
981 | ;; addr-specs is in use, so delimiting fields might help. Probably | |
982 | ;; not decoding a word which isn't properly delimited is good enough | |
983 | ;; and worthwhile (is it more correct or not?), e.g. something like | |
984 | ;; `=?iso-8859-1?q?foo?=@'. | |
c113de23 | 985 | |
343d6628 MB |
986 | (defun rfc2047-decode-region (start end &optional address-mime) |
987 | "Decode MIME-encoded words in region between START and END. | |
988 | If ADDRESS-MIME is non-nil, strip backslashes which precede characters | |
989 | other than `\"' and `\\' in quoted strings." | |
c113de23 GM |
990 | (interactive "r") |
991 | (let ((case-fold-search t) | |
b890d447 MB |
992 | (eword-regexp |
993 | (if rfc2047-allow-irregular-q-encoded-words | |
994 | (eval-when-compile | |
995 | (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp-loose "\\)")) | |
996 | (eval-when-compile | |
997 | (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp "\\)")))) | |
292f71fe | 998 | b e match words) |
652dbc07 DL |
999 | (save-excursion |
1000 | (save-restriction | |
1001 | (narrow-to-region start end) | |
343d6628 MB |
1002 | (when address-mime |
1003 | (rfc2047-strip-backslashes-in-quoted-strings)) | |
292f71fe MB |
1004 | (goto-char (setq b start)) |
1005 | ;; Look for the encoded-words. | |
1006 | (while (setq match (re-search-forward eword-regexp nil t)) | |
1007 | (setq e (match-beginning 1) | |
1008 | end (match-end 0) | |
1009 | words nil) | |
1010 | (while match | |
1011 | (push (list (match-string 2) ;; charset | |
01c52d31 | 1012 | (char-after (match-beginning 3)) ;; encoding |
b890d447 | 1013 | (substring (match-string 3) 2) ;; encoded-text |
292f71fe MB |
1014 | (match-string 1)) ;; encoded-word |
1015 | words) | |
1016 | ;; Look for the subsequent encoded-words. | |
1017 | (when (setq match (looking-at eword-regexp)) | |
1018 | (goto-char (setq end (match-end 0))))) | |
1019 | ;; Replace the encoded-words with the decoded one. | |
1020 | (delete-region e end) | |
1021 | (insert (rfc2047-decode-encoded-words (nreverse words))) | |
23f87bed MB |
1022 | (save-restriction |
1023 | (narrow-to-region e (point)) | |
1024 | (goto-char e) | |
270a576a MB |
1025 | ;; Remove newlines between decoded words, though such |
1026 | ;; things essentially must not be there. | |
23f87bed MB |
1027 | (while (re-search-forward "[\n\r]+" nil t) |
1028 | (replace-match " ")) | |
270a576a MB |
1029 | ;; Quote decoded words if there are special characters |
1030 | ;; which might violate RFC2822. | |
1031 | (when (and rfc2047-quote-decoded-words-containing-tspecials | |
1032 | (let ((regexp (car (rassq | |
1033 | 'address-mime | |
1034 | rfc2047-header-encoding-alist)))) | |
1035 | (when regexp | |
1036 | (save-restriction | |
1037 | (widen) | |
1038 | (beginning-of-line) | |
1039 | (while (and (memq (char-after) '(? ?\t)) | |
1040 | (zerop (forward-line -1)))) | |
1041 | (looking-at regexp))))) | |
1042 | (let (quoted) | |
1043 | (goto-char e) | |
1044 | (skip-chars-forward " \t") | |
1045 | (setq start (point)) | |
1046 | (setq quoted (eq (char-after) ?\")) | |
1047 | (goto-char (point-max)) | |
1dbc941a | 1048 | (skip-chars-backward " \t" start) |
270a576a MB |
1049 | (if (setq quoted (and quoted |
1050 | (> (point) (1+ start)) | |
1051 | (eq (char-before) ?\"))) | |
1052 | (progn | |
1053 | (backward-char) | |
1054 | (setq start (1+ start) | |
1055 | end (point-marker))) | |
1056 | (setq end (point-marker))) | |
1057 | (goto-char start) | |
1058 | (while (search-forward "\"" end t) | |
1059 | (when (prog2 | |
1060 | (backward-char) | |
1061 | (zerop (% (skip-chars-backward "\\\\") 2)) | |
1062 | (goto-char (match-beginning 0))) | |
1063 | (insert "\\")) | |
1064 | (forward-char)) | |
1065 | (when (and (not quoted) | |
1066 | (progn | |
1067 | (goto-char start) | |
1068 | (re-search-forward | |
1069 | (concat "[" ietf-drums-tspecials "]") | |
1070 | end t))) | |
1071 | (goto-char start) | |
1072 | (insert "\"") | |
1073 | (goto-char end) | |
1074 | (insert "\"")) | |
1075 | (set-marker end nil))) | |
23f87bed | 1076 | (goto-char (point-max))) |
652dbc07 DL |
1077 | (when (and (mm-multibyte-p) |
1078 | mail-parse-charset | |
23f87bed | 1079 | (not (eq mail-parse-charset 'us-ascii)) |
652dbc07 DL |
1080 | (not (eq mail-parse-charset 'gnus-decoded))) |
1081 | (mm-decode-coding-region b e mail-parse-charset)) | |
1082 | (setq b (point))) | |
1083 | (when (and (mm-multibyte-p) | |
1084 | mail-parse-charset | |
1085 | (not (eq mail-parse-charset 'us-ascii)) | |
1086 | (not (eq mail-parse-charset 'gnus-decoded))) | |
23f87bed | 1087 | (mm-decode-coding-region b (point-max) mail-parse-charset)))))) |
c113de23 | 1088 | |
343d6628 MB |
1089 | (defun rfc2047-decode-address-region (start end) |
1090 | "Decode MIME-encoded words in region between START and END. | |
1091 | Backslashes which precede characters other than `\"' and `\\' in quoted | |
1092 | strings are stripped." | |
1093 | (rfc2047-decode-region start end t)) | |
1094 | ||
1095 | (defun rfc2047-decode-string (string &optional address-mime) | |
1096 | "Decode MIME-encoded STRING and return the result. | |
1097 | If ADDRESS-MIME is non-nil, strip backslashes which precede characters | |
1098 | other than `\"' and `\\' in quoted strings." | |
f15a9fec | 1099 | ;; (let ((m (mm-multibyte-p))) |
23f87bed MB |
1100 | (if (string-match "=\\?" string) |
1101 | (with-temp-buffer | |
f15a9fec SM |
1102 | ;; We used to only call mm-enable-multibyte if `m' is non-nil, |
1103 | ;; but this can't be the right criterion. Don't just revert this | |
1104 | ;; change if it encounters a bug. Please help me fix it | |
1105 | ;; right instead. --Stef | |
1106 | ;; The string returned should always be multibyte in a multibyte | |
1107 | ;; session, i.e. the buffer should be multibyte before | |
1108 | ;; `buffer-string' is called. | |
1109 | (mm-enable-multibyte) | |
23f87bed MB |
1110 | (insert string) |
1111 | (inline | |
343d6628 | 1112 | (rfc2047-decode-region (point-min) (point-max) address-mime)) |
23f87bed | 1113 | (buffer-string)) |
343d6628 MB |
1114 | (when address-mime |
1115 | (setq string | |
1116 | (with-temp-buffer | |
1117 | (when (mm-multibyte-string-p string) | |
1118 | (mm-enable-multibyte)) | |
1119 | (insert string) | |
1120 | (rfc2047-strip-backslashes-in-quoted-strings) | |
1121 | (buffer-string)))) | |
23f87bed | 1122 | ;; Fixme: As above, `m' here is inappropriate. |
f15a9fec | 1123 | (if (and ;; m |
23f87bed MB |
1124 | mail-parse-charset |
1125 | (not (eq mail-parse-charset 'us-ascii)) | |
1126 | (not (eq mail-parse-charset 'gnus-decoded))) | |
10ace8ea MB |
1127 | ;; `decode-coding-string' in Emacs offers a third optional |
1128 | ;; arg NOCOPY to avoid consing a new string if the decoding | |
1129 | ;; is "trivial". Unfortunately it currently doesn't | |
1130 | ;; consider anything else than a `nil' coding system | |
1131 | ;; trivial. | |
1132 | ;; `rfc2047-decode-string' is called multiple times for each | |
1133 | ;; article during summary buffer generation, and we really | |
1134 | ;; want to avoid unnecessary consing. So we bypass | |
1135 | ;; `decode-coding-string' if the string is purely ASCII. | |
1136 | (if (and (fboundp 'detect-coding-string) | |
1137 | ;; string is purely ASCII | |
1138 | (eq (detect-coding-string string t) 'undecided)) | |
f15a9fec SM |
1139 | string |
1140 | (mm-decode-coding-string string mail-parse-charset)) | |
1141 | (mm-string-to-multibyte string)))) ;; ) | |
c113de23 | 1142 | |
343d6628 MB |
1143 | (defun rfc2047-decode-address-string (string) |
1144 | "Decode MIME-encoded STRING and return the result. | |
1145 | Backslashes which precede characters other than `\"' and `\\' in quoted | |
1146 | strings are stripped." | |
1147 | (rfc2047-decode-string string t)) | |
1148 | ||
23f87bed MB |
1149 | (defun rfc2047-pad-base64 (string) |
1150 | "Pad STRING to quartets." | |
1151 | ;; Be more liberal to accept buggy base64 strings. If | |
1152 | ;; base64-decode-string accepts buggy strings, this function could | |
1153 | ;; be aliased to identity. | |
f4dd4ae8 MB |
1154 | (if (= 0 (mod (length string) 4)) |
1155 | string | |
1156 | (when (string-match "=+$" string) | |
1157 | (setq string (substring string 0 (match-beginning 0)))) | |
1158 | (case (mod (length string) 4) | |
1159 | (0 string) | |
1160 | (1 string) ;; Error, don't pad it. | |
1161 | (2 (concat string "==")) | |
1162 | (3 (concat string "="))))) | |
652dbc07 | 1163 | |
c113de23 GM |
1164 | (provide 'rfc2047) |
1165 | ||
f15a9fec | 1166 | ;; arch-tag: a07fe3d4-22b5-4c4a-bd89-b1f82d5d36f6 |
c113de23 | 1167 | ;;; rfc2047.el ends here |