Commit | Line | Data |
---|---|---|
715a2ca2 | 1 | ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages |
270a576a | 2 | |
ab422c4d | 3 | ;; Copyright (C) 1998-2013 Free Software Foundation, Inc. |
c113de23 GM |
4 | |
5 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
6 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
7 | ;; This file is part of GNU Emacs. | |
8 | ||
5e809f55 | 9 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
c113de23 | 10 | ;; it under the terms of the GNU General Public License as published by |
5e809f55 GM |
11 | ;; the Free Software Foundation, either version 3 of the License, or |
12 | ;; (at your option) any later version. | |
c113de23 GM |
13 | |
14 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
15 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
5e809f55 | 16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
c113de23 GM |
17 | ;; GNU General Public License for more details. |
18 | ||
19 | ;; You should have received a copy of the GNU General Public License | |
5e809f55 | 20 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
c113de23 GM |
21 | |
22 | ;;; Commentary: | |
23 | ||
d49a4975 DL |
24 | ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part |
25 | ;; Three: Message Header Extensions for Non-ASCII Text". | |
26 | ||
c113de23 GM |
27 | ;;; Code: |
28 | ||
7f0321ff | 29 | (eval-when-compile |
9efa445f DN |
30 | (require 'cl)) |
31 | (defvar message-posting-charset) | |
c113de23 | 32 | |
c113de23 | 33 | (require 'mm-util) |
270a576a | 34 | (require 'ietf-drums) |
7f0321ff | 35 | ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus. |
c113de23 | 36 | (require 'mail-prsvr) |
82fe1aed | 37 | (require 'rfc2045) ;; rfc2045-encode-string |
f2307f18 | 38 | (autoload 'mm-body-7-or-8 "mm-bodies") |
1c33719f | 39 | |
c113de23 | 40 | (defvar rfc2047-header-encoding-alist |
23f87bed MB |
41 | '(("Newsgroups" . nil) |
42 | ("Followup-To" . nil) | |
c113de23 | 43 | ("Message-ID" . nil) |
ad136a7c | 44 | ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|\\(In-\\)?Reply-To\\|Sender\ |
23f87bed | 45 | \\|Mail-Followup-To\\|Mail-Copies-To\\|Approved\\)" . address-mime) |
c113de23 GM |
46 | (t . mime)) |
47 | "*Header/encoding method alist. | |
48 | The list is traversed sequentially. The keys can either be | |
f2307f18 | 49 | header regexps or t. |
c113de23 GM |
50 | |
51 | The values can be: | |
52 | ||
53 | 1) nil, in which case no encoding is done; | |
54 | 2) `mime', in which case the header will be encoded according to RFC2047; | |
7f0321ff DL |
55 | 3) `address-mime', like `mime', but takes account of the rules for address |
56 | fields (where quoted strings and comments must be treated separately); | |
57 | 4) a charset, in which case it will be encoded as that charset; | |
58 | 5) `default', in which case the field will be encoded as the rest | |
c113de23 GM |
59 | of the article.") |
60 | ||
61 | (defvar rfc2047-charset-encoding-alist | |
62 | '((us-ascii . nil) | |
63 | (iso-8859-1 . Q) | |
64 | (iso-8859-2 . Q) | |
65 | (iso-8859-3 . Q) | |
66 | (iso-8859-4 . Q) | |
67 | (iso-8859-5 . B) | |
68 | (koi8-r . B) | |
7f0321ff DL |
69 | (iso-8859-7 . B) |
70 | (iso-8859-8 . B) | |
c113de23 | 71 | (iso-8859-9 . Q) |
f2307f18 DL |
72 | (iso-8859-14 . Q) |
73 | (iso-8859-15 . Q) | |
c113de23 GM |
74 | (iso-2022-jp . B) |
75 | (iso-2022-kr . B) | |
76 | (gb2312 . B) | |
650c9e21 KH |
77 | (gbk . B) |
78 | (gb18030 . B) | |
676a7cc9 SZ |
79 | (big5 . B) |
80 | (cn-big5 . B) | |
c113de23 GM |
81 | (cn-gb . B) |
82 | (cn-gb-2312 . B) | |
83 | (euc-kr . B) | |
84 | (iso-2022-jp-2 . B) | |
23f87bed MB |
85 | (iso-2022-int-1 . B) |
86 | (viscii . Q)) | |
c113de23 | 87 | "Alist of MIME charsets to RFC2047 encodings. |
7f0321ff DL |
88 | Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding, |
89 | quoted-printable and base64 respectively.") | |
c113de23 | 90 | |
10ace8ea MB |
91 | (defvar rfc2047-encode-function-alist |
92 | '((Q . rfc2047-q-encode-string) | |
93 | (B . rfc2047-b-encode-string) | |
94 | (nil . identity)) | |
c113de23 GM |
95 | "Alist of RFC2047 encodings to encoding functions.") |
96 | ||
10ace8ea MB |
97 | (defvar rfc2047-encode-encoded-words t |
98 | "Whether encoded words should be encoded again.") | |
99 | ||
b890d447 MB |
100 | (defvar rfc2047-allow-irregular-q-encoded-words t |
101 | "*Whether to decode irregular Q-encoded words.") | |
102 | ||
103 | (eval-and-compile ;; Necessary to hard code them in `rfc2047-decode-region'. | |
104 | (defconst rfc2047-encoded-word-regexp | |
105 | "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\ | |
106 | \\(B\\?[+/0-9A-Za-z]*=*\ | |
107 | \\|Q\\?[ ->@-~]*\ | |
108 | \\)\\?=" | |
109 | "Regexp that matches encoded word." | |
110 | ;; The patterns for the B encoding and the Q encoding, i.e. the ones | |
111 | ;; beginning with "B" and "Q" respectively, are restricted into only | |
112 | ;; the characters that those encodings may generally use. | |
113 | ) | |
114 | (defconst rfc2047-encoded-word-regexp-loose | |
115 | "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\ | |
116 | \\(B\\?[+/0-9A-Za-z]*=*\ | |
117 | \\|Q\\?\\(?:\\?+[ -<>@-~]\\)?\\(?:[ ->@-~]+\\?+[ -<>@-~]\\)*[ ->@-~]*\\?*\ | |
118 | \\)\\?=" | |
119 | "Regexp that matches encoded word allowing loose Q encoding." | |
120 | ;; The pattern for the Q encoding, i.e. the one beginning with "Q", | |
121 | ;; is similar to: | |
122 | ;; "Q\\?\\(\\?+[^\n=?]\\)?\\([^\n?]+\\?+[^\n=?]\\)*[^\n?]*\\?*" | |
123 | ;; <--------1-------><----------2,3----------><--4--><-5-> | |
124 | ;; They mean: | |
125 | ;; 1. After "Q?", allow "?"s that follow a character other than "=". | |
126 | ;; 2. Allow "=" after "Q?"; it isn't regarded as the terminator. | |
127 | ;; 3. In the middle of an encoded word, allow "?"s that follow a | |
128 | ;; character other than "=". | |
129 | ;; 4. Allow any characters other than "?" in the middle of an | |
130 | ;; encoded word. | |
131 | ;; 5. At the end, allow "?"s. | |
132 | )) | |
133 | ||
c113de23 GM |
134 | ;;; |
135 | ;;; Functions for encoding RFC2047 messages | |
136 | ;;; | |
137 | ||
f4dd4ae8 MB |
138 | (defun rfc2047-qp-or-base64 () |
139 | "Return the type with which to encode the buffer. | |
140 | This is either `base64' or `quoted-printable'." | |
141 | (save-excursion | |
142 | (let ((limit (min (point-max) (+ 2000 (point-min)))) | |
143 | (n8bit 0)) | |
144 | (goto-char (point-min)) | |
145 | (skip-chars-forward "\x20-\x7f\r\n\t" limit) | |
146 | (while (< (point) limit) | |
147 | (incf n8bit) | |
148 | (forward-char 1) | |
149 | (skip-chars-forward "\x20-\x7f\r\n\t" limit)) | |
150 | (if (or (< (* 6 n8bit) (- limit (point-min))) | |
151 | ;; Don't base64, say, a short line with a single | |
152 | ;; non-ASCII char when splitting parts by charset. | |
153 | (= n8bit 1)) | |
154 | 'quoted-printable | |
155 | 'base64)))) | |
156 | ||
c113de23 GM |
157 | (defun rfc2047-narrow-to-field () |
158 | "Narrow the buffer to the header on the current line." | |
159 | (beginning-of-line) | |
160 | (narrow-to-region | |
161 | (point) | |
162 | (progn | |
163 | (forward-line 1) | |
164 | (if (re-search-forward "^[^ \n\t]" nil t) | |
01c52d31 | 165 | (point-at-bol) |
c113de23 GM |
166 | (point-max)))) |
167 | (goto-char (point-min))) | |
168 | ||
23f87bed MB |
169 | (defun rfc2047-field-value () |
170 | "Return the value of the field at point." | |
171 | (save-excursion | |
172 | (save-restriction | |
173 | (rfc2047-narrow-to-field) | |
174 | (re-search-forward ":[ \t\n]*" nil t) | |
10ace8ea | 175 | (buffer-substring-no-properties (point) (point-max))))) |
23f87bed | 176 | |
343d6628 MB |
177 | (defun rfc2047-quote-special-characters-in-quoted-strings (&optional |
178 | encodable-regexp) | |
179 | "Quote special characters with `\\'s in quoted strings. | |
180 | Quoting will not be done in a quoted string if it contains characters | |
01c52d31 | 181 | matching ENCODABLE-REGEXP or it is within parentheses." |
343d6628 MB |
182 | (goto-char (point-min)) |
183 | (let ((tspecials (concat "[" ietf-drums-tspecials "]")) | |
01c52d31 | 184 | (start (point)) |
b110774a | 185 | beg end) |
343d6628 | 186 | (with-syntax-table (standard-syntax-table) |
01c52d31 MB |
187 | (while (not (eobp)) |
188 | (if (ignore-errors | |
189 | (forward-list 1) | |
190 | (eq (char-before) ?\))) | |
191 | (forward-list -1) | |
192 | (goto-char (point-max))) | |
193 | (save-restriction | |
194 | (narrow-to-region start (point)) | |
195 | (goto-char start) | |
196 | (while (search-forward "\"" nil t) | |
197 | (setq beg (match-beginning 0)) | |
198 | (unless (eq (char-before beg) ?\\) | |
199 | (goto-char beg) | |
200 | (setq beg (1+ beg)) | |
201 | (condition-case nil | |
202 | (progn | |
203 | (forward-sexp) | |
204 | (setq end (1- (point))) | |
205 | (goto-char beg) | |
206 | (if (and encodable-regexp | |
207 | (re-search-forward encodable-regexp end t)) | |
208 | (goto-char (1+ end)) | |
209 | (save-restriction | |
210 | (narrow-to-region beg end) | |
211 | (while (re-search-forward tspecials nil 'move) | |
212 | (if (eq (char-before) ?\\) | |
213 | (if (looking-at tspecials) ;; Already quoted. | |
214 | (forward-char) | |
215 | (insert "\\")) | |
216 | (goto-char (match-beginning 0)) | |
217 | (insert "\\") | |
218 | (forward-char)))) | |
219 | (forward-char))) | |
220 | (error | |
221 | (goto-char beg))))) | |
222 | (goto-char (point-max))) | |
223 | (forward-list 1) | |
224 | (setq start (point)))))) | |
343d6628 | 225 | |
7f0321ff DL |
226 | (defvar rfc2047-encoding-type 'address-mime |
227 | "The type of encoding done by `rfc2047-encode-region'. | |
228 | This should be dynamically bound around calls to | |
229 | `rfc2047-encode-region' to either `mime' or `address-mime'. See | |
230 | `rfc2047-header-encoding-alist', for definitions.") | |
231 | ||
c113de23 GM |
232 | (defun rfc2047-encode-message-header () |
233 | "Encode the message header according to `rfc2047-header-encoding-alist'. | |
234 | Should be called narrowed to the head of the message." | |
235 | (interactive "*") | |
236 | (save-excursion | |
237 | (goto-char (point-min)) | |
238 | (let (alist elem method) | |
239 | (while (not (eobp)) | |
240 | (save-restriction | |
241 | (rfc2047-narrow-to-field) | |
343d6628 MB |
242 | (setq method nil |
243 | alist rfc2047-header-encoding-alist) | |
244 | (while (setq elem (pop alist)) | |
245 | (when (or (and (stringp (car elem)) | |
246 | (looking-at (car elem))) | |
247 | (eq (car elem) t)) | |
248 | (setq alist nil | |
249 | method (cdr elem)))) | |
c113de23 | 250 | (if (not (rfc2047-encodable-p)) |
343d6628 MB |
251 | (prog2 |
252 | (when (eq method 'address-mime) | |
253 | (rfc2047-quote-special-characters-in-quoted-strings)) | |
10ace8ea MB |
254 | (if (and (eq (mm-body-7-or-8) '8bit) |
255 | (mm-multibyte-p) | |
256 | (mm-coding-system-p | |
257 | (car message-posting-charset))) | |
258 | ;; 8 bit must be decoded. | |
259 | (mm-encode-coding-region | |
260 | (point-min) (point-max) | |
261 | (mm-charset-to-coding-system | |
262 | (car message-posting-charset)))) | |
c6e26ce2 | 263 | ;; No encoding necessary, but folding is nice |
10ace8ea MB |
264 | (when nil |
265 | (rfc2047-fold-region | |
266 | (save-excursion | |
267 | (goto-char (point-min)) | |
268 | (skip-chars-forward "^:") | |
269 | (when (looking-at ": ") | |
270 | (forward-char 2)) | |
271 | (point)) | |
272 | (point-max)))) | |
c113de23 | 273 | ;; We found something that may perhaps be encoded. |
7f0321ff | 274 | (re-search-forward "^[^:]+: *" nil t) |
c113de23 | 275 | (cond |
7f0321ff DL |
276 | ((eq method 'address-mime) |
277 | (rfc2047-encode-region (point) (point-max))) | |
c113de23 | 278 | ((eq method 'mime) |
23f87bed | 279 | (let ((rfc2047-encoding-type 'mime)) |
7f0321ff | 280 | (rfc2047-encode-region (point) (point-max)))) |
c113de23 GM |
281 | ((eq method 'default) |
282 | (if (and (featurep 'mule) | |
2e62b574 GM |
283 | (if (boundp 'enable-multibyte-characters) |
284 | (default-value 'enable-multibyte-characters)) | |
c113de23 | 285 | mail-parse-charset) |
7f0321ff | 286 | (mm-encode-coding-region (point) (point-max) |
c113de23 | 287 | mail-parse-charset))) |
c7015153 | 288 | ;; We get this when CC'ing messages to newsgroups with |
23f87bed MB |
289 | ;; 8-bit names. The group name mail copy just got |
290 | ;; unconditionally encoded. Previously, it would ask | |
291 | ;; whether to encode, which was quite confusing for the | |
c7015153 | 292 | ;; user. If the new behavior is wrong, tell me. I have |
23f87bed MB |
293 | ;; left the old code commented out below. |
294 | ;; -- Per Abrahamsen <abraham@dina.kvl.dk> Date: 2001-10-07. | |
295 | ;; Modified by Dave Love, with the commented-out code changed | |
296 | ;; in accordance with changes elsewhere. | |
297 | ((null method) | |
298 | (rfc2047-encode-region (point) (point-max))) | |
299 | ;;; ((null method) | |
300 | ;;; (if (or (message-options-get | |
301 | ;;; 'rfc2047-encode-message-header-encode-any) | |
302 | ;;; (message-options-set | |
303 | ;;; 'rfc2047-encode-message-header-encode-any | |
304 | ;;; (y-or-n-p | |
305 | ;;; "Some texts are not encoded. Encode anyway?"))) | |
306 | ;;; (rfc2047-encode-region (point-min) (point-max)) | |
307 | ;;; (error "Cannot send unencoded text"))) | |
c113de23 | 308 | ((mm-coding-system-p method) |
01c52d31 | 309 | (if (or (and (featurep 'mule) |
2e62b574 GM |
310 | (if (boundp 'enable-multibyte-characters) |
311 | (default-value 'enable-multibyte-characters))) | |
01c52d31 | 312 | (featurep 'file-coding)) |
7f0321ff | 313 | (mm-encode-coding-region (point) (point-max) method))) |
c113de23 GM |
314 | ;; Hm. |
315 | (t))) | |
316 | (goto-char (point-max))))))) | |
317 | ||
a553a9f5 DL |
318 | ;; Fixme: This, and the require below may not be the Right Thing, but |
319 | ;; should be safe just before release. -- fx 2001-02-08 | |
a553a9f5 | 320 | |
f2307f18 DL |
321 | (defun rfc2047-encodable-p () |
322 | "Return non-nil if any characters in current buffer need encoding in headers. | |
323 | The buffer may be narrowed." | |
a553a9f5 | 324 | (require 'message) ; for message-posting-charset |
c113de23 | 325 | (let ((charsets |
7f0321ff | 326 | (mm-find-mime-charset-region (point-min) (point-max)))) |
10ace8ea MB |
327 | (goto-char (point-min)) |
328 | (or (and rfc2047-encode-encoded-words | |
329 | (prog1 | |
b890d447 | 330 | (re-search-forward rfc2047-encoded-word-regexp nil t) |
10ace8ea MB |
331 | (goto-char (point-min)))) |
332 | (and charsets | |
333 | (not (equal charsets (list (car message-posting-charset)))))))) | |
7f0321ff DL |
334 | |
335 | ;; Use this syntax table when parsing into regions that may need | |
336 | ;; encoding. Double quotes are string delimiters, backslash is | |
337 | ;; character quoting, and all other RFC 2822 special characters are | |
338 | ;; treated as punctuation so we can use forward-sexp/forward-word to | |
339 | ;; skip to the end of regions appropriately. Nb. ietf-drums does | |
340 | ;; things differently. | |
341 | (defconst rfc2047-syntax-table | |
23f87bed MB |
342 | ;; (make-char-table 'syntax-table '(2)) only works in Emacs. |
343 | (let ((table (make-syntax-table))) | |
0c43b6f8 KY |
344 | ;; The following is done to work for setting all elements of the table; |
345 | ;; it appears to be the cleanest way. | |
23f87bed MB |
346 | ;; Play safe and don't assume the form of the word syntax entry -- |
347 | ;; copy it from ?a. | |
3d492670 KY |
348 | (if (featurep 'xemacs) |
349 | (put-char-table t (get-char-table ?a (standard-syntax-table)) table) | |
350 | (set-char-table-range table t (aref (standard-syntax-table) ?a))) | |
7f0321ff DL |
351 | (modify-syntax-entry ?\\ "\\" table) |
352 | (modify-syntax-entry ?\" "\"" table) | |
10ace8ea MB |
353 | (modify-syntax-entry ?\( "(" table) |
354 | (modify-syntax-entry ?\) ")" table) | |
7f0321ff DL |
355 | (modify-syntax-entry ?\< "." table) |
356 | (modify-syntax-entry ?\> "." table) | |
357 | (modify-syntax-entry ?\[ "." table) | |
358 | (modify-syntax-entry ?\] "." table) | |
359 | (modify-syntax-entry ?: "." table) | |
360 | (modify-syntax-entry ?\; "." table) | |
361 | (modify-syntax-entry ?, "." table) | |
362 | (modify-syntax-entry ?@ "." table) | |
363 | table)) | |
c113de23 | 364 | |
ba775afe | 365 | (defun rfc2047-encode-region (b e &optional dont-fold) |
7f0321ff DL |
366 | "Encode words in region B to E that need encoding. |
367 | By default, the region is treated as containing RFC2822 addresses. | |
368 | Dynamically bind `rfc2047-encoding-type' to change that." | |
369 | (save-restriction | |
370 | (narrow-to-region b e) | |
10ace8ea MB |
371 | (let ((encodable-regexp (if rfc2047-encode-encoded-words |
372 | "[^\000-\177]+\\|=\\?" | |
373 | "[^\000-\177]+")) | |
374 | start ; start of current token | |
375 | end begin csyntax | |
376 | ;; Whether there's an encoded word before the current token, | |
377 | ;; either immediately or separated by space. | |
378 | last-encoded | |
379 | (orig-text (buffer-substring-no-properties b e))) | |
380 | (if (eq 'mime rfc2047-encoding-type) | |
381 | ;; Simple case. Continuous words in which all those contain | |
382 | ;; non-ASCII characters are encoded collectively. Encoding | |
383 | ;; ASCII words, including `Re:' used in Subject headers, is | |
384 | ;; avoided for interoperability with non-MIME clients and | |
385 | ;; for making it easy to find keywords. | |
386 | (progn | |
387 | (goto-char (point-min)) | |
388 | (while (progn (skip-chars-forward " \t\n") | |
389 | (not (eobp))) | |
390 | (setq start (point)) | |
391 | (while (and (looking-at "[ \t\n]*\\([^ \t\n]+\\)") | |
392 | (progn | |
393 | (setq end (match-end 0)) | |
394 | (re-search-forward encodable-regexp end t))) | |
395 | (goto-char end)) | |
396 | (if (> (point) start) | |
397 | (rfc2047-encode start (point)) | |
398 | (goto-char end)))) | |
399 | ;; `address-mime' case -- take care of quoted words, comments. | |
343d6628 | 400 | (rfc2047-quote-special-characters-in-quoted-strings encodable-regexp) |
10ace8ea | 401 | (with-syntax-table rfc2047-syntax-table |
7f0321ff | 402 | (goto-char (point-min)) |
10ace8ea | 403 | (condition-case err ; in case of unbalanced quotes |
7f0321ff DL |
404 | ;; Look for rfc2822-style: sequences of atoms, quoted |
405 | ;; strings, specials, whitespace. (Specials mustn't be | |
406 | ;; encoded.) | |
407 | (while (not (eobp)) | |
7f0321ff | 408 | ;; Skip whitespace. |
10ace8ea MB |
409 | (skip-chars-forward " \t\n") |
410 | (setq start (point)) | |
7f0321ff DL |
411 | (cond |
412 | ((not (char-after))) ; eob | |
413 | ;; else token start | |
10ace8ea | 414 | ((eq ?\" (setq csyntax (char-syntax (char-after)))) |
7f0321ff DL |
415 | ;; Quoted word. |
416 | (forward-sexp) | |
417 | (setq end (point)) | |
418 | ;; Does it need encoding? | |
419 | (goto-char start) | |
10ace8ea MB |
420 | (if (re-search-forward encodable-regexp end 'move) |
421 | ;; It needs encoding. Strip the quotes first, | |
422 | ;; since encoded words can't occur in quotes. | |
423 | (progn | |
424 | (goto-char end) | |
d355a0b7 | 425 | (delete-char -1) |
10ace8ea MB |
426 | (goto-char start) |
427 | (delete-char 1) | |
428 | (when last-encoded | |
429 | ;; There was a preceding quoted word. We need | |
430 | ;; to include any separating whitespace in this | |
431 | ;; word to avoid it getting lost. | |
432 | (skip-chars-backward " \t") | |
433 | ;; A space is needed between the encoded words. | |
434 | (insert ? ) | |
435 | (setq start (point) | |
436 | end (1+ end))) | |
437 | ;; Adjust the end position for the deleted quotes. | |
438 | (rfc2047-encode start (- end 2)) | |
439 | (setq last-encoded t)) ; record that it was encoded | |
440 | (setq last-encoded nil))) | |
441 | ((eq ?. csyntax) | |
7f0321ff DL |
442 | ;; Skip other delimiters, but record that they've |
443 | ;; potentially separated quoted words. | |
444 | (forward-char) | |
445 | (setq last-encoded nil)) | |
10ace8ea MB |
446 | ((eq ?\) csyntax) |
447 | (error "Unbalanced parentheses")) | |
448 | ((eq ?\( csyntax) | |
449 | ;; Look for the end of parentheses. | |
450 | (forward-list) | |
451 | ;; Encode text as an unstructured field. | |
452 | (let ((rfc2047-encoding-type 'mime)) | |
453 | (rfc2047-encode-region (1+ start) (1- (point)))) | |
454 | (skip-chars-forward ")")) | |
7f0321ff DL |
455 | (t ; normal token/whitespace sequence |
456 | ;; Find the end. | |
10ace8ea MB |
457 | ;; Skip one ASCII word, or encode continuous words |
458 | ;; in which all those contain non-ASCII characters. | |
459 | (setq end nil) | |
460 | (while (not (or end (eobp))) | |
461 | (when (looking-at "[\000-\177]+") | |
462 | (setq begin (point) | |
463 | end (match-end 0)) | |
464 | (when (progn | |
465 | (while (and (or (re-search-forward | |
466 | "[ \t\n]\\|\\Sw" end 'move) | |
467 | (setq end nil)) | |
468 | (eq ?\\ (char-syntax (char-before)))) | |
469 | ;; Skip backslash-quoted characters. | |
470 | (forward-char)) | |
471 | end) | |
472 | (setq end (match-beginning 0)) | |
473 | (if rfc2047-encode-encoded-words | |
474 | (progn | |
475 | (goto-char begin) | |
476 | (when (search-forward "=?" end 'move) | |
477 | (goto-char (match-beginning 0)) | |
478 | (setq end nil))) | |
479 | (goto-char end)))) | |
480 | ;; Where the value nil of `end' means there may be | |
481 | ;; text to have to be encoded following the point. | |
482 | ;; Otherwise, the point reached to the end of ASCII | |
483 | ;; words separated by whitespace or a special char. | |
484 | (unless end | |
485 | (when (looking-at encodable-regexp) | |
486 | (goto-char (setq begin (match-end 0))) | |
487 | (while (and (looking-at "[ \t\n]+\\([^ \t\n]+\\)") | |
488 | (setq end (match-end 0)) | |
489 | (progn | |
490 | (while (re-search-forward | |
491 | encodable-regexp end t)) | |
492 | (< begin (point))) | |
493 | (goto-char begin) | |
494 | (or (not (re-search-forward "\\Sw" end t)) | |
495 | (progn | |
496 | (goto-char (match-beginning 0)) | |
497 | nil))) | |
498 | (goto-char end)) | |
499 | (when (looking-at "[^ \t\n]+") | |
500 | (setq end (match-end 0)) | |
501 | (if (re-search-forward "\\Sw+" end t) | |
502 | ;; There are special characters better | |
503 | ;; to be encoded so that MTAs may parse | |
504 | ;; them safely. | |
505 | (cond ((= end (point))) | |
506 | ((looking-at (concat "\\sw*\\(" | |
507 | encodable-regexp | |
508 | "\\)")) | |
509 | (setq end nil)) | |
510 | (t | |
511 | (goto-char (1- (match-end 0))) | |
512 | (unless (= (point) (match-beginning 0)) | |
513 | ;; Separate encodable text and | |
514 | ;; delimiter. | |
515 | (insert " ")))) | |
516 | (goto-char end) | |
517 | (skip-chars-forward " \t\n") | |
518 | (if (and (looking-at "[^ \t\n]+") | |
519 | (string-match encodable-regexp | |
520 | (match-string 0))) | |
521 | (setq end nil) | |
522 | (goto-char end))))))) | |
523 | (skip-chars-backward " \t\n") | |
7f0321ff | 524 | (setq end (point)) |
7f0321ff | 525 | (goto-char start) |
10ace8ea MB |
526 | (if (re-search-forward encodable-regexp end 'move) |
527 | (progn | |
528 | (unless (memq (char-before start) '(nil ?\t ? )) | |
529 | (if (progn | |
530 | (goto-char start) | |
531 | (skip-chars-backward "^ \t\n") | |
532 | (and (looking-at "\\Sw+") | |
533 | (= (match-end 0) start))) | |
534 | ;; Also encode bogus delimiters. | |
535 | (setq start (point)) | |
536 | ;; Separate encodable text and delimiter. | |
537 | (goto-char start) | |
538 | (insert " ") | |
539 | (setq start (1+ start) | |
540 | end (1+ end)))) | |
541 | (rfc2047-encode start end) | |
542 | (setq last-encoded t)) | |
543 | (setq last-encoded nil))))) | |
23f87bed | 544 | (error |
10ace8ea MB |
545 | (if (or debug-on-quit debug-on-error) |
546 | (signal (car err) (cdr err)) | |
547 | (error "Invalid data for rfc2047 encoding: %s" | |
548 | (mm-replace-in-string orig-text "[ \t\n]+" " ")))))))) | |
ba775afe G |
549 | (unless dont-fold |
550 | (rfc2047-fold-region b (point))) | |
10ace8ea | 551 | (goto-char (point-max)))) |
c113de23 | 552 | |
ba775afe | 553 | (defun rfc2047-encode-string (string &optional dont-fold) |
7f0321ff DL |
554 | "Encode words in STRING. |
555 | By default, the string is treated as containing addresses (see | |
23f87bed | 556 | `rfc2047-encoding-type')." |
10ace8ea | 557 | (mm-with-multibyte-buffer |
c113de23 | 558 | (insert string) |
ba775afe | 559 | (rfc2047-encode-region (point-min) (point-max) dont-fold) |
c113de23 GM |
560 | (buffer-string))) |
561 | ||
b890d447 MB |
562 | ;; From RFC 2047: |
563 | ;; 2. Syntax of encoded-words | |
564 | ;; [...] | |
565 | ;; While there is no limit to the length of a multiple-line header | |
566 | ;; field, each line of a header field that contains one or more | |
567 | ;; 'encoded-word's is limited to 76 characters. | |
568 | ;; | |
569 | ;; In `rfc2047-encode-parameter' it is bound to nil, so don't defconst it. | |
10ace8ea MB |
570 | (defvar rfc2047-encode-max-chars 76 |
571 | "Maximum characters of each header line that contain encoded-words. | |
b890d447 MB |
572 | According to RFC 2047, it is 76. If it is nil, encoded-words |
573 | will not be folded. Too small value may cause an error. You | |
574 | should not change this value.") | |
10ace8ea MB |
575 | |
576 | (defun rfc2047-encode-1 (column string cs encoder start crest tail | |
577 | &optional eword) | |
578 | "Subroutine used by `rfc2047-encode'." | |
579 | (cond ((string-equal string "") | |
580 | (or eword "")) | |
581 | ((not rfc2047-encode-max-chars) | |
582 | (concat start | |
583 | (funcall encoder (if cs | |
584 | (mm-encode-coding-string string cs) | |
585 | string)) | |
586 | "?=")) | |
587 | ((>= column rfc2047-encode-max-chars) | |
588 | (when eword | |
589 | (cond ((string-match "\n[ \t]+\\'" eword) | |
e4920bc9 | 590 | ;; Remove a superfluous empty line. |
10ace8ea MB |
591 | (setq eword (substring eword 0 (match-beginning 0)))) |
592 | ((string-match "(+\\'" eword) | |
593 | ;; Break the line before the open parenthesis. | |
594 | (setq crest (concat crest (match-string 0 eword)) | |
595 | eword (substring eword 0 (match-beginning 0)))))) | |
596 | (rfc2047-encode-1 (length crest) string cs encoder start " " tail | |
597 | (concat eword "\n" crest))) | |
598 | (t | |
599 | (let ((index 0) | |
600 | (limit (1- (length string))) | |
601 | (prev "") | |
602 | next len) | |
603 | (while (and prev | |
604 | (<= index limit)) | |
605 | (setq next (concat start | |
606 | (funcall encoder | |
607 | (if cs | |
608 | (mm-encode-coding-string | |
609 | (substring string 0 (1+ index)) | |
610 | cs) | |
611 | (substring string 0 (1+ index)))) | |
612 | "?=") | |
613 | len (+ column (length next))) | |
614 | (if (> len rfc2047-encode-max-chars) | |
615 | (setq next prev | |
616 | prev nil) | |
617 | (if (or (< index limit) | |
618 | (<= (+ len (or (string-match "\n" tail) | |
619 | (length tail))) | |
620 | rfc2047-encode-max-chars)) | |
621 | (setq prev next | |
622 | index (1+ index)) | |
623 | (if (string-match "\\`)+" tail) | |
624 | ;; Break the line after the close parenthesis. | |
625 | (setq tail (concat (substring tail 0 (match-end 0)) | |
626 | "\n " | |
627 | (substring tail (match-end 0))) | |
628 | prev next | |
629 | index (1+ index)) | |
630 | (setq next prev | |
631 | prev nil))))) | |
632 | (if (> index limit) | |
633 | (concat eword next tail) | |
634 | (if (= 0 index) | |
635 | (if (and eword | |
636 | (string-match "(+\\'" eword)) | |
637 | (setq crest (concat crest (match-string 0 eword)) | |
638 | eword (substring eword 0 (match-beginning 0))) | |
639 | (setq eword (concat eword next))) | |
640 | (setq crest " " | |
641 | eword (concat eword next))) | |
642 | (when (string-match "\n[ \t]+\\'" eword) | |
e4920bc9 | 643 | ;; Remove a superfluous empty line. |
10ace8ea MB |
644 | (setq eword (substring eword 0 (match-beginning 0)))) |
645 | (rfc2047-encode-1 (length crest) (substring string index) | |
646 | cs encoder start " " tail | |
647 | (concat eword "\n" crest))))))) | |
648 | ||
7f0321ff DL |
649 | (defun rfc2047-encode (b e) |
650 | "Encode the word(s) in the region B to E. | |
10ace8ea MB |
651 | Point moves to the end of the region." |
652 | (let ((mime-charset (or (mm-find-mime-charset-region b e) (list 'us-ascii))) | |
653 | cs encoding tail crest eword) | |
cbfb9427 KY |
654 | ;; Use utf-8 as a last resort if determining charset of text fails. |
655 | (if (memq nil mime-charset) | |
656 | (setq mime-charset (list 'utf-8))) | |
10ace8ea MB |
657 | (cond ((> (length mime-charset) 1) |
658 | (error "Can't rfc2047-encode `%s'" | |
659 | (buffer-substring-no-properties b e))) | |
660 | ((= (length mime-charset) 1) | |
661 | (setq mime-charset (car mime-charset) | |
662 | cs (mm-charset-to-coding-system mime-charset)) | |
663 | (unless (and (mm-multibyte-p) | |
664 | (mm-coding-system-p cs)) | |
665 | (setq cs nil)) | |
666 | (save-restriction | |
667 | (narrow-to-region b e) | |
668 | (setq encoding | |
669 | (or (cdr (assq mime-charset | |
c113de23 | 670 | rfc2047-charset-encoding-alist)) |
23f87bed MB |
671 | ;; For the charsets that don't have a preferred |
672 | ;; encoding, choose the one that's shorter. | |
10ace8ea MB |
673 | (if (eq (rfc2047-qp-or-base64) 'base64) |
674 | 'B | |
675 | 'Q))) | |
676 | (widen) | |
677 | (goto-char e) | |
678 | (skip-chars-forward "^ \t\n") | |
679 | ;; `tail' may contain a close parenthesis. | |
680 | (setq tail (buffer-substring-no-properties e (point))) | |
681 | (goto-char b) | |
682 | (setq b (point-marker) | |
683 | e (set-marker (make-marker) e)) | |
01c52d31 | 684 | (rfc2047-fold-region (point-at-bol) b) |
10ace8ea MB |
685 | (goto-char b) |
686 | (skip-chars-backward "^ \t\n") | |
687 | (unless (= 0 (skip-chars-backward " \t")) | |
688 | ;; `crest' may contain whitespace and an open parenthesis. | |
689 | (setq crest (buffer-substring-no-properties (point) b))) | |
690 | (setq eword (rfc2047-encode-1 | |
01c52d31 | 691 | (- b (point-at-bol)) |
10ace8ea MB |
692 | (mm-replace-in-string |
693 | (buffer-substring-no-properties b e) | |
694 | "\n\\([ \t]?\\)" "\\1") | |
695 | cs | |
696 | (or (cdr (assq encoding | |
697 | rfc2047-encode-function-alist)) | |
698 | 'identity) | |
699 | (concat "=?" (downcase (symbol-name mime-charset)) | |
700 | "?" (upcase (symbol-name encoding)) "?") | |
701 | (or crest " ") | |
702 | tail)) | |
703 | (delete-region (if (eq (aref eword 0) ?\n) | |
704 | (if (bolp) | |
705 | ;; The line was folded before encoding. | |
706 | (1- (point)) | |
707 | (point)) | |
708 | (goto-char b)) | |
709 | (+ e (length tail))) | |
710 | ;; `eword' contains `crest' and `tail'. | |
711 | (insert eword) | |
712 | (set-marker b nil) | |
713 | (set-marker e nil) | |
714 | (unless (or (/= 0 (length tail)) | |
715 | (eobp) | |
716 | (looking-at "[ \t\n)]")) | |
717 | (insert " ")))) | |
718 | (t | |
719 | (goto-char e))))) | |
c113de23 | 720 | |
23f87bed MB |
721 | (defun rfc2047-fold-field () |
722 | "Fold the current header field." | |
723 | (save-excursion | |
724 | (save-restriction | |
725 | (rfc2047-narrow-to-field) | |
726 | (rfc2047-fold-region (point-min) (point-max))))) | |
727 | ||
c113de23 | 728 | (defun rfc2047-fold-region (b e) |
a553a9f5 | 729 | "Fold long lines in region B to E." |
c113de23 GM |
730 | (save-restriction |
731 | (narrow-to-region b e) | |
732 | (goto-char (point-min)) | |
f2307f18 DL |
733 | (let ((break nil) |
734 | (qword-break nil) | |
c6e26ce2 | 735 | (first t) |
f2307f18 DL |
736 | (bol (save-restriction |
737 | (widen) | |
01c52d31 | 738 | (point-at-bol)))) |
c113de23 | 739 | (while (not (eobp)) |
23f87bed MB |
740 | (when (and (or break qword-break) |
741 | (> (- (point) bol) 76)) | |
f2307f18 DL |
742 | (goto-char (or break qword-break)) |
743 | (setq break nil | |
744 | qword-break nil) | |
10ace8ea | 745 | (skip-chars-backward " \t") |
c6e26ce2 | 746 | (if (looking-at "[ \t]") |
7f0321ff | 747 | (insert ?\n) |
619ac84f | 748 | (insert "\n ")) |
f2307f18 DL |
749 | (setq bol (1- (point))) |
750 | ;; Don't break before the first non-LWSP characters. | |
751 | (skip-chars-forward " \t") | |
23f87bed MB |
752 | (unless (eobp) |
753 | (forward-char 1))) | |
c113de23 | 754 | (cond |
f2307f18 DL |
755 | ((eq (char-after) ?\n) |
756 | (forward-char 1) | |
757 | (setq bol (point) | |
758 | break nil | |
759 | qword-break nil) | |
760 | (skip-chars-forward " \t") | |
761 | (unless (or (eobp) (eq (char-after) ?\n)) | |
762 | (forward-char 1))) | |
763 | ((eq (char-after) ?\r) | |
764 | (forward-char 1)) | |
c113de23 | 765 | ((memq (char-after) '(? ?\t)) |
f2307f18 | 766 | (skip-chars-forward " \t") |
10ace8ea MB |
767 | (unless first ;; Don't break just after the header name. |
768 | (setq break (point)))) | |
f2307f18 DL |
769 | ((not break) |
770 | (if (not (looking-at "=\\?[^=]")) | |
771 | (if (eq (char-after) ?=) | |
772 | (forward-char 1) | |
773 | (skip-chars-forward "^ \t\n\r=")) | |
23f87bed MB |
774 | ;; Don't break at the start of the field. |
775 | (unless (= (point) b) | |
776 | (setq qword-break (point))) | |
f2307f18 DL |
777 | (skip-chars-forward "^ \t\n\r"))) |
778 | (t | |
10ace8ea MB |
779 | (skip-chars-forward "^ \t\n\r"))) |
780 | (setq first nil)) | |
23f87bed MB |
781 | (when (and (or break qword-break) |
782 | (> (- (point) bol) 76)) | |
f2307f18 DL |
783 | (goto-char (or break qword-break)) |
784 | (setq break nil | |
785 | qword-break nil) | |
10ace8ea MB |
786 | (if (or (> 0 (skip-chars-backward " \t")) |
787 | (looking-at "[ \t]")) | |
788 | (insert ?\n) | |
789 | (insert "\n ")) | |
f2307f18 DL |
790 | (setq bol (1- (point))) |
791 | ;; Don't break before the first non-LWSP characters. | |
792 | (skip-chars-forward " \t") | |
23f87bed MB |
793 | (unless (eobp) |
794 | (forward-char 1)))))) | |
795 | ||
796 | (defun rfc2047-unfold-field () | |
797 | "Fold the current line." | |
798 | (save-excursion | |
799 | (save-restriction | |
800 | (rfc2047-narrow-to-field) | |
801 | (rfc2047-unfold-region (point-min) (point-max))))) | |
f2307f18 DL |
802 | |
803 | (defun rfc2047-unfold-region (b e) | |
a553a9f5 | 804 | "Unfold lines in region B to E." |
f2307f18 DL |
805 | (save-restriction |
806 | (narrow-to-region b e) | |
807 | (goto-char (point-min)) | |
808 | (let ((bol (save-restriction | |
809 | (widen) | |
01c52d31 MB |
810 | (point-at-bol))) |
811 | (eol (point-at-eol))) | |
f2307f18 DL |
812 | (forward-line 1) |
813 | (while (not (eobp)) | |
c6e26ce2 | 814 | (if (and (looking-at "[ \t]") |
01c52d31 | 815 | (< (- (point-at-eol) bol) 76)) |
c6e26ce2 DL |
816 | (delete-region eol (progn |
817 | (goto-char eol) | |
818 | (skip-chars-forward "\r\n") | |
819 | (point))) | |
01c52d31 MB |
820 | (setq bol (point-at-bol))) |
821 | (setq eol (point-at-eol)) | |
f2307f18 | 822 | (forward-line 1))))) |
c113de23 | 823 | |
10ace8ea MB |
824 | (defun rfc2047-b-encode-string (string) |
825 | "Base64-encode the header contained in STRING." | |
826 | (base64-encode-string string t)) | |
827 | ||
aa8f8277 GM |
828 | (autoload 'quoted-printable-encode-region "qp") |
829 | ||
10ace8ea MB |
830 | (defun rfc2047-q-encode-string (string) |
831 | "Quoted-printable-encode the header in STRING." | |
832 | (mm-with-unibyte-buffer | |
833 | (insert string) | |
834 | (quoted-printable-encode-region | |
835 | (point-min) (point-max) nil | |
836 | ;; = (\075), _ (\137), ? (\077) are used in the encoded word. | |
837 | ;; Avoid using 8bit characters. | |
838 | ;; This list excludes `especials' (see the RFC2047 syntax), | |
839 | ;; meaning that some characters in non-structured fields will | |
840 | ;; get encoded when they con't need to be. The following is | |
841 | ;; what it used to be. | |
842 | ;;; ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" | |
843 | ;;; "\010\012\014\040-\074\076\100-\136\140-\177") | |
844 | "-\b\n\f !#-'*+0-9A-Z\\^`-~\d") | |
845 | (subst-char-in-region (point-min) (point-max) ? ?_) | |
846 | (buffer-string))) | |
847 | ||
848 | (defun rfc2047-encode-parameter (param value) | |
849 | "Return and PARAM=VALUE string encoded in the RFC2047-like style. | |
e4e22d29 KY |
850 | This is a substitution for the `rfc2231-encode-string' function, that |
851 | is the standard but many mailers don't support it." | |
82fe1aed MB |
852 | (let ((rfc2047-encoding-type 'mime) |
853 | (rfc2047-encode-max-chars nil)) | |
ba775afe | 854 | (rfc2045-encode-string param (rfc2047-encode-string value t)))) |
c113de23 GM |
855 | |
856 | ;;; | |
857 | ;;; Functions for decoding RFC2047 messages | |
858 | ;;; | |
859 | ||
270a576a MB |
860 | (defvar rfc2047-quote-decoded-words-containing-tspecials nil |
861 | "If non-nil, quote decoded words containing special characters.") | |
862 | ||
292f71fe MB |
863 | (defvar rfc2047-allow-incomplete-encoded-text t |
864 | "*Non-nil means allow incomplete encoded-text in successive encoded-words. | |
865 | Dividing of encoded-text in the place other than character boundaries | |
866 | violates RFC2047 section 5, while we have a capability to decode it. | |
867 | If it is non-nil, the decoder will decode B- or Q-encoding in each | |
868 | encoded-word, concatenate them, and decode it by charset. Otherwise, | |
869 | the decoder will fully decode each encoded-word before concatenating | |
870 | them.") | |
871 | ||
343d6628 | 872 | (defun rfc2047-strip-backslashes-in-quoted-strings () |
d7093904 | 873 | "Strip backslashes in quoted strings. `\\\"' remains." |
343d6628 MB |
874 | (goto-char (point-min)) |
875 | (let (beg) | |
876 | (with-syntax-table (standard-syntax-table) | |
877 | (while (search-forward "\"" nil t) | |
878 | (unless (eq (char-before) ?\\) | |
879 | (setq beg (match-end 0)) | |
880 | (goto-char (match-beginning 0)) | |
881 | (condition-case nil | |
882 | (progn | |
883 | (forward-sexp) | |
884 | (save-restriction | |
885 | (narrow-to-region beg (1- (point))) | |
886 | (goto-char beg) | |
887 | (while (search-forward "\\" nil 'move) | |
d7093904 | 888 | (unless (memq (char-after) '(?\")) |
d355a0b7 | 889 | (delete-char -1)) |
343d6628 MB |
890 | (forward-char))) |
891 | (forward-char)) | |
892 | (error | |
893 | (goto-char beg)))))))) | |
894 | ||
b6b8f5fd | 895 | (defun rfc2047-charset-to-coding-system (charset &optional allow-override) |
292f71fe | 896 | "Return coding-system corresponding to MIME CHARSET. |
b6b8f5fd KY |
897 | If your Emacs implementation can't decode CHARSET, return nil. |
898 | ||
899 | If allow-override is given, use `mm-charset-override-alist' to | |
900 | map undesired charset names to their replacement. This should | |
901 | only be used for decoding, not for encoding." | |
292f71fe MB |
902 | (when (stringp charset) |
903 | (setq charset (intern (downcase charset)))) | |
904 | (when (or (not charset) | |
905 | (eq 'gnus-all mail-parse-ignored-charsets) | |
906 | (memq 'gnus-all mail-parse-ignored-charsets) | |
907 | (memq charset mail-parse-ignored-charsets)) | |
908 | (setq charset mail-parse-charset)) | |
b6b8f5fd | 909 | (let ((cs (mm-charset-to-coding-system charset nil allow-override))) |
292f71fe MB |
910 | (cond ((eq cs 'ascii) |
911 | (setq cs (or (mm-charset-to-coding-system mail-parse-charset) | |
912 | 'raw-text))) | |
e13be7a2 | 913 | ((mm-coding-system-p cs)) |
292f71fe MB |
914 | ((and charset |
915 | (listp mail-parse-ignored-charsets) | |
916 | (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
917 | (setq cs (mm-charset-to-coding-system mail-parse-charset)))) | |
918 | (if (eq cs 'ascii) | |
919 | 'raw-text | |
920 | cs))) | |
921 | ||
aa8f8277 GM |
922 | (autoload 'quoted-printable-decode-string "qp") |
923 | ||
292f71fe MB |
924 | (defun rfc2047-decode-encoded-words (words) |
925 | "Decode successive encoded-words in WORDS and return a decoded string. | |
926 | Each element of WORDS looks like (CHARSET ENCODING ENCODED-TEXT | |
927 | ENCODED-WORD)." | |
928 | (let (word charset cs encoding text rest) | |
929 | (while words | |
930 | (setq word (pop words)) | |
14e6dc54 | 931 | (if (and (setq cs (rfc2047-charset-to-coding-system |
b6b8f5fd | 932 | (setq charset (car word)) t)) |
292f71fe MB |
933 | (condition-case code |
934 | (cond ((char-equal ?B (nth 1 word)) | |
935 | (setq text (base64-decode-string | |
936 | (rfc2047-pad-base64 (nth 2 word))))) | |
937 | ((char-equal ?Q (nth 1 word)) | |
938 | (setq text (quoted-printable-decode-string | |
939 | (mm-subst-char-in-string | |
940 | ?_ ? (nth 2 word) t))))) | |
941 | (error | |
942 | (message "%s" (error-message-string code)) | |
943 | nil))) | |
944 | (if (and rfc2047-allow-incomplete-encoded-text | |
945 | (eq cs (caar rest))) | |
946 | ;; Concatenate text of which the charset is the same. | |
947 | (setcdr (car rest) (concat (cdar rest) text)) | |
948 | (push (cons cs text) rest)) | |
949 | ;; Don't decode encoded-word. | |
950 | (push (cons nil (nth 3 word)) rest))) | |
951 | (while rest | |
952 | (setq words (concat | |
953 | (or (and (setq cs (caar rest)) | |
954 | (condition-case code | |
955 | (mm-decode-coding-string (cdar rest) cs) | |
956 | (error | |
957 | (message "%s" (error-message-string code)) | |
958 | nil))) | |
959 | (concat (when (cdr rest) " ") | |
960 | (cdar rest) | |
961 | (when (and words | |
962 | (not (eq (string-to-char words) ? ))) | |
963 | " "))) | |
964 | words) | |
965 | rest (cdr rest))) | |
966 | words)) | |
967 | ||
23f87bed MB |
968 | ;; Fixme: This should decode in place, not cons intermediate strings. |
969 | ;; Also check whether it needs to worry about delimiting fields like | |
970 | ;; encoding. | |
971 | ||
972 | ;; In fact it's reported that (invalid) encoding of mailboxes in | |
973 | ;; addr-specs is in use, so delimiting fields might help. Probably | |
974 | ;; not decoding a word which isn't properly delimited is good enough | |
975 | ;; and worthwhile (is it more correct or not?), e.g. something like | |
976 | ;; `=?iso-8859-1?q?foo?=@'. | |
c113de23 | 977 | |
343d6628 MB |
978 | (defun rfc2047-decode-region (start end &optional address-mime) |
979 | "Decode MIME-encoded words in region between START and END. | |
980 | If ADDRESS-MIME is non-nil, strip backslashes which precede characters | |
981 | other than `\"' and `\\' in quoted strings." | |
c113de23 GM |
982 | (interactive "r") |
983 | (let ((case-fold-search t) | |
b890d447 MB |
984 | (eword-regexp |
985 | (if rfc2047-allow-irregular-q-encoded-words | |
986 | (eval-when-compile | |
987 | (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp-loose "\\)")) | |
988 | (eval-when-compile | |
989 | (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp "\\)")))) | |
292f71fe | 990 | b e match words) |
652dbc07 DL |
991 | (save-excursion |
992 | (save-restriction | |
993 | (narrow-to-region start end) | |
343d6628 MB |
994 | (when address-mime |
995 | (rfc2047-strip-backslashes-in-quoted-strings)) | |
292f71fe MB |
996 | (goto-char (setq b start)) |
997 | ;; Look for the encoded-words. | |
998 | (while (setq match (re-search-forward eword-regexp nil t)) | |
999 | (setq e (match-beginning 1) | |
1000 | end (match-end 0) | |
1001 | words nil) | |
1002 | (while match | |
1003 | (push (list (match-string 2) ;; charset | |
01c52d31 | 1004 | (char-after (match-beginning 3)) ;; encoding |
b890d447 | 1005 | (substring (match-string 3) 2) ;; encoded-text |
292f71fe MB |
1006 | (match-string 1)) ;; encoded-word |
1007 | words) | |
1008 | ;; Look for the subsequent encoded-words. | |
1009 | (when (setq match (looking-at eword-regexp)) | |
1010 | (goto-char (setq end (match-end 0))))) | |
1011 | ;; Replace the encoded-words with the decoded one. | |
1012 | (delete-region e end) | |
1013 | (insert (rfc2047-decode-encoded-words (nreverse words))) | |
23f87bed MB |
1014 | (save-restriction |
1015 | (narrow-to-region e (point)) | |
1016 | (goto-char e) | |
270a576a MB |
1017 | ;; Remove newlines between decoded words, though such |
1018 | ;; things essentially must not be there. | |
23f87bed MB |
1019 | (while (re-search-forward "[\n\r]+" nil t) |
1020 | (replace-match " ")) | |
30123838 | 1021 | (setq end (point-max)) |
270a576a MB |
1022 | ;; Quote decoded words if there are special characters |
1023 | ;; which might violate RFC2822. | |
1024 | (when (and rfc2047-quote-decoded-words-containing-tspecials | |
1025 | (let ((regexp (car (rassq | |
1026 | 'address-mime | |
1027 | rfc2047-header-encoding-alist)))) | |
1028 | (when regexp | |
1029 | (save-restriction | |
1030 | (widen) | |
30123838 KY |
1031 | (and |
1032 | ;; Don't quote words if already quoted. | |
1033 | (not (and (eq (char-before e) ?\") | |
1034 | (eq (char-after end) ?\"))) | |
1035 | (progn | |
1036 | (beginning-of-line) | |
1037 | (while (and (memq (char-after) '(? ?\t)) | |
1038 | (zerop (forward-line -1)))) | |
1039 | (looking-at regexp))))))) | |
270a576a MB |
1040 | (let (quoted) |
1041 | (goto-char e) | |
1042 | (skip-chars-forward " \t") | |
1043 | (setq start (point)) | |
1044 | (setq quoted (eq (char-after) ?\")) | |
1045 | (goto-char (point-max)) | |
1dbc941a | 1046 | (skip-chars-backward " \t" start) |
270a576a MB |
1047 | (if (setq quoted (and quoted |
1048 | (> (point) (1+ start)) | |
1049 | (eq (char-before) ?\"))) | |
1050 | (progn | |
1051 | (backward-char) | |
1052 | (setq start (1+ start) | |
1053 | end (point-marker))) | |
1054 | (setq end (point-marker))) | |
1055 | (goto-char start) | |
1056 | (while (search-forward "\"" end t) | |
1057 | (when (prog2 | |
1058 | (backward-char) | |
1059 | (zerop (% (skip-chars-backward "\\\\") 2)) | |
1060 | (goto-char (match-beginning 0))) | |
1061 | (insert "\\")) | |
1062 | (forward-char)) | |
1063 | (when (and (not quoted) | |
1064 | (progn | |
1065 | (goto-char start) | |
1066 | (re-search-forward | |
1067 | (concat "[" ietf-drums-tspecials "]") | |
1068 | end t))) | |
1069 | (goto-char start) | |
1070 | (insert "\"") | |
1071 | (goto-char end) | |
1072 | (insert "\"")) | |
1073 | (set-marker end nil))) | |
23f87bed | 1074 | (goto-char (point-max))) |
652dbc07 DL |
1075 | (when (and (mm-multibyte-p) |
1076 | mail-parse-charset | |
23f87bed | 1077 | (not (eq mail-parse-charset 'us-ascii)) |
652dbc07 DL |
1078 | (not (eq mail-parse-charset 'gnus-decoded))) |
1079 | (mm-decode-coding-region b e mail-parse-charset)) | |
1080 | (setq b (point))) | |
1081 | (when (and (mm-multibyte-p) | |
1082 | mail-parse-charset | |
1083 | (not (eq mail-parse-charset 'us-ascii)) | |
1084 | (not (eq mail-parse-charset 'gnus-decoded))) | |
23f87bed | 1085 | (mm-decode-coding-region b (point-max) mail-parse-charset)))))) |
c113de23 | 1086 | |
343d6628 MB |
1087 | (defun rfc2047-decode-address-region (start end) |
1088 | "Decode MIME-encoded words in region between START and END. | |
1089 | Backslashes which precede characters other than `\"' and `\\' in quoted | |
1090 | strings are stripped." | |
1091 | (rfc2047-decode-region start end t)) | |
1092 | ||
1093 | (defun rfc2047-decode-string (string &optional address-mime) | |
1094 | "Decode MIME-encoded STRING and return the result. | |
1095 | If ADDRESS-MIME is non-nil, strip backslashes which precede characters | |
1096 | other than `\"' and `\\' in quoted strings." | |
f15a9fec | 1097 | ;; (let ((m (mm-multibyte-p))) |
23f87bed MB |
1098 | (if (string-match "=\\?" string) |
1099 | (with-temp-buffer | |
f15a9fec SM |
1100 | ;; We used to only call mm-enable-multibyte if `m' is non-nil, |
1101 | ;; but this can't be the right criterion. Don't just revert this | |
1102 | ;; change if it encounters a bug. Please help me fix it | |
1103 | ;; right instead. --Stef | |
1104 | ;; The string returned should always be multibyte in a multibyte | |
1105 | ;; session, i.e. the buffer should be multibyte before | |
1106 | ;; `buffer-string' is called. | |
1107 | (mm-enable-multibyte) | |
23f87bed MB |
1108 | (insert string) |
1109 | (inline | |
343d6628 | 1110 | (rfc2047-decode-region (point-min) (point-max) address-mime)) |
23f87bed | 1111 | (buffer-string)) |
343d6628 MB |
1112 | (when address-mime |
1113 | (setq string | |
1114 | (with-temp-buffer | |
1115 | (when (mm-multibyte-string-p string) | |
1116 | (mm-enable-multibyte)) | |
1117 | (insert string) | |
1118 | (rfc2047-strip-backslashes-in-quoted-strings) | |
1119 | (buffer-string)))) | |
23f87bed | 1120 | ;; Fixme: As above, `m' here is inappropriate. |
f15a9fec | 1121 | (if (and ;; m |
23f87bed MB |
1122 | mail-parse-charset |
1123 | (not (eq mail-parse-charset 'us-ascii)) | |
1124 | (not (eq mail-parse-charset 'gnus-decoded))) | |
10ace8ea MB |
1125 | ;; `decode-coding-string' in Emacs offers a third optional |
1126 | ;; arg NOCOPY to avoid consing a new string if the decoding | |
1127 | ;; is "trivial". Unfortunately it currently doesn't | |
1128 | ;; consider anything else than a `nil' coding system | |
1129 | ;; trivial. | |
1130 | ;; `rfc2047-decode-string' is called multiple times for each | |
1131 | ;; article during summary buffer generation, and we really | |
1132 | ;; want to avoid unnecessary consing. So we bypass | |
1133 | ;; `decode-coding-string' if the string is purely ASCII. | |
1134 | (if (and (fboundp 'detect-coding-string) | |
1135 | ;; string is purely ASCII | |
1136 | (eq (detect-coding-string string t) 'undecided)) | |
f15a9fec SM |
1137 | string |
1138 | (mm-decode-coding-string string mail-parse-charset)) | |
1139 | (mm-string-to-multibyte string)))) ;; ) | |
c113de23 | 1140 | |
343d6628 MB |
1141 | (defun rfc2047-decode-address-string (string) |
1142 | "Decode MIME-encoded STRING and return the result. | |
1143 | Backslashes which precede characters other than `\"' and `\\' in quoted | |
1144 | strings are stripped." | |
1145 | (rfc2047-decode-string string t)) | |
1146 | ||
23f87bed MB |
1147 | (defun rfc2047-pad-base64 (string) |
1148 | "Pad STRING to quartets." | |
1149 | ;; Be more liberal to accept buggy base64 strings. If | |
1150 | ;; base64-decode-string accepts buggy strings, this function could | |
1151 | ;; be aliased to identity. | |
f4dd4ae8 MB |
1152 | (if (= 0 (mod (length string) 4)) |
1153 | string | |
1154 | (when (string-match "=+$" string) | |
1155 | (setq string (substring string 0 (match-beginning 0)))) | |
1156 | (case (mod (length string) 4) | |
1157 | (0 string) | |
1158 | (1 string) ;; Error, don't pad it. | |
1159 | (2 (concat string "==")) | |
1160 | (3 (concat string "="))))) | |
652dbc07 | 1161 | |
c113de23 GM |
1162 | (provide 'rfc2047) |
1163 | ||
1164 | ;;; rfc2047.el ends here |