Commit | Line | Data |
---|---|---|
715a2ca2 | 1 | ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages |
23f87bed | 2 | ;; Copyright (C) 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc. |
c113de23 GM |
3 | |
4 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
5 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
6 | ;; This file is part of GNU Emacs. | |
7 | ||
8 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
9 | ;; it under the terms of the GNU General Public License as published by | |
10 | ;; the Free Software Foundation; either version 2, or (at your option) | |
11 | ;; any later version. | |
12 | ||
13 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | ;; GNU General Public License for more details. | |
17 | ||
18 | ;; You should have received a copy of the GNU General Public License | |
19 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
20 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 | ;; Boston, MA 02111-1307, USA. | |
22 | ||
23 | ;;; Commentary: | |
24 | ||
d49a4975 DL |
25 | ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part |
26 | ;; Three: Message Header Extensions for Non-ASCII Text". | |
27 | ||
c113de23 GM |
28 | ;;; Code: |
29 | ||
7f0321ff DL |
30 | (eval-when-compile |
31 | (require 'cl) | |
23f87bed MB |
32 | (defvar message-posting-charset) |
33 | (unless (fboundp 'with-syntax-table) ; not in Emacs 20 | |
34 | (defmacro with-syntax-table (table &rest body) | |
35 | "Evaluate BODY with syntax table of current buffer set to TABLE. | |
36 | The syntax table of the current buffer is saved, BODY is evaluated, and the | |
37 | saved table is restored, even in case of an abnormal exit. | |
38 | Value is what BODY returns." | |
39 | (let ((old-table (make-symbol "table")) | |
40 | (old-buffer (make-symbol "buffer"))) | |
41 | `(let ((,old-table (syntax-table)) | |
42 | (,old-buffer (current-buffer))) | |
43 | (unwind-protect | |
44 | (progn | |
45 | (set-syntax-table ,table) | |
46 | ,@body) | |
47 | (save-current-buffer | |
48 | (set-buffer ,old-buffer) | |
49 | (set-syntax-table ,old-table)))))))) | |
c113de23 GM |
50 | |
51 | (require 'qp) | |
52 | (require 'mm-util) | |
7f0321ff | 53 | ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus. |
c113de23 | 54 | (require 'mail-prsvr) |
f2307f18 | 55 | (require 'base64) |
f2307f18 | 56 | (autoload 'mm-body-7-or-8 "mm-bodies") |
1c33719f | 57 | |
23f87bed MB |
58 | (eval-and-compile |
59 | ;; Avoid gnus-util for mm- code. | |
60 | (defalias 'rfc2047-point-at-bol | |
61 | (if (fboundp 'point-at-bol) | |
62 | 'point-at-bol | |
63 | 'line-beginning-position)) | |
64 | ||
65 | (defalias 'rfc2047-point-at-eol | |
66 | (if (fboundp 'point-at-eol) | |
67 | 'point-at-eol | |
68 | 'line-end-position))) | |
69 | ||
c113de23 | 70 | (defvar rfc2047-header-encoding-alist |
23f87bed MB |
71 | '(("Newsgroups" . nil) |
72 | ("Followup-To" . nil) | |
c113de23 | 73 | ("Message-ID" . nil) |
23f87bed MB |
74 | ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\ |
75 | \\|Mail-Followup-To\\|Mail-Copies-To\\|Approved\\)" . address-mime) | |
c113de23 GM |
76 | (t . mime)) |
77 | "*Header/encoding method alist. | |
78 | The list is traversed sequentially. The keys can either be | |
f2307f18 | 79 | header regexps or t. |
c113de23 GM |
80 | |
81 | The values can be: | |
82 | ||
83 | 1) nil, in which case no encoding is done; | |
84 | 2) `mime', in which case the header will be encoded according to RFC2047; | |
7f0321ff DL |
85 | 3) `address-mime', like `mime', but takes account of the rules for address |
86 | fields (where quoted strings and comments must be treated separately); | |
87 | 4) a charset, in which case it will be encoded as that charset; | |
88 | 5) `default', in which case the field will be encoded as the rest | |
c113de23 GM |
89 | of the article.") |
90 | ||
91 | (defvar rfc2047-charset-encoding-alist | |
92 | '((us-ascii . nil) | |
93 | (iso-8859-1 . Q) | |
94 | (iso-8859-2 . Q) | |
95 | (iso-8859-3 . Q) | |
96 | (iso-8859-4 . Q) | |
97 | (iso-8859-5 . B) | |
98 | (koi8-r . B) | |
7f0321ff DL |
99 | (iso-8859-7 . B) |
100 | (iso-8859-8 . B) | |
c113de23 | 101 | (iso-8859-9 . Q) |
f2307f18 DL |
102 | (iso-8859-14 . Q) |
103 | (iso-8859-15 . Q) | |
c113de23 GM |
104 | (iso-2022-jp . B) |
105 | (iso-2022-kr . B) | |
106 | (gb2312 . B) | |
676a7cc9 SZ |
107 | (big5 . B) |
108 | (cn-big5 . B) | |
c113de23 GM |
109 | (cn-gb . B) |
110 | (cn-gb-2312 . B) | |
111 | (euc-kr . B) | |
112 | (iso-2022-jp-2 . B) | |
23f87bed MB |
113 | (iso-2022-int-1 . B) |
114 | (viscii . Q)) | |
c113de23 | 115 | "Alist of MIME charsets to RFC2047 encodings. |
7f0321ff DL |
116 | Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding, |
117 | quoted-printable and base64 respectively.") | |
c113de23 GM |
118 | |
119 | (defvar rfc2047-encoding-function-alist | |
120 | '((Q . rfc2047-q-encode-region) | |
121 | (B . rfc2047-b-encode-region) | |
122 | (nil . ignore)) | |
123 | "Alist of RFC2047 encodings to encoding functions.") | |
124 | ||
c113de23 GM |
125 | ;;; |
126 | ;;; Functions for encoding RFC2047 messages | |
127 | ;;; | |
128 | ||
f4dd4ae8 MB |
129 | (defun rfc2047-qp-or-base64 () |
130 | "Return the type with which to encode the buffer. | |
131 | This is either `base64' or `quoted-printable'." | |
132 | (save-excursion | |
133 | (let ((limit (min (point-max) (+ 2000 (point-min)))) | |
134 | (n8bit 0)) | |
135 | (goto-char (point-min)) | |
136 | (skip-chars-forward "\x20-\x7f\r\n\t" limit) | |
137 | (while (< (point) limit) | |
138 | (incf n8bit) | |
139 | (forward-char 1) | |
140 | (skip-chars-forward "\x20-\x7f\r\n\t" limit)) | |
141 | (if (or (< (* 6 n8bit) (- limit (point-min))) | |
142 | ;; Don't base64, say, a short line with a single | |
143 | ;; non-ASCII char when splitting parts by charset. | |
144 | (= n8bit 1)) | |
145 | 'quoted-printable | |
146 | 'base64)))) | |
147 | ||
c113de23 GM |
148 | (defun rfc2047-narrow-to-field () |
149 | "Narrow the buffer to the header on the current line." | |
150 | (beginning-of-line) | |
151 | (narrow-to-region | |
152 | (point) | |
153 | (progn | |
154 | (forward-line 1) | |
155 | (if (re-search-forward "^[^ \n\t]" nil t) | |
23f87bed | 156 | (rfc2047-point-at-bol) |
c113de23 GM |
157 | (point-max)))) |
158 | (goto-char (point-min))) | |
159 | ||
23f87bed MB |
160 | (defun rfc2047-field-value () |
161 | "Return the value of the field at point." | |
162 | (save-excursion | |
163 | (save-restriction | |
164 | (rfc2047-narrow-to-field) | |
165 | (re-search-forward ":[ \t\n]*" nil t) | |
166 | (buffer-substring (point) (point-max))))) | |
167 | ||
7f0321ff DL |
168 | (defvar rfc2047-encoding-type 'address-mime |
169 | "The type of encoding done by `rfc2047-encode-region'. | |
170 | This should be dynamically bound around calls to | |
171 | `rfc2047-encode-region' to either `mime' or `address-mime'. See | |
172 | `rfc2047-header-encoding-alist', for definitions.") | |
173 | ||
c113de23 GM |
174 | (defun rfc2047-encode-message-header () |
175 | "Encode the message header according to `rfc2047-header-encoding-alist'. | |
176 | Should be called narrowed to the head of the message." | |
177 | (interactive "*") | |
178 | (save-excursion | |
179 | (goto-char (point-min)) | |
180 | (let (alist elem method) | |
181 | (while (not (eobp)) | |
182 | (save-restriction | |
183 | (rfc2047-narrow-to-field) | |
184 | (if (not (rfc2047-encodable-p)) | |
c6e26ce2 DL |
185 | (prog1 |
186 | (if (and (eq (mm-body-7-or-8) '8bit) | |
187 | (mm-multibyte-p) | |
188 | (mm-coding-system-p | |
189 | (car message-posting-charset))) | |
190 | ;; 8 bit must be decoded. | |
191 | (mm-encode-coding-region | |
192 | (point-min) (point-max) | |
193 | (mm-charset-to-coding-system | |
194 | (car message-posting-charset)))) | |
195 | ;; No encoding necessary, but folding is nice | |
196 | (rfc2047-fold-region | |
197 | (save-excursion | |
198 | (goto-char (point-min)) | |
199 | (skip-chars-forward "^:") | |
200 | (when (looking-at ": ") | |
201 | (forward-char 2)) | |
202 | (point)) | |
203 | (point-max))) | |
c113de23 GM |
204 | ;; We found something that may perhaps be encoded. |
205 | (setq method nil | |
206 | alist rfc2047-header-encoding-alist) | |
207 | (while (setq elem (pop alist)) | |
208 | (when (or (and (stringp (car elem)) | |
209 | (looking-at (car elem))) | |
210 | (eq (car elem) t)) | |
211 | (setq alist nil | |
212 | method (cdr elem)))) | |
7f0321ff DL |
213 | (goto-char (point-min)) |
214 | (re-search-forward "^[^:]+: *" nil t) | |
c113de23 | 215 | (cond |
7f0321ff DL |
216 | ((eq method 'address-mime) |
217 | (rfc2047-encode-region (point) (point-max))) | |
c113de23 | 218 | ((eq method 'mime) |
23f87bed | 219 | (let ((rfc2047-encoding-type 'mime)) |
7f0321ff | 220 | (rfc2047-encode-region (point) (point-max)))) |
c113de23 GM |
221 | ((eq method 'default) |
222 | (if (and (featurep 'mule) | |
1bde0b39 DL |
223 | (if (boundp 'default-enable-multibyte-characters) |
224 | default-enable-multibyte-characters) | |
c113de23 | 225 | mail-parse-charset) |
7f0321ff | 226 | (mm-encode-coding-region (point) (point-max) |
c113de23 | 227 | mail-parse-charset))) |
23f87bed MB |
228 | ;; We get this when CC'ing messsages to newsgroups with |
229 | ;; 8-bit names. The group name mail copy just got | |
230 | ;; unconditionally encoded. Previously, it would ask | |
231 | ;; whether to encode, which was quite confusing for the | |
232 | ;; user. If the new behaviour is wrong, tell me. I have | |
233 | ;; left the old code commented out below. | |
234 | ;; -- Per Abrahamsen <abraham@dina.kvl.dk> Date: 2001-10-07. | |
235 | ;; Modified by Dave Love, with the commented-out code changed | |
236 | ;; in accordance with changes elsewhere. | |
237 | ((null method) | |
238 | (rfc2047-encode-region (point) (point-max))) | |
239 | ;;; ((null method) | |
240 | ;;; (if (or (message-options-get | |
241 | ;;; 'rfc2047-encode-message-header-encode-any) | |
242 | ;;; (message-options-set | |
243 | ;;; 'rfc2047-encode-message-header-encode-any | |
244 | ;;; (y-or-n-p | |
245 | ;;; "Some texts are not encoded. Encode anyway?"))) | |
246 | ;;; (rfc2047-encode-region (point-min) (point-max)) | |
247 | ;;; (error "Cannot send unencoded text"))) | |
c113de23 | 248 | ((mm-coding-system-p method) |
1bde0b39 DL |
249 | (if (and (featurep 'mule) |
250 | (if (boundp 'default-enable-multibyte-characters) | |
251 | default-enable-multibyte-characters)) | |
7f0321ff | 252 | (mm-encode-coding-region (point) (point-max) method))) |
c113de23 GM |
253 | ;; Hm. |
254 | (t))) | |
255 | (goto-char (point-max))))))) | |
256 | ||
a553a9f5 DL |
257 | ;; Fixme: This, and the require below may not be the Right Thing, but |
258 | ;; should be safe just before release. -- fx 2001-02-08 | |
259 | (eval-when-compile (defvar message-posting-charset)) | |
260 | ||
f2307f18 DL |
261 | (defun rfc2047-encodable-p () |
262 | "Return non-nil if any characters in current buffer need encoding in headers. | |
263 | The buffer may be narrowed." | |
a553a9f5 | 264 | (require 'message) ; for message-posting-charset |
c113de23 | 265 | (let ((charsets |
7f0321ff | 266 | (mm-find-mime-charset-region (point-min) (point-max)))) |
23f87bed MB |
267 | (and charsets |
268 | (not (equal charsets (list (car message-posting-charset))))))) | |
7f0321ff DL |
269 | |
270 | ;; Use this syntax table when parsing into regions that may need | |
271 | ;; encoding. Double quotes are string delimiters, backslash is | |
272 | ;; character quoting, and all other RFC 2822 special characters are | |
273 | ;; treated as punctuation so we can use forward-sexp/forward-word to | |
274 | ;; skip to the end of regions appropriately. Nb. ietf-drums does | |
275 | ;; things differently. | |
276 | (defconst rfc2047-syntax-table | |
23f87bed MB |
277 | ;; (make-char-table 'syntax-table '(2)) only works in Emacs. |
278 | (let ((table (make-syntax-table))) | |
279 | ;; The following is done to work for setting all elements of the table | |
280 | ;; in Emacs 21 and 22 and XEmacs; it appears to be the cleanest way. | |
281 | ;; Play safe and don't assume the form of the word syntax entry -- | |
282 | ;; copy it from ?a. | |
283 | (if (fboundp 'set-char-table-range) ; Emacs | |
284 | (funcall (intern "set-char-table-range") | |
285 | table t (aref (standard-syntax-table) ?a)) | |
286 | (if (fboundp 'put-char-table) | |
287 | (if (fboundp 'get-char-table) ; warning avoidance | |
288 | (put-char-table t (get-char-table ?a (standard-syntax-table)) | |
289 | table)))) | |
7f0321ff DL |
290 | (modify-syntax-entry ?\\ "\\" table) |
291 | (modify-syntax-entry ?\" "\"" table) | |
292 | (modify-syntax-entry ?\( "." table) | |
293 | (modify-syntax-entry ?\) "." table) | |
294 | (modify-syntax-entry ?\< "." table) | |
295 | (modify-syntax-entry ?\> "." table) | |
296 | (modify-syntax-entry ?\[ "." table) | |
297 | (modify-syntax-entry ?\] "." table) | |
298 | (modify-syntax-entry ?: "." table) | |
299 | (modify-syntax-entry ?\; "." table) | |
300 | (modify-syntax-entry ?, "." table) | |
301 | (modify-syntax-entry ?@ "." table) | |
302 | table)) | |
c113de23 GM |
303 | |
304 | (defun rfc2047-encode-region (b e) | |
7f0321ff DL |
305 | "Encode words in region B to E that need encoding. |
306 | By default, the region is treated as containing RFC2822 addresses. | |
307 | Dynamically bind `rfc2047-encoding-type' to change that." | |
308 | (save-restriction | |
309 | (narrow-to-region b e) | |
310 | (if (eq 'mime rfc2047-encoding-type) | |
23f87bed MB |
311 | ;; Simple case. Treat as single word after any initial ASCII |
312 | ;; part and before any tailing ASCII part. The leading ASCII | |
313 | ;; is relevant for instance in Subject headers with `Re:' for | |
314 | ;; interoperability with non-MIME clients, and we might as | |
315 | ;; well avoid the tail too. | |
7f0321ff DL |
316 | (progn |
317 | (goto-char (point-min)) | |
318 | ;; Does it need encoding? | |
23f87bed | 319 | (skip-chars-forward "\000-\177") |
7f0321ff | 320 | (unless (eobp) |
23f87bed MB |
321 | (skip-chars-backward "^ \n") ; beginning of space-delimited word |
322 | (rfc2047-encode (point) (progn | |
323 | (goto-char e) | |
324 | (skip-chars-backward "\000-\177") | |
325 | (skip-chars-forward "^ \n") | |
326 | ;; end of space-delimited word | |
327 | (point))))) | |
7f0321ff DL |
328 | ;; `address-mime' case -- take care of quoted words, comments. |
329 | (with-syntax-table rfc2047-syntax-table | |
23f87bed | 330 | (let ((start) ; start of current token |
7f0321ff DL |
331 | end ; end of current token |
332 | ;; Whether there's an encoded word before the current | |
c6e26ce2 | 333 | ;; token, either immediately or separated by space. |
7f0321ff DL |
334 | last-encoded) |
335 | (goto-char (point-min)) | |
23f87bed | 336 | (condition-case nil ; in case of unbalanced quotes |
7f0321ff DL |
337 | ;; Look for rfc2822-style: sequences of atoms, quoted |
338 | ;; strings, specials, whitespace. (Specials mustn't be | |
339 | ;; encoded.) | |
340 | (while (not (eobp)) | |
341 | (setq start (point)) | |
342 | ;; Skip whitespace. | |
c6e26ce2 | 343 | (unless (= 0 (skip-chars-forward " \t\n")) |
7f0321ff DL |
344 | (setq start (point))) |
345 | (cond | |
346 | ((not (char-after))) ; eob | |
347 | ;; else token start | |
348 | ((eq ?\" (char-syntax (char-after))) | |
349 | ;; Quoted word. | |
350 | (forward-sexp) | |
351 | (setq end (point)) | |
352 | ;; Does it need encoding? | |
353 | (goto-char start) | |
354 | (skip-chars-forward "\000-\177" end) | |
355 | (if (= end (point)) | |
356 | (setq last-encoded nil) | |
357 | ;; It needs encoding. Strip the quotes first, | |
358 | ;; since encoded words can't occur in quotes. | |
359 | (goto-char end) | |
360 | (delete-backward-char 1) | |
361 | (goto-char start) | |
362 | (delete-char 1) | |
363 | (when last-encoded | |
364 | ;; There was a preceding quoted word. We need | |
365 | ;; to include any separating whitespace in this | |
366 | ;; word to avoid it getting lost. | |
367 | (skip-chars-backward " \t") | |
368 | ;; A space is needed between the encoded words. | |
369 | (insert ? ) | |
370 | (setq start (point) | |
371 | end (1+ end))) | |
372 | ;; Adjust the end position for the deleted quotes. | |
373 | (rfc2047-encode start (- end 2)) | |
374 | (setq last-encoded t))) ; record that it was encoded | |
375 | ((eq ?. (char-syntax (char-after))) | |
376 | ;; Skip other delimiters, but record that they've | |
377 | ;; potentially separated quoted words. | |
378 | (forward-char) | |
379 | (setq last-encoded nil)) | |
380 | (t ; normal token/whitespace sequence | |
381 | ;; Find the end. | |
382 | (forward-word 1) | |
383 | (skip-chars-backward " \t") | |
384 | (setq end (point)) | |
385 | ;; Deal with encoding and leading space as for | |
386 | ;; quoted words. | |
387 | (goto-char start) | |
388 | (skip-chars-forward "\000-\177" end) | |
389 | (if (= end (point)) | |
390 | (setq last-encoded nil) | |
391 | (when last-encoded | |
392 | (goto-char start) | |
393 | (skip-chars-backward " \t") | |
394 | (insert ? ) | |
395 | (setq start (point) | |
396 | end (1+ end))) | |
397 | (rfc2047-encode start end) | |
398 | (setq last-encoded t))))) | |
23f87bed MB |
399 | (error |
400 | (error "Invalid data for rfc2047 encoding: %s" | |
401 | (buffer-substring b e))))))) | |
7f0321ff | 402 | (rfc2047-fold-region b (point)))) |
c113de23 GM |
403 | |
404 | (defun rfc2047-encode-string (string) | |
7f0321ff DL |
405 | "Encode words in STRING. |
406 | By default, the string is treated as containing addresses (see | |
23f87bed | 407 | `rfc2047-encoding-type')." |
c113de23 GM |
408 | (with-temp-buffer |
409 | (insert string) | |
410 | (rfc2047-encode-region (point-min) (point-max)) | |
411 | (buffer-string))) | |
412 | ||
7f0321ff DL |
413 | (defun rfc2047-encode (b e) |
414 | "Encode the word(s) in the region B to E. | |
415 | By default, the region is treated as containing addresses (see | |
23f87bed | 416 | `rfc2047-encoding-type')." |
7f0321ff DL |
417 | (let* ((mime-charset (mm-find-mime-charset-region b e)) |
418 | (cs (if (> (length mime-charset) 1) | |
419 | ;; Fixme: Instead of this, try to break region into | |
420 | ;; parts that can be encoded separately. | |
421 | (error "Can't rfc2047-encode `%s'" | |
422 | (buffer-substring b e)) | |
423 | (setq mime-charset (car mime-charset)) | |
424 | (mm-charset-to-coding-system mime-charset))) | |
425 | ;; Fixme: Better, calculate the number of non-ASCII | |
426 | ;; characters, at least for 8-bit charsets. | |
23f87bed | 427 | (encoding (or (cdr (assq mime-charset |
c113de23 | 428 | rfc2047-charset-encoding-alist)) |
23f87bed MB |
429 | ;; For the charsets that don't have a preferred |
430 | ;; encoding, choose the one that's shorter. | |
431 | (save-restriction | |
432 | (narrow-to-region b e) | |
f4dd4ae8 | 433 | (if (eq (rfc2047-qp-or-base64) 'base64) |
23f87bed MB |
434 | 'B |
435 | 'Q)))) | |
c113de23 GM |
436 | (start (concat |
437 | "=?" (downcase (symbol-name mime-charset)) "?" | |
438 | (downcase (symbol-name encoding)) "?")) | |
23f87bed MB |
439 | (factor (case mime-charset |
440 | ((iso-8859-5 iso-8859-7 iso-8859-8 koi8-r) 1) | |
441 | ((big5 gb2312 euc-kr) 2) | |
442 | (utf-8 4) | |
443 | (t 8))) | |
444 | (pre (- b (save-restriction | |
445 | (widen) | |
446 | (rfc2047-point-at-bol)))) | |
447 | ;; encoded-words must not be longer than 75 characters, | |
448 | ;; including charset, encoding etc. This leaves us with | |
449 | ;; 75 - (length start) - 2 - 2 characters. The last 2 is for | |
450 | ;; possible base64 padding. In the worst case (iso-2022-*) | |
451 | ;; each character expands to 8 bytes which is expanded by a | |
452 | ;; factor of 4/3 by base64 encoding. | |
453 | (length (floor (- 75 (length start) 4) (* factor (/ 4.0 3.0)))) | |
454 | ;; Limit line length to 76 characters. | |
455 | (length1 (max 1 (floor (- 76 (length start) 4 pre) | |
456 | (* factor (/ 4.0 3.0))))) | |
c113de23 | 457 | (first t)) |
7f0321ff DL |
458 | (if mime-charset |
459 | (save-restriction | |
460 | (narrow-to-region b e) | |
461 | (when (eq encoding 'B) | |
462 | ;; break into lines before encoding | |
463 | (goto-char (point-min)) | |
464 | (while (not (eobp)) | |
23f87bed MB |
465 | (if first |
466 | (progn | |
467 | (goto-char (min (point-max) (+ length1 (point)))) | |
468 | (setq first nil)) | |
469 | (goto-char (min (point-max) (+ length (point))))) | |
7f0321ff | 470 | (unless (eobp) |
23f87bed MB |
471 | (insert ?\n))) |
472 | (setq first t)) | |
7f0321ff DL |
473 | (if (and (mm-multibyte-p) |
474 | (mm-coding-system-p cs)) | |
475 | (mm-encode-coding-region (point-min) (point-max) cs)) | |
476 | (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) | |
477 | (point-min) (point-max)) | |
478 | (goto-char (point-min)) | |
479 | (while (not (eobp)) | |
480 | (unless first | |
481 | (insert ? )) | |
482 | (setq first nil) | |
483 | (insert start) | |
484 | (end-of-line) | |
485 | (insert "?=") | |
486 | (forward-line 1)))))) | |
c113de23 | 487 | |
23f87bed MB |
488 | (defun rfc2047-fold-field () |
489 | "Fold the current header field." | |
490 | (save-excursion | |
491 | (save-restriction | |
492 | (rfc2047-narrow-to-field) | |
493 | (rfc2047-fold-region (point-min) (point-max))))) | |
494 | ||
c113de23 | 495 | (defun rfc2047-fold-region (b e) |
a553a9f5 | 496 | "Fold long lines in region B to E." |
c113de23 GM |
497 | (save-restriction |
498 | (narrow-to-region b e) | |
499 | (goto-char (point-min)) | |
f2307f18 DL |
500 | (let ((break nil) |
501 | (qword-break nil) | |
c6e26ce2 | 502 | (first t) |
f2307f18 DL |
503 | (bol (save-restriction |
504 | (widen) | |
23f87bed | 505 | (rfc2047-point-at-bol)))) |
c113de23 | 506 | (while (not (eobp)) |
23f87bed MB |
507 | (when (and (or break qword-break) |
508 | (> (- (point) bol) 76)) | |
f2307f18 DL |
509 | (goto-char (or break qword-break)) |
510 | (setq break nil | |
511 | qword-break nil) | |
c6e26ce2 | 512 | (if (looking-at "[ \t]") |
7f0321ff | 513 | (insert ?\n) |
619ac84f | 514 | (insert "\n ")) |
f2307f18 DL |
515 | (setq bol (1- (point))) |
516 | ;; Don't break before the first non-LWSP characters. | |
517 | (skip-chars-forward " \t") | |
23f87bed MB |
518 | (unless (eobp) |
519 | (forward-char 1))) | |
c113de23 | 520 | (cond |
f2307f18 DL |
521 | ((eq (char-after) ?\n) |
522 | (forward-char 1) | |
523 | (setq bol (point) | |
524 | break nil | |
525 | qword-break nil) | |
526 | (skip-chars-forward " \t") | |
527 | (unless (or (eobp) (eq (char-after) ?\n)) | |
528 | (forward-char 1))) | |
529 | ((eq (char-after) ?\r) | |
530 | (forward-char 1)) | |
c113de23 | 531 | ((memq (char-after) '(? ?\t)) |
f2307f18 | 532 | (skip-chars-forward " \t") |
c6e26ce2 DL |
533 | (if first |
534 | ;; Don't break just after the header name. | |
535 | (setq first nil) | |
536 | (setq break (1- (point))))) | |
f2307f18 DL |
537 | ((not break) |
538 | (if (not (looking-at "=\\?[^=]")) | |
539 | (if (eq (char-after) ?=) | |
540 | (forward-char 1) | |
541 | (skip-chars-forward "^ \t\n\r=")) | |
23f87bed MB |
542 | ;; Don't break at the start of the field. |
543 | (unless (= (point) b) | |
544 | (setq qword-break (point))) | |
f2307f18 DL |
545 | (skip-chars-forward "^ \t\n\r"))) |
546 | (t | |
547 | (skip-chars-forward "^ \t\n\r")))) | |
23f87bed MB |
548 | (when (and (or break qword-break) |
549 | (> (- (point) bol) 76)) | |
f2307f18 DL |
550 | (goto-char (or break qword-break)) |
551 | (setq break nil | |
552 | qword-break nil) | |
c6e26ce2 | 553 | (if (looking-at "[ \t]") |
7f0321ff | 554 | (insert ?\n) |
619ac84f | 555 | (insert "\n ")) |
f2307f18 DL |
556 | (setq bol (1- (point))) |
557 | ;; Don't break before the first non-LWSP characters. | |
558 | (skip-chars-forward " \t") | |
23f87bed MB |
559 | (unless (eobp) |
560 | (forward-char 1)))))) | |
561 | ||
562 | (defun rfc2047-unfold-field () | |
563 | "Fold the current line." | |
564 | (save-excursion | |
565 | (save-restriction | |
566 | (rfc2047-narrow-to-field) | |
567 | (rfc2047-unfold-region (point-min) (point-max))))) | |
f2307f18 DL |
568 | |
569 | (defun rfc2047-unfold-region (b e) | |
a553a9f5 | 570 | "Unfold lines in region B to E." |
f2307f18 DL |
571 | (save-restriction |
572 | (narrow-to-region b e) | |
573 | (goto-char (point-min)) | |
574 | (let ((bol (save-restriction | |
575 | (widen) | |
23f87bed MB |
576 | (rfc2047-point-at-bol))) |
577 | (eol (rfc2047-point-at-eol))) | |
f2307f18 DL |
578 | (forward-line 1) |
579 | (while (not (eobp)) | |
c6e26ce2 | 580 | (if (and (looking-at "[ \t]") |
23f87bed | 581 | (< (- (rfc2047-point-at-eol) bol) 76)) |
c6e26ce2 DL |
582 | (delete-region eol (progn |
583 | (goto-char eol) | |
584 | (skip-chars-forward "\r\n") | |
585 | (point))) | |
23f87bed MB |
586 | (setq bol (rfc2047-point-at-bol))) |
587 | (setq eol (rfc2047-point-at-eol)) | |
f2307f18 | 588 | (forward-line 1))))) |
c113de23 GM |
589 | |
590 | (defun rfc2047-b-encode-region (b e) | |
f2307f18 | 591 | "Base64-encode the header contained in region B to E." |
c113de23 GM |
592 | (save-restriction |
593 | (narrow-to-region (goto-char b) e) | |
594 | (while (not (eobp)) | |
595 | (base64-encode-region (point) (progn (end-of-line) (point)) t) | |
596 | (if (and (bolp) (eolp)) | |
597 | (delete-backward-char 1)) | |
598 | (forward-line)))) | |
599 | ||
600 | (defun rfc2047-q-encode-region (b e) | |
f2307f18 | 601 | "Quoted-printable-encode the header in region B to E." |
c113de23 GM |
602 | (save-excursion |
603 | (save-restriction | |
604 | (narrow-to-region (goto-char b) e) | |
23f87bed | 605 | (let ((bol (save-restriction |
f2307f18 | 606 | (widen) |
23f87bed MB |
607 | (rfc2047-point-at-bol)))) |
608 | (quoted-printable-encode-region | |
609 | b e nil | |
610 | ;; = (\075), _ (\137), ? (\077) are used in the encoded word. | |
611 | ;; Avoid using 8bit characters. | |
612 | ;; This list excludes `especials' (see the RFC2047 syntax), | |
613 | ;; meaning that some characters in non-structured fields will | |
614 | ;; get encoded when they con't need to be. The following is | |
615 | ;; what it used to be. | |
616 | ;;; ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" | |
617 | ;;; "\010\012\014\040-\074\076\100-\136\140-\177") | |
618 | "-\b\n\f !#-'*+0-9A-Z\\^`-~\d") | |
619 | (subst-char-in-region (point-min) (point-max) ? ?_) | |
f2307f18 DL |
620 | ;; The size of QP encapsulation is about 20, so set limit to |
621 | ;; 56=76-20. | |
622 | (unless (< (- (point-max) (point-min)) 56) | |
623 | ;; Don't break if it could fit in one line. | |
624 | ;; Let rfc2047-encode-region break it later. | |
625 | (goto-char (1+ (point-min))) | |
626 | (while (and (not (bobp)) (not (eobp))) | |
627 | (goto-char (min (point-max) (+ 56 bol))) | |
628 | (search-backward "=" (- (point) 2) t) | |
629 | (unless (or (bobp) (eobp)) | |
23f87bed | 630 | (insert ?\n) |
f2307f18 | 631 | (setq bol (point))))))))) |
c113de23 GM |
632 | |
633 | ;;; | |
634 | ;;; Functions for decoding RFC2047 messages | |
635 | ;;; | |
636 | ||
23f87bed MB |
637 | (eval-and-compile |
638 | (defconst rfc2047-encoded-word-regexp | |
639 | "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\ | |
640 | \\?\\([!->@-~ +]*\\)\\?=")) | |
641 | ||
642 | ;; Fixme: This should decode in place, not cons intermediate strings. | |
643 | ;; Also check whether it needs to worry about delimiting fields like | |
644 | ;; encoding. | |
645 | ||
646 | ;; In fact it's reported that (invalid) encoding of mailboxes in | |
647 | ;; addr-specs is in use, so delimiting fields might help. Probably | |
648 | ;; not decoding a word which isn't properly delimited is good enough | |
649 | ;; and worthwhile (is it more correct or not?), e.g. something like | |
650 | ;; `=?iso-8859-1?q?foo?=@'. | |
c113de23 GM |
651 | |
652 | (defun rfc2047-decode-region (start end) | |
653 | "Decode MIME-encoded words in region between START and END." | |
654 | (interactive "r") | |
655 | (let ((case-fold-search t) | |
656 | b e) | |
652dbc07 DL |
657 | (save-excursion |
658 | (save-restriction | |
659 | (narrow-to-region start end) | |
660 | (goto-char (point-min)) | |
661 | ;; Remove whitespace between encoded words. | |
662 | (while (re-search-forward | |
23f87bed MB |
663 | (eval-when-compile |
664 | (concat "\\(" rfc2047-encoded-word-regexp "\\)" | |
665 | "\\(\n?[ \t]\\)+" | |
666 | "\\(" rfc2047-encoded-word-regexp "\\)")) | |
652dbc07 DL |
667 | nil t) |
668 | (delete-region (goto-char (match-end 1)) (match-beginning 6))) | |
669 | ;; Decode the encoded words. | |
670 | (setq b (goto-char (point-min))) | |
671 | (while (re-search-forward rfc2047-encoded-word-regexp nil t) | |
672 | (setq e (match-beginning 0)) | |
673 | (insert (rfc2047-parse-and-decode | |
674 | (prog1 | |
675 | (match-string 0) | |
676 | (delete-region (match-beginning 0) (match-end 0))))) | |
23f87bed MB |
677 | ;; Remove newlines between decoded words, though such things |
678 | ;; essentially must not be there. | |
679 | (save-restriction | |
680 | (narrow-to-region e (point)) | |
681 | (goto-char e) | |
682 | (while (re-search-forward "[\n\r]+" nil t) | |
683 | (replace-match " ")) | |
684 | (goto-char (point-max))) | |
652dbc07 DL |
685 | (when (and (mm-multibyte-p) |
686 | mail-parse-charset | |
23f87bed | 687 | (not (eq mail-parse-charset 'us-ascii)) |
652dbc07 DL |
688 | (not (eq mail-parse-charset 'gnus-decoded))) |
689 | (mm-decode-coding-region b e mail-parse-charset)) | |
690 | (setq b (point))) | |
691 | (when (and (mm-multibyte-p) | |
692 | mail-parse-charset | |
693 | (not (eq mail-parse-charset 'us-ascii)) | |
694 | (not (eq mail-parse-charset 'gnus-decoded))) | |
23f87bed | 695 | (mm-decode-coding-region b (point-max) mail-parse-charset)))))) |
c113de23 GM |
696 | |
697 | (defun rfc2047-decode-string (string) | |
698 | "Decode the quoted-printable-encoded STRING and return the results." | |
699 | (let ((m (mm-multibyte-p))) | |
23f87bed MB |
700 | (if (string-match "=\\?" string) |
701 | (with-temp-buffer | |
702 | ;; Fixme: This logic is wrong, but seems to be required by | |
703 | ;; Gnus summary buffer generation. The value of `m' depends | |
704 | ;; on the current buffer, not global multibyteness or that | |
705 | ;; of the string. Also the string returned should always be | |
706 | ;; multibyte in a multibyte session, i.e. the buffer should | |
707 | ;; be multibyte before `buffer-string' is called. | |
708 | (when m | |
709 | (mm-enable-multibyte)) | |
710 | (insert string) | |
711 | (inline | |
712 | (rfc2047-decode-region (point-min) (point-max))) | |
713 | (buffer-string)) | |
714 | ;; Fixme: As above, `m' here is inappropriate. | |
715 | (if (and m | |
716 | mail-parse-charset | |
717 | (not (eq mail-parse-charset 'us-ascii)) | |
718 | (not (eq mail-parse-charset 'gnus-decoded))) | |
719 | (mm-decode-coding-string string mail-parse-charset) | |
720 | (mm-string-as-multibyte string))))) | |
c113de23 | 721 | |
652dbc07 | 722 | (defun rfc2047-parse-and-decode (word) |
c113de23 | 723 | "Decode WORD and return it if it is an encoded word. |
23f87bed MB |
724 | Return WORD if it is not not an encoded word or if the charset isn't |
725 | decodable." | |
652dbc07 DL |
726 | (if (not (string-match rfc2047-encoded-word-regexp word)) |
727 | word | |
728 | (or | |
729 | (condition-case nil | |
730 | (rfc2047-decode | |
731 | (match-string 1 word) | |
732 | (upcase (match-string 2 word)) | |
733 | (match-string 3 word)) | |
734 | (error word)) | |
23f87bed MB |
735 | word))) ; un-decodable |
736 | ||
737 | (defun rfc2047-pad-base64 (string) | |
738 | "Pad STRING to quartets." | |
739 | ;; Be more liberal to accept buggy base64 strings. If | |
740 | ;; base64-decode-string accepts buggy strings, this function could | |
741 | ;; be aliased to identity. | |
f4dd4ae8 MB |
742 | (if (= 0 (mod (length string) 4)) |
743 | string | |
744 | (when (string-match "=+$" string) | |
745 | (setq string (substring string 0 (match-beginning 0)))) | |
746 | (case (mod (length string) 4) | |
747 | (0 string) | |
748 | (1 string) ;; Error, don't pad it. | |
749 | (2 (concat string "==")) | |
750 | (3 (concat string "="))))) | |
652dbc07 DL |
751 | |
752 | (defun rfc2047-decode (charset encoding string) | |
753 | "Decode STRING from the given MIME CHARSET in the given ENCODING. | |
c113de23 | 754 | Valid ENCODINGs are \"B\" and \"Q\". |
f2307f18 | 755 | If your Emacs implementation can't decode CHARSET, return nil." |
c113de23 GM |
756 | (if (stringp charset) |
757 | (setq charset (intern (downcase charset)))) | |
f2307f18 | 758 | (if (or (not charset) |
c113de23 GM |
759 | (eq 'gnus-all mail-parse-ignored-charsets) |
760 | (memq 'gnus-all mail-parse-ignored-charsets) | |
761 | (memq charset mail-parse-ignored-charsets)) | |
762 | (setq charset mail-parse-charset)) | |
763 | (let ((cs (mm-charset-to-coding-system charset))) | |
f2307f18 | 764 | (if (and (not cs) charset |
c113de23 GM |
765 | (listp mail-parse-ignored-charsets) |
766 | (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
767 | (setq cs (mm-charset-to-coding-system mail-parse-charset))) | |
768 | (when cs | |
769 | (when (and (eq cs 'ascii) | |
770 | mail-parse-charset) | |
771 | (setq cs mail-parse-charset)) | |
23f87bed MB |
772 | (mm-decode-coding-string |
773 | (cond | |
774 | ((equal "B" encoding) | |
775 | (base64-decode-string | |
776 | (rfc2047-pad-base64 string))) | |
777 | ((equal "Q" encoding) | |
778 | (quoted-printable-decode-string | |
779 | (mm-replace-chars-in-string string ?_ ? ))) | |
780 | (t (error "Invalid encoding: %s" encoding))) | |
781 | cs)))) | |
c113de23 GM |
782 | |
783 | (provide 'rfc2047) | |
784 | ||
ab5796a9 | 785 | ;;; arch-tag: a07fe3d4-22b5-4c4a-bd89-b1f82d5d36f6 |
c113de23 | 786 | ;;; rfc2047.el ends here |