Commit | Line | Data |
---|---|---|
c113de23 GM |
1 | ;;; rfc2047.el --- Functions for encoding and decoding rfc2047 messages |
2 | ;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. | |
3 | ||
4 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
5 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
6 | ;; This file is part of GNU Emacs. | |
7 | ||
8 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
9 | ;; it under the terms of the GNU General Public License as published by | |
10 | ;; the Free Software Foundation; either version 2, or (at your option) | |
11 | ;; any later version. | |
12 | ||
13 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | ;; GNU General Public License for more details. | |
17 | ||
18 | ;; You should have received a copy of the GNU General Public License | |
19 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
20 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 | ;; Boston, MA 02111-1307, USA. | |
22 | ||
23 | ;;; Commentary: | |
24 | ||
25 | ;;; Code: | |
26 | ||
f2307f18 | 27 | (eval-when-compile (require 'cl)) |
c113de23 GM |
28 | |
29 | (require 'qp) | |
30 | (require 'mm-util) | |
31 | (require 'ietf-drums) | |
32 | (require 'mail-prsvr) | |
f2307f18 DL |
33 | (require 'base64) |
34 | ;; Fixme: Avoid this (for gnus-point-at-...) mm dependence on gnus. | |
35 | (require 'gnus-util) | |
36 | (autoload 'mm-body-7-or-8 "mm-bodies") | |
1c33719f | 37 | |
c113de23 GM |
38 | (defvar rfc2047-header-encoding-alist |
39 | '(("Newsgroups" . nil) | |
40 | ("Message-ID" . nil) | |
41 | (t . mime)) | |
42 | "*Header/encoding method alist. | |
43 | The list is traversed sequentially. The keys can either be | |
f2307f18 | 44 | header regexps or t. |
c113de23 GM |
45 | |
46 | The values can be: | |
47 | ||
48 | 1) nil, in which case no encoding is done; | |
49 | 2) `mime', in which case the header will be encoded according to RFC2047; | |
50 | 3) a charset, in which case it will be encoded as that charset; | |
51 | 4) `default', in which case the field will be encoded as the rest | |
52 | of the article.") | |
53 | ||
54 | (defvar rfc2047-charset-encoding-alist | |
55 | '((us-ascii . nil) | |
56 | (iso-8859-1 . Q) | |
57 | (iso-8859-2 . Q) | |
58 | (iso-8859-3 . Q) | |
59 | (iso-8859-4 . Q) | |
60 | (iso-8859-5 . B) | |
61 | (koi8-r . B) | |
62 | (iso-8859-7 . Q) | |
63 | (iso-8859-8 . Q) | |
64 | (iso-8859-9 . Q) | |
f2307f18 DL |
65 | (iso-8859-14 . Q) |
66 | (iso-8859-15 . Q) | |
c113de23 GM |
67 | (iso-2022-jp . B) |
68 | (iso-2022-kr . B) | |
69 | (gb2312 . B) | |
70 | (cn-gb . B) | |
71 | (cn-gb-2312 . B) | |
72 | (euc-kr . B) | |
73 | (iso-2022-jp-2 . B) | |
74 | (iso-2022-int-1 . B)) | |
75 | "Alist of MIME charsets to RFC2047 encodings. | |
76 | Valid encodings are nil, `Q' and `B'.") | |
77 | ||
78 | (defvar rfc2047-encoding-function-alist | |
79 | '((Q . rfc2047-q-encode-region) | |
80 | (B . rfc2047-b-encode-region) | |
81 | (nil . ignore)) | |
82 | "Alist of RFC2047 encodings to encoding functions.") | |
83 | ||
84 | (defvar rfc2047-q-encoding-alist | |
f2307f18 | 85 | '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "-A-Za-z0-9!*+/") |
ce9401f3 DL |
86 | ;; = (\075), _ (\137), ? (\077) are used in the encoded word. |
87 | ;; Avoid using 8bit characters. Some versions of Emacs has bug! | |
88 | ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" | |
89 | ("." . "\010\012\014\040-\074\076\100-\136\140-\177")) | |
c113de23 GM |
90 | "Alist of header regexps and valid Q characters.") |
91 | ||
92 | ;;; | |
93 | ;;; Functions for encoding RFC2047 messages | |
94 | ;;; | |
95 | ||
96 | (defun rfc2047-narrow-to-field () | |
97 | "Narrow the buffer to the header on the current line." | |
98 | (beginning-of-line) | |
99 | (narrow-to-region | |
100 | (point) | |
101 | (progn | |
102 | (forward-line 1) | |
103 | (if (re-search-forward "^[^ \n\t]" nil t) | |
104 | (progn | |
105 | (beginning-of-line) | |
106 | (point)) | |
107 | (point-max)))) | |
108 | (goto-char (point-min))) | |
109 | ||
110 | (defun rfc2047-encode-message-header () | |
111 | "Encode the message header according to `rfc2047-header-encoding-alist'. | |
112 | Should be called narrowed to the head of the message." | |
113 | (interactive "*") | |
114 | (save-excursion | |
115 | (goto-char (point-min)) | |
116 | (let (alist elem method) | |
117 | (while (not (eobp)) | |
118 | (save-restriction | |
119 | (rfc2047-narrow-to-field) | |
120 | (if (not (rfc2047-encodable-p)) | |
121 | (if (and (eq (mm-body-7-or-8) '8bit) | |
122 | (mm-multibyte-p) | |
123 | (mm-coding-system-p | |
124 | (car message-posting-charset))) | |
125 | ;; 8 bit must be decoded. | |
126 | ;; Is message-posting-charset a coding system? | |
f2307f18 DL |
127 | (mm-encode-coding-region |
128 | (point-min) (point-max) | |
c113de23 GM |
129 | (car message-posting-charset))) |
130 | ;; We found something that may perhaps be encoded. | |
131 | (setq method nil | |
132 | alist rfc2047-header-encoding-alist) | |
133 | (while (setq elem (pop alist)) | |
134 | (when (or (and (stringp (car elem)) | |
135 | (looking-at (car elem))) | |
136 | (eq (car elem) t)) | |
137 | (setq alist nil | |
138 | method (cdr elem)))) | |
139 | (cond | |
140 | ((eq method 'mime) | |
f2307f18 | 141 | (rfc2047-encode-region (point-min) (point-max))) |
c113de23 GM |
142 | ((eq method 'default) |
143 | (if (and (featurep 'mule) | |
1bde0b39 DL |
144 | (if (boundp 'default-enable-multibyte-characters) |
145 | default-enable-multibyte-characters) | |
c113de23 | 146 | mail-parse-charset) |
f2307f18 | 147 | (mm-encode-coding-region (point-min) (point-max) |
c113de23 GM |
148 | mail-parse-charset))) |
149 | ((mm-coding-system-p method) | |
1bde0b39 DL |
150 | (if (and (featurep 'mule) |
151 | (if (boundp 'default-enable-multibyte-characters) | |
152 | default-enable-multibyte-characters)) | |
c113de23 GM |
153 | (mm-encode-coding-region (point-min) (point-max) method))) |
154 | ;; Hm. | |
155 | (t))) | |
156 | (goto-char (point-max))))))) | |
157 | ||
f2307f18 DL |
158 | (defun rfc2047-encodable-p () |
159 | "Return non-nil if any characters in current buffer need encoding in headers. | |
160 | The buffer may be narrowed." | |
c113de23 GM |
161 | (let ((charsets |
162 | (mapcar | |
163 | 'mm-mime-charset | |
164 | (mm-find-charset-region (point-min) (point-max)))) | |
165 | (cs (list 'us-ascii (car message-posting-charset))) | |
166 | found) | |
167 | (while charsets | |
168 | (unless (memq (pop charsets) cs) | |
169 | (setq found t))) | |
170 | found)) | |
171 | ||
172 | (defun rfc2047-dissect-region (b e) | |
173 | "Dissect the region between B and E into words." | |
f2307f18 DL |
174 | (let ((word-chars "-A-Za-z0-9!*+/") |
175 | ;; Not using ietf-drums-specials-token makes life simple. | |
176 | mail-parse-mule-charset | |
177 | words point current | |
178 | result word) | |
c113de23 GM |
179 | (save-restriction |
180 | (narrow-to-region b e) | |
181 | (goto-char (point-min)) | |
f2307f18 | 182 | (skip-chars-forward "\000-\177") |
c113de23 | 183 | (while (not (eobp)) |
f2307f18 DL |
184 | (setq point (point)) |
185 | (skip-chars-backward word-chars b) | |
186 | (unless (eq b (point)) | |
187 | (push (cons (buffer-substring b (point)) nil) words)) | |
188 | (setq b (point)) | |
189 | (goto-char point) | |
190 | (setq current (mm-charset-after)) | |
191 | (forward-char 1) | |
192 | (skip-chars-forward word-chars) | |
193 | (while (and (not (eobp)) | |
194 | (eq (mm-charset-after) current)) | |
195 | (forward-char 1) | |
196 | (skip-chars-forward word-chars)) | |
197 | (unless (eq b (point)) | |
198 | (push (cons (buffer-substring b (point)) current) words)) | |
199 | (setq b (point)) | |
200 | (skip-chars-forward "\000-\177")) | |
201 | (unless (eq b (point)) | |
202 | (push (cons (buffer-substring b (point)) nil) words))) | |
203 | ;; merge adjacent words | |
204 | (setq word (pop words)) | |
205 | (while word | |
206 | (if (and (cdr word) | |
207 | (caar words) | |
208 | (not (cdar words)) | |
209 | (not (string-match "[^ \t]" (caar words)))) | |
210 | (if (eq (cdr (nth 1 words)) (cdr word)) | |
211 | (progn | |
212 | (setq word (cons (concat | |
213 | (car (nth 1 words)) (caar words) | |
214 | (car word)) | |
215 | (cdr word))) | |
216 | (pop words) | |
217 | (pop words)) | |
218 | (push (cons (concat (caar words) (car word)) (cdr word)) | |
219 | result) | |
220 | (pop words) | |
221 | (setq word (pop words))) | |
222 | (push word result) | |
223 | (setq word (pop words)))) | |
224 | result)) | |
c113de23 GM |
225 | |
226 | (defun rfc2047-encode-region (b e) | |
f2307f18 DL |
227 | "Encode all encodable words in region." |
228 | (let ((words (rfc2047-dissect-region b e)) word) | |
229 | (save-restriction | |
230 | (narrow-to-region b e) | |
231 | (delete-region (point-min) (point-max)) | |
232 | (while (setq word (pop words)) | |
233 | (if (not (cdr word)) | |
234 | (insert (car word)) | |
235 | (rfc2047-fold-region (gnus-point-at-bol) (point)) | |
236 | (goto-char (point-max)) | |
237 | (if (> (- (point) (save-restriction | |
238 | (widen) | |
239 | (gnus-point-at-bol))) 76) | |
240 | (insert "\n ")) | |
241 | ;; Insert blank between encoded words | |
242 | (if (eq (char-before) ?=) (insert " ")) | |
243 | (rfc2047-encode (point) | |
244 | (progn (insert (car word)) (point)) | |
245 | (cdr word)))) | |
246 | (rfc2047-fold-region (point-min) (point-max))))) | |
c113de23 GM |
247 | |
248 | (defun rfc2047-encode-string (string) | |
249 | "Encode words in STRING." | |
250 | (with-temp-buffer | |
251 | (insert string) | |
252 | (rfc2047-encode-region (point-min) (point-max)) | |
253 | (buffer-string))) | |
254 | ||
255 | (defun rfc2047-encode (b e charset) | |
f2307f18 | 256 | "Encode the word in the region B to E with CHARSET." |
c113de23 GM |
257 | (let* ((mime-charset (mm-mime-charset charset)) |
258 | (encoding (or (cdr (assq mime-charset | |
259 | rfc2047-charset-encoding-alist)) | |
260 | 'B)) | |
261 | (start (concat | |
262 | "=?" (downcase (symbol-name mime-charset)) "?" | |
263 | (downcase (symbol-name encoding)) "?")) | |
264 | (first t)) | |
265 | (save-restriction | |
266 | (narrow-to-region b e) | |
267 | (when (eq encoding 'B) | |
268 | ;; break into lines before encoding | |
269 | (goto-char (point-min)) | |
270 | (while (not (eobp)) | |
271 | (goto-char (min (point-max) (+ 15 (point)))) | |
272 | (unless (eobp) | |
273 | (insert "\n")))) | |
274 | (if (and (mm-multibyte-p) | |
275 | (mm-coding-system-p mime-charset)) | |
276 | (mm-encode-coding-region (point-min) (point-max) mime-charset)) | |
277 | (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) | |
278 | (point-min) (point-max)) | |
279 | (goto-char (point-min)) | |
280 | (while (not (eobp)) | |
281 | (unless first | |
282 | (insert " ")) | |
283 | (setq first nil) | |
284 | (insert start) | |
285 | (end-of-line) | |
286 | (insert "?=") | |
287 | (forward-line 1))))) | |
288 | ||
289 | (defun rfc2047-fold-region (b e) | |
f2307f18 | 290 | "Fold long lines in the region." |
c113de23 GM |
291 | (save-restriction |
292 | (narrow-to-region b e) | |
293 | (goto-char (point-min)) | |
f2307f18 DL |
294 | (let ((break nil) |
295 | (qword-break nil) | |
296 | (bol (save-restriction | |
297 | (widen) | |
298 | (gnus-point-at-bol)))) | |
c113de23 | 299 | (while (not (eobp)) |
f2307f18 DL |
300 | (when (and (or break qword-break) (> (- (point) bol) 76)) |
301 | (goto-char (or break qword-break)) | |
302 | (setq break nil | |
303 | qword-break nil) | |
304 | (insert "\n ") | |
305 | (setq bol (1- (point))) | |
306 | ;; Don't break before the first non-LWSP characters. | |
307 | (skip-chars-forward " \t") | |
308 | (forward-char 1)) | |
c113de23 | 309 | (cond |
f2307f18 DL |
310 | ((eq (char-after) ?\n) |
311 | (forward-char 1) | |
312 | (setq bol (point) | |
313 | break nil | |
314 | qword-break nil) | |
315 | (skip-chars-forward " \t") | |
316 | (unless (or (eobp) (eq (char-after) ?\n)) | |
317 | (forward-char 1))) | |
318 | ((eq (char-after) ?\r) | |
319 | (forward-char 1)) | |
c113de23 | 320 | ((memq (char-after) '(? ?\t)) |
f2307f18 DL |
321 | (skip-chars-forward " \t") |
322 | (setq break (1- (point)))) | |
323 | ((not break) | |
324 | (if (not (looking-at "=\\?[^=]")) | |
325 | (if (eq (char-after) ?=) | |
326 | (forward-char 1) | |
327 | (skip-chars-forward "^ \t\n\r=")) | |
328 | (setq qword-break (point)) | |
329 | (skip-chars-forward "^ \t\n\r"))) | |
330 | (t | |
331 | (skip-chars-forward "^ \t\n\r")))) | |
332 | (when (and (or break qword-break) (> (- (point) bol) 76)) | |
333 | (goto-char (or break qword-break)) | |
334 | (setq break nil | |
335 | qword-break nil) | |
336 | (insert "\n ") | |
337 | (setq bol (1- (point))) | |
338 | ;; Don't break before the first non-LWSP characters. | |
339 | (skip-chars-forward " \t") | |
340 | (forward-char 1))))) | |
341 | ||
342 | (defun rfc2047-unfold-region (b e) | |
343 | "Unfold lines in the region." | |
344 | (save-restriction | |
345 | (narrow-to-region b e) | |
346 | (goto-char (point-min)) | |
347 | (let ((bol (save-restriction | |
348 | (widen) | |
349 | (gnus-point-at-bol))) | |
350 | (eol (gnus-point-at-eol)) | |
351 | leading) | |
352 | (forward-line 1) | |
353 | (while (not (eobp)) | |
354 | (looking-at "[ \t]*") | |
355 | (setq leading (- (match-end 0) (match-beginning 0))) | |
356 | (if (< (- (gnus-point-at-eol) bol leading) 76) | |
357 | (progn | |
358 | (goto-char eol) | |
359 | (delete-region eol (progn | |
360 | (skip-chars-forward "[ \t\n\r]+") | |
361 | (1- (point))))) | |
362 | (setq bol (gnus-point-at-bol))) | |
363 | (setq eol (gnus-point-at-eol)) | |
364 | (forward-line 1))))) | |
c113de23 GM |
365 | |
366 | (defun rfc2047-b-encode-region (b e) | |
f2307f18 | 367 | "Base64-encode the header contained in region B to E." |
c113de23 GM |
368 | (save-restriction |
369 | (narrow-to-region (goto-char b) e) | |
370 | (while (not (eobp)) | |
371 | (base64-encode-region (point) (progn (end-of-line) (point)) t) | |
372 | (if (and (bolp) (eolp)) | |
373 | (delete-backward-char 1)) | |
374 | (forward-line)))) | |
375 | ||
376 | (defun rfc2047-q-encode-region (b e) | |
f2307f18 | 377 | "Quoted-printable-encode the header in region B to E." |
c113de23 GM |
378 | (save-excursion |
379 | (save-restriction | |
380 | (narrow-to-region (goto-char b) e) | |
f2307f18 DL |
381 | (let ((alist rfc2047-q-encoding-alist) |
382 | (bol (save-restriction | |
383 | (widen) | |
384 | (gnus-point-at-bol)))) | |
c113de23 GM |
385 | (while alist |
386 | (when (looking-at (caar alist)) | |
387 | (quoted-printable-encode-region b e nil (cdar alist)) | |
388 | (subst-char-in-region (point-min) (point-max) ? ?_) | |
389 | (setq alist nil)) | |
390 | (pop alist)) | |
f2307f18 DL |
391 | ;; The size of QP encapsulation is about 20, so set limit to |
392 | ;; 56=76-20. | |
393 | (unless (< (- (point-max) (point-min)) 56) | |
394 | ;; Don't break if it could fit in one line. | |
395 | ;; Let rfc2047-encode-region break it later. | |
396 | (goto-char (1+ (point-min))) | |
397 | (while (and (not (bobp)) (not (eobp))) | |
398 | (goto-char (min (point-max) (+ 56 bol))) | |
399 | (search-backward "=" (- (point) 2) t) | |
400 | (unless (or (bobp) (eobp)) | |
401 | (insert "\n") | |
402 | (setq bol (point))))))))) | |
c113de23 GM |
403 | |
404 | ;;; | |
405 | ;;; Functions for decoding RFC2047 messages | |
406 | ;;; | |
407 | ||
408 | (defvar rfc2047-encoded-word-regexp | |
409 | "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ +]+\\)\\?=") | |
410 | ||
411 | (defun rfc2047-decode-region (start end) | |
412 | "Decode MIME-encoded words in region between START and END." | |
413 | (interactive "r") | |
414 | (let ((case-fold-search t) | |
415 | b e) | |
416 | (save-excursion | |
417 | (save-restriction | |
418 | (narrow-to-region start end) | |
419 | (goto-char (point-min)) | |
420 | ;; Remove whitespace between encoded words. | |
421 | (while (re-search-forward | |
422 | (concat "\\(" rfc2047-encoded-word-regexp "\\)" | |
423 | "\\(\n?[ \t]\\)+" | |
424 | "\\(" rfc2047-encoded-word-regexp "\\)") | |
425 | nil t) | |
426 | (delete-region (goto-char (match-end 1)) (match-beginning 6))) | |
427 | ;; Decode the encoded words. | |
428 | (setq b (goto-char (point-min))) | |
429 | (while (re-search-forward rfc2047-encoded-word-regexp nil t) | |
430 | (setq e (match-beginning 0)) | |
431 | (insert (rfc2047-parse-and-decode | |
432 | (prog1 | |
433 | (match-string 0) | |
434 | (delete-region (match-beginning 0) (match-end 0))))) | |
435 | (when (and (mm-multibyte-p) | |
436 | mail-parse-charset | |
437 | (not (eq mail-parse-charset 'gnus-decoded))) | |
438 | (mm-decode-coding-region b e mail-parse-charset)) | |
439 | (setq b (point))) | |
440 | (when (and (mm-multibyte-p) | |
441 | mail-parse-charset | |
442 | (not (eq mail-parse-charset 'us-ascii)) | |
443 | (not (eq mail-parse-charset 'gnus-decoded))) | |
f2307f18 DL |
444 | (mm-decode-coding-region b (point-max) mail-parse-charset)) |
445 | (rfc2047-unfold-region (point-min) (point-max)))))) | |
c113de23 GM |
446 | |
447 | (defun rfc2047-decode-string (string) | |
448 | "Decode the quoted-printable-encoded STRING and return the results." | |
449 | (let ((m (mm-multibyte-p))) | |
450 | (with-temp-buffer | |
451 | (when m | |
452 | (mm-enable-multibyte)) | |
453 | (insert string) | |
454 | (inline | |
455 | (rfc2047-decode-region (point-min) (point-max))) | |
456 | (buffer-string)))) | |
457 | ||
458 | (defun rfc2047-parse-and-decode (word) | |
459 | "Decode WORD and return it if it is an encoded word. | |
460 | Return WORD if not." | |
461 | (if (not (string-match rfc2047-encoded-word-regexp word)) | |
462 | word | |
463 | (or | |
464 | (condition-case nil | |
465 | (rfc2047-decode | |
466 | (match-string 1 word) | |
467 | (upcase (match-string 2 word)) | |
468 | (match-string 3 word)) | |
469 | (error word)) | |
470 | word))) | |
471 | ||
472 | (defun rfc2047-decode (charset encoding string) | |
f2307f18 | 473 | "Decode STRING from the given MIME CHARSET in the given ENCODING. |
c113de23 | 474 | Valid ENCODINGs are \"B\" and \"Q\". |
f2307f18 | 475 | If your Emacs implementation can't decode CHARSET, return nil." |
c113de23 GM |
476 | (if (stringp charset) |
477 | (setq charset (intern (downcase charset)))) | |
f2307f18 | 478 | (if (or (not charset) |
c113de23 GM |
479 | (eq 'gnus-all mail-parse-ignored-charsets) |
480 | (memq 'gnus-all mail-parse-ignored-charsets) | |
481 | (memq charset mail-parse-ignored-charsets)) | |
482 | (setq charset mail-parse-charset)) | |
483 | (let ((cs (mm-charset-to-coding-system charset))) | |
f2307f18 | 484 | (if (and (not cs) charset |
c113de23 GM |
485 | (listp mail-parse-ignored-charsets) |
486 | (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
487 | (setq cs (mm-charset-to-coding-system mail-parse-charset))) | |
488 | (when cs | |
489 | (when (and (eq cs 'ascii) | |
490 | mail-parse-charset) | |
491 | (setq cs mail-parse-charset)) | |
f2307f18 DL |
492 | ;; Ensure unibyte result in Emacs 20. |
493 | (let (default-enable-multibyte-characters) | |
494 | (with-temp-buffer | |
495 | (mm-decode-coding-string | |
496 | (cond | |
497 | ((equal "B" encoding) | |
498 | (base64-decode-string string)) | |
499 | ((equal "Q" encoding) | |
500 | (quoted-printable-decode-string | |
501 | (mm-replace-chars-in-string string ?_ ? ))) | |
502 | (t (error "Invalid encoding: %s" encoding))) | |
503 | cs)))))) | |
c113de23 GM |
504 | |
505 | (provide 'rfc2047) | |
506 | ||
507 | ;;; rfc2047.el ends here |