Commit | Line | Data |
---|---|---|
23f87bed MB |
1 | ;;; mm-bodies.el --- Functions for decoding MIME things |
2 | ||
e84b4b86 | 3 | ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
5df4f04c | 4 | ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. |
c113de23 GM |
5 | |
6 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
7 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
8 | ;; This file is part of GNU Emacs. | |
9 | ||
5e809f55 | 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
c113de23 | 11 | ;; it under the terms of the GNU General Public License as published by |
5e809f55 GM |
12 | ;; the Free Software Foundation, either version 3 of the License, or |
13 | ;; (at your option) any later version. | |
c113de23 GM |
14 | |
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
5e809f55 | 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
c113de23 GM |
18 | ;; GNU General Public License for more details. |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
5e809f55 | 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
c113de23 GM |
22 | |
23 | ;;; Commentary: | |
24 | ||
25 | ;;; Code: | |
26 | ||
498fc2ae GM |
27 | ;; For Emacs < 22.2. |
28 | (eval-and-compile | |
29 | (unless (fboundp 'declare-function) (defmacro declare-function (&rest r)))) | |
30 | ||
c113de23 GM |
31 | (require 'mm-util) |
32 | (require 'rfc2047) | |
058635a9 | 33 | (require 'mm-encode) |
c113de23 | 34 | |
9efa445f DN |
35 | (defvar mm-uu-yenc-decode-function) |
36 | (defvar mm-uu-decode-function) | |
37 | (defvar mm-uu-binhex-decode-function) | |
38 | ||
f4dd4ae8 | 39 | ;; 8bit treatment gets any char except: 0x32 - 0x7f, LF, TAB, BEL, |
c113de23 | 40 | ;; BS, vertical TAB, form feed, and ^_ |
f4dd4ae8 MB |
41 | ;; |
42 | ;; Note that CR is *not* included, as that would allow a non-paired CR | |
43 | ;; in the body contrary to RFC 2822: | |
44 | ;; | |
45 | ;; - CR and LF MUST only occur together as CRLF; they MUST NOT | |
46 | ;; appear independently in the body. | |
47 | ||
48 | (defvar mm-7bit-chars "\x20-\x7f\n\t\x7\x8\xb\xc\x1f") | |
c113de23 GM |
49 | |
50 | (defcustom mm-body-charset-encoding-alist | |
51 | '((iso-2022-jp . 7bit) | |
23f87bed MB |
52 | (iso-2022-jp-2 . 7bit) |
53 | ;; We MUST encode UTF-16 because it can contain \0's which is | |
54 | ;; known to break servers. | |
55 | ;; Note: UTF-16 variants are invalid for text parts [RFC 2781], | |
56 | ;; so this can't happen :-/. | |
bd29ba20 RS |
57 | ;; PPS: Yes, it can happen if the user specifies UTF-16 in the MML |
58 | ;; markup. - jh. | |
23f87bed MB |
59 | (utf-16 . base64) |
60 | (utf-16be . base64) | |
61 | (utf-16le . base64)) | |
c113de23 GM |
62 | "Alist of MIME charsets to encodings. |
63 | Valid encodings are `7bit', `8bit', `quoted-printable' and `base64'." | |
64 | :type '(repeat (cons (symbol :tag "charset") | |
65 | (choice :tag "encoding" | |
66 | (const 7bit) | |
67 | (const 8bit) | |
68 | (const quoted-printable) | |
69 | (const base64)))) | |
70 | :group 'mime) | |
71 | ||
498fc2ae GM |
72 | (autoload 'message-options-get "message") |
73 | (declare-function message-options-set "message" (symbol value)) | |
74 | ||
23f87bed | 75 | (defun mm-encode-body (&optional charset) |
c113de23 GM |
76 | "Encode a body. |
77 | Should be called narrowed to the body that is to be encoded. | |
23f87bed MB |
78 | If there is more than one non-ASCII MULE charset in the body, then the |
79 | list of MULE charsets found is returned. | |
80 | If CHARSET is non-nil, it is used as the MIME charset to encode the body. | |
c113de23 GM |
81 | If successful, the MIME charset is returned. |
82 | If no encoding was done, nil is returned." | |
def5ff36 | 83 | (if (not (mm-multibyte-p)) |
c113de23 GM |
84 | ;; In the non-Mule case, we search for non-ASCII chars and |
85 | ;; return the value of `mail-parse-charset' if any are found. | |
23f87bed MB |
86 | (or charset |
87 | (save-excursion | |
88 | (goto-char (point-min)) | |
89 | (if (re-search-forward "[^\x0-\x7f]" nil t) | |
90 | (or mail-parse-charset | |
91 | (message-options-get 'mm-encody-body-charset) | |
92 | (message-options-set | |
93 | 'mm-encody-body-charset | |
94 | (mm-read-coding-system "Charset used in the article: "))) | |
95 | ;; The logic in `mml-generate-mime-1' confirms that it's OK | |
96 | ;; to return nil here. | |
97 | nil))) | |
c113de23 | 98 | (save-excursion |
23f87bed MB |
99 | (if charset |
100 | (progn | |
11e22c4a MB |
101 | (mm-encode-coding-region (point-min) (point-max) |
102 | (mm-charset-to-coding-system charset)) | |
23f87bed MB |
103 | charset) |
104 | (goto-char (point-min)) | |
f5490ddc MB |
105 | (let ((charsets (mm-find-mime-charset-region (point-min) (point-max) |
106 | mm-hack-charsets))) | |
23f87bed MB |
107 | (cond |
108 | ;; No encoding. | |
109 | ((null charsets) | |
110 | nil) | |
111 | ;; Too many charsets. | |
112 | ((> (length charsets) 1) | |
113 | charsets) | |
114 | ;; We encode. | |
115 | (t | |
116 | (prog1 | |
117 | (setq charset (car charsets)) | |
118 | (mm-encode-coding-region (point-min) (point-max) | |
119 | (mm-charset-to-coding-system charset)))) | |
120 | )))))) | |
121 | ||
122 | (defun mm-long-lines-p (length) | |
123 | "Say whether any of the lines in the buffer is longer than LENGTH." | |
124 | (save-excursion | |
125 | (goto-char (point-min)) | |
126 | (end-of-line) | |
127 | (while (and (not (eobp)) | |
128 | (not (> (current-column) length))) | |
129 | (forward-line 1) | |
130 | (end-of-line)) | |
131 | (and (> (current-column) length) | |
132 | (current-column)))) | |
133 | ||
134 | (defvar message-posting-charset) | |
058635a9 | 135 | |
c113de23 GM |
136 | (defun mm-body-encoding (charset &optional encoding) |
137 | "Do Content-Transfer-Encoding and return the encoding of the current buffer." | |
23f87bed MB |
138 | (when (stringp encoding) |
139 | (setq encoding (intern (downcase encoding)))) | |
140 | (let ((bits (mm-body-7-or-8)) | |
141 | (longp (mm-long-lines-p 1000))) | |
058635a9 | 142 | (require 'message) |
c113de23 | 143 | (cond |
23f87bed MB |
144 | ((and (not longp) |
145 | (not (and mm-use-ultra-safe-encoding | |
2f7717f6 MB |
146 | (or (save-excursion (re-search-forward " $" nil t)) |
147 | (save-excursion (re-search-forward "^From " nil t))))) | |
23f87bed | 148 | (eq bits '7bit)) |
c113de23 GM |
149 | bits) |
150 | ((and (not mm-use-ultra-safe-encoding) | |
23f87bed MB |
151 | (not longp) |
152 | (not (cdr (assq charset mm-body-charset-encoding-alist))) | |
c113de23 GM |
153 | (or (eq t (cdr message-posting-charset)) |
154 | (memq charset (cdr message-posting-charset)) | |
155 | (eq charset mail-parse-charset))) | |
156 | bits) | |
157 | (t | |
158 | (let ((encoding (or encoding | |
159 | (cdr (assq charset mm-body-charset-encoding-alist)) | |
160 | (mm-qp-or-base64)))) | |
161 | (when mm-use-ultra-safe-encoding | |
162 | (setq encoding (mm-safer-encoding encoding))) | |
163 | (mm-encode-content-transfer-encoding encoding "text/plain") | |
164 | encoding))))) | |
165 | ||
166 | (defun mm-body-7-or-8 () | |
167 | "Say whether the body is 7bit or 8bit." | |
5d54c59d DL |
168 | (if (save-excursion |
169 | (goto-char (point-min)) | |
170 | (skip-chars-forward mm-7bit-chars) | |
171 | (eobp)) | |
172 | '7bit | |
173 | '8bit)) | |
c113de23 GM |
174 | |
175 | ;;; | |
176 | ;;; Functions for decoding | |
177 | ;;; | |
178 | ||
179 | (defun mm-decode-content-transfer-encoding (encoding &optional type) | |
23f87bed MB |
180 | "Decodes buffer encoded with ENCODING, returning success status. |
181 | If TYPE is `text/plain' CRLF->LF translation may occur." | |
c113de23 GM |
182 | (prog1 |
183 | (condition-case error | |
184 | (cond | |
185 | ((eq encoding 'quoted-printable) | |
23f87bed MB |
186 | (quoted-printable-decode-region (point-min) (point-max)) |
187 | t) | |
c113de23 GM |
188 | ((eq encoding 'base64) |
189 | (base64-decode-region | |
190 | (point-min) | |
191 | ;; Some mailers insert whitespace | |
192 | ;; junk at the end which | |
193 | ;; base64-decode-region dislikes. | |
194 | ;; Also remove possible junk which could | |
195 | ;; have been added by mailing list software. | |
196 | (save-excursion | |
197 | (goto-char (point-min)) | |
198 | (while (re-search-forward "^[\t ]*\r?\n" nil t) | |
199 | (delete-region (match-beginning 0) (match-end 0))) | |
200 | (goto-char (point-max)) | |
201 | (when (re-search-backward "^[A-Za-z0-9+/]+=*[\t ]*$" nil t) | |
23f87bed MB |
202 | (forward-line)) |
203 | (point)))) | |
a88fd51a | 204 | ((memq encoding '(nil 7bit 8bit binary)) |
c113de23 | 205 | ;; Do nothing. |
23f87bed | 206 | t) |
c113de23 | 207 | ((memq encoding '(x-uuencode x-uue)) |
058635a9 | 208 | (require 'mm-uu) |
23f87bed MB |
209 | (funcall mm-uu-decode-function (point-min) (point-max)) |
210 | t) | |
c113de23 | 211 | ((eq encoding 'x-binhex) |
058635a9 | 212 | (require 'mm-uu) |
23f87bed MB |
213 | (funcall mm-uu-binhex-decode-function (point-min) (point-max)) |
214 | t) | |
215 | ((eq encoding 'x-yenc) | |
216 | (require 'mm-uu) | |
217 | (funcall mm-uu-yenc-decode-function (point-min) (point-max)) | |
218 | ) | |
c113de23 | 219 | ((functionp encoding) |
23f87bed MB |
220 | (funcall encoding (point-min) (point-max)) |
221 | t) | |
c113de23 GM |
222 | (t |
223 | (message "Unknown encoding %s; defaulting to 8bit" encoding))) | |
224 | (error | |
225 | (message "Error while decoding: %s" error) | |
226 | nil)) | |
227 | (when (and | |
3031d8b0 | 228 | type |
23f87bed | 229 | (memq encoding '(base64 x-uuencode x-uue x-binhex x-yenc)) |
4a2358e9 | 230 | (string-match "\\`text/" type)) |
c113de23 GM |
231 | (goto-char (point-min)) |
232 | (while (search-forward "\r\n" nil t) | |
233 | (replace-match "\n" t t))))) | |
234 | ||
235 | (defun mm-decode-body (charset &optional encoding type) | |
23f87bed MB |
236 | "Decode the current article that has been encoded with ENCODING to CHARSET. |
237 | ENCODING is a MIME content transfer encoding. | |
238 | CHARSET is the MIME charset with which to decode the data after transfer | |
239 | decoding. If it is nil, default to `mail-parse-charset'." | |
240 | (when (stringp charset) | |
241 | (setq charset (intern (downcase charset)))) | |
242 | (when (or (not charset) | |
243 | (eq 'gnus-all mail-parse-ignored-charsets) | |
244 | (memq 'gnus-all mail-parse-ignored-charsets) | |
245 | (memq charset mail-parse-ignored-charsets)) | |
246 | (setq charset mail-parse-charset)) | |
c113de23 GM |
247 | (save-excursion |
248 | (when encoding | |
249 | (mm-decode-content-transfer-encoding encoding type)) | |
e499bc94 MB |
250 | (when (and (featurep 'mule) ;; Fixme: Wrong test for unibyte session. |
251 | (not (eq charset 'gnus-decoded))) | |
bd29ba20 RS |
252 | (let ((coding-system (mm-charset-to-coding-system |
253 | ;; Allow overwrite using | |
254 | ;; `mm-charset-override-alist'. | |
255 | charset nil t))) | |
058635a9 | 256 | (if (and (not coding-system) |
c113de23 GM |
257 | (listp mail-parse-ignored-charsets) |
258 | (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
5d54c59d | 259 | (setq coding-system |
c113de23 | 260 | (mm-charset-to-coding-system mail-parse-charset))) |
058635a9 | 261 | (when (and charset coding-system |
c113de23 GM |
262 | ;; buffer-file-coding-system |
263 | ;;Article buffer is nil coding system | |
264 | ;;in XEmacs | |
265 | (mm-multibyte-p) | |
058635a9 | 266 | (or (not (eq coding-system 'ascii)) |
e499bc94 | 267 | (setq coding-system mail-parse-charset))) |
23f87bed MB |
268 | (mm-decode-coding-region (point-min) (point-max) |
269 | coding-system)) | |
270 | (setq buffer-file-coding-system | |
271 | (if (boundp 'last-coding-system-used) | |
272 | (symbol-value 'last-coding-system-used) | |
273 | coding-system)))))) | |
c113de23 GM |
274 | |
275 | (defun mm-decode-string (string charset) | |
276 | "Decode STRING with CHARSET." | |
277 | (when (stringp charset) | |
278 | (setq charset (intern (downcase charset)))) | |
5d54c59d | 279 | (when (or (not charset) |
c113de23 GM |
280 | (eq 'gnus-all mail-parse-ignored-charsets) |
281 | (memq 'gnus-all mail-parse-ignored-charsets) | |
282 | (memq charset mail-parse-ignored-charsets)) | |
283 | (setq charset mail-parse-charset)) | |
284 | (or | |
285 | (when (featurep 'mule) | |
bd29ba20 RS |
286 | (let ((coding-system (mm-charset-to-coding-system |
287 | charset | |
288 | ;; Allow overwrite using | |
289 | ;; `mm-charset-override-alist'. | |
290 | nil t))) | |
058635a9 | 291 | (if (and (not coding-system) |
c113de23 GM |
292 | (listp mail-parse-ignored-charsets) |
293 | (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
5d54c59d | 294 | (setq coding-system |
c113de23 | 295 | (mm-charset-to-coding-system mail-parse-charset))) |
058635a9 | 296 | (when (and charset coding-system |
c113de23 | 297 | (mm-multibyte-p) |
058635a9 DL |
298 | (or (not (eq coding-system 'ascii)) |
299 | (setq coding-system mail-parse-charset))) | |
300 | (mm-decode-coding-string string coding-system)))) | |
c113de23 GM |
301 | string)) |
302 | ||
303 | (provide 'mm-bodies) | |
304 | ||
a88fd51a | 305 | ;; arch-tag: 41104bb6-4443-4ca9-8d5c-ff87ecf27d8d |
715a2ca2 | 306 | ;;; mm-bodies.el ends here |