Commit | Line | Data |
---|---|---|
c113de23 GM |
1 | ;;; mm-bodies.el --- Functions for decoding MIME things |
2 | ;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. | |
3 | ||
4 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
5 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
6 | ;; This file is part of GNU Emacs. | |
7 | ||
8 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
9 | ;; it under the terms of the GNU General Public License as published by | |
10 | ;; the Free Software Foundation; either version 2, or (at your option) | |
11 | ;; any later version. | |
12 | ||
13 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
14 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | ;; GNU General Public License for more details. | |
17 | ||
18 | ;; You should have received a copy of the GNU General Public License | |
19 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
20 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 | ;; Boston, MA 02111-1307, USA. | |
22 | ||
23 | ;;; Commentary: | |
24 | ||
25 | ;;; Code: | |
26 | ||
27 | (eval-and-compile | |
28 | (or (fboundp 'base64-decode-region) | |
058635a9 | 29 | (require 'base64))) |
8f716686 | 30 | |
776ca44f DL |
31 | (eval-when-compile |
32 | (defvar mm-uu-decode-function) | |
33 | (defvar mm-uu-binhex-decode-function)) | |
c113de23 GM |
34 | |
35 | (require 'mm-util) | |
36 | (require 'rfc2047) | |
058635a9 | 37 | (require 'mm-encode) |
c113de23 GM |
38 | |
39 | ;; 8bit treatment gets any char except: 0x32 - 0x7f, CR, LF, TAB, BEL, | |
40 | ;; BS, vertical TAB, form feed, and ^_ | |
41 | (defvar mm-7bit-chars "\x20-\x7f\r\n\t\x7\x8\xb\xc\x1f") | |
42 | ||
43 | (defcustom mm-body-charset-encoding-alist | |
44 | '((iso-2022-jp . 7bit) | |
45 | (iso-2022-jp-2 . 7bit)) | |
46 | "Alist of MIME charsets to encodings. | |
47 | Valid encodings are `7bit', `8bit', `quoted-printable' and `base64'." | |
48 | :type '(repeat (cons (symbol :tag "charset") | |
49 | (choice :tag "encoding" | |
50 | (const 7bit) | |
51 | (const 8bit) | |
52 | (const quoted-printable) | |
53 | (const base64)))) | |
54 | :group 'mime) | |
55 | ||
56 | (defun mm-encode-body () | |
57 | "Encode a body. | |
58 | Should be called narrowed to the body that is to be encoded. | |
59 | If there is more than one non-ASCII MULE charset, then list of found | |
60 | MULE charsets are returned. | |
61 | If successful, the MIME charset is returned. | |
62 | If no encoding was done, nil is returned." | |
def5ff36 | 63 | (if (not (mm-multibyte-p)) |
c113de23 GM |
64 | ;; In the non-Mule case, we search for non-ASCII chars and |
65 | ;; return the value of `mail-parse-charset' if any are found. | |
66 | (save-excursion | |
67 | (goto-char (point-min)) | |
68 | (if (re-search-forward "[^\x0-\x7f]" nil t) | |
69 | (or mail-parse-charset | |
70 | (mm-read-charset "Charset used in the article: ")) | |
71 | ;; The logic in `mml-generate-mime-1' confirms that it's OK | |
72 | ;; to return nil here. | |
73 | nil)) | |
74 | (save-excursion | |
75 | (goto-char (point-min)) | |
76 | (let ((charsets (mm-find-mime-charset-region (point-min) (point-max))) | |
77 | charset) | |
78 | (cond | |
79 | ;; No encoding. | |
80 | ((null charsets) | |
81 | nil) | |
82 | ;; Too many charsets. | |
83 | ((> (length charsets) 1) | |
84 | charsets) | |
85 | ;; We encode. | |
86 | (t | |
87 | (let ((charset (car charsets)) | |
88 | start) | |
89 | (when (or t | |
90 | ;; We always decode. | |
91 | (not (mm-coding-system-equal | |
92 | charset buffer-file-coding-system))) | |
93 | (while (not (eobp)) | |
94 | (if (eq (mm-charset-after) 'ascii) | |
95 | (when start | |
96 | (save-restriction | |
97 | (narrow-to-region start (point)) | |
058635a9 DL |
98 | (mm-encode-coding-region |
99 | start (point) (mm-charset-to-coding-system charset)) | |
c113de23 GM |
100 | (goto-char (point-max))) |
101 | (setq start nil)) | |
102 | (unless start | |
103 | (setq start (point)))) | |
104 | (forward-char 1)) | |
105 | (when start | |
058635a9 DL |
106 | (mm-encode-coding-region start (point) |
107 | (mm-charset-to-coding-system charset)) | |
c113de23 GM |
108 | (setq start nil))) |
109 | charset))))))) | |
110 | ||
058635a9 DL |
111 | (eval-when-compile (defvar message-posting-charset)) |
112 | ||
c113de23 GM |
113 | (defun mm-body-encoding (charset &optional encoding) |
114 | "Do Content-Transfer-Encoding and return the encoding of the current buffer." | |
115 | (let ((bits (mm-body-7-or-8))) | |
058635a9 | 116 | (require 'message) |
c113de23 GM |
117 | (cond |
118 | ((and (not mm-use-ultra-safe-encoding) (eq bits '7bit)) | |
119 | bits) | |
120 | ((and (not mm-use-ultra-safe-encoding) | |
121 | (or (eq t (cdr message-posting-charset)) | |
122 | (memq charset (cdr message-posting-charset)) | |
123 | (eq charset mail-parse-charset))) | |
124 | bits) | |
125 | (t | |
126 | (let ((encoding (or encoding | |
127 | (cdr (assq charset mm-body-charset-encoding-alist)) | |
128 | (mm-qp-or-base64)))) | |
129 | (when mm-use-ultra-safe-encoding | |
130 | (setq encoding (mm-safer-encoding encoding))) | |
131 | (mm-encode-content-transfer-encoding encoding "text/plain") | |
132 | encoding))))) | |
133 | ||
134 | (defun mm-body-7-or-8 () | |
135 | "Say whether the body is 7bit or 8bit." | |
136 | (cond | |
137 | ((not (featurep 'mule)) | |
138 | (if (save-excursion | |
139 | (goto-char (point-min)) | |
140 | (skip-chars-forward mm-7bit-chars) | |
141 | (eobp)) | |
142 | '7bit | |
143 | '8bit)) | |
144 | (t | |
145 | ;; Mule version | |
146 | (if (and (null (delq 'ascii | |
147 | (mm-find-charset-region (point-min) (point-max)))) | |
148 | ;;!!!The following is necessary because the function | |
149 | ;;!!!above seems to return the wrong result under | |
150 | ;;!!!Emacs 20.3. Sometimes. | |
151 | (save-excursion | |
152 | (goto-char (point-min)) | |
153 | (skip-chars-forward mm-7bit-chars) | |
154 | (eobp))) | |
155 | '7bit | |
156 | '8bit)))) | |
157 | ||
158 | ;;; | |
159 | ;;; Functions for decoding | |
160 | ;;; | |
161 | ||
162 | (defun mm-decode-content-transfer-encoding (encoding &optional type) | |
163 | (prog1 | |
164 | (condition-case error | |
165 | (cond | |
166 | ((eq encoding 'quoted-printable) | |
167 | (quoted-printable-decode-region (point-min) (point-max))) | |
168 | ((eq encoding 'base64) | |
169 | (base64-decode-region | |
170 | (point-min) | |
171 | ;; Some mailers insert whitespace | |
172 | ;; junk at the end which | |
173 | ;; base64-decode-region dislikes. | |
174 | ;; Also remove possible junk which could | |
175 | ;; have been added by mailing list software. | |
176 | (save-excursion | |
177 | (goto-char (point-min)) | |
178 | (while (re-search-forward "^[\t ]*\r?\n" nil t) | |
179 | (delete-region (match-beginning 0) (match-end 0))) | |
180 | (goto-char (point-max)) | |
181 | (when (re-search-backward "^[A-Za-z0-9+/]+=*[\t ]*$" nil t) | |
182 | (forward-line) | |
183 | (delete-region (point) (point-max))) | |
184 | (point-max)))) | |
185 | ((memq encoding '(7bit 8bit binary)) | |
186 | ;; Do nothing. | |
187 | ) | |
188 | ((null encoding) | |
189 | ;; Do nothing. | |
190 | ) | |
191 | ((memq encoding '(x-uuencode x-uue)) | |
058635a9 | 192 | (require 'mm-uu) |
c113de23 GM |
193 | (funcall mm-uu-decode-function (point-min) (point-max))) |
194 | ((eq encoding 'x-binhex) | |
058635a9 | 195 | (require 'mm-uu) |
c113de23 GM |
196 | (funcall mm-uu-binhex-decode-function (point-min) (point-max))) |
197 | ((functionp encoding) | |
198 | (funcall encoding (point-min) (point-max))) | |
199 | (t | |
200 | (message "Unknown encoding %s; defaulting to 8bit" encoding))) | |
201 | (error | |
202 | (message "Error while decoding: %s" error) | |
203 | nil)) | |
204 | (when (and | |
205 | (memq encoding '(base64 x-uuencode x-uue x-binhex)) | |
206 | (equal type "text/plain")) | |
207 | (goto-char (point-min)) | |
208 | (while (search-forward "\r\n" nil t) | |
209 | (replace-match "\n" t t))))) | |
210 | ||
211 | (defun mm-decode-body (charset &optional encoding type) | |
212 | "Decode the current article that has been encoded with ENCODING. | |
213 | The characters in CHARSET should then be decoded." | |
214 | (if (stringp charset) | |
215 | (setq charset (intern (downcase charset)))) | |
216 | (if (or (not charset) | |
217 | (eq 'gnus-all mail-parse-ignored-charsets) | |
218 | (memq 'gnus-all mail-parse-ignored-charsets) | |
219 | (memq charset mail-parse-ignored-charsets)) | |
220 | (setq charset mail-parse-charset)) | |
221 | (save-excursion | |
222 | (when encoding | |
223 | (mm-decode-content-transfer-encoding encoding type)) | |
224 | (when (featurep 'mule) | |
058635a9 DL |
225 | (let ((coding-system (mm-charset-to-coding-system charset))) |
226 | (if (and (not coding-system) | |
c113de23 GM |
227 | (listp mail-parse-ignored-charsets) |
228 | (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
058635a9 | 229 | (setq coding-system |
c113de23 | 230 | (mm-charset-to-coding-system mail-parse-charset))) |
058635a9 | 231 | (when (and charset coding-system |
c113de23 GM |
232 | ;; buffer-file-coding-system |
233 | ;;Article buffer is nil coding system | |
234 | ;;in XEmacs | |
235 | (mm-multibyte-p) | |
058635a9 DL |
236 | (or (not (eq coding-system 'ascii)) |
237 | (setq coding-system mail-parse-charset)) | |
238 | (not (eq coding-system 'gnus-decoded))) | |
239 | (mm-decode-coding-region (point-min) (point-max) coding-system)))))) | |
c113de23 GM |
240 | |
241 | (defun mm-decode-string (string charset) | |
242 | "Decode STRING with CHARSET." | |
243 | (when (stringp charset) | |
244 | (setq charset (intern (downcase charset)))) | |
245 | (when (or (not charset) | |
246 | (eq 'gnus-all mail-parse-ignored-charsets) | |
247 | (memq 'gnus-all mail-parse-ignored-charsets) | |
248 | (memq charset mail-parse-ignored-charsets)) | |
249 | (setq charset mail-parse-charset)) | |
250 | (or | |
251 | (when (featurep 'mule) | |
058635a9 DL |
252 | (let ((coding-system (mm-charset-to-coding-system charset))) |
253 | (if (and (not coding-system) | |
c113de23 GM |
254 | (listp mail-parse-ignored-charsets) |
255 | (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
058635a9 | 256 | (setq coding-system |
c113de23 | 257 | (mm-charset-to-coding-system mail-parse-charset))) |
058635a9 | 258 | (when (and charset coding-system |
c113de23 | 259 | (mm-multibyte-p) |
058635a9 DL |
260 | (or (not (eq coding-system 'ascii)) |
261 | (setq coding-system mail-parse-charset))) | |
262 | (mm-decode-coding-string string coding-system)))) | |
c113de23 GM |
263 | string)) |
264 | ||
265 | (provide 'mm-bodies) | |
266 | ||
267 | ;; mm-bodies.el ends here |