Merge from emacs--rel--22
[bpt/emacs.git] / lisp / gnus / mm-bodies.el
CommitLineData
23f87bed
MB
1;;; mm-bodies.el --- Functions for decoding MIME things
2
e84b4b86 3;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004,
d7a0267c 4;; 2005, 2006, 2007 Free Software Foundation, Inc.
c113de23
GM
5
6;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
22;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23;; Boston, MA 02110-1301, USA.
c113de23
GM
24
25;;; Commentary:
26
27;;; Code:
28
29(eval-and-compile
30 (or (fboundp 'base64-decode-region)
058635a9 31 (require 'base64)))
8f716686 32
776ca44f
DL
33(eval-when-compile
34 (defvar mm-uu-decode-function)
35 (defvar mm-uu-binhex-decode-function))
c113de23
GM
36
37(require 'mm-util)
38(require 'rfc2047)
058635a9 39(require 'mm-encode)
c113de23 40
f4dd4ae8 41;; 8bit treatment gets any char except: 0x32 - 0x7f, LF, TAB, BEL,
c113de23 42;; BS, vertical TAB, form feed, and ^_
f4dd4ae8
MB
43;;
44;; Note that CR is *not* included, as that would allow a non-paired CR
45;; in the body contrary to RFC 2822:
46;;
47;; - CR and LF MUST only occur together as CRLF; they MUST NOT
48;; appear independently in the body.
49
50(defvar mm-7bit-chars "\x20-\x7f\n\t\x7\x8\xb\xc\x1f")
c113de23
GM
51
52(defcustom mm-body-charset-encoding-alist
53 '((iso-2022-jp . 7bit)
23f87bed
MB
54 (iso-2022-jp-2 . 7bit)
55 ;; We MUST encode UTF-16 because it can contain \0's which is
56 ;; known to break servers.
57 ;; Note: UTF-16 variants are invalid for text parts [RFC 2781],
58 ;; so this can't happen :-/.
bd29ba20
RS
59 ;; PPS: Yes, it can happen if the user specifies UTF-16 in the MML
60 ;; markup. - jh.
23f87bed
MB
61 (utf-16 . base64)
62 (utf-16be . base64)
63 (utf-16le . base64))
c113de23
GM
64 "Alist of MIME charsets to encodings.
65Valid encodings are `7bit', `8bit', `quoted-printable' and `base64'."
66 :type '(repeat (cons (symbol :tag "charset")
67 (choice :tag "encoding"
68 (const 7bit)
69 (const 8bit)
70 (const quoted-printable)
71 (const base64))))
72 :group 'mime)
73
23f87bed 74(defun mm-encode-body (&optional charset)
c113de23
GM
75 "Encode a body.
76Should be called narrowed to the body that is to be encoded.
23f87bed
MB
77If there is more than one non-ASCII MULE charset in the body, then the
78list of MULE charsets found is returned.
79If CHARSET is non-nil, it is used as the MIME charset to encode the body.
c113de23
GM
80If successful, the MIME charset is returned.
81If no encoding was done, nil is returned."
def5ff36 82 (if (not (mm-multibyte-p))
c113de23
GM
83 ;; In the non-Mule case, we search for non-ASCII chars and
84 ;; return the value of `mail-parse-charset' if any are found.
23f87bed
MB
85 (or charset
86 (save-excursion
87 (goto-char (point-min))
88 (if (re-search-forward "[^\x0-\x7f]" nil t)
89 (or mail-parse-charset
90 (message-options-get 'mm-encody-body-charset)
91 (message-options-set
92 'mm-encody-body-charset
93 (mm-read-coding-system "Charset used in the article: ")))
94 ;; The logic in `mml-generate-mime-1' confirms that it's OK
95 ;; to return nil here.
96 nil)))
c113de23 97 (save-excursion
23f87bed
MB
98 (if charset
99 (progn
11e22c4a
MB
100 (mm-encode-coding-region (point-min) (point-max)
101 (mm-charset-to-coding-system charset))
23f87bed
MB
102 charset)
103 (goto-char (point-min))
104 (let ((charsets (mm-find-mime-charset-region (point-min) (point-max)
105 mm-hack-charsets)))
106 (cond
107 ;; No encoding.
108 ((null charsets)
109 nil)
110 ;; Too many charsets.
111 ((> (length charsets) 1)
112 charsets)
113 ;; We encode.
114 (t
115 (prog1
116 (setq charset (car charsets))
117 (mm-encode-coding-region (point-min) (point-max)
118 (mm-charset-to-coding-system charset))))
119 ))))))
120
121(defun mm-long-lines-p (length)
122 "Say whether any of the lines in the buffer is longer than LENGTH."
123 (save-excursion
124 (goto-char (point-min))
125 (end-of-line)
126 (while (and (not (eobp))
127 (not (> (current-column) length)))
128 (forward-line 1)
129 (end-of-line))
130 (and (> (current-column) length)
131 (current-column))))
132
133(defvar message-posting-charset)
058635a9 134
c113de23
GM
135(defun mm-body-encoding (charset &optional encoding)
136 "Do Content-Transfer-Encoding and return the encoding of the current buffer."
23f87bed
MB
137 (when (stringp encoding)
138 (setq encoding (intern (downcase encoding))))
139 (let ((bits (mm-body-7-or-8))
140 (longp (mm-long-lines-p 1000)))
058635a9 141 (require 'message)
c113de23 142 (cond
23f87bed
MB
143 ((and (not longp)
144 (not (and mm-use-ultra-safe-encoding
2f7717f6
MB
145 (or (save-excursion (re-search-forward " $" nil t))
146 (save-excursion (re-search-forward "^From " nil t)))))
23f87bed 147 (eq bits '7bit))
c113de23
GM
148 bits)
149 ((and (not mm-use-ultra-safe-encoding)
23f87bed
MB
150 (not longp)
151 (not (cdr (assq charset mm-body-charset-encoding-alist)))
c113de23
GM
152 (or (eq t (cdr message-posting-charset))
153 (memq charset (cdr message-posting-charset))
154 (eq charset mail-parse-charset)))
155 bits)
156 (t
157 (let ((encoding (or encoding
158 (cdr (assq charset mm-body-charset-encoding-alist))
159 (mm-qp-or-base64))))
160 (when mm-use-ultra-safe-encoding
161 (setq encoding (mm-safer-encoding encoding)))
162 (mm-encode-content-transfer-encoding encoding "text/plain")
163 encoding)))))
164
165(defun mm-body-7-or-8 ()
166 "Say whether the body is 7bit or 8bit."
5d54c59d
DL
167 (if (save-excursion
168 (goto-char (point-min))
169 (skip-chars-forward mm-7bit-chars)
170 (eobp))
171 '7bit
172 '8bit))
c113de23
GM
173
174;;;
175;;; Functions for decoding
176;;;
177
23f87bed
MB
178(eval-when-compile (defvar mm-uu-yenc-decode-function))
179
c113de23 180(defun mm-decode-content-transfer-encoding (encoding &optional type)
23f87bed
MB
181 "Decodes buffer encoded with ENCODING, returning success status.
182If TYPE is `text/plain' CRLF->LF translation may occur."
c113de23
GM
183 (prog1
184 (condition-case error
185 (cond
186 ((eq encoding 'quoted-printable)
23f87bed
MB
187 (quoted-printable-decode-region (point-min) (point-max))
188 t)
c113de23
GM
189 ((eq encoding 'base64)
190 (base64-decode-region
191 (point-min)
192 ;; Some mailers insert whitespace
193 ;; junk at the end which
194 ;; base64-decode-region dislikes.
195 ;; Also remove possible junk which could
196 ;; have been added by mailing list software.
197 (save-excursion
198 (goto-char (point-min))
199 (while (re-search-forward "^[\t ]*\r?\n" nil t)
200 (delete-region (match-beginning 0) (match-end 0)))
201 (goto-char (point-max))
202 (when (re-search-backward "^[A-Za-z0-9+/]+=*[\t ]*$" nil t)
23f87bed
MB
203 (forward-line))
204 (point))))
c113de23
GM
205 ((memq encoding '(7bit 8bit binary))
206 ;; Do nothing.
23f87bed 207 t)
c113de23
GM
208 ((null encoding)
209 ;; Do nothing.
23f87bed 210 t)
c113de23 211 ((memq encoding '(x-uuencode x-uue))
058635a9 212 (require 'mm-uu)
23f87bed
MB
213 (funcall mm-uu-decode-function (point-min) (point-max))
214 t)
c113de23 215 ((eq encoding 'x-binhex)
058635a9 216 (require 'mm-uu)
23f87bed
MB
217 (funcall mm-uu-binhex-decode-function (point-min) (point-max))
218 t)
219 ((eq encoding 'x-yenc)
220 (require 'mm-uu)
221 (funcall mm-uu-yenc-decode-function (point-min) (point-max))
222 )
c113de23 223 ((functionp encoding)
23f87bed
MB
224 (funcall encoding (point-min) (point-max))
225 t)
c113de23
GM
226 (t
227 (message "Unknown encoding %s; defaulting to 8bit" encoding)))
228 (error
229 (message "Error while decoding: %s" error)
230 nil))
231 (when (and
3031d8b0 232 type
23f87bed 233 (memq encoding '(base64 x-uuencode x-uue x-binhex x-yenc))
4a2358e9 234 (string-match "\\`text/" type))
c113de23
GM
235 (goto-char (point-min))
236 (while (search-forward "\r\n" nil t)
237 (replace-match "\n" t t)))))
238
239(defun mm-decode-body (charset &optional encoding type)
23f87bed
MB
240 "Decode the current article that has been encoded with ENCODING to CHARSET.
241ENCODING is a MIME content transfer encoding.
242CHARSET is the MIME charset with which to decode the data after transfer
243decoding. If it is nil, default to `mail-parse-charset'."
244 (when (stringp charset)
245 (setq charset (intern (downcase charset))))
246 (when (or (not charset)
247 (eq 'gnus-all mail-parse-ignored-charsets)
248 (memq 'gnus-all mail-parse-ignored-charsets)
249 (memq charset mail-parse-ignored-charsets))
250 (setq charset mail-parse-charset))
c113de23
GM
251 (save-excursion
252 (when encoding
253 (mm-decode-content-transfer-encoding encoding type))
e499bc94
MB
254 (when (and (featurep 'mule) ;; Fixme: Wrong test for unibyte session.
255 (not (eq charset 'gnus-decoded)))
bd29ba20
RS
256 (let ((coding-system (mm-charset-to-coding-system
257 ;; Allow overwrite using
258 ;; `mm-charset-override-alist'.
259 charset nil t)))
058635a9 260 (if (and (not coding-system)
c113de23
GM
261 (listp mail-parse-ignored-charsets)
262 (memq 'gnus-unknown mail-parse-ignored-charsets))
5d54c59d 263 (setq coding-system
c113de23 264 (mm-charset-to-coding-system mail-parse-charset)))
058635a9 265 (when (and charset coding-system
c113de23
GM
266 ;; buffer-file-coding-system
267 ;;Article buffer is nil coding system
268 ;;in XEmacs
269 (mm-multibyte-p)
058635a9 270 (or (not (eq coding-system 'ascii))
e499bc94 271 (setq coding-system mail-parse-charset)))
23f87bed
MB
272 (mm-decode-coding-region (point-min) (point-max)
273 coding-system))
274 (setq buffer-file-coding-system
275 (if (boundp 'last-coding-system-used)
276 (symbol-value 'last-coding-system-used)
277 coding-system))))))
c113de23
GM
278
279(defun mm-decode-string (string charset)
280 "Decode STRING with CHARSET."
281 (when (stringp charset)
282 (setq charset (intern (downcase charset))))
5d54c59d 283 (when (or (not charset)
c113de23
GM
284 (eq 'gnus-all mail-parse-ignored-charsets)
285 (memq 'gnus-all mail-parse-ignored-charsets)
286 (memq charset mail-parse-ignored-charsets))
287 (setq charset mail-parse-charset))
288 (or
289 (when (featurep 'mule)
bd29ba20
RS
290 (let ((coding-system (mm-charset-to-coding-system
291 charset
292 ;; Allow overwrite using
293 ;; `mm-charset-override-alist'.
294 nil t)))
058635a9 295 (if (and (not coding-system)
c113de23
GM
296 (listp mail-parse-ignored-charsets)
297 (memq 'gnus-unknown mail-parse-ignored-charsets))
5d54c59d 298 (setq coding-system
c113de23 299 (mm-charset-to-coding-system mail-parse-charset)))
058635a9 300 (when (and charset coding-system
c113de23 301 (mm-multibyte-p)
058635a9
DL
302 (or (not (eq coding-system 'ascii))
303 (setq coding-system mail-parse-charset)))
304 (mm-decode-coding-string string coding-system))))
c113de23
GM
305 string))
306
307(provide 'mm-bodies)
308
ab5796a9 309;;; arch-tag: 41104bb6-4443-4ca9-8d5c-ff87ecf27d8d
715a2ca2 310;;; mm-bodies.el ends here