Update Gnus to No Gnus 0.7 from the Gnus CVS trunk
[bpt/emacs.git] / lisp / gnus / mm-bodies.el
CommitLineData
23f87bed
MB
1;;; mm-bodies.el --- Functions for decoding MIME things
2
e84b4b86 3;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004,
d7a0267c 4;; 2005, 2006, 2007 Free Software Foundation, Inc.
c113de23
GM
5
6;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
5a9dffec 12;; the Free Software Foundation; either version 3, or (at your option)
c113de23
GM
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
22;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23;; Boston, MA 02110-1301, USA.
c113de23
GM
24
25;;; Commentary:
26
27;;; Code:
28
776ca44f
DL
29(eval-when-compile
30 (defvar mm-uu-decode-function)
31 (defvar mm-uu-binhex-decode-function))
c113de23
GM
32
33(require 'mm-util)
34(require 'rfc2047)
058635a9 35(require 'mm-encode)
c113de23 36
f4dd4ae8 37;; 8bit treatment gets any char except: 0x32 - 0x7f, LF, TAB, BEL,
c113de23 38;; BS, vertical TAB, form feed, and ^_
f4dd4ae8
MB
39;;
40;; Note that CR is *not* included, as that would allow a non-paired CR
41;; in the body contrary to RFC 2822:
42;;
43;; - CR and LF MUST only occur together as CRLF; they MUST NOT
44;; appear independently in the body.
45
46(defvar mm-7bit-chars "\x20-\x7f\n\t\x7\x8\xb\xc\x1f")
c113de23
GM
47
48(defcustom mm-body-charset-encoding-alist
49 '((iso-2022-jp . 7bit)
23f87bed
MB
50 (iso-2022-jp-2 . 7bit)
51 ;; We MUST encode UTF-16 because it can contain \0's which is
52 ;; known to break servers.
53 ;; Note: UTF-16 variants are invalid for text parts [RFC 2781],
54 ;; so this can't happen :-/.
bd29ba20
RS
55 ;; PPS: Yes, it can happen if the user specifies UTF-16 in the MML
56 ;; markup. - jh.
23f87bed
MB
57 (utf-16 . base64)
58 (utf-16be . base64)
59 (utf-16le . base64))
c113de23
GM
60 "Alist of MIME charsets to encodings.
61Valid encodings are `7bit', `8bit', `quoted-printable' and `base64'."
62 :type '(repeat (cons (symbol :tag "charset")
63 (choice :tag "encoding"
64 (const 7bit)
65 (const 8bit)
66 (const quoted-printable)
67 (const base64))))
68 :group 'mime)
69
23f87bed 70(defun mm-encode-body (&optional charset)
c113de23
GM
71 "Encode a body.
72Should be called narrowed to the body that is to be encoded.
23f87bed
MB
73If there is more than one non-ASCII MULE charset in the body, then the
74list of MULE charsets found is returned.
75If CHARSET is non-nil, it is used as the MIME charset to encode the body.
c113de23
GM
76If successful, the MIME charset is returned.
77If no encoding was done, nil is returned."
def5ff36 78 (if (not (mm-multibyte-p))
c113de23
GM
79 ;; In the non-Mule case, we search for non-ASCII chars and
80 ;; return the value of `mail-parse-charset' if any are found.
23f87bed
MB
81 (or charset
82 (save-excursion
83 (goto-char (point-min))
84 (if (re-search-forward "[^\x0-\x7f]" nil t)
85 (or mail-parse-charset
86 (message-options-get 'mm-encody-body-charset)
87 (message-options-set
88 'mm-encody-body-charset
89 (mm-read-coding-system "Charset used in the article: ")))
90 ;; The logic in `mml-generate-mime-1' confirms that it's OK
91 ;; to return nil here.
92 nil)))
c113de23 93 (save-excursion
23f87bed
MB
94 (if charset
95 (progn
11e22c4a
MB
96 (mm-encode-coding-region (point-min) (point-max)
97 (mm-charset-to-coding-system charset))
23f87bed
MB
98 charset)
99 (goto-char (point-min))
100 (let ((charsets (mm-find-mime-charset-region (point-min) (point-max)
101 mm-hack-charsets)))
102 (cond
103 ;; No encoding.
104 ((null charsets)
105 nil)
106 ;; Too many charsets.
107 ((> (length charsets) 1)
108 charsets)
109 ;; We encode.
110 (t
111 (prog1
112 (setq charset (car charsets))
113 (mm-encode-coding-region (point-min) (point-max)
114 (mm-charset-to-coding-system charset))))
115 ))))))
116
117(defun mm-long-lines-p (length)
118 "Say whether any of the lines in the buffer is longer than LENGTH."
119 (save-excursion
120 (goto-char (point-min))
121 (end-of-line)
122 (while (and (not (eobp))
123 (not (> (current-column) length)))
124 (forward-line 1)
125 (end-of-line))
126 (and (> (current-column) length)
127 (current-column))))
128
129(defvar message-posting-charset)
058635a9 130
c113de23
GM
131(defun mm-body-encoding (charset &optional encoding)
132 "Do Content-Transfer-Encoding and return the encoding of the current buffer."
23f87bed
MB
133 (when (stringp encoding)
134 (setq encoding (intern (downcase encoding))))
135 (let ((bits (mm-body-7-or-8))
136 (longp (mm-long-lines-p 1000)))
058635a9 137 (require 'message)
c113de23 138 (cond
23f87bed
MB
139 ((and (not longp)
140 (not (and mm-use-ultra-safe-encoding
2f7717f6
MB
141 (or (save-excursion (re-search-forward " $" nil t))
142 (save-excursion (re-search-forward "^From " nil t)))))
23f87bed 143 (eq bits '7bit))
c113de23
GM
144 bits)
145 ((and (not mm-use-ultra-safe-encoding)
23f87bed
MB
146 (not longp)
147 (not (cdr (assq charset mm-body-charset-encoding-alist)))
c113de23
GM
148 (or (eq t (cdr message-posting-charset))
149 (memq charset (cdr message-posting-charset))
150 (eq charset mail-parse-charset)))
151 bits)
152 (t
153 (let ((encoding (or encoding
154 (cdr (assq charset mm-body-charset-encoding-alist))
155 (mm-qp-or-base64))))
156 (when mm-use-ultra-safe-encoding
157 (setq encoding (mm-safer-encoding encoding)))
158 (mm-encode-content-transfer-encoding encoding "text/plain")
159 encoding)))))
160
161(defun mm-body-7-or-8 ()
162 "Say whether the body is 7bit or 8bit."
5d54c59d
DL
163 (if (save-excursion
164 (goto-char (point-min))
165 (skip-chars-forward mm-7bit-chars)
166 (eobp))
167 '7bit
168 '8bit))
c113de23
GM
169
170;;;
171;;; Functions for decoding
172;;;
173
23f87bed
MB
174(eval-when-compile (defvar mm-uu-yenc-decode-function))
175
c113de23 176(defun mm-decode-content-transfer-encoding (encoding &optional type)
23f87bed
MB
177 "Decodes buffer encoded with ENCODING, returning success status.
178If TYPE is `text/plain' CRLF->LF translation may occur."
c113de23
GM
179 (prog1
180 (condition-case error
181 (cond
182 ((eq encoding 'quoted-printable)
23f87bed
MB
183 (quoted-printable-decode-region (point-min) (point-max))
184 t)
c113de23
GM
185 ((eq encoding 'base64)
186 (base64-decode-region
187 (point-min)
188 ;; Some mailers insert whitespace
189 ;; junk at the end which
190 ;; base64-decode-region dislikes.
191 ;; Also remove possible junk which could
192 ;; have been added by mailing list software.
193 (save-excursion
194 (goto-char (point-min))
195 (while (re-search-forward "^[\t ]*\r?\n" nil t)
196 (delete-region (match-beginning 0) (match-end 0)))
197 (goto-char (point-max))
198 (when (re-search-backward "^[A-Za-z0-9+/]+=*[\t ]*$" nil t)
23f87bed
MB
199 (forward-line))
200 (point))))
c113de23
GM
201 ((memq encoding '(7bit 8bit binary))
202 ;; Do nothing.
23f87bed 203 t)
c113de23
GM
204 ((null encoding)
205 ;; Do nothing.
23f87bed 206 t)
c113de23 207 ((memq encoding '(x-uuencode x-uue))
058635a9 208 (require 'mm-uu)
23f87bed
MB
209 (funcall mm-uu-decode-function (point-min) (point-max))
210 t)
c113de23 211 ((eq encoding 'x-binhex)
058635a9 212 (require 'mm-uu)
23f87bed
MB
213 (funcall mm-uu-binhex-decode-function (point-min) (point-max))
214 t)
215 ((eq encoding 'x-yenc)
216 (require 'mm-uu)
217 (funcall mm-uu-yenc-decode-function (point-min) (point-max))
218 )
c113de23 219 ((functionp encoding)
23f87bed
MB
220 (funcall encoding (point-min) (point-max))
221 t)
c113de23
GM
222 (t
223 (message "Unknown encoding %s; defaulting to 8bit" encoding)))
224 (error
225 (message "Error while decoding: %s" error)
226 nil))
227 (when (and
3031d8b0 228 type
23f87bed 229 (memq encoding '(base64 x-uuencode x-uue x-binhex x-yenc))
4a2358e9 230 (string-match "\\`text/" type))
c113de23
GM
231 (goto-char (point-min))
232 (while (search-forward "\r\n" nil t)
233 (replace-match "\n" t t)))))
234
235(defun mm-decode-body (charset &optional encoding type)
23f87bed
MB
236 "Decode the current article that has been encoded with ENCODING to CHARSET.
237ENCODING is a MIME content transfer encoding.
238CHARSET is the MIME charset with which to decode the data after transfer
239decoding. If it is nil, default to `mail-parse-charset'."
240 (when (stringp charset)
241 (setq charset (intern (downcase charset))))
242 (when (or (not charset)
243 (eq 'gnus-all mail-parse-ignored-charsets)
244 (memq 'gnus-all mail-parse-ignored-charsets)
245 (memq charset mail-parse-ignored-charsets))
246 (setq charset mail-parse-charset))
c113de23
GM
247 (save-excursion
248 (when encoding
249 (mm-decode-content-transfer-encoding encoding type))
e499bc94
MB
250 (when (and (featurep 'mule) ;; Fixme: Wrong test for unibyte session.
251 (not (eq charset 'gnus-decoded)))
bd29ba20
RS
252 (let ((coding-system (mm-charset-to-coding-system
253 ;; Allow overwrite using
254 ;; `mm-charset-override-alist'.
255 charset nil t)))
058635a9 256 (if (and (not coding-system)
c113de23
GM
257 (listp mail-parse-ignored-charsets)
258 (memq 'gnus-unknown mail-parse-ignored-charsets))
5d54c59d 259 (setq coding-system
c113de23 260 (mm-charset-to-coding-system mail-parse-charset)))
058635a9 261 (when (and charset coding-system
c113de23
GM
262 ;; buffer-file-coding-system
263 ;;Article buffer is nil coding system
264 ;;in XEmacs
265 (mm-multibyte-p)
058635a9 266 (or (not (eq coding-system 'ascii))
e499bc94 267 (setq coding-system mail-parse-charset)))
23f87bed
MB
268 (mm-decode-coding-region (point-min) (point-max)
269 coding-system))
270 (setq buffer-file-coding-system
271 (if (boundp 'last-coding-system-used)
272 (symbol-value 'last-coding-system-used)
273 coding-system))))))
c113de23
GM
274
275(defun mm-decode-string (string charset)
276 "Decode STRING with CHARSET."
277 (when (stringp charset)
278 (setq charset (intern (downcase charset))))
5d54c59d 279 (when (or (not charset)
c113de23
GM
280 (eq 'gnus-all mail-parse-ignored-charsets)
281 (memq 'gnus-all mail-parse-ignored-charsets)
282 (memq charset mail-parse-ignored-charsets))
283 (setq charset mail-parse-charset))
284 (or
285 (when (featurep 'mule)
bd29ba20
RS
286 (let ((coding-system (mm-charset-to-coding-system
287 charset
288 ;; Allow overwrite using
289 ;; `mm-charset-override-alist'.
290 nil t)))
058635a9 291 (if (and (not coding-system)
c113de23
GM
292 (listp mail-parse-ignored-charsets)
293 (memq 'gnus-unknown mail-parse-ignored-charsets))
5d54c59d 294 (setq coding-system
c113de23 295 (mm-charset-to-coding-system mail-parse-charset)))
058635a9 296 (when (and charset coding-system
c113de23 297 (mm-multibyte-p)
058635a9
DL
298 (or (not (eq coding-system 'ascii))
299 (setq coding-system mail-parse-charset)))
300 (mm-decode-coding-string string coding-system))))
c113de23
GM
301 string))
302
303(provide 'mm-bodies)
304
ab5796a9 305;;; arch-tag: 41104bb6-4443-4ca9-8d5c-ff87ecf27d8d
715a2ca2 306;;; mm-bodies.el ends here