Add 2011 to FSF/AIST copyright years.
[bpt/emacs.git] / lisp / gnus / mm-bodies.el
CommitLineData
23f87bed
MB
1;;; mm-bodies.el --- Functions for decoding MIME things
2
e84b4b86 3;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5df4f04c 4;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
c113de23
GM
5
6;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
8;; This file is part of GNU Emacs.
9
5e809f55 10;; GNU Emacs is free software: you can redistribute it and/or modify
c113de23 11;; it under the terms of the GNU General Public License as published by
5e809f55
GM
12;; the Free Software Foundation, either version 3 of the License, or
13;; (at your option) any later version.
c113de23
GM
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
5e809f55 17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
c113de23
GM
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
5e809f55 21;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
c113de23
GM
22
23;;; Commentary:
24
25;;; Code:
26
498fc2ae
GM
27;; For Emacs < 22.2.
28(eval-and-compile
29 (unless (fboundp 'declare-function) (defmacro declare-function (&rest r))))
30
c113de23
GM
31(require 'mm-util)
32(require 'rfc2047)
058635a9 33(require 'mm-encode)
c113de23 34
9efa445f
DN
35(defvar mm-uu-yenc-decode-function)
36(defvar mm-uu-decode-function)
37(defvar mm-uu-binhex-decode-function)
38
f4dd4ae8 39;; 8bit treatment gets any char except: 0x32 - 0x7f, LF, TAB, BEL,
c113de23 40;; BS, vertical TAB, form feed, and ^_
f4dd4ae8
MB
41;;
42;; Note that CR is *not* included, as that would allow a non-paired CR
43;; in the body contrary to RFC 2822:
44;;
45;; - CR and LF MUST only occur together as CRLF; they MUST NOT
46;; appear independently in the body.
47
48(defvar mm-7bit-chars "\x20-\x7f\n\t\x7\x8\xb\xc\x1f")
c113de23
GM
49
50(defcustom mm-body-charset-encoding-alist
51 '((iso-2022-jp . 7bit)
23f87bed
MB
52 (iso-2022-jp-2 . 7bit)
53 ;; We MUST encode UTF-16 because it can contain \0's which is
54 ;; known to break servers.
55 ;; Note: UTF-16 variants are invalid for text parts [RFC 2781],
56 ;; so this can't happen :-/.
bd29ba20
RS
57 ;; PPS: Yes, it can happen if the user specifies UTF-16 in the MML
58 ;; markup. - jh.
23f87bed
MB
59 (utf-16 . base64)
60 (utf-16be . base64)
61 (utf-16le . base64))
c113de23
GM
62 "Alist of MIME charsets to encodings.
63Valid encodings are `7bit', `8bit', `quoted-printable' and `base64'."
64 :type '(repeat (cons (symbol :tag "charset")
65 (choice :tag "encoding"
66 (const 7bit)
67 (const 8bit)
68 (const quoted-printable)
69 (const base64))))
70 :group 'mime)
71
498fc2ae
GM
72(autoload 'message-options-get "message")
73(declare-function message-options-set "message" (symbol value))
74
23f87bed 75(defun mm-encode-body (&optional charset)
c113de23
GM
76 "Encode a body.
77Should be called narrowed to the body that is to be encoded.
23f87bed
MB
78If there is more than one non-ASCII MULE charset in the body, then the
79list of MULE charsets found is returned.
80If CHARSET is non-nil, it is used as the MIME charset to encode the body.
c113de23
GM
81If successful, the MIME charset is returned.
82If no encoding was done, nil is returned."
def5ff36 83 (if (not (mm-multibyte-p))
c113de23
GM
84 ;; In the non-Mule case, we search for non-ASCII chars and
85 ;; return the value of `mail-parse-charset' if any are found.
23f87bed
MB
86 (or charset
87 (save-excursion
88 (goto-char (point-min))
89 (if (re-search-forward "[^\x0-\x7f]" nil t)
90 (or mail-parse-charset
91 (message-options-get 'mm-encody-body-charset)
92 (message-options-set
93 'mm-encody-body-charset
94 (mm-read-coding-system "Charset used in the article: ")))
95 ;; The logic in `mml-generate-mime-1' confirms that it's OK
96 ;; to return nil here.
97 nil)))
c113de23 98 (save-excursion
23f87bed
MB
99 (if charset
100 (progn
11e22c4a
MB
101 (mm-encode-coding-region (point-min) (point-max)
102 (mm-charset-to-coding-system charset))
23f87bed
MB
103 charset)
104 (goto-char (point-min))
f5490ddc
MB
105 (let ((charsets (mm-find-mime-charset-region (point-min) (point-max)
106 mm-hack-charsets)))
23f87bed
MB
107 (cond
108 ;; No encoding.
109 ((null charsets)
110 nil)
111 ;; Too many charsets.
112 ((> (length charsets) 1)
113 charsets)
114 ;; We encode.
115 (t
116 (prog1
117 (setq charset (car charsets))
118 (mm-encode-coding-region (point-min) (point-max)
119 (mm-charset-to-coding-system charset))))
120 ))))))
121
122(defun mm-long-lines-p (length)
123 "Say whether any of the lines in the buffer is longer than LENGTH."
124 (save-excursion
125 (goto-char (point-min))
126 (end-of-line)
127 (while (and (not (eobp))
128 (not (> (current-column) length)))
129 (forward-line 1)
130 (end-of-line))
131 (and (> (current-column) length)
132 (current-column))))
133
134(defvar message-posting-charset)
058635a9 135
c113de23
GM
136(defun mm-body-encoding (charset &optional encoding)
137 "Do Content-Transfer-Encoding and return the encoding of the current buffer."
23f87bed
MB
138 (when (stringp encoding)
139 (setq encoding (intern (downcase encoding))))
140 (let ((bits (mm-body-7-or-8))
141 (longp (mm-long-lines-p 1000)))
058635a9 142 (require 'message)
c113de23 143 (cond
23f87bed
MB
144 ((and (not longp)
145 (not (and mm-use-ultra-safe-encoding
2f7717f6
MB
146 (or (save-excursion (re-search-forward " $" nil t))
147 (save-excursion (re-search-forward "^From " nil t)))))
23f87bed 148 (eq bits '7bit))
c113de23
GM
149 bits)
150 ((and (not mm-use-ultra-safe-encoding)
23f87bed
MB
151 (not longp)
152 (not (cdr (assq charset mm-body-charset-encoding-alist)))
c113de23
GM
153 (or (eq t (cdr message-posting-charset))
154 (memq charset (cdr message-posting-charset))
155 (eq charset mail-parse-charset)))
156 bits)
157 (t
158 (let ((encoding (or encoding
159 (cdr (assq charset mm-body-charset-encoding-alist))
160 (mm-qp-or-base64))))
161 (when mm-use-ultra-safe-encoding
162 (setq encoding (mm-safer-encoding encoding)))
163 (mm-encode-content-transfer-encoding encoding "text/plain")
164 encoding)))))
165
166(defun mm-body-7-or-8 ()
167 "Say whether the body is 7bit or 8bit."
5d54c59d
DL
168 (if (save-excursion
169 (goto-char (point-min))
170 (skip-chars-forward mm-7bit-chars)
171 (eobp))
172 '7bit
173 '8bit))
c113de23
GM
174
175;;;
176;;; Functions for decoding
177;;;
178
179(defun mm-decode-content-transfer-encoding (encoding &optional type)
23f87bed
MB
180 "Decodes buffer encoded with ENCODING, returning success status.
181If TYPE is `text/plain' CRLF->LF translation may occur."
c113de23
GM
182 (prog1
183 (condition-case error
184 (cond
185 ((eq encoding 'quoted-printable)
23f87bed
MB
186 (quoted-printable-decode-region (point-min) (point-max))
187 t)
c113de23
GM
188 ((eq encoding 'base64)
189 (base64-decode-region
190 (point-min)
191 ;; Some mailers insert whitespace
192 ;; junk at the end which
193 ;; base64-decode-region dislikes.
194 ;; Also remove possible junk which could
195 ;; have been added by mailing list software.
196 (save-excursion
197 (goto-char (point-min))
198 (while (re-search-forward "^[\t ]*\r?\n" nil t)
199 (delete-region (match-beginning 0) (match-end 0)))
200 (goto-char (point-max))
201 (when (re-search-backward "^[A-Za-z0-9+/]+=*[\t ]*$" nil t)
23f87bed
MB
202 (forward-line))
203 (point))))
a88fd51a 204 ((memq encoding '(nil 7bit 8bit binary))
c113de23 205 ;; Do nothing.
23f87bed 206 t)
c113de23 207 ((memq encoding '(x-uuencode x-uue))
058635a9 208 (require 'mm-uu)
23f87bed
MB
209 (funcall mm-uu-decode-function (point-min) (point-max))
210 t)
c113de23 211 ((eq encoding 'x-binhex)
058635a9 212 (require 'mm-uu)
23f87bed
MB
213 (funcall mm-uu-binhex-decode-function (point-min) (point-max))
214 t)
215 ((eq encoding 'x-yenc)
216 (require 'mm-uu)
217 (funcall mm-uu-yenc-decode-function (point-min) (point-max))
218 )
c113de23 219 ((functionp encoding)
23f87bed
MB
220 (funcall encoding (point-min) (point-max))
221 t)
c113de23
GM
222 (t
223 (message "Unknown encoding %s; defaulting to 8bit" encoding)))
224 (error
225 (message "Error while decoding: %s" error)
226 nil))
227 (when (and
3031d8b0 228 type
23f87bed 229 (memq encoding '(base64 x-uuencode x-uue x-binhex x-yenc))
4a2358e9 230 (string-match "\\`text/" type))
c113de23
GM
231 (goto-char (point-min))
232 (while (search-forward "\r\n" nil t)
233 (replace-match "\n" t t)))))
234
235(defun mm-decode-body (charset &optional encoding type)
23f87bed
MB
236 "Decode the current article that has been encoded with ENCODING to CHARSET.
237ENCODING is a MIME content transfer encoding.
238CHARSET is the MIME charset with which to decode the data after transfer
239decoding. If it is nil, default to `mail-parse-charset'."
240 (when (stringp charset)
241 (setq charset (intern (downcase charset))))
242 (when (or (not charset)
243 (eq 'gnus-all mail-parse-ignored-charsets)
244 (memq 'gnus-all mail-parse-ignored-charsets)
245 (memq charset mail-parse-ignored-charsets))
246 (setq charset mail-parse-charset))
c113de23
GM
247 (save-excursion
248 (when encoding
249 (mm-decode-content-transfer-encoding encoding type))
e499bc94
MB
250 (when (and (featurep 'mule) ;; Fixme: Wrong test for unibyte session.
251 (not (eq charset 'gnus-decoded)))
bd29ba20
RS
252 (let ((coding-system (mm-charset-to-coding-system
253 ;; Allow overwrite using
254 ;; `mm-charset-override-alist'.
255 charset nil t)))
058635a9 256 (if (and (not coding-system)
c113de23
GM
257 (listp mail-parse-ignored-charsets)
258 (memq 'gnus-unknown mail-parse-ignored-charsets))
5d54c59d 259 (setq coding-system
c113de23 260 (mm-charset-to-coding-system mail-parse-charset)))
058635a9 261 (when (and charset coding-system
c113de23
GM
262 ;; buffer-file-coding-system
263 ;;Article buffer is nil coding system
264 ;;in XEmacs
265 (mm-multibyte-p)
058635a9 266 (or (not (eq coding-system 'ascii))
e499bc94 267 (setq coding-system mail-parse-charset)))
23f87bed
MB
268 (mm-decode-coding-region (point-min) (point-max)
269 coding-system))
270 (setq buffer-file-coding-system
271 (if (boundp 'last-coding-system-used)
272 (symbol-value 'last-coding-system-used)
273 coding-system))))))
c113de23
GM
274
275(defun mm-decode-string (string charset)
276 "Decode STRING with CHARSET."
277 (when (stringp charset)
278 (setq charset (intern (downcase charset))))
5d54c59d 279 (when (or (not charset)
c113de23
GM
280 (eq 'gnus-all mail-parse-ignored-charsets)
281 (memq 'gnus-all mail-parse-ignored-charsets)
282 (memq charset mail-parse-ignored-charsets))
283 (setq charset mail-parse-charset))
284 (or
285 (when (featurep 'mule)
bd29ba20
RS
286 (let ((coding-system (mm-charset-to-coding-system
287 charset
288 ;; Allow overwrite using
289 ;; `mm-charset-override-alist'.
290 nil t)))
058635a9 291 (if (and (not coding-system)
c113de23
GM
292 (listp mail-parse-ignored-charsets)
293 (memq 'gnus-unknown mail-parse-ignored-charsets))
5d54c59d 294 (setq coding-system
c113de23 295 (mm-charset-to-coding-system mail-parse-charset)))
058635a9 296 (when (and charset coding-system
c113de23 297 (mm-multibyte-p)
058635a9
DL
298 (or (not (eq coding-system 'ascii))
299 (setq coding-system mail-parse-charset)))
300 (mm-decode-coding-string string coding-system))))
c113de23
GM
301 string))
302
303(provide 'mm-bodies)
304
a88fd51a 305;; arch-tag: 41104bb6-4443-4ca9-8d5c-ff87ecf27d8d
715a2ca2 306;;; mm-bodies.el ends here