Merge from emacs-24; up to 2014-03-24T03:06:35Z!dancol@dancol.org
[bpt/emacs.git] / lisp / gnus / mm-bodies.el
CommitLineData
23f87bed
MB
1;;; mm-bodies.el --- Functions for decoding MIME things
2
ba318903 3;; Copyright (C) 1998-2014 Free Software Foundation, Inc.
c113de23
GM
4
5;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
6;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
7;; This file is part of GNU Emacs.
8
5e809f55 9;; GNU Emacs is free software: you can redistribute it and/or modify
c113de23 10;; it under the terms of the GNU General Public License as published by
5e809f55
GM
11;; the Free Software Foundation, either version 3 of the License, or
12;; (at your option) any later version.
c113de23
GM
13
14;; GNU Emacs is distributed in the hope that it will be useful,
15;; but WITHOUT ANY WARRANTY; without even the implied warranty of
5e809f55 16;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
c113de23
GM
17;; GNU General Public License for more details.
18
19;; You should have received a copy of the GNU General Public License
5e809f55 20;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
c113de23
GM
21
22;;; Commentary:
23
24;;; Code:
25
c113de23
GM
26(require 'mm-util)
27(require 'rfc2047)
058635a9 28(require 'mm-encode)
c113de23 29
9efa445f
DN
30(defvar mm-uu-yenc-decode-function)
31(defvar mm-uu-decode-function)
32(defvar mm-uu-binhex-decode-function)
33
f4dd4ae8 34;; 8bit treatment gets any char except: 0x32 - 0x7f, LF, TAB, BEL,
c113de23 35;; BS, vertical TAB, form feed, and ^_
f4dd4ae8
MB
36;;
37;; Note that CR is *not* included, as that would allow a non-paired CR
38;; in the body contrary to RFC 2822:
39;;
40;; - CR and LF MUST only occur together as CRLF; they MUST NOT
41;; appear independently in the body.
42
43(defvar mm-7bit-chars "\x20-\x7f\n\t\x7\x8\xb\xc\x1f")
c113de23
GM
44
45(defcustom mm-body-charset-encoding-alist
46 '((iso-2022-jp . 7bit)
23f87bed
MB
47 (iso-2022-jp-2 . 7bit)
48 ;; We MUST encode UTF-16 because it can contain \0's which is
49 ;; known to break servers.
50 ;; Note: UTF-16 variants are invalid for text parts [RFC 2781],
51 ;; so this can't happen :-/.
bd29ba20
RS
52 ;; PPS: Yes, it can happen if the user specifies UTF-16 in the MML
53 ;; markup. - jh.
23f87bed
MB
54 (utf-16 . base64)
55 (utf-16be . base64)
56 (utf-16le . base64))
c113de23
GM
57 "Alist of MIME charsets to encodings.
58Valid encodings are `7bit', `8bit', `quoted-printable' and `base64'."
59 :type '(repeat (cons (symbol :tag "charset")
60 (choice :tag "encoding"
61 (const 7bit)
62 (const 8bit)
63 (const quoted-printable)
64 (const base64))))
65 :group 'mime)
66
498fc2ae
GM
67(autoload 'message-options-get "message")
68(declare-function message-options-set "message" (symbol value))
69
23f87bed 70(defun mm-encode-body (&optional charset)
c113de23
GM
71 "Encode a body.
72Should be called narrowed to the body that is to be encoded.
23f87bed
MB
73If there is more than one non-ASCII MULE charset in the body, then the
74list of MULE charsets found is returned.
75If CHARSET is non-nil, it is used as the MIME charset to encode the body.
c113de23
GM
76If successful, the MIME charset is returned.
77If no encoding was done, nil is returned."
def5ff36 78 (if (not (mm-multibyte-p))
c113de23
GM
79 ;; In the non-Mule case, we search for non-ASCII chars and
80 ;; return the value of `mail-parse-charset' if any are found.
23f87bed
MB
81 (or charset
82 (save-excursion
83 (goto-char (point-min))
84 (if (re-search-forward "[^\x0-\x7f]" nil t)
85 (or mail-parse-charset
e4769531 86 (message-options-get 'mm-body-charset-encoding-alist)
23f87bed 87 (message-options-set
e4769531 88 'mm-body-charset-encoding-alist
23f87bed
MB
89 (mm-read-coding-system "Charset used in the article: ")))
90 ;; The logic in `mml-generate-mime-1' confirms that it's OK
91 ;; to return nil here.
92 nil)))
c113de23 93 (save-excursion
23f87bed
MB
94 (if charset
95 (progn
11e22c4a
MB
96 (mm-encode-coding-region (point-min) (point-max)
97 (mm-charset-to-coding-system charset))
23f87bed
MB
98 charset)
99 (goto-char (point-min))
f5490ddc
MB
100 (let ((charsets (mm-find-mime-charset-region (point-min) (point-max)
101 mm-hack-charsets)))
23f87bed
MB
102 (cond
103 ;; No encoding.
104 ((null charsets)
105 nil)
106 ;; Too many charsets.
107 ((> (length charsets) 1)
108 charsets)
109 ;; We encode.
110 (t
111 (prog1
112 (setq charset (car charsets))
113 (mm-encode-coding-region (point-min) (point-max)
114 (mm-charset-to-coding-system charset))))
115 ))))))
116
117(defun mm-long-lines-p (length)
118 "Say whether any of the lines in the buffer is longer than LENGTH."
119 (save-excursion
120 (goto-char (point-min))
121 (end-of-line)
122 (while (and (not (eobp))
123 (not (> (current-column) length)))
124 (forward-line 1)
125 (end-of-line))
126 (and (> (current-column) length)
127 (current-column))))
128
129(defvar message-posting-charset)
058635a9 130
c113de23
GM
131(defun mm-body-encoding (charset &optional encoding)
132 "Do Content-Transfer-Encoding and return the encoding of the current buffer."
23f87bed
MB
133 (when (stringp encoding)
134 (setq encoding (intern (downcase encoding))))
135 (let ((bits (mm-body-7-or-8))
136 (longp (mm-long-lines-p 1000)))
058635a9 137 (require 'message)
c113de23 138 (cond
23f87bed
MB
139 ((and (not longp)
140 (not (and mm-use-ultra-safe-encoding
2f7717f6
MB
141 (or (save-excursion (re-search-forward " $" nil t))
142 (save-excursion (re-search-forward "^From " nil t)))))
23f87bed 143 (eq bits '7bit))
c113de23
GM
144 bits)
145 ((and (not mm-use-ultra-safe-encoding)
23f87bed
MB
146 (not longp)
147 (not (cdr (assq charset mm-body-charset-encoding-alist)))
c113de23
GM
148 (or (eq t (cdr message-posting-charset))
149 (memq charset (cdr message-posting-charset))
150 (eq charset mail-parse-charset)))
151 bits)
152 (t
153 (let ((encoding (or encoding
154 (cdr (assq charset mm-body-charset-encoding-alist))
155 (mm-qp-or-base64))))
156 (when mm-use-ultra-safe-encoding
157 (setq encoding (mm-safer-encoding encoding)))
158 (mm-encode-content-transfer-encoding encoding "text/plain")
159 encoding)))))
160
161(defun mm-body-7-or-8 ()
162 "Say whether the body is 7bit or 8bit."
5d54c59d
DL
163 (if (save-excursion
164 (goto-char (point-min))
165 (skip-chars-forward mm-7bit-chars)
166 (eobp))
167 '7bit
168 '8bit))
c113de23
GM
169
170;;;
171;;; Functions for decoding
172;;;
173
174(defun mm-decode-content-transfer-encoding (encoding &optional type)
23f87bed
MB
175 "Decodes buffer encoded with ENCODING, returning success status.
176If TYPE is `text/plain' CRLF->LF translation may occur."
c113de23
GM
177 (prog1
178 (condition-case error
179 (cond
180 ((eq encoding 'quoted-printable)
23f87bed
MB
181 (quoted-printable-decode-region (point-min) (point-max))
182 t)
c113de23
GM
183 ((eq encoding 'base64)
184 (base64-decode-region
185 (point-min)
186 ;; Some mailers insert whitespace
187 ;; junk at the end which
188 ;; base64-decode-region dislikes.
189 ;; Also remove possible junk which could
190 ;; have been added by mailing list software.
191 (save-excursion
192 (goto-char (point-min))
193 (while (re-search-forward "^[\t ]*\r?\n" nil t)
194 (delete-region (match-beginning 0) (match-end 0)))
195 (goto-char (point-max))
cfe397c6
KY
196 (when (re-search-backward "^[\t ]*[A-Za-z0-9+/]+=*[\t ]*$"
197 nil t)
23f87bed
MB
198 (forward-line))
199 (point))))
a88fd51a 200 ((memq encoding '(nil 7bit 8bit binary))
c113de23 201 ;; Do nothing.
23f87bed 202 t)
c113de23 203 ((memq encoding '(x-uuencode x-uue))
058635a9 204 (require 'mm-uu)
23f87bed
MB
205 (funcall mm-uu-decode-function (point-min) (point-max))
206 t)
c113de23 207 ((eq encoding 'x-binhex)
058635a9 208 (require 'mm-uu)
23f87bed
MB
209 (funcall mm-uu-binhex-decode-function (point-min) (point-max))
210 t)
211 ((eq encoding 'x-yenc)
212 (require 'mm-uu)
213 (funcall mm-uu-yenc-decode-function (point-min) (point-max))
214 )
c113de23 215 ((functionp encoding)
23f87bed
MB
216 (funcall encoding (point-min) (point-max))
217 t)
c113de23
GM
218 (t
219 (message "Unknown encoding %s; defaulting to 8bit" encoding)))
220 (error
221 (message "Error while decoding: %s" error)
222 nil))
223 (when (and
3031d8b0 224 type
23f87bed 225 (memq encoding '(base64 x-uuencode x-uue x-binhex x-yenc))
4a2358e9 226 (string-match "\\`text/" type))
c113de23
GM
227 (goto-char (point-min))
228 (while (search-forward "\r\n" nil t)
229 (replace-match "\n" t t)))))
230
231(defun mm-decode-body (charset &optional encoding type)
23f87bed
MB
232 "Decode the current article that has been encoded with ENCODING to CHARSET.
233ENCODING is a MIME content transfer encoding.
234CHARSET is the MIME charset with which to decode the data after transfer
235decoding. If it is nil, default to `mail-parse-charset'."
236 (when (stringp charset)
237 (setq charset (intern (downcase charset))))
238 (when (or (not charset)
239 (eq 'gnus-all mail-parse-ignored-charsets)
240 (memq 'gnus-all mail-parse-ignored-charsets)
241 (memq charset mail-parse-ignored-charsets))
242 (setq charset mail-parse-charset))
c113de23
GM
243 (save-excursion
244 (when encoding
245 (mm-decode-content-transfer-encoding encoding type))
e499bc94
MB
246 (when (and (featurep 'mule) ;; Fixme: Wrong test for unibyte session.
247 (not (eq charset 'gnus-decoded)))
bd29ba20
RS
248 (let ((coding-system (mm-charset-to-coding-system
249 ;; Allow overwrite using
250 ;; `mm-charset-override-alist'.
251 charset nil t)))
058635a9 252 (if (and (not coding-system)
c113de23
GM
253 (listp mail-parse-ignored-charsets)
254 (memq 'gnus-unknown mail-parse-ignored-charsets))
5d54c59d 255 (setq coding-system
c113de23 256 (mm-charset-to-coding-system mail-parse-charset)))
058635a9 257 (when (and charset coding-system
c113de23
GM
258 ;; buffer-file-coding-system
259 ;;Article buffer is nil coding system
260 ;;in XEmacs
261 (mm-multibyte-p)
058635a9 262 (or (not (eq coding-system 'ascii))
e499bc94 263 (setq coding-system mail-parse-charset)))
23f87bed
MB
264 (mm-decode-coding-region (point-min) (point-max)
265 coding-system))
266 (setq buffer-file-coding-system
267 (if (boundp 'last-coding-system-used)
268 (symbol-value 'last-coding-system-used)
269 coding-system))))))
c113de23
GM
270
271(defun mm-decode-string (string charset)
272 "Decode STRING with CHARSET."
273 (when (stringp charset)
274 (setq charset (intern (downcase charset))))
5d54c59d 275 (when (or (not charset)
c113de23
GM
276 (eq 'gnus-all mail-parse-ignored-charsets)
277 (memq 'gnus-all mail-parse-ignored-charsets)
278 (memq charset mail-parse-ignored-charsets))
279 (setq charset mail-parse-charset))
280 (or
281 (when (featurep 'mule)
bd29ba20
RS
282 (let ((coding-system (mm-charset-to-coding-system
283 charset
284 ;; Allow overwrite using
285 ;; `mm-charset-override-alist'.
286 nil t)))
058635a9 287 (if (and (not coding-system)
c113de23
GM
288 (listp mail-parse-ignored-charsets)
289 (memq 'gnus-unknown mail-parse-ignored-charsets))
5d54c59d 290 (setq coding-system
c113de23 291 (mm-charset-to-coding-system mail-parse-charset)))
058635a9 292 (when (and charset coding-system
c113de23 293 (mm-multibyte-p)
058635a9
DL
294 (or (not (eq coding-system 'ascii))
295 (setq coding-system mail-parse-charset)))
296 (mm-decode-coding-string string coding-system))))
c113de23
GM
297 string))
298
299(provide 'mm-bodies)
300
715a2ca2 301;;; mm-bodies.el ends here