Regenerate ldefs-boot.el
[bpt/emacs.git] / lisp / gnus / mm-bodies.el
CommitLineData
23f87bed
MB
1;;; mm-bodies.el --- Functions for decoding MIME things
2
ba318903 3;; Copyright (C) 1998-2014 Free Software Foundation, Inc.
c113de23
GM
4
5;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
6;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
7;; This file is part of GNU Emacs.
8
5e809f55 9;; GNU Emacs is free software: you can redistribute it and/or modify
c113de23 10;; it under the terms of the GNU General Public License as published by
5e809f55
GM
11;; the Free Software Foundation, either version 3 of the License, or
12;; (at your option) any later version.
c113de23
GM
13
14;; GNU Emacs is distributed in the hope that it will be useful,
15;; but WITHOUT ANY WARRANTY; without even the implied warranty of
5e809f55 16;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
c113de23
GM
17;; GNU General Public License for more details.
18
19;; You should have received a copy of the GNU General Public License
5e809f55 20;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
c113de23
GM
21
22;;; Commentary:
23
24;;; Code:
25
f0b7f5a8 26;; For Emacs <22.2 and XEmacs.
498fc2ae
GM
27(eval-and-compile
28 (unless (fboundp 'declare-function) (defmacro declare-function (&rest r))))
29
c113de23
GM
30(require 'mm-util)
31(require 'rfc2047)
058635a9 32(require 'mm-encode)
c113de23 33
9efa445f
DN
34(defvar mm-uu-yenc-decode-function)
35(defvar mm-uu-decode-function)
36(defvar mm-uu-binhex-decode-function)
37
f4dd4ae8 38;; 8bit treatment gets any char except: 0x32 - 0x7f, LF, TAB, BEL,
c113de23 39;; BS, vertical TAB, form feed, and ^_
f4dd4ae8
MB
40;;
41;; Note that CR is *not* included, as that would allow a non-paired CR
42;; in the body contrary to RFC 2822:
43;;
44;; - CR and LF MUST only occur together as CRLF; they MUST NOT
45;; appear independently in the body.
46
47(defvar mm-7bit-chars "\x20-\x7f\n\t\x7\x8\xb\xc\x1f")
c113de23
GM
48
49(defcustom mm-body-charset-encoding-alist
50 '((iso-2022-jp . 7bit)
23f87bed
MB
51 (iso-2022-jp-2 . 7bit)
52 ;; We MUST encode UTF-16 because it can contain \0's which is
53 ;; known to break servers.
54 ;; Note: UTF-16 variants are invalid for text parts [RFC 2781],
55 ;; so this can't happen :-/.
bd29ba20
RS
56 ;; PPS: Yes, it can happen if the user specifies UTF-16 in the MML
57 ;; markup. - jh.
23f87bed
MB
58 (utf-16 . base64)
59 (utf-16be . base64)
60 (utf-16le . base64))
c113de23
GM
61 "Alist of MIME charsets to encodings.
62Valid encodings are `7bit', `8bit', `quoted-printable' and `base64'."
63 :type '(repeat (cons (symbol :tag "charset")
64 (choice :tag "encoding"
65 (const 7bit)
66 (const 8bit)
67 (const quoted-printable)
68 (const base64))))
69 :group 'mime)
70
498fc2ae
GM
71(autoload 'message-options-get "message")
72(declare-function message-options-set "message" (symbol value))
73
23f87bed 74(defun mm-encode-body (&optional charset)
c113de23
GM
75 "Encode a body.
76Should be called narrowed to the body that is to be encoded.
23f87bed
MB
77If there is more than one non-ASCII MULE charset in the body, then the
78list of MULE charsets found is returned.
79If CHARSET is non-nil, it is used as the MIME charset to encode the body.
c113de23
GM
80If successful, the MIME charset is returned.
81If no encoding was done, nil is returned."
def5ff36 82 (if (not (mm-multibyte-p))
c113de23
GM
83 ;; In the non-Mule case, we search for non-ASCII chars and
84 ;; return the value of `mail-parse-charset' if any are found.
23f87bed
MB
85 (or charset
86 (save-excursion
87 (goto-char (point-min))
88 (if (re-search-forward "[^\x0-\x7f]" nil t)
89 (or mail-parse-charset
e4769531 90 (message-options-get 'mm-body-charset-encoding-alist)
23f87bed 91 (message-options-set
e4769531 92 'mm-body-charset-encoding-alist
23f87bed
MB
93 (mm-read-coding-system "Charset used in the article: ")))
94 ;; The logic in `mml-generate-mime-1' confirms that it's OK
95 ;; to return nil here.
96 nil)))
c113de23 97 (save-excursion
23f87bed
MB
98 (if charset
99 (progn
11e22c4a
MB
100 (mm-encode-coding-region (point-min) (point-max)
101 (mm-charset-to-coding-system charset))
23f87bed
MB
102 charset)
103 (goto-char (point-min))
f5490ddc
MB
104 (let ((charsets (mm-find-mime-charset-region (point-min) (point-max)
105 mm-hack-charsets)))
23f87bed
MB
106 (cond
107 ;; No encoding.
108 ((null charsets)
109 nil)
110 ;; Too many charsets.
111 ((> (length charsets) 1)
112 charsets)
113 ;; We encode.
114 (t
115 (prog1
116 (setq charset (car charsets))
117 (mm-encode-coding-region (point-min) (point-max)
118 (mm-charset-to-coding-system charset))))
119 ))))))
120
121(defun mm-long-lines-p (length)
122 "Say whether any of the lines in the buffer is longer than LENGTH."
123 (save-excursion
124 (goto-char (point-min))
125 (end-of-line)
126 (while (and (not (eobp))
127 (not (> (current-column) length)))
128 (forward-line 1)
129 (end-of-line))
130 (and (> (current-column) length)
131 (current-column))))
132
133(defvar message-posting-charset)
058635a9 134
c113de23
GM
135(defun mm-body-encoding (charset &optional encoding)
136 "Do Content-Transfer-Encoding and return the encoding of the current buffer."
23f87bed
MB
137 (when (stringp encoding)
138 (setq encoding (intern (downcase encoding))))
139 (let ((bits (mm-body-7-or-8))
140 (longp (mm-long-lines-p 1000)))
058635a9 141 (require 'message)
c113de23 142 (cond
23f87bed
MB
143 ((and (not longp)
144 (not (and mm-use-ultra-safe-encoding
2f7717f6
MB
145 (or (save-excursion (re-search-forward " $" nil t))
146 (save-excursion (re-search-forward "^From " nil t)))))
23f87bed 147 (eq bits '7bit))
c113de23
GM
148 bits)
149 ((and (not mm-use-ultra-safe-encoding)
23f87bed
MB
150 (not longp)
151 (not (cdr (assq charset mm-body-charset-encoding-alist)))
c113de23
GM
152 (or (eq t (cdr message-posting-charset))
153 (memq charset (cdr message-posting-charset))
154 (eq charset mail-parse-charset)))
155 bits)
156 (t
157 (let ((encoding (or encoding
158 (cdr (assq charset mm-body-charset-encoding-alist))
159 (mm-qp-or-base64))))
160 (when mm-use-ultra-safe-encoding
161 (setq encoding (mm-safer-encoding encoding)))
162 (mm-encode-content-transfer-encoding encoding "text/plain")
163 encoding)))))
164
165(defun mm-body-7-or-8 ()
166 "Say whether the body is 7bit or 8bit."
5d54c59d
DL
167 (if (save-excursion
168 (goto-char (point-min))
169 (skip-chars-forward mm-7bit-chars)
170 (eobp))
171 '7bit
172 '8bit))
c113de23
GM
173
174;;;
175;;; Functions for decoding
176;;;
177
178(defun mm-decode-content-transfer-encoding (encoding &optional type)
23f87bed
MB
179 "Decodes buffer encoded with ENCODING, returning success status.
180If TYPE is `text/plain' CRLF->LF translation may occur."
c113de23
GM
181 (prog1
182 (condition-case error
183 (cond
184 ((eq encoding 'quoted-printable)
23f87bed
MB
185 (quoted-printable-decode-region (point-min) (point-max))
186 t)
c113de23
GM
187 ((eq encoding 'base64)
188 (base64-decode-region
189 (point-min)
190 ;; Some mailers insert whitespace
191 ;; junk at the end which
192 ;; base64-decode-region dislikes.
193 ;; Also remove possible junk which could
194 ;; have been added by mailing list software.
195 (save-excursion
196 (goto-char (point-min))
197 (while (re-search-forward "^[\t ]*\r?\n" nil t)
198 (delete-region (match-beginning 0) (match-end 0)))
199 (goto-char (point-max))
cfe397c6
KY
200 (when (re-search-backward "^[\t ]*[A-Za-z0-9+/]+=*[\t ]*$"
201 nil t)
23f87bed
MB
202 (forward-line))
203 (point))))
a88fd51a 204 ((memq encoding '(nil 7bit 8bit binary))
c113de23 205 ;; Do nothing.
23f87bed 206 t)
c113de23 207 ((memq encoding '(x-uuencode x-uue))
058635a9 208 (require 'mm-uu)
23f87bed
MB
209 (funcall mm-uu-decode-function (point-min) (point-max))
210 t)
c113de23 211 ((eq encoding 'x-binhex)
058635a9 212 (require 'mm-uu)
23f87bed
MB
213 (funcall mm-uu-binhex-decode-function (point-min) (point-max))
214 t)
215 ((eq encoding 'x-yenc)
216 (require 'mm-uu)
217 (funcall mm-uu-yenc-decode-function (point-min) (point-max))
218 )
c113de23 219 ((functionp encoding)
23f87bed
MB
220 (funcall encoding (point-min) (point-max))
221 t)
c113de23
GM
222 (t
223 (message "Unknown encoding %s; defaulting to 8bit" encoding)))
224 (error
225 (message "Error while decoding: %s" error)
226 nil))
227 (when (and
3031d8b0 228 type
23f87bed 229 (memq encoding '(base64 x-uuencode x-uue x-binhex x-yenc))
4a2358e9 230 (string-match "\\`text/" type))
c113de23
GM
231 (goto-char (point-min))
232 (while (search-forward "\r\n" nil t)
233 (replace-match "\n" t t)))))
234
235(defun mm-decode-body (charset &optional encoding type)
23f87bed
MB
236 "Decode the current article that has been encoded with ENCODING to CHARSET.
237ENCODING is a MIME content transfer encoding.
238CHARSET is the MIME charset with which to decode the data after transfer
239decoding. If it is nil, default to `mail-parse-charset'."
240 (when (stringp charset)
241 (setq charset (intern (downcase charset))))
242 (when (or (not charset)
243 (eq 'gnus-all mail-parse-ignored-charsets)
244 (memq 'gnus-all mail-parse-ignored-charsets)
245 (memq charset mail-parse-ignored-charsets))
246 (setq charset mail-parse-charset))
c113de23
GM
247 (save-excursion
248 (when encoding
249 (mm-decode-content-transfer-encoding encoding type))
e499bc94
MB
250 (when (and (featurep 'mule) ;; Fixme: Wrong test for unibyte session.
251 (not (eq charset 'gnus-decoded)))
bd29ba20
RS
252 (let ((coding-system (mm-charset-to-coding-system
253 ;; Allow overwrite using
254 ;; `mm-charset-override-alist'.
255 charset nil t)))
058635a9 256 (if (and (not coding-system)
c113de23
GM
257 (listp mail-parse-ignored-charsets)
258 (memq 'gnus-unknown mail-parse-ignored-charsets))
5d54c59d 259 (setq coding-system
c113de23 260 (mm-charset-to-coding-system mail-parse-charset)))
058635a9 261 (when (and charset coding-system
c113de23
GM
262 ;; buffer-file-coding-system
263 ;;Article buffer is nil coding system
264 ;;in XEmacs
265 (mm-multibyte-p)
058635a9 266 (or (not (eq coding-system 'ascii))
e499bc94 267 (setq coding-system mail-parse-charset)))
23f87bed
MB
268 (mm-decode-coding-region (point-min) (point-max)
269 coding-system))
270 (setq buffer-file-coding-system
271 (if (boundp 'last-coding-system-used)
272 (symbol-value 'last-coding-system-used)
273 coding-system))))))
c113de23
GM
274
275(defun mm-decode-string (string charset)
276 "Decode STRING with CHARSET."
277 (when (stringp charset)
278 (setq charset (intern (downcase charset))))
5d54c59d 279 (when (or (not charset)
c113de23
GM
280 (eq 'gnus-all mail-parse-ignored-charsets)
281 (memq 'gnus-all mail-parse-ignored-charsets)
282 (memq charset mail-parse-ignored-charsets))
283 (setq charset mail-parse-charset))
284 (or
285 (when (featurep 'mule)
bd29ba20
RS
286 (let ((coding-system (mm-charset-to-coding-system
287 charset
288 ;; Allow overwrite using
289 ;; `mm-charset-override-alist'.
290 nil t)))
058635a9 291 (if (and (not coding-system)
c113de23
GM
292 (listp mail-parse-ignored-charsets)
293 (memq 'gnus-unknown mail-parse-ignored-charsets))
5d54c59d 294 (setq coding-system
c113de23 295 (mm-charset-to-coding-system mail-parse-charset)))
058635a9 296 (when (and charset coding-system
c113de23 297 (mm-multibyte-p)
058635a9
DL
298 (or (not (eq coding-system 'ascii))
299 (setq coding-system mail-parse-charset)))
300 (mm-decode-coding-string string coding-system))))
c113de23
GM
301 string))
302
303(provide 'mm-bodies)
304
715a2ca2 305;;; mm-bodies.el ends here