Revision: miles@gnu.org--gnu-2004/emacs--unicode--0--patch-75
[bpt/emacs.git] / lisp / gnus / mm-bodies.el
CommitLineData
23f87bed
MB
1;;; mm-bodies.el --- Functions for decoding MIME things
2
503815da 3;; Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004
23f87bed 4;; Free Software Foundation, Inc.
c113de23
GM
5
6;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
12;; the Free Software Foundation; either version 2, or (at your option)
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to the
22;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23;; Boston, MA 02111-1307, USA.
24
25;;; Commentary:
26
27;;; Code:
28
29(eval-and-compile
30 (or (fboundp 'base64-decode-region)
058635a9 31 (require 'base64)))
8f716686 32
776ca44f
DL
33(eval-when-compile
34 (defvar mm-uu-decode-function)
35 (defvar mm-uu-binhex-decode-function))
c113de23
GM
36
37(require 'mm-util)
38(require 'rfc2047)
058635a9 39(require 'mm-encode)
c113de23 40
f4dd4ae8 41;; 8bit treatment gets any char except: 0x32 - 0x7f, LF, TAB, BEL,
c113de23 42;; BS, vertical TAB, form feed, and ^_
f4dd4ae8
MB
43;;
44;; Note that CR is *not* included, as that would allow a non-paired CR
45;; in the body contrary to RFC 2822:
46;;
47;; - CR and LF MUST only occur together as CRLF; they MUST NOT
48;; appear independently in the body.
49
50(defvar mm-7bit-chars "\x20-\x7f\n\t\x7\x8\xb\xc\x1f")
c113de23
GM
51
52(defcustom mm-body-charset-encoding-alist
53 '((iso-2022-jp . 7bit)
23f87bed
MB
54 (iso-2022-jp-2 . 7bit)
55 ;; We MUST encode UTF-16 because it can contain \0's which is
56 ;; known to break servers.
57 ;; Note: UTF-16 variants are invalid for text parts [RFC 2781],
58 ;; so this can't happen :-/.
59 (utf-16 . base64)
60 (utf-16be . base64)
61 (utf-16le . base64))
c113de23
GM
62 "Alist of MIME charsets to encodings.
63Valid encodings are `7bit', `8bit', `quoted-printable' and `base64'."
64 :type '(repeat (cons (symbol :tag "charset")
65 (choice :tag "encoding"
66 (const 7bit)
67 (const 8bit)
68 (const quoted-printable)
69 (const base64))))
70 :group 'mime)
71
23f87bed 72(defun mm-encode-body (&optional charset)
c113de23
GM
73 "Encode a body.
74Should be called narrowed to the body that is to be encoded.
23f87bed
MB
75If there is more than one non-ASCII MULE charset in the body, then the
76list of MULE charsets found is returned.
77If CHARSET is non-nil, it is used as the MIME charset to encode the body.
c113de23
GM
78If successful, the MIME charset is returned.
79If no encoding was done, nil is returned."
def5ff36 80 (if (not (mm-multibyte-p))
c113de23
GM
81 ;; In the non-Mule case, we search for non-ASCII chars and
82 ;; return the value of `mail-parse-charset' if any are found.
23f87bed
MB
83 (or charset
84 (save-excursion
85 (goto-char (point-min))
86 (if (re-search-forward "[^\x0-\x7f]" nil t)
87 (or mail-parse-charset
88 (message-options-get 'mm-encody-body-charset)
89 (message-options-set
90 'mm-encody-body-charset
91 (mm-read-coding-system "Charset used in the article: ")))
92 ;; The logic in `mml-generate-mime-1' confirms that it's OK
93 ;; to return nil here.
94 nil)))
c113de23 95 (save-excursion
23f87bed
MB
96 (if charset
97 (progn
98 (mm-encode-coding-region (point-min) (point-max) charset)
99 charset)
100 (goto-char (point-min))
503815da 101 (let ((charsets (mm-find-mime-charset-region (point-min) (point-max))))
23f87bed
MB
102 (cond
103 ;; No encoding.
104 ((null charsets)
105 nil)
106 ;; Too many charsets.
107 ((> (length charsets) 1)
108 charsets)
109 ;; We encode.
110 (t
111 (prog1
112 (setq charset (car charsets))
113 (mm-encode-coding-region (point-min) (point-max)
114 (mm-charset-to-coding-system charset))))
115 ))))))
116
117(defun mm-long-lines-p (length)
118 "Say whether any of the lines in the buffer is longer than LENGTH."
119 (save-excursion
120 (goto-char (point-min))
121 (end-of-line)
122 (while (and (not (eobp))
123 (not (> (current-column) length)))
124 (forward-line 1)
125 (end-of-line))
126 (and (> (current-column) length)
127 (current-column))))
128
129(defvar message-posting-charset)
058635a9 130
c113de23
GM
131(defun mm-body-encoding (charset &optional encoding)
132 "Do Content-Transfer-Encoding and return the encoding of the current buffer."
23f87bed
MB
133 (when (stringp encoding)
134 (setq encoding (intern (downcase encoding))))
135 (let ((bits (mm-body-7-or-8))
136 (longp (mm-long-lines-p 1000)))
058635a9 137 (require 'message)
c113de23 138 (cond
23f87bed
MB
139 ((and (not longp)
140 (not (and mm-use-ultra-safe-encoding
141 (save-excursion (re-search-forward "^From " nil t))))
142 (eq bits '7bit))
c113de23
GM
143 bits)
144 ((and (not mm-use-ultra-safe-encoding)
23f87bed
MB
145 (not longp)
146 (not (cdr (assq charset mm-body-charset-encoding-alist)))
c113de23
GM
147 (or (eq t (cdr message-posting-charset))
148 (memq charset (cdr message-posting-charset))
149 (eq charset mail-parse-charset)))
150 bits)
151 (t
152 (let ((encoding (or encoding
153 (cdr (assq charset mm-body-charset-encoding-alist))
154 (mm-qp-or-base64))))
155 (when mm-use-ultra-safe-encoding
156 (setq encoding (mm-safer-encoding encoding)))
157 (mm-encode-content-transfer-encoding encoding "text/plain")
158 encoding)))))
159
160(defun mm-body-7-or-8 ()
161 "Say whether the body is 7bit or 8bit."
5d54c59d
DL
162 (if (save-excursion
163 (goto-char (point-min))
164 (skip-chars-forward mm-7bit-chars)
165 (eobp))
166 '7bit
167 '8bit))
c113de23
GM
168
169;;;
170;;; Functions for decoding
171;;;
172
23f87bed
MB
173(eval-when-compile (defvar mm-uu-yenc-decode-function))
174
c113de23 175(defun mm-decode-content-transfer-encoding (encoding &optional type)
23f87bed
MB
176 "Decodes buffer encoded with ENCODING, returning success status.
177If TYPE is `text/plain' CRLF->LF translation may occur."
c113de23
GM
178 (prog1
179 (condition-case error
180 (cond
181 ((eq encoding 'quoted-printable)
23f87bed
MB
182 (quoted-printable-decode-region (point-min) (point-max))
183 t)
c113de23
GM
184 ((eq encoding 'base64)
185 (base64-decode-region
186 (point-min)
187 ;; Some mailers insert whitespace
188 ;; junk at the end which
189 ;; base64-decode-region dislikes.
190 ;; Also remove possible junk which could
191 ;; have been added by mailing list software.
192 (save-excursion
193 (goto-char (point-min))
194 (while (re-search-forward "^[\t ]*\r?\n" nil t)
195 (delete-region (match-beginning 0) (match-end 0)))
196 (goto-char (point-max))
197 (when (re-search-backward "^[A-Za-z0-9+/]+=*[\t ]*$" nil t)
23f87bed
MB
198 (forward-line))
199 (point))))
c113de23
GM
200 ((memq encoding '(7bit 8bit binary))
201 ;; Do nothing.
23f87bed 202 t)
c113de23
GM
203 ((null encoding)
204 ;; Do nothing.
23f87bed 205 t)
c113de23 206 ((memq encoding '(x-uuencode x-uue))
058635a9 207 (require 'mm-uu)
23f87bed
MB
208 (funcall mm-uu-decode-function (point-min) (point-max))
209 t)
c113de23 210 ((eq encoding 'x-binhex)
058635a9 211 (require 'mm-uu)
23f87bed
MB
212 (funcall mm-uu-binhex-decode-function (point-min) (point-max))
213 t)
214 ((eq encoding 'x-yenc)
215 (require 'mm-uu)
216 (funcall mm-uu-yenc-decode-function (point-min) (point-max))
217 )
c113de23 218 ((functionp encoding)
23f87bed
MB
219 (funcall encoding (point-min) (point-max))
220 t)
c113de23
GM
221 (t
222 (message "Unknown encoding %s; defaulting to 8bit" encoding)))
223 (error
224 (message "Error while decoding: %s" error)
225 nil))
226 (when (and
23f87bed 227 (memq encoding '(base64 x-uuencode x-uue x-binhex x-yenc))
c113de23
GM
228 (equal type "text/plain"))
229 (goto-char (point-min))
230 (while (search-forward "\r\n" nil t)
231 (replace-match "\n" t t)))))
232
233(defun mm-decode-body (charset &optional encoding type)
23f87bed
MB
234 "Decode the current article that has been encoded with ENCODING to CHARSET.
235ENCODING is a MIME content transfer encoding.
236CHARSET is the MIME charset with which to decode the data after transfer
237decoding. If it is nil, default to `mail-parse-charset'."
238 (when (stringp charset)
239 (setq charset (intern (downcase charset))))
240 (when (or (not charset)
241 (eq 'gnus-all mail-parse-ignored-charsets)
242 (memq 'gnus-all mail-parse-ignored-charsets)
243 (memq charset mail-parse-ignored-charsets))
244 (setq charset mail-parse-charset))
c113de23
GM
245 (save-excursion
246 (when encoding
247 (mm-decode-content-transfer-encoding encoding type))
23f87bed 248 (when (featurep 'mule) ; Fixme: Wrong test for unibyte session.
058635a9
DL
249 (let ((coding-system (mm-charset-to-coding-system charset)))
250 (if (and (not coding-system)
c113de23
GM
251 (listp mail-parse-ignored-charsets)
252 (memq 'gnus-unknown mail-parse-ignored-charsets))
5d54c59d 253 (setq coding-system
c113de23 254 (mm-charset-to-coding-system mail-parse-charset)))
058635a9 255 (when (and charset coding-system
c113de23
GM
256 ;; buffer-file-coding-system
257 ;;Article buffer is nil coding system
258 ;;in XEmacs
259 (mm-multibyte-p)
058635a9
DL
260 (or (not (eq coding-system 'ascii))
261 (setq coding-system mail-parse-charset))
262 (not (eq coding-system 'gnus-decoded)))
23f87bed
MB
263 (mm-decode-coding-region (point-min) (point-max)
264 coding-system))
265 (setq buffer-file-coding-system
266 (if (boundp 'last-coding-system-used)
267 (symbol-value 'last-coding-system-used)
268 coding-system))))))
c113de23
GM
269
270(defun mm-decode-string (string charset)
271 "Decode STRING with CHARSET."
272 (when (stringp charset)
273 (setq charset (intern (downcase charset))))
5d54c59d 274 (when (or (not charset)
c113de23
GM
275 (eq 'gnus-all mail-parse-ignored-charsets)
276 (memq 'gnus-all mail-parse-ignored-charsets)
277 (memq charset mail-parse-ignored-charsets))
278 (setq charset mail-parse-charset))
279 (or
280 (when (featurep 'mule)
058635a9
DL
281 (let ((coding-system (mm-charset-to-coding-system charset)))
282 (if (and (not coding-system)
c113de23
GM
283 (listp mail-parse-ignored-charsets)
284 (memq 'gnus-unknown mail-parse-ignored-charsets))
5d54c59d 285 (setq coding-system
c113de23 286 (mm-charset-to-coding-system mail-parse-charset)))
058635a9 287 (when (and charset coding-system
c113de23 288 (mm-multibyte-p)
058635a9
DL
289 (or (not (eq coding-system 'ascii))
290 (setq coding-system mail-parse-charset)))
291 (mm-decode-coding-string string coding-system))))
c113de23
GM
292 string))
293
294(provide 'mm-bodies)
295
ab5796a9 296;;; arch-tag: 41104bb6-4443-4ca9-8d5c-ff87ecf27d8d
715a2ca2 297;;; mm-bodies.el ends here