*** empty log message ***
[bpt/emacs.git] / lisp / gnus / mm-util.el
CommitLineData
c113de23 1;;; mm-util.el --- Utility functions for MIME things
bf3b936f 2;; Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
c113de23
GM
3
4;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
5;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
6;; This file is part of GNU Emacs.
7
8;; GNU Emacs is free software; you can redistribute it and/or modify
9;; it under the terms of the GNU General Public License as published by
10;; the Free Software Foundation; either version 2, or (at your option)
11;; any later version.
12
13;; GNU Emacs is distributed in the hope that it will be useful,
14;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16;; GNU General Public License for more details.
17
18;; You should have received a copy of the GNU General Public License
19;; along with GNU Emacs; see the file COPYING. If not, write to the
20;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21;; Boston, MA 02111-1307, USA.
22
23;;; Commentary:
24
25;;; Code:
26
052802c1 27(eval-when-compile (require 'cl))
c113de23
GM
28(require 'mail-prsvr)
29
f53b2875
DL
30(defun mm-coding-system-p (sym)
31 "Return non-nil if SYM is a coding system."
32 (or (and (fboundp 'coding-system-p) (coding-system-p sym))
33 (memq sym (mm-get-coding-system-list))))
c89ac698 34
c113de23 35(defvar mm-mime-mule-charset-alist
bf3b936f 36 `((us-ascii ascii)
c113de23
GM
37 (iso-8859-1 latin-iso8859-1)
38 (iso-8859-2 latin-iso8859-2)
39 (iso-8859-3 latin-iso8859-3)
40 (iso-8859-4 latin-iso8859-4)
41 (iso-8859-5 cyrillic-iso8859-5)
42 ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters.
43 ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default
44 ;; charset is koi8-r, not iso-8859-5.
45 (koi8-r cyrillic-iso8859-5 gnus-koi8-r)
46 (iso-8859-6 arabic-iso8859-6)
47 (iso-8859-7 greek-iso8859-7)
48 (iso-8859-8 hebrew-iso8859-8)
49 (iso-8859-9 latin-iso8859-9)
98e61bd1
DL
50 (iso-8859-14 latin-iso8859-14)
51 (iso-8859-15 latin-iso8859-15)
c113de23
GM
52 (viscii vietnamese-viscii-lower)
53 (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978)
54 (euc-kr korean-ksc5601)
676a7cc9
SZ
55 (gb2312 chinese-gb2312)
56 (big5 chinese-big5-1 chinese-big5-2)
c113de23
GM
57 (tibetan tibetan)
58 (thai-tis620 thai-tis620)
59 (iso-2022-7bit ethiopic arabic-1-column arabic-2-column)
60 (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7
61 latin-jisx0201 japanese-jisx0208-1978
62 chinese-gb2312 japanese-jisx0208
63 korean-ksc5601 japanese-jisx0212
64 katakana-jisx0201)
65 (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7
66 latin-jisx0201 japanese-jisx0208-1978
67 chinese-gb2312 japanese-jisx0208
68 korean-ksc5601 japanese-jisx0212
69 chinese-cns11643-1 chinese-cns11643-2)
70 (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2
71 cyrillic-iso8859-5 greek-iso8859-7
72 latin-jisx0201 japanese-jisx0208-1978
73 chinese-gb2312 japanese-jisx0208
74 korean-ksc5601 japanese-jisx0212
75 chinese-cns11643-1 chinese-cns11643-2
76 chinese-cns11643-3 chinese-cns11643-4
77 chinese-cns11643-5 chinese-cns11643-6
78 chinese-cns11643-7)
f891ed29
DL
79 ;; utf-8 comes either from Mule-UCS or Mule 5+.
80 ,@(if (mm-coding-system-p 'utf-8)
81 (list (cons 'utf-8 (delete 'ascii
82 (coding-system-get
83 'mule-utf-8
84 'safe-charsets))))))
c113de23
GM
85 "Alist of MIME-charset/MULE-charsets.")
86
f53b2875
DL
87(eval-and-compile
88 (mapcar
89 (lambda (elem)
90 (let ((nfunc (intern (format "mm-%s" (car elem)))))
91 (if (fboundp (car elem))
92 (defalias nfunc (car elem))
93 (defalias nfunc (cdr elem)))))
94 '((decode-coding-string . (lambda (s a) s))
95 (encode-coding-string . (lambda (s a) s))
96 (encode-coding-region . ignore)
97 (coding-system-list . ignore)
98 (decode-coding-region . ignore)
99 (char-int . identity)
100 (device-type . ignore)
101 (coding-system-equal . equal)
102 (annotationp . ignore)
103 (set-buffer-file-coding-system . ignore)
104 (make-char
105 . (lambda (charset int)
106 (int-to-char int)))
107 (read-coding-system
108 . (lambda (prompt)
109 "Prompt the user for a coding system."
110 (completing-read
111 prompt (mapcar (lambda (s) (list (symbol-name (car s))))
112 mm-mime-mule-charset-alist))))
113 (read-charset
114 . (lambda (prompt)
115 "Return a charset."
116 (intern
117 (completing-read
118 prompt
119 (mapcar (lambda (e) (list (symbol-name (car e))))
120 mm-mime-mule-charset-alist)
121 nil t))))
122 (string-as-unibyte . identity)
123 (multibyte-string-p . ignore)
124 )))
125
c113de23
GM
126(eval-and-compile
127 (defalias 'mm-char-or-char-int-p
128 (cond
129 ((fboundp 'char-or-char-int-p) 'char-or-char-int-p)
130 ((fboundp 'char-valid-p) 'char-valid-p)
131 (t 'identity))))
132
133(defvar mm-coding-system-list nil)
134(defun mm-get-coding-system-list ()
135 "Get the coding system list."
136 (or mm-coding-system-list
137 (setq mm-coding-system-list (mm-coding-system-list))))
138
139(defvar mm-charset-synonym-alist
d1a7bc93 140 `((big5 . cn-big5)
c113de23 141 (gb2312 . cn-gb-2312)
d1a7bc93
DL
142 ;; Windows-1252 is actually a superset of Latin-1. See also
143 ;; `gnus-article-dumbquotes-map'.
3df3ee35
SZ
144 ,(unless (mm-coding-system-p 'windows-1252) ; should be defined eventually
145 '(windows-1252 . iso-8859-1))
c113de23
GM
146 (x-ctext . ctext))
147 "A mapping from invalid charset names to the real charset names.")
148
c113de23
GM
149(defvar mm-binary-coding-system
150 (cond
151 ((mm-coding-system-p 'binary) 'binary)
152 ((mm-coding-system-p 'no-conversion) 'no-conversion)
153 (t nil))
154 "100% binary coding system.")
155
156(defvar mm-text-coding-system
157 (or (if (memq system-type '(windows-nt ms-dos ms-windows))
158 (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos)
159 (and (mm-coding-system-p 'raw-text) 'raw-text))
160 mm-binary-coding-system)
161 "Text-safe coding system (For removing ^M).")
162
163(defvar mm-text-coding-system-for-write nil
164 "Text coding system for write.")
165
166(defvar mm-auto-save-coding-system
167 (cond
168 ((mm-coding-system-p 'emacs-mule)
169 (if (memq system-type '(windows-nt ms-dos ms-windows))
170 (if (mm-coding-system-p 'emacs-mule-dos)
171 'emacs-mule-dos mm-binary-coding-system)
172 'emacs-mule))
173 ((mm-coding-system-p 'escape-quoted) 'escape-quoted)
174 (t mm-binary-coding-system))
175 "Coding system of auto save file.")
176
177;;; Internal variables:
178
179;;; Functions:
180
181(defun mm-mule-charset-to-mime-charset (charset)
1c57d870 182 "Return the MIME charset corresponding to the given Mule CHARSET."
c113de23
GM
183 (let ((alist mm-mime-mule-charset-alist)
184 out)
185 (while alist
186 (when (memq charset (cdar alist))
187 (setq out (caar alist)
188 alist nil))
189 (pop alist))
190 out))
191
192(defun mm-charset-to-coding-system (charset &optional lbt)
193 "Return coding-system corresponding to CHARSET.
194CHARSET is a symbol naming a MIME charset.
195If optional argument LBT (`unix', `dos' or `mac') is specified, it is
196used as the line break code type of the coding system."
197 (when (stringp charset)
198 (setq charset (intern (downcase charset))))
199 (setq charset
200 (or (cdr (assq charset mm-charset-synonym-alist))
201 charset))
202 (when lbt
203 (setq charset (intern (format "%s-%s" charset lbt))))
204 (cond
205 ;; Running in a non-MULE environment.
206 ((null (mm-get-coding-system-list))
207 charset)
208 ;; ascii
209 ((eq charset 'us-ascii)
210 'ascii)
1c57d870
DL
211 ;; Check to see whether we can handle this charset. (This depends
212 ;; on there being some coding system matching each `mime-charset'
213 ;; coding sysytem property defined, as there should be.)
c113de23
GM
214 ((memq charset (mm-get-coding-system-list))
215 charset)
216 ;; Nope.
217 (t
218 nil)))
219
220(if (fboundp 'subst-char-in-string)
221 (defsubst mm-replace-chars-in-string (string from to)
222 (subst-char-in-string from to string))
223 (defun mm-replace-chars-in-string (string from to)
224 "Replace characters in STRING from FROM to TO."
225 (let ((string (substring string 0)) ;Copy string.
226 (len (length string))
227 (idx 0))
228 ;; Replace all occurrences of FROM with TO.
229 (while (< idx len)
230 (when (= (aref string idx) from)
231 (aset string idx to))
232 (setq idx (1+ idx)))
233 string)))
234
235(defsubst mm-enable-multibyte ()
1c57d870
DL
236 "Set the multibyte flag of the current buffer.
237Only do this if the default value of `enable-multibyte-characters' is
238non-nil. This is a no-op in XEmacs."
c113de23
GM
239 (when (and (fboundp 'set-buffer-multibyte)
240 (boundp 'enable-multibyte-characters)
241 (default-value 'enable-multibyte-characters))
242 (set-buffer-multibyte t)))
243
244(defsubst mm-disable-multibyte ()
1c57d870
DL
245 "Unset the multibyte flag of in the current buffer.
246This is a no-op in XEmacs."
c113de23
GM
247 (when (fboundp 'set-buffer-multibyte)
248 (set-buffer-multibyte nil)))
249
052802c1
DL
250(defsubst mm-enable-multibyte-mule4 ()
251 "Enable multibyte in the current buffer.
252Only used in Emacs Mule 4."
253 (when (and (fboundp 'set-buffer-multibyte)
254 (boundp 'enable-multibyte-characters)
255 (default-value 'enable-multibyte-characters)
256 (not (charsetp 'eight-bit-control)))
257 (set-buffer-multibyte t)))
258
259(defsubst mm-disable-multibyte-mule4 ()
260 "Disable multibyte in the current buffer.
261Only used in Emacs Mule 4."
262 (when (and (fboundp 'set-buffer-multibyte)
263 (not (charsetp 'eight-bit-control)))
264 (set-buffer-multibyte nil)))
265
c113de23
GM
266(defun mm-preferred-coding-system (charset)
267 ;; A typo in some Emacs versions.
268 (or (get-charset-property charset 'prefered-coding-system)
269 (get-charset-property charset 'preferred-coding-system)))
270
271(defun mm-charset-after (&optional pos)
272 "Return charset of a character in current buffer at position POS.
273If POS is nil, it defauls to the current point.
274If POS is out of range, the value is nil.
275If the charset is `composition', return the actual one."
052802c1
DL
276 (let ((char (char-after pos)) charset)
277 (if (< (mm-char-int char) 128)
278 (setq charset 'ascii)
279 ;; charset-after is fake in some Emacsen.
280 (setq charset (and (fboundp 'char-charset) (char-charset char)))
281 (if (eq charset 'composition)
282 (let ((p (or pos (point))))
283 (cadr (find-charset-region p (1+ p))))
284 (if (and charset (not (memq charset '(ascii eight-bit-control
285 eight-bit-graphic))))
286 charset
287 (or
288 mail-parse-mule-charset ;; cached mule-charset
289 (progn
290 (setq mail-parse-mule-charset
291 (and (boundp 'current-language-environment)
292 (car (last
293 (assq 'charset
294 (assoc current-language-environment
295 language-info-alist))))))
296 (if (or (not mail-parse-mule-charset)
297 (eq mail-parse-mule-charset 'ascii))
298 (setq mail-parse-mule-charset
299 (or (car (last (assq mail-parse-charset
300 mm-mime-mule-charset-alist)))
bf3b936f 301 ;; Fixme: don't fix that!
052802c1
DL
302 'latin-iso8859-1)))
303 mail-parse-mule-charset)))))))
c113de23
GM
304
305(defun mm-mime-charset (charset)
1c57d870 306 "Return the MIME charset corresponding to the given Mule CHARSET."
052802c1 307 (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property))
c113de23
GM
308 ;; This exists in Emacs 20.
309 (or
310 (and (mm-preferred-coding-system charset)
311 (coding-system-get
312 (mm-preferred-coding-system charset) 'mime-charset))
313 (and (eq charset 'ascii)
314 'us-ascii)
c113de23
GM
315 (mm-mule-charset-to-mime-charset charset))
316 ;; This is for XEmacs.
317 (mm-mule-charset-to-mime-charset charset)))
318
319(defun mm-delete-duplicates (list)
320 "Simple substitute for CL `delete-duplicates', testing with `equal'."
321 (let (result head)
322 (while list
323 (setq head (car list))
324 (setq list (delete head list))
325 (setq result (cons head result)))
326 (nreverse result)))
327
328(defun mm-find-mime-charset-region (b e)
329 "Return the MIME charsets needed to encode the region between B and E."
330 (let ((charsets (mapcar 'mm-mime-charset
331 (delq 'ascii
332 (mm-find-charset-region b e)))))
333 (when (memq 'iso-2022-jp-2 charsets)
334 (setq charsets (delq 'iso-2022-jp charsets)))
335 (setq charsets (mm-delete-duplicates charsets))
336 (if (and (> (length charsets) 1)
337 (fboundp 'find-coding-systems-region)
bf3b936f
DL
338 (let ((cs (find-coding-systems-region b e)))
339 (or (memq 'utf-8 cs) (memq 'mule-utf-8 cs))))
c113de23
GM
340 '(utf-8)
341 charsets)))
342
343(defsubst mm-multibyte-p ()
344 "Say whether multibyte is enabled."
052802c1
DL
345 (if (and (not (featurep 'xemacs))
346 (boundp 'enable-multibyte-characters))
347 enable-multibyte-characters
348 (featurep 'mule)))
c113de23
GM
349
350(defmacro mm-with-unibyte-buffer (&rest forms)
351 "Create a temporary buffer, and evaluate FORMS there like `progn'.
1c57d870
DL
352Use unibyte mode for this."
353 `(let (default-enable-multibyte-characters)
354 (with-temp-buffer ,@forms)))
c113de23
GM
355(put 'mm-with-unibyte-buffer 'lisp-indent-function 0)
356(put 'mm-with-unibyte-buffer 'edebug-form-spec '(body))
357
358(defmacro mm-with-unibyte-current-buffer (&rest forms)
1c57d870
DL
359 "Evaluate FORMS with current current buffer temporarily made unibyte.
360Also bind `default-enable-multibyte-characters' to nil.
361Equivalent to `progn' in XEmacs"
c113de23 362 (let ((multibyte (make-symbol "multibyte")))
1c57d870
DL
363 `(if (fboundp 'set-buffer-multibyte)
364 (let ((,multibyte enable-multibyte-characters))
365 (unwind-protect
366 (let (default-enable-multibyte-characters)
367 (set-buffer-multibyte nil)
368 ,@forms)
369 (set-buffer-multibyte ,multibyte)))
370 (progn
371 ,@forms))))
c113de23
GM
372(put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0)
373(put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body))
374
052802c1
DL
375(defmacro mm-with-unibyte-current-buffer-mule4 (&rest forms)
376 "Evaluate FORMS there like `progn' in current buffer.
377Mule4 only."
378 (let ((multibyte (make-symbol "multibyte")))
379 `(if (or (featurep 'xemacs)
380 (not (fboundp 'set-buffer-multibyte))
381 (charsetp 'eight-bit-control)) ;; For Emacs Mule 4 only.
382 (progn
383 ,@forms)
384 (let ((,multibyte (default-value 'enable-multibyte-characters)))
385 (unwind-protect
386 (let ((buffer-file-coding-system mm-binary-coding-system)
387 (coding-system-for-read mm-binary-coding-system)
388 (coding-system-for-write mm-binary-coding-system))
389 (set-buffer-multibyte nil)
390 (setq-default enable-multibyte-characters nil)
391 ,@forms)
392 (setq-default enable-multibyte-characters ,multibyte)
393 (set-buffer-multibyte ,multibyte))))))
394(put 'mm-with-unibyte-current-buffer-mule4 'lisp-indent-function 0)
395(put 'mm-with-unibyte-current-buffer-mule4 'edebug-form-spec '(body))
396
c113de23 397(defmacro mm-with-unibyte (&rest forms)
1c57d870
DL
398 "Eval the FORMS with the default value of `enable-multibyte-characters' nil, ."
399 `(let (default-enable-multibyte-characters)
400 ,@forms))
c113de23
GM
401(put 'mm-with-unibyte 'lisp-indent-function 0)
402(put 'mm-with-unibyte 'edebug-form-spec '(body))
403
404(defun mm-find-charset-region (b e)
1c57d870 405 "Return a list of Emacs charsets in the region B to E."
c113de23
GM
406 (cond
407 ((and (mm-multibyte-p)
408 (fboundp 'find-charset-region))
409 ;; Remove composition since the base charsets have been included.
410 (delq 'composition (find-charset-region b e)))
052802c1
DL
411 (t
412 ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit.
c113de23
GM
413 (save-excursion
414 (save-restriction
415 (narrow-to-region b e)
416 (goto-char (point-min))
417 (skip-chars-forward "\0-\177")
418 (if (eobp)
419 '(ascii)
052802c1
DL
420 (let (charset)
421 (setq charset
422 (and (boundp 'current-language-environment)
423 (car (last (assq 'charset
424 (assoc current-language-environment
425 language-info-alist))))))
426 (if (eq charset 'ascii) (setq charset nil))
427 (or charset
428 (setq charset
429 (car (last (assq mail-parse-charset
430 mm-mime-mule-charset-alist)))))
431 (list 'ascii (or charset 'latin-iso8859-1)))))))))
c113de23
GM
432
433(if (fboundp 'shell-quote-argument)
434 (defalias 'mm-quote-arg 'shell-quote-argument)
435 (defun mm-quote-arg (arg)
436 "Return a version of ARG that is safe to evaluate in a shell."
437 (let ((pos 0) new-pos accum)
438 ;; *** bug: we don't handle newline characters properly
439 (while (setq new-pos (string-match "[]*[;!'`\"$\\& \t{} |()<>]" arg pos))
440 (push (substring arg pos new-pos) accum)
441 (push "\\" accum)
442 (push (list (aref arg new-pos)) accum)
443 (setq pos (1+ new-pos)))
444 (if (= pos 0)
445 arg
446 (apply 'concat (nconc (nreverse accum) (list (substring arg pos))))))))
447
448(defun mm-auto-mode-alist ()
449 "Return an `auto-mode-alist' with only the .gz (etc) thingies."
450 (let ((alist auto-mode-alist)
451 out)
452 (while alist
453 (when (listp (cdar alist))
454 (push (car alist) out))
455 (pop alist))
456 (nreverse out)))
457
458(defvar mm-inhibit-file-name-handlers
244d58ba 459 '(jka-compr-handler image-file-handler)
c113de23
GM
460 "A list of handlers doing (un)compression (etc) thingies.")
461
462(defun mm-insert-file-contents (filename &optional visit beg end replace
463 inhibit)
464 "Like `insert-file-contents', q.v., but only reads in the file.
465A buffer may be modified in several ways after reading into the buffer due
466to advanced Emacs features, such as file-name-handlers, format decoding,
467find-file-hooks, etc.
468If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers.
469 This function ensures that none of these modifications will take place."
470 (let ((format-alist nil)
471 (auto-mode-alist (if inhibit nil (mm-auto-mode-alist)))
472 (default-major-mode 'fundamental-mode)
473 (enable-local-variables nil)
474 (after-insert-file-functions nil)
475 (enable-local-eval nil)
476 (find-file-hooks nil)
477 (inhibit-file-name-operation (if inhibit
478 'insert-file-contents
479 inhibit-file-name-operation))
480 (inhibit-file-name-handlers
481 (if inhibit
482 (append mm-inhibit-file-name-handlers
483 inhibit-file-name-handlers)
484 inhibit-file-name-handlers)))
485 (insert-file-contents filename visit beg end replace)))
486
487(defun mm-append-to-file (start end filename &optional codesys inhibit)
488 "Append the contents of the region to the end of file FILENAME.
489When called from a function, expects three arguments,
490START, END and FILENAME. START and END are buffer positions
491saying what text to write.
492Optional fourth argument specifies the coding system to use when
493encoding the file.
494If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers."
495 (let ((coding-system-for-write
496 (or codesys mm-text-coding-system-for-write
497 mm-text-coding-system))
498 (inhibit-file-name-operation (if inhibit
499 'append-to-file
500 inhibit-file-name-operation))
501 (inhibit-file-name-handlers
502 (if inhibit
503 (append mm-inhibit-file-name-handlers
504 inhibit-file-name-handlers)
505 inhibit-file-name-handlers)))
506 (append-to-file start end filename)))
507
508(defun mm-write-region (start end filename &optional append visit lockname
509 coding-system inhibit)
510
511 "Like `write-region'.
512If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers."
513 (let ((coding-system-for-write
514 (or coding-system mm-text-coding-system-for-write
515 mm-text-coding-system))
516 (inhibit-file-name-operation (if inhibit
517 'write-region
518 inhibit-file-name-operation))
519 (inhibit-file-name-handlers
520 (if inhibit
521 (append mm-inhibit-file-name-handlers
522 inhibit-file-name-handlers)
523 inhibit-file-name-handlers)))
524 (write-region start end filename append visit lockname)))
525
526(provide 'mm-util)
527
528;;; mm-util.el ends here