Revision: miles@gnu.org--gnu-2004/emacs--unicode--0--patch-70
[bpt/emacs.git] / lisp / gnus / mm-util.el
CommitLineData
95fa1ff7 1;;; mm-util.el --- Utility functions for Mule and low level things
23f87bed
MB
2;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004
3;; Free Software Foundation, Inc.
c113de23
GM
4
5;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
6;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
7;; This file is part of GNU Emacs.
8
9;; GNU Emacs is free software; you can redistribute it and/or modify
10;; it under the terms of the GNU General Public License as published by
11;; the Free Software Foundation; either version 2, or (at your option)
12;; any later version.
13
14;; GNU Emacs is distributed in the hope that it will be useful,
15;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17;; GNU General Public License for more details.
18
19;; You should have received a copy of the GNU General Public License
20;; along with GNU Emacs; see the file COPYING. If not, write to the
21;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22;; Boston, MA 02111-1307, USA.
23
24;;; Commentary:
25
26;;; Code:
27
23f87bed 28(eval-when-compile (require 'cl))
c113de23
GM
29(require 'mail-prsvr)
30
f53b2875
DL
31(eval-and-compile
32 (mapcar
33 (lambda (elem)
34 (let ((nfunc (intern (format "mm-%s" (car elem)))))
35 (if (fboundp (car elem))
36 (defalias nfunc (car elem))
37 (defalias nfunc (cdr elem)))))
38 '((decode-coding-string . (lambda (s a) s))
39 (encode-coding-string . (lambda (s a) s))
40 (encode-coding-region . ignore)
41 (coding-system-list . ignore)
42 (decode-coding-region . ignore)
43 (char-int . identity)
f53b2875
DL
44 (coding-system-equal . equal)
45 (annotationp . ignore)
46 (set-buffer-file-coding-system . ignore)
47 (make-char
48 . (lambda (charset int)
49 (int-to-char int)))
f53b2875
DL
50 (read-charset
51 . (lambda (prompt)
52 "Return a charset."
53 (intern
54 (completing-read
55 prompt
56 (mapcar (lambda (e) (list (symbol-name (car e))))
57 mm-mime-mule-charset-alist)
58 nil t))))
95fa1ff7
SZ
59 (subst-char-in-string
60 . (lambda (from to string) ;; stolen (and renamed) from nnheader.el
61 "Replace characters in STRING from FROM to TO."
62 (let ((string (substring string 0)) ;Copy string.
63 (len (length string))
64 (idx 0))
65 ;; Replace all occurrences of FROM with TO.
66 (while (< idx len)
67 (when (= (aref string idx) from)
68 (aset string idx to))
69 (setq idx (1+ idx)))
70 string)))
f53b2875 71 (string-as-unibyte . identity)
23f87bed 72 (string-make-unibyte . identity)
95fa1ff7 73 (string-as-multibyte . identity)
56e09c09 74 (multibyte-string-p . ignore)
23f87bed
MB
75 ;; It is not a MIME function, but some MIME functions use it.
76 (make-temp-file . (lambda (prefix &optional dir-flag)
77 (let ((file (expand-file-name
78 (make-temp-name prefix)
79 (if (fboundp 'temp-directory)
80 (temp-directory)
81 temporary-file-directory))))
82 (if dir-flag
83 (make-directory file))
84 file)))
56e09c09
DL
85 (insert-byte . insert-char)
86 (multibyte-char-to-unibyte . identity))))
f53b2875 87
c113de23
GM
88(eval-and-compile
89 (defalias 'mm-char-or-char-int-p
95fa1ff7 90 (cond
c113de23 91 ((fboundp 'char-or-char-int-p) 'char-or-char-int-p)
95fa1ff7 92 ((fboundp 'char-valid-p) 'char-valid-p)
c113de23
GM
93 (t 'identity))))
94
23f87bed
MB
95;; Fixme: This seems always to be used to read a MIME charset, so it
96;; should be re-named and fixed (in Emacs) to offer completion only on
97;; proper charset names (base coding systems which have a
98;; mime-charset defined). XEmacs doesn't believe in mime-charset;
99;; test with
100;; `(or (coding-system-get 'iso-8859-1 'mime-charset)
101;; (coding-system-get 'iso-8859-1 :mime-charset))'
102;; Actually, there should be an `mm-coding-system-mime-charset'.
95fa1ff7
SZ
103(eval-and-compile
104 (defalias 'mm-read-coding-system
105 (cond
106 ((fboundp 'read-coding-system)
107 (if (and (featurep 'xemacs)
108 (<= (string-to-number emacs-version) 21.1))
109 (lambda (prompt &optional default-coding-system)
110 (read-coding-system prompt))
111 'read-coding-system))
112 (t (lambda (prompt &optional default-coding-system)
113 "Prompt the user for a coding system."
114 (completing-read
115 prompt (mapcar (lambda (s) (list (symbol-name (car s))))
116 mm-mime-mule-charset-alist)))))))
117
c113de23
GM
118(defvar mm-coding-system-list nil)
119(defun mm-get-coding-system-list ()
120 "Get the coding system list."
121 (or mm-coding-system-list
122 (setq mm-coding-system-list (mm-coding-system-list))))
123
23f87bed
MB
124(defun mm-coding-system-p (cs)
125 "Return non-nil if CS is a symbol naming a coding system.
0683d241
MB
126In XEmacs, also return non-nil if CS is a coding system object.
127If CS is available, return CS itself in Emacs, and return a coding
128system object in XEmacs."
23f87bed
MB
129 (if (fboundp 'find-coding-system)
130 (find-coding-system cs)
131 (if (fboundp 'coding-system-p)
0683d241
MB
132 (when (coding-system-p cs)
133 cs)
23f87bed 134 ;; Is this branch ever actually useful?
0683d241 135 (car (memq cs (mm-get-coding-system-list))))))
95fa1ff7 136
c113de23 137(defvar mm-charset-synonym-alist
95fa1ff7 138 `(
95fa1ff7 139 ;; Not in XEmacs, but it's not a proper MIME charset anyhow.
72eb5fc7
SZ
140 ,@(unless (mm-coding-system-p 'x-ctext)
141 '((x-ctext . ctext)))
23f87bed
MB
142 ;; ISO-8859-15 is very similar to ISO-8859-1. But it's _different_!
143 ,@(unless (mm-coding-system-p 'iso-8859-15)
72eb5fc7 144 '((iso-8859-15 . iso-8859-1)))
23f87bed
MB
145 ;; BIG-5HKSCS is similar to, but different than, BIG-5.
146 ,@(unless (mm-coding-system-p 'big5-hkscs)
147 '((big5-hkscs . big5)))
d1a7bc93
DL
148 ;; Windows-1252 is actually a superset of Latin-1. See also
149 ;; `gnus-article-dumbquotes-map'.
a1506d29 150 ,@(unless (mm-coding-system-p 'windows-1252)
72eb5fc7
SZ
151 (if (mm-coding-system-p 'cp1252)
152 '((windows-1252 . cp1252))
153 '((windows-1252 . iso-8859-1))))
20c381cf
GM
154 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
155 ;; Outlook users in Czech republic. Use this to allow reading of their
156 ;; e-mails. cp1250 should be defined by M-x codepage-setup.
72eb5fc7
SZ
157 ,@(if (and (not (mm-coding-system-p 'windows-1250))
158 (mm-coding-system-p 'cp1250))
159 '((windows-1250 . cp1250)))
95fa1ff7 160 )
c113de23
GM
161 "A mapping from invalid charset names to the real charset names.")
162
c113de23 163(defvar mm-binary-coding-system
95fa1ff7 164 (cond
c113de23
GM
165 ((mm-coding-system-p 'binary) 'binary)
166 ((mm-coding-system-p 'no-conversion) 'no-conversion)
167 (t nil))
168 "100% binary coding system.")
169
170(defvar mm-text-coding-system
171 (or (if (memq system-type '(windows-nt ms-dos ms-windows))
172 (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos)
173 (and (mm-coding-system-p 'raw-text) 'raw-text))
174 mm-binary-coding-system)
175 "Text-safe coding system (For removing ^M).")
176
177(defvar mm-text-coding-system-for-write nil
178 "Text coding system for write.")
179
180(defvar mm-auto-save-coding-system
95fa1ff7 181 (cond
23f87bed 182 ((mm-coding-system-p 'utf-8-emacs) ; Mule 7
56e09c09
DL
183 (if (memq system-type '(windows-nt ms-dos ms-windows))
184 (if (mm-coding-system-p 'utf-8-emacs-dos)
185 'utf-8-emacs-dos mm-binary-coding-system)
186 'utf-8-emacs))
c113de23
GM
187 ((mm-coding-system-p 'emacs-mule)
188 (if (memq system-type '(windows-nt ms-dos ms-windows))
95fa1ff7 189 (if (mm-coding-system-p 'emacs-mule-dos)
c113de23
GM
190 'emacs-mule-dos mm-binary-coding-system)
191 'emacs-mule))
192 ((mm-coding-system-p 'escape-quoted) 'escape-quoted)
193 (t mm-binary-coding-system))
194 "Coding system of auto save file.")
195
95fa1ff7 196(defvar mm-universal-coding-system mm-auto-save-coding-system
47b63dfa 197 "The universal coding system.")
95fa1ff7
SZ
198
199;; Fixme: some of the cars here aren't valid MIME charsets. That
200;; should only matter with XEmacs, though.
201(defvar mm-mime-mule-charset-alist
202 `((us-ascii ascii)
203 (iso-8859-1 latin-iso8859-1)
204 (iso-8859-2 latin-iso8859-2)
205 (iso-8859-3 latin-iso8859-3)
206 (iso-8859-4 latin-iso8859-4)
207 (iso-8859-5 cyrillic-iso8859-5)
208 ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters.
209 ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default
210 ;; charset is koi8-r, not iso-8859-5.
211 (koi8-r cyrillic-iso8859-5 gnus-koi8-r)
212 (iso-8859-6 arabic-iso8859-6)
213 (iso-8859-7 greek-iso8859-7)
214 (iso-8859-8 hebrew-iso8859-8)
215 (iso-8859-9 latin-iso8859-9)
216 (iso-8859-14 latin-iso8859-14)
217 (iso-8859-15 latin-iso8859-15)
218 (viscii vietnamese-viscii-lower)
219 (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978)
220 (euc-kr korean-ksc5601)
221 (gb2312 chinese-gb2312)
222 (big5 chinese-big5-1 chinese-big5-2)
223 (tibetan tibetan)
224 (thai-tis620 thai-tis620)
0683d241 225 (windows-1251 cyrillic-iso8859-5)
95fa1ff7
SZ
226 (iso-2022-7bit ethiopic arabic-1-column arabic-2-column)
227 (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7
228 latin-jisx0201 japanese-jisx0208-1978
229 chinese-gb2312 japanese-jisx0208
0683d241 230 korean-ksc5601 japanese-jisx0212)
95fa1ff7
SZ
231 (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7
232 latin-jisx0201 japanese-jisx0208-1978
233 chinese-gb2312 japanese-jisx0208
234 korean-ksc5601 japanese-jisx0212
235 chinese-cns11643-1 chinese-cns11643-2)
236 (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2
237 cyrillic-iso8859-5 greek-iso8859-7
238 latin-jisx0201 japanese-jisx0208-1978
239 chinese-gb2312 japanese-jisx0208
240 korean-ksc5601 japanese-jisx0212
241 chinese-cns11643-1 chinese-cns11643-2
242 chinese-cns11643-3 chinese-cns11643-4
243 chinese-cns11643-5 chinese-cns11643-6
244 chinese-cns11643-7)
0683d241
MB
245 (iso-2022-jp-3 latin-jisx0201 japanese-jisx0208-1978 japanese-jisx0208
246 japanese-jisx0213-1 japanese-jisx0213-2)
247 (shift_jis latin-jisx0201 katakana-jisx0201 japanese-jisx0208)
95fa1ff7
SZ
248 ,(if (or (not (fboundp 'charsetp)) ;; non-Mule case
249 (charsetp 'unicode-a)
250 (not (mm-coding-system-p 'mule-utf-8)))
251 '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e)
252 ;; If we have utf-8 we're in Mule 5+.
253 (append '(utf-8)
254 (delete 'ascii
255 (coding-system-get 'mule-utf-8 'safe-charsets)))))
256 "Alist of MIME-charset/MULE-charsets.")
257
0683d241
MB
258(defun mm-enrich-utf-8-by-mule-ucs ()
259 "Make the `utf-8' MIME charset usable by the Mule-UCS package.
260This function will run when the `un-define' module is loaded under
261XEmacs, and fill the `utf-8' entry in `mm-mime-mule-charset-alist'
262with Mule charsets. It is completely useless for Emacs."
263 (unless (cdr (delete '(mm-enrich-utf-8-by-mule-ucs)
264 (assoc "un-define" after-load-alist)))
265 (setq after-load-alist
266 (delete '("un-define") after-load-alist)))
267 (when (boundp 'unicode-basic-translation-charset-order-list)
268 (condition-case nil
269 (let ((val (delq
270 'ascii
271 (copy-sequence
272 (symbol-value
273 'unicode-basic-translation-charset-order-list))))
274 (elem (assq 'utf-8 mm-mime-mule-charset-alist)))
275 (if elem
276 (setcdr elem val)
277 (setq mm-mime-mule-charset-alist
278 (nconc mm-mime-mule-charset-alist
279 (list (cons 'utf-8 val))))))
280 (error))))
281
282;; Correct by construction, but should be unnecessary for Emacs:
283(if (featurep 'xemacs)
284 (eval-after-load "un-define" '(mm-enrich-utf-8-by-mule-ucs))
285 (when (and (fboundp 'coding-system-list)
286 (fboundp 'sort-coding-systems))
287 (let ((css (sort-coding-systems (coding-system-list 'base-only)))
288 cs mime mule alist)
289 (while css
290 (setq cs (pop css)
291 mime (or (coding-system-get cs :mime-charset) ; Emacs 22
292 (coding-system-get cs 'mime-charset)))
293 (when (and mime
294 (not (eq t (setq mule
295 (coding-system-get cs 'safe-charsets))))
296 (not (assq mime alist)))
297 (push (cons mime (delq 'ascii mule)) alist)))
298 (setq mm-mime-mule-charset-alist (nreverse alist)))))
95fa1ff7 299
23f87bed
MB
300(defcustom mm-coding-system-priorities
301 (if (boundp 'current-language-environment)
302 (let ((lang (symbol-value 'current-language-environment)))
303 (cond ((string= lang "Japanese")
5153a47a
MB
304 ;; Japanese users prefer iso-2022-jp to euc-japan or
305 ;; shift_jis, however iso-8859-1 should be used when
306 ;; there are only ASCII text and Latin-1 characters.
307 '(iso-8859-1 iso-2022-jp iso-2022-jp-2 shift_jis utf-8)))))
23f87bed
MB
308 "Preferred coding systems for encoding outgoing messages.
309
310More than one suitable coding system may be found for some text.
311By default, the coding system with the highest priority is used
312to encode outgoing messages (see `sort-coding-systems'). If this
313variable is set, it overrides the default priority."
a08b59c9 314 :version "21.2"
23f87bed
MB
315 :type '(repeat (symbol :tag "Coding system"))
316 :group 'mime)
317
318;; ??
1f7d2e14
SZ
319(defvar mm-use-find-coding-systems-region
320 (fboundp 'find-coding-systems-region)
23f87bed
MB
321 "Use `find-coding-systems-region' to find proper coding systems.
322
323Setting it to nil is useful on Emacsen supporting Unicode if sending
324mail with multiple parts is preferred to sending a Unicode one.")
1f7d2e14 325
c113de23
GM
326;;; Internal variables:
327
328;;; Functions:
329
330(defun mm-mule-charset-to-mime-charset (charset)
1c57d870 331 "Return the MIME charset corresponding to the given Mule CHARSET."
23f87bed
MB
332 (if (and (fboundp 'find-coding-systems-for-charsets)
333 (fboundp 'sort-coding-systems))
0683d241
MB
334 (let ((css (sort (sort-coding-systems
335 (find-coding-systems-for-charsets (list charset)))
336 'mm-sort-coding-systems-predicate))
337 cs mime)
338 (while (and (not mime)
339 css)
340 (when (setq cs (pop css))
341 (setq mime (or (coding-system-get cs :mime-charset)
342 (coding-system-get cs 'mime-charset)))))
95fa1ff7 343 mime)
0683d241
MB
344 (let ((alist (mapcar (lambda (cs)
345 (assq cs mm-mime-mule-charset-alist))
346 (sort (mapcar 'car mm-mime-mule-charset-alist)
347 'mm-sort-coding-systems-predicate)))
95fa1ff7
SZ
348 out)
349 (while alist
350 (when (memq charset (cdar alist))
351 (setq out (caar alist)
352 alist nil))
353 (pop alist))
354 out)))
c113de23
GM
355
356(defun mm-charset-to-coding-system (charset &optional lbt)
357 "Return coding-system corresponding to CHARSET.
358CHARSET is a symbol naming a MIME charset.
359If optional argument LBT (`unix', `dos' or `mac') is specified, it is
360used as the line break code type of the coding system."
361 (when (stringp charset)
362 (setq charset (intern (downcase charset))))
c113de23
GM
363 (when lbt
364 (setq charset (intern (format "%s-%s" charset lbt))))
365 (cond
47b63dfa
SZ
366 ((null charset)
367 charset)
c113de23 368 ;; Running in a non-MULE environment.
23f87bed
MB
369 ((or (null (mm-get-coding-system-list))
370 (not (fboundp 'coding-system-get)))
c113de23
GM
371 charset)
372 ;; ascii
373 ((eq charset 'us-ascii)
374 'ascii)
1c57d870
DL
375 ;; Check to see whether we can handle this charset. (This depends
376 ;; on there being some coding system matching each `mime-charset'
95fa1ff7
SZ
377 ;; property defined, as there should be.)
378 ((and (mm-coding-system-p charset)
379;;; Doing this would potentially weed out incorrect charsets.
380;;; charset
381;;; (eq charset (coding-system-get charset 'mime-charset))
382 )
c113de23 383 charset)
95fa1ff7 384 ;; Translate invalid charsets.
d62d49df 385 ((let ((cs (cdr (assq charset mm-charset-synonym-alist))))
23f87bed 386 (and cs (mm-coding-system-p cs) cs)))
95fa1ff7
SZ
387 ;; Last resort: search the coding system list for entries which
388 ;; have the right mime-charset in case the canonical name isn't
389 ;; defined (though it should be).
390 ((let (cs)
391 ;; mm-get-coding-system-list returns a list of cs without lbt.
392 ;; Do we need -lbt?
393 (dolist (c (mm-get-coding-system-list))
394 (if (and (null cs)
56e09c09
DL
395 (eq charset (or (coding-system-get c :mime-charset)
396 (coding-system-get c 'mime-charset))))
95fa1ff7
SZ
397 (setq cs c)))
398 cs))))
399
400(defsubst mm-replace-chars-in-string (string from to)
401 (mm-subst-char-in-string from to string))
402
403(eval-and-compile
404 (defvar mm-emacs-mule (and (not (featurep 'xemacs))
405 (boundp 'default-enable-multibyte-characters)
406 default-enable-multibyte-characters
407 (fboundp 'set-buffer-multibyte))
56e09c09 408 "True in Emacs with Mule.")
95fa1ff7
SZ
409
410 (if mm-emacs-mule
411 (defun mm-enable-multibyte ()
412 "Set the multibyte flag of the current buffer.
1c57d870
DL
413Only do this if the default value of `enable-multibyte-characters' is
414non-nil. This is a no-op in XEmacs."
473ad4a5 415 (set-buffer-multibyte 'to))
95fa1ff7 416 (defalias 'mm-enable-multibyte 'ignore))
c113de23 417
95fa1ff7
SZ
418 (if mm-emacs-mule
419 (defun mm-disable-multibyte ()
420 "Unset the multibyte flag of in the current buffer.
1c57d870 421This is a no-op in XEmacs."
95fa1ff7 422 (set-buffer-multibyte nil))
56e09c09 423 (defalias 'mm-disable-multibyte 'ignore)))
052802c1 424
c113de23
GM
425(defun mm-preferred-coding-system (charset)
426 ;; A typo in some Emacs versions.
47b63dfa
SZ
427 (or (get-charset-property charset 'preferred-coding-system)
428 (get-charset-property charset 'prefered-coding-system)))
c113de23 429
23f87bed
MB
430;; Mule charsets shouldn't be used.
431(defsubst mm-guess-charset ()
432 "Guess Mule charset from the language environment."
433 (or
434 mail-parse-mule-charset ;; cached mule-charset
435 (progn
436 (setq mail-parse-mule-charset
437 (and (boundp 'current-language-environment)
438 (car (last
439 (assq 'charset
440 (assoc current-language-environment
441 language-info-alist))))))
442 (if (or (not mail-parse-mule-charset)
443 (eq mail-parse-mule-charset 'ascii))
444 (setq mail-parse-mule-charset
445 (or (car (last (assq mail-parse-charset
446 mm-mime-mule-charset-alist)))
447 ;; default
448 'latin-iso8859-1)))
449 mail-parse-mule-charset)))
450
c113de23
GM
451(defun mm-charset-after (&optional pos)
452 "Return charset of a character in current buffer at position POS.
453If POS is nil, it defauls to the current point.
454If POS is out of range, the value is nil.
455If the charset is `composition', return the actual one."
052802c1
DL
456 (let ((char (char-after pos)) charset)
457 (if (< (mm-char-int char) 128)
458 (setq charset 'ascii)
459 ;; charset-after is fake in some Emacsen.
460 (setq charset (and (fboundp 'char-charset) (char-charset char)))
56e09c09 461 (if (eq charset 'composition) ; Mule 4
052802c1
DL
462 (let ((p (or pos (point))))
463 (cadr (find-charset-region p (1+ p))))
464 (if (and charset (not (memq charset '(ascii eight-bit-control
465 eight-bit-graphic))))
466 charset
23f87bed 467 (mm-guess-charset))))))
c113de23
GM
468
469(defun mm-mime-charset (charset)
1c57d870 470 "Return the MIME charset corresponding to the given Mule CHARSET."
95fa1ff7
SZ
471 (if (eq charset 'unknown)
472 (error "The message contains non-printable characters, please use attachment"))
052802c1 473 (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property))
c113de23
GM
474 ;; This exists in Emacs 20.
475 (or
476 (and (mm-preferred-coding-system charset)
56e09c09
DL
477 (or (coding-system-get
478 (mm-preferred-coding-system charset) :mime-charset)
479 (coding-system-get
480 (mm-preferred-coding-system charset) 'mime-charset)))
c113de23
GM
481 (and (eq charset 'ascii)
482 'us-ascii)
95fa1ff7 483 (mm-preferred-coding-system charset)
c113de23
GM
484 (mm-mule-charset-to-mime-charset charset))
485 ;; This is for XEmacs.
486 (mm-mule-charset-to-mime-charset charset)))
487
488(defun mm-delete-duplicates (list)
b8898393 489 "Simple substitute for CL `delete-duplicates', testing with `equal'."
c113de23
GM
490 (let (result head)
491 (while list
492 (setq head (car list))
493 (setq list (delete head list))
494 (setq result (cons head result)))
495 (nreverse result)))
496
23f87bed
MB
497;; Fixme: This is used in places when it should be testing the
498;; default multibyteness. See mm-default-multibyte-p.
499(eval-and-compile
052802c1
DL
500 (if (and (not (featurep 'xemacs))
501 (boundp 'enable-multibyte-characters))
23f87bed
MB
502 (defun mm-multibyte-p ()
503 "Non-nil if multibyte is enabled in the current buffer."
504 enable-multibyte-characters)
505 (defun mm-multibyte-p () (featurep 'mule))))
506
507(defun mm-default-multibyte-p ()
508 "Return non-nil if the session is multibyte.
509This affects whether coding conversion should be attempted generally."
510 (if (featurep 'mule)
511 (if (boundp 'default-enable-multibyte-characters)
512 default-enable-multibyte-characters
513 t)))
c113de23 514
47b63dfa 515(defun mm-sort-coding-systems-predicate (a b)
23f87bed
MB
516 (let ((priorities
517 (mapcar (lambda (cs)
518 ;; Note: invalid entries are dropped silently
0683d241 519 (and (setq cs (mm-coding-system-p cs))
23f87bed
MB
520 (coding-system-base cs)))
521 mm-coding-system-priorities)))
0683d241
MB
522 (and (setq a (mm-coding-system-p a))
523 (if (setq b (mm-coding-system-p b))
524 (> (length (memq (coding-system-base a) priorities))
525 (length (memq (coding-system-base b) priorities)))
526 t))))
47b63dfa 527
0c129bca 528(defun mm-find-mime-charset-region (b e)
95fa1ff7 529 "Return the MIME charsets needed to encode the region between B and E.
f0529b5b 530nil means ASCII, a single-element list represents an appropriate MIME
95fa1ff7 531charset, and a longer list means no appropriate charset."
47b63dfa
SZ
532 (let (charsets)
533 ;; The return possibilities of this function are a mess...
534 (or (and (mm-multibyte-p)
1f7d2e14 535 mm-use-find-coding-systems-region
47b63dfa
SZ
536 ;; Find the mime-charset of the most preferred coding
537 ;; system that has one.
538 (let ((systems (find-coding-systems-region b e)))
539 (when mm-coding-system-priorities
a1506d29 540 (setq systems
47b63dfa 541 (sort systems 'mm-sort-coding-systems-predicate)))
47b63dfa
SZ
542 (setq systems (delq 'compound-text systems))
543 (unless (equal systems '(undecided))
544 (while systems
56e09c09
DL
545 (let* ((head (pop systems))
546 (cs (or (coding-system-get head :mime-charset)
547 (coding-system-get head 'mime-charset))))
23f87bed
MB
548 ;; The mime-charset (`x-ctext') of
549 ;; `compound-text' is not in the IANA list. We
550 ;; shouldn't normally use anything here with a
551 ;; mime-charset having an `x-' prefix.
552 ;; Fixme: Allow this to be overridden, since
553 ;; there is existing use of x-ctext.
554 ;; Also people apparently need the coding system
555 ;; `iso-2022-jp-3' (which Mule-UCS defines with
556 ;; mime-charset, though it's not valid).
557 (if (and cs
558 (not (string-match "^[Xx]-" (symbol-name cs)))
559 ;; UTF-16 of any variety is invalid for
560 ;; text parts and, unfortunately, has
561 ;; mime-charset defined both in Mule-UCS
562 ;; and versions of Emacs. (The name
563 ;; might be `mule-utf-16...' or
564 ;; `utf-16...'.)
565 (not (string-match "utf-16" (symbol-name cs))))
47b63dfa
SZ
566 (setq systems nil
567 charsets (list cs))))))
568 charsets))
0c129bca
DL
569 ;; Fixme: won't work for unibyte Emacs 22:
570
23f87bed
MB
571 ;; Otherwise we're not multibyte, we're XEmacs, or a single
572 ;; coding system won't cover it.
a1506d29 573 (setq charsets
47b63dfa
SZ
574 (mm-delete-duplicates
575 (mapcar 'mm-mime-charset
576 (delq 'ascii
577 (mm-find-charset-region b e))))))
47b63dfa 578 charsets))
95fa1ff7 579
c113de23
GM
580(defmacro mm-with-unibyte-buffer (&rest forms)
581 "Create a temporary buffer, and evaluate FORMS there like `progn'.
1c57d870
DL
582Use unibyte mode for this."
583 `(let (default-enable-multibyte-characters)
584 (with-temp-buffer ,@forms)))
c113de23
GM
585(put 'mm-with-unibyte-buffer 'lisp-indent-function 0)
586(put 'mm-with-unibyte-buffer 'edebug-form-spec '(body))
587
23f87bed
MB
588(defmacro mm-with-multibyte-buffer (&rest forms)
589 "Create a temporary buffer, and evaluate FORMS there like `progn'.
590Use multibyte mode for this."
591 `(let ((default-enable-multibyte-characters t))
592 (with-temp-buffer ,@forms)))
593(put 'mm-with-multibyte-buffer 'lisp-indent-function 0)
594(put 'mm-with-multibyte-buffer 'edebug-form-spec '(body))
595
c113de23 596(defmacro mm-with-unibyte-current-buffer (&rest forms)
56e09c09 597 "Evaluate FORMS with current buffer temporarily made unibyte.
1c57d870
DL
598Also bind `default-enable-multibyte-characters' to nil.
599Equivalent to `progn' in XEmacs"
95fa1ff7
SZ
600 (let ((multibyte (make-symbol "multibyte"))
601 (buffer (make-symbol "buffer")))
a1506d29 602 `(if mm-emacs-mule
95fa1ff7
SZ
603 (let ((,multibyte enable-multibyte-characters)
604 (,buffer (current-buffer)))
1c57d870
DL
605 (unwind-protect
606 (let (default-enable-multibyte-characters)
607 (set-buffer-multibyte nil)
608 ,@forms)
95fa1ff7 609 (set-buffer ,buffer)
1c57d870 610 (set-buffer-multibyte ,multibyte)))
95fa1ff7 611 (let (default-enable-multibyte-characters)
1c57d870 612 ,@forms))))
c113de23
GM
613(put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0)
614(put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body))
615
616(defmacro mm-with-unibyte (&rest forms)
23f87bed 617 "Eval the FORMS with the default value of `enable-multibyte-characters' nil."
1c57d870
DL
618 `(let (default-enable-multibyte-characters)
619 ,@forms))
c113de23
GM
620(put 'mm-with-unibyte 'lisp-indent-function 0)
621(put 'mm-with-unibyte 'edebug-form-spec '(body))
622
23f87bed
MB
623(defmacro mm-with-multibyte (&rest forms)
624 "Eval the FORMS with the default value of `enable-multibyte-characters' t."
625 `(let ((default-enable-multibyte-characters t))
626 ,@forms))
627(put 'mm-with-multibyte 'lisp-indent-function 0)
628(put 'mm-with-multibyte 'edebug-form-spec '(body))
629
c113de23 630(defun mm-find-charset-region (b e)
1c57d870 631 "Return a list of Emacs charsets in the region B to E."
c113de23
GM
632 (cond
633 ((and (mm-multibyte-p)
95fa1ff7 634 (fboundp 'find-charset-region))
c113de23 635 ;; Remove composition since the base charsets have been included.
95fa1ff7
SZ
636 ;; Remove eight-bit-*, treat them as ascii.
637 (let ((css (find-charset-region b e)))
638 (mapcar (lambda (cs) (setq css (delq cs css)))
639 '(composition eight-bit-control eight-bit-graphic
640 control-1))
641 css))
052802c1
DL
642 (t
643 ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit.
c113de23
GM
644 (save-excursion
645 (save-restriction
646 (narrow-to-region b e)
647 (goto-char (point-min))
648 (skip-chars-forward "\0-\177")
649 (if (eobp)
650 '(ascii)
052802c1
DL
651 (let (charset)
652 (setq charset
653 (and (boundp 'current-language-environment)
95fa1ff7
SZ
654 (car (last (assq 'charset
655 (assoc current-language-environment
052802c1
DL
656 language-info-alist))))))
657 (if (eq charset 'ascii) (setq charset nil))
658 (or charset
659 (setq charset
660 (car (last (assq mail-parse-charset
661 mm-mime-mule-charset-alist)))))
662 (list 'ascii (or charset 'latin-iso8859-1)))))))))
c113de23
GM
663
664(if (fboundp 'shell-quote-argument)
665 (defalias 'mm-quote-arg 'shell-quote-argument)
666 (defun mm-quote-arg (arg)
667 "Return a version of ARG that is safe to evaluate in a shell."
668 (let ((pos 0) new-pos accum)
669 ;; *** bug: we don't handle newline characters properly
670 (while (setq new-pos (string-match "[]*[;!'`\"$\\& \t{} |()<>]" arg pos))
671 (push (substring arg pos new-pos) accum)
672 (push "\\" accum)
673 (push (list (aref arg new-pos)) accum)
674 (setq pos (1+ new-pos)))
675 (if (= pos 0)
676 arg
677 (apply 'concat (nconc (nreverse accum) (list (substring arg pos))))))))
678
679(defun mm-auto-mode-alist ()
680 "Return an `auto-mode-alist' with only the .gz (etc) thingies."
681 (let ((alist auto-mode-alist)
682 out)
683 (while alist
684 (when (listp (cdar alist))
685 (push (car alist) out))
686 (pop alist))
687 (nreverse out)))
688
689(defvar mm-inhibit-file-name-handlers
244d58ba 690 '(jka-compr-handler image-file-handler)
c113de23
GM
691 "A list of handlers doing (un)compression (etc) thingies.")
692
693(defun mm-insert-file-contents (filename &optional visit beg end replace
694 inhibit)
23f87bed 695 "Like `insert-file-contents', but only reads in the file.
c113de23
GM
696A buffer may be modified in several ways after reading into the buffer due
697to advanced Emacs features, such as file-name-handlers, format decoding,
23f87bed 698`find-file-hooks', etc.
56e09c09 699If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'.
c113de23
GM
700 This function ensures that none of these modifications will take place."
701 (let ((format-alist nil)
702 (auto-mode-alist (if inhibit nil (mm-auto-mode-alist)))
703 (default-major-mode 'fundamental-mode)
704 (enable-local-variables nil)
95fa1ff7 705 (after-insert-file-functions nil)
c113de23
GM
706 (enable-local-eval nil)
707 (find-file-hooks nil)
95fa1ff7 708 (inhibit-file-name-operation (if inhibit
c113de23
GM
709 'insert-file-contents
710 inhibit-file-name-operation))
711 (inhibit-file-name-handlers
712 (if inhibit
95fa1ff7 713 (append mm-inhibit-file-name-handlers
c113de23
GM
714 inhibit-file-name-handlers)
715 inhibit-file-name-handlers)))
716 (insert-file-contents filename visit beg end replace)))
717
718(defun mm-append-to-file (start end filename &optional codesys inhibit)
719 "Append the contents of the region to the end of file FILENAME.
720When called from a function, expects three arguments,
721START, END and FILENAME. START and END are buffer positions
722saying what text to write.
723Optional fourth argument specifies the coding system to use when
724encoding the file.
23f87bed 725If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'."
95fa1ff7
SZ
726 (let ((coding-system-for-write
727 (or codesys mm-text-coding-system-for-write
c113de23 728 mm-text-coding-system))
95fa1ff7 729 (inhibit-file-name-operation (if inhibit
c113de23
GM
730 'append-to-file
731 inhibit-file-name-operation))
732 (inhibit-file-name-handlers
733 (if inhibit
95fa1ff7 734 (append mm-inhibit-file-name-handlers
c113de23
GM
735 inhibit-file-name-handlers)
736 inhibit-file-name-handlers)))
23f87bed
MB
737 (write-region start end filename t 'no-message)
738 (message "Appended to %s" filename)))
c113de23 739
95fa1ff7 740(defun mm-write-region (start end filename &optional append visit lockname
c113de23
GM
741 coding-system inhibit)
742
743 "Like `write-region'.
23f87bed 744If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'."
95fa1ff7
SZ
745 (let ((coding-system-for-write
746 (or coding-system mm-text-coding-system-for-write
c113de23 747 mm-text-coding-system))
95fa1ff7 748 (inhibit-file-name-operation (if inhibit
c113de23
GM
749 'write-region
750 inhibit-file-name-operation))
751 (inhibit-file-name-handlers
752 (if inhibit
95fa1ff7 753 (append mm-inhibit-file-name-handlers
c113de23
GM
754 inhibit-file-name-handlers)
755 inhibit-file-name-handlers)))
756 (write-region start end filename append visit lockname)))
757
95fa1ff7
SZ
758(defun mm-image-load-path (&optional package)
759 (let (dir result)
760 (dolist (path load-path (nreverse result))
f4dd4ae8
MB
761 (when (and path
762 (file-directory-p
763 (setq dir (concat (file-name-directory
764 (directory-file-name path))
765 "etc/" (or package "gnus/")))))
766 (push dir result))
95fa1ff7
SZ
767 (push path result))))
768
23f87bed
MB
769;; Fixme: This doesn't look useful where it's used.
770(if (fboundp 'detect-coding-region)
771 (defun mm-detect-coding-region (start end)
772 "Like `detect-coding-region' except returning the best one."
773 (let ((coding-systems
774 (detect-coding-region (point) (point-max))))
775 (or (car-safe coding-systems)
776 coding-systems)))
777 (defun mm-detect-coding-region (start end)
778 (let ((point (point)))
779 (goto-char start)
780 (skip-chars-forward "\0-\177" end)
781 (prog1
782 (if (eq (point) end) 'ascii (mm-guess-charset))
783 (goto-char point)))))
784
785(if (fboundp 'coding-system-get)
786 (defun mm-detect-mime-charset-region (start end)
787 "Detect MIME charset of the text in the region between START and END."
788 (let ((cs (mm-detect-coding-region start end)))
789 (coding-system-get cs 'mime-charset)))
790 (defun mm-detect-mime-charset-region (start end)
791 "Detect MIME charset of the text in the region between START and END."
792 (let ((cs (mm-detect-coding-region start end)))
793 cs)))
794
3efe5554 795
c113de23
GM
796(provide 'mm-util)
797
b8898393 798;;; arch-tag: 94dc5388-825d-4fd1-bfa5-2100aa351238
c113de23 799;;; mm-util.el ends here