(file-remote-p): Docstring fix.
[bpt/emacs.git] / lisp / gnus / mm-util.el
CommitLineData
95fa1ff7 1;;; mm-util.el --- Utility functions for Mule and low level things
e84b4b86
TTN
2
3;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004,
e3fe4da0 4;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
c113de23
GM
5
6;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
8;; This file is part of GNU Emacs.
9
10;; GNU Emacs is free software; you can redistribute it and/or modify
11;; it under the terms of the GNU General Public License as published by
5a9dffec 12;; the Free Software Foundation; either version 3, or (at your option)
c113de23
GM
13;; any later version.
14
15;; GNU Emacs is distributed in the hope that it will be useful,
16;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18;; GNU General Public License for more details.
19
20;; You should have received a copy of the GNU General Public License
21;; along with GNU Emacs; see the file COPYING. If not, write to the
3a35cf56
LK
22;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23;; Boston, MA 02110-1301, USA.
c113de23
GM
24
25;;; Commentary:
26
27;;; Code:
28
b5000590
GM
29;; For Emacs < 22.2.
30(eval-and-compile
31 (unless (fboundp 'declare-function) (defmacro declare-function (&rest r))))
32
23f87bed 33(eval-when-compile (require 'cl))
c113de23
GM
34(require 'mail-prsvr)
35
f53b2875 36(eval-and-compile
01c52d31
MB
37 (if (featurep 'xemacs)
38 (unless (ignore-errors
39 (require 'timer-funcs))
40 (require 'timer))
41 (require 'timer)))
42
9efa445f
DN
43(defvar mm-mime-mule-charset-alist )
44
01c52d31
MB
45(eval-and-compile
46 (mapc
f53b2875
DL
47 (lambda (elem)
48 (let ((nfunc (intern (format "mm-%s" (car elem)))))
49 (if (fboundp (car elem))
50 (defalias nfunc (car elem))
51 (defalias nfunc (cdr elem)))))
82fe1aed 52 '((coding-system-list . ignore)
f53b2875 53 (char-int . identity)
f53b2875
DL
54 (coding-system-equal . equal)
55 (annotationp . ignore)
56 (set-buffer-file-coding-system . ignore)
f53b2875
DL
57 (read-charset
58 . (lambda (prompt)
59 "Return a charset."
60 (intern
61 (completing-read
62 prompt
63 (mapcar (lambda (e) (list (symbol-name (car e))))
64 mm-mime-mule-charset-alist)
65 nil t))))
95fa1ff7 66 (subst-char-in-string
91472578
MB
67 . (lambda (from to string &optional inplace)
68 ;; stolen (and renamed) from nnheader.el
69 "Replace characters in STRING from FROM to TO.
70 Unless optional argument INPLACE is non-nil, return a new string."
71 (let ((string (if inplace string (copy-sequence string)))
95fa1ff7
SZ
72 (len (length string))
73 (idx 0))
74 ;; Replace all occurrences of FROM with TO.
75 (while (< idx len)
76 (when (= (aref string idx) from)
77 (aset string idx to))
78 (setq idx (1+ idx)))
79 string)))
01c52d31
MB
80 (replace-in-string
81 . (lambda (string regexp rep &optional literal)
82 "See `replace-regexp-in-string', only the order of args differs."
83 (replace-regexp-in-string regexp rep string nil literal)))
f53b2875 84 (string-as-unibyte . identity)
23f87bed 85 (string-make-unibyte . identity)
9d9b0de9
SM
86 ;; string-as-multibyte often doesn't really do what you think it does.
87 ;; Example:
88 ;; (aref (string-as-multibyte "\201") 0) -> 129 (aka ?\201)
89 ;; (aref (string-as-multibyte "\300") 0) -> 192 (aka ?\300)
90 ;; (aref (string-as-multibyte "\300\201") 0) -> 192 (aka ?\300)
91 ;; (aref (string-as-multibyte "\300\201") 1) -> 129 (aka ?\201)
92 ;; but
93 ;; (aref (string-as-multibyte "\201\300") 0) -> 2240
94 ;; (aref (string-as-multibyte "\201\300") 1) -> <error>
95 ;; Better use string-to-multibyte or encode-coding-string.
96 ;; If you really need string-as-multibyte somewhere it's usually
97 ;; because you're using the internal emacs-mule representation (maybe
98 ;; because you're using string-as-unibyte somewhere), which is
99 ;; generally a problem in itself.
100 ;; Here is an approximate equivalence table to help think about it:
101 ;; (string-as-multibyte s) ~= (decode-coding-string s 'emacs-mule)
102 ;; (string-to-multibyte s) ~= (decode-coding-string s 'binary)
103 ;; (string-make-multibyte s) ~= (decode-coding-string s locale-coding-system)
95fa1ff7 104 (string-as-multibyte . identity)
56e09c09 105 (multibyte-string-p . ignore)
56e09c09 106 (insert-byte . insert-char)
01c52d31
MB
107 (multibyte-char-to-unibyte . identity)
108 (special-display-p
109 . (lambda (buffer-name)
110 "Returns non-nil if a buffer named BUFFER-NAME gets a special frame."
111 (and special-display-function
112 (or (and (member buffer-name special-display-buffer-names) t)
113 (cdr (assoc buffer-name special-display-buffer-names))
114 (catch 'return
115 (dolist (elem special-display-regexps)
116 (and (stringp elem)
117 (string-match elem buffer-name)
118 (throw 'return t))
119 (and (consp elem)
120 (stringp (car elem))
121 (string-match (car elem) buffer-name)
122 (throw 'return (cdr elem))))))))))))
f53b2875 123
82fe1aed
MB
124(eval-and-compile
125 (if (featurep 'xemacs)
126 (if (featurep 'file-coding)
127 ;; Don't modify string if CODING-SYSTEM is nil.
128 (progn
129 (defun mm-decode-coding-string (str coding-system)
130 (if coding-system
131 (decode-coding-string str coding-system)
132 str))
133 (defun mm-encode-coding-string (str coding-system)
134 (if coding-system
135 (encode-coding-string str coding-system)
136 str))
137 (defun mm-decode-coding-region (start end coding-system)
138 (if coding-system
139 (decode-coding-region start end coding-system)))
140 (defun mm-encode-coding-region (start end coding-system)
141 (if coding-system
142 (encode-coding-region start end coding-system))))
143 (defun mm-decode-coding-string (str coding-system) str)
144 (defun mm-encode-coding-string (str coding-system) str)
145 (defalias 'mm-decode-coding-region 'ignore)
146 (defalias 'mm-encode-coding-region 'ignore))
147 (defalias 'mm-decode-coding-string 'decode-coding-string)
148 (defalias 'mm-encode-coding-string 'encode-coding-string)
149 (defalias 'mm-decode-coding-region 'decode-coding-region)
150 (defalias 'mm-encode-coding-region 'encode-coding-region)))
151
e8f0f70d
MB
152(defalias 'mm-string-to-multibyte
153 (cond
154 ((featurep 'xemacs)
155 'identity)
156 ((fboundp 'string-to-multibyte)
157 'string-to-multibyte)
158 (t
159 (lambda (string)
160 "Return a multibyte string with the same individual chars as string."
161 (mapconcat
162 (lambda (ch) (mm-string-as-multibyte (char-to-string ch)))
163 string "")))))
164
c113de23
GM
165(eval-and-compile
166 (defalias 'mm-char-or-char-int-p
95fa1ff7 167 (cond
c113de23 168 ((fboundp 'char-or-char-int-p) 'char-or-char-int-p)
95fa1ff7 169 ((fboundp 'char-valid-p) 'char-valid-p)
c113de23
GM
170 (t 'identity))))
171
23f87bed
MB
172;; Fixme: This seems always to be used to read a MIME charset, so it
173;; should be re-named and fixed (in Emacs) to offer completion only on
174;; proper charset names (base coding systems which have a
175;; mime-charset defined). XEmacs doesn't believe in mime-charset;
176;; test with
177;; `(or (coding-system-get 'iso-8859-1 'mime-charset)
178;; (coding-system-get 'iso-8859-1 :mime-charset))'
179;; Actually, there should be an `mm-coding-system-mime-charset'.
95fa1ff7
SZ
180(eval-and-compile
181 (defalias 'mm-read-coding-system
182 (cond
183 ((fboundp 'read-coding-system)
184 (if (and (featurep 'xemacs)
185 (<= (string-to-number emacs-version) 21.1))
186 (lambda (prompt &optional default-coding-system)
187 (read-coding-system prompt))
188 'read-coding-system))
189 (t (lambda (prompt &optional default-coding-system)
190 "Prompt the user for a coding system."
191 (completing-read
192 prompt (mapcar (lambda (s) (list (symbol-name (car s))))
193 mm-mime-mule-charset-alist)))))))
194
c113de23
GM
195(defvar mm-coding-system-list nil)
196(defun mm-get-coding-system-list ()
197 "Get the coding system list."
198 (or mm-coding-system-list
199 (setq mm-coding-system-list (mm-coding-system-list))))
200
23f87bed
MB
201(defun mm-coding-system-p (cs)
202 "Return non-nil if CS is a symbol naming a coding system.
0683d241
MB
203In XEmacs, also return non-nil if CS is a coding system object.
204If CS is available, return CS itself in Emacs, and return a coding
205system object in XEmacs."
23f87bed 206 (if (fboundp 'find-coding-system)
91472578 207 (and cs (find-coding-system cs))
23f87bed 208 (if (fboundp 'coding-system-p)
0683d241
MB
209 (when (coding-system-p cs)
210 cs)
5f4264e5 211 ;; no-MULE XEmacs:
0683d241 212 (car (memq cs (mm-get-coding-system-list))))))
95fa1ff7 213
bd29ba20
RS
214(defun mm-codepage-setup (number &optional alias)
215 "Create a coding system cpNUMBER.
216The coding system is created using `codepage-setup'. If ALIAS is
217non-nil, an alias is created and added to
218`mm-charset-synonym-alist'. If ALIAS is a string, it's used as
219the alias. Else windows-NUMBER is used."
220 (interactive
221 (let ((completion-ignore-case t)
222 (candidates (cp-supported-codepages)))
223 (list (completing-read "Setup DOS Codepage: (default 437) " candidates
224 nil t nil nil "437"))))
225 (when alias
226 (setq alias (if (stringp alias)
227 (intern alias)
228 (intern (format "windows-%s" number)))))
229 (let* ((cp (intern (format "cp%s" number))))
230 (unless (mm-coding-system-p cp)
231 (codepage-setup number))
232 (when (and alias
233 ;; Don't add alias if setup of cp failed.
234 (mm-coding-system-p cp))
235 (add-to-list 'mm-charset-synonym-alist (cons alias cp)))))
236
c113de23 237(defvar mm-charset-synonym-alist
95fa1ff7 238 `(
95fa1ff7 239 ;; Not in XEmacs, but it's not a proper MIME charset anyhow.
72eb5fc7 240 ,@(unless (mm-coding-system-p 'x-ctext)
b44409c9 241 '((x-ctext . ctext)))
ab785936
MB
242 ;; ISO-8859-15 is very similar to ISO-8859-1. But it's _different_ in 8
243 ;; positions!
23f87bed 244 ,@(unless (mm-coding-system-p 'iso-8859-15)
b44409c9 245 '((iso-8859-15 . iso-8859-1)))
23f87bed
MB
246 ;; BIG-5HKSCS is similar to, but different than, BIG-5.
247 ,@(unless (mm-coding-system-p 'big5-hkscs)
248 '((big5-hkscs . big5)))
bd29ba20 249 ;; A Microsoft misunderstanding.
ab785936
MB
250 ,@(when (and (not (mm-coding-system-p 'unicode))
251 (mm-coding-system-p 'utf-16-le))
252 '((unicode . utf-16-le)))
bd29ba20
RS
253 ;; A Microsoft misunderstanding.
254 ,@(unless (mm-coding-system-p 'ks_c_5601-1987)
255 (if (mm-coding-system-p 'cp949)
256 '((ks_c_5601-1987 . cp949))
257 '((ks_c_5601-1987 . euc-kr))))
b44409c9 258 ;; Windows-31J is Windows Codepage 932.
ab785936
MB
259 ,@(when (and (not (mm-coding-system-p 'windows-31j))
260 (mm-coding-system-p 'cp932))
261 '((windows-31j . cp932)))
4b70e299
MB
262 ;; Charset name: GBK, Charset aliases: CP936, MS936, windows-936
263 ;; http://www.iana.org/assignments/charset-reg/GBK
264 ;; Emacs 22.1 has cp936, but not gbk, so we alias it:
265 ,@(when (and (not (mm-coding-system-p 'gbk))
266 (mm-coding-system-p 'cp936))
267 '((gbk . cp936)))
01c52d31
MB
268 ;; ISO8859-1 is a bogus name for ISO-8859-1
269 ,@(when (and (not (mm-coding-system-p 'iso8859-1))
270 (mm-coding-system-p 'iso-8859-1))
271 '((iso8859-1 . iso-8859-1)))
95fa1ff7 272 )
ab785936
MB
273 "A mapping from unknown or invalid charset names to the real charset names.
274
275See `mm-codepage-iso-8859-list' and `mm-codepage-ibm-list'.")
276
277(defcustom mm-codepage-iso-8859-list
278 (list 1250 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft
279 ;; Outlook users in Czech republic. Use this to allow reading of
280 ;; their e-mails. cp1250 should be defined by M-x codepage-setup
281 ;; (Emacs 21).
282 '(1252 . 1) ;; Windows-1252 is a superset of iso-8859-1 (West
283 ;; Europe). See also `gnus-article-dumbquotes-map'.
284 '(1254 . 9) ;; Windows-1254 is a superset of iso-8859-9 (Turkish).
285 '(1255 . 8));; Windows-1255 is a superset of iso-8859-8 (Hebrew).
286 "A list of Windows codepage numbers and iso-8859 charset numbers.
287
288If an element is a number corresponding to a supported windows
289codepage, appropriate entries to `mm-charset-synonym-alist' are
290added by `mm-setup-codepage-iso-8859'. An element may also be a
291cons cell where the car is a codepage number and the cdr is the
292corresponding number of an iso-8859 charset."
293 :type '(list (set :inline t
294 (const 1250 :tag "Central and East European")
295 (const (1252 . 1) :tag "West European")
296 (const (1254 . 9) :tag "Turkish")
297 (const (1255 . 8) :tag "Hebrew"))
298 (repeat :inline t
299 :tag "Other options"
300 (choice
301 (integer :tag "Windows codepage number")
302 (cons (integer :tag "Windows codepage number")
303 (integer :tag "iso-8859 charset number")))))
304 :version "22.1" ;; Gnus 5.10.9
305 :group 'mime)
306
307(defcustom mm-codepage-ibm-list
308 (list 437 ;; (US etc.)
309 860 ;; (Portugal)
310 861 ;; (Iceland)
311 862 ;; (Israel)
312 863 ;; (Canadian French)
313 865 ;; (Nordic)
314 852 ;;
315 850 ;; (Latin 1)
316 855 ;; (Cyrillic)
317 866 ;; (Cyrillic - Russian)
318 857 ;; (Turkish)
319 864 ;; (Arabic)
320 869 ;; (Greek)
321 874);; (Thai)
322 ;; In Emacs 23 (unicode), cp... and ibm... are aliases.
323 ;; Cf. http://thread.gmane.org/v9lkng5nwy.fsf@marauder.physik.uni-ulm.de
324 "List of IBM codepage numbers.
325
326The codepage mappings slighly differ between IBM and other vendors.
327See \"ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/IBM/README.TXT\".
328
329If an element is a number corresponding to a supported windows
330codepage, appropriate entries to `mm-charset-synonym-alist' are
331added by `mm-setup-codepage-ibm'."
332 :type '(list (set :inline t
333 (const 437 :tag "US etc.")
334 (const 860 :tag "Portugal")
335 (const 861 :tag "Iceland")
336 (const 862 :tag "Israel")
337 (const 863 :tag "Canadian French")
338 (const 865 :tag "Nordic")
339 (const 852)
340 (const 850 :tag "Latin 1")
341 (const 855 :tag "Cyrillic")
342 (const 866 :tag "Cyrillic - Russian")
343 (const 857 :tag "Turkish")
344 (const 864 :tag "Arabic")
345 (const 869 :tag "Greek")
346 (const 874 :tag "Thai"))
347 (repeat :inline t
348 :tag "Other options"
349 (integer :tag "Codepage number")))
350 :version "22.1" ;; Gnus 5.10.9
351 :group 'mime)
352
353(defun mm-setup-codepage-iso-8859 (&optional list)
354 "Add appropriate entries to `mm-charset-synonym-alist'.
355Unless LIST is given, `mm-codepage-iso-8859-list' is used."
356 (unless list
357 (setq list mm-codepage-iso-8859-list))
358 (dolist (i list)
359 (let (cp windows iso)
360 (if (consp i)
361 (setq cp (intern (format "cp%d" (car i)))
362 windows (intern (format "windows-%d" (car i)))
363 iso (intern (format "iso-8859-%d" (cdr i))))
364 (setq cp (intern (format "cp%d" i))
365 windows (intern (format "windows-%d" i))))
366 (unless (mm-coding-system-p windows)
367 (if (mm-coding-system-p cp)
368 (add-to-list 'mm-charset-synonym-alist (cons windows cp))
369 (add-to-list 'mm-charset-synonym-alist (cons windows iso)))))))
370
371(defun mm-setup-codepage-ibm (&optional list)
372 "Add appropriate entries to `mm-charset-synonym-alist'.
373Unless LIST is given, `mm-codepage-ibm-list' is used."
374 (unless list
375 (setq list mm-codepage-ibm-list))
376 (dolist (number list)
377 (let ((ibm (intern (format "ibm%d" number)))
378 (cp (intern (format "cp%d" number))))
379 (when (and (not (mm-coding-system-p ibm))
380 (mm-coding-system-p cp))
381 (add-to-list 'mm-charset-synonym-alist (cons ibm cp))))))
382
383;; Initialize:
384(mm-setup-codepage-iso-8859)
385(mm-setup-codepage-ibm)
bd29ba20
RS
386
387(defcustom mm-charset-override-alist
01c52d31
MB
388 '((iso-8859-1 . windows-1252)
389 (iso-8859-8 . windows-1255)
390 (iso-8859-9 . windows-1254))
bd29ba20
RS
391 "A mapping from undesired charset names to their replacement.
392
393You may add pairs like (iso-8859-1 . windows-1252) here,
394i.e. treat iso-8859-1 as windows-1252. windows-1252 is a
395superset of iso-8859-1."
396 :type '(list (set :inline t
397 (const (iso-8859-1 . windows-1252))
01c52d31
MB
398 (const (iso-8859-8 . windows-1255))
399 (const (iso-8859-9 . windows-1254))
bd29ba20
RS
400 (const (undecided . windows-1252)))
401 (repeat :inline t
402 :tag "Other options"
403 (cons (symbol :tag "From charset")
404 (symbol :tag "To charset"))))
67099291 405 :version "22.1" ;; Gnus 5.10.9
bd29ba20
RS
406 :group 'mime)
407
408(defcustom mm-charset-eval-alist
409 (if (featurep 'xemacs)
410 nil ;; I don't know what would be useful for XEmacs.
411 '(;; Emacs 21 offers 1250 1251 1253 1257. Emacs 22 provides autoloads for
412 ;; 1250-1258 (i.e. `mm-codepage-setup' does nothing).
413 (windows-1250 . (mm-codepage-setup 1250 t))
414 (windows-1251 . (mm-codepage-setup 1251 t))
415 (windows-1253 . (mm-codepage-setup 1253 t))
416 (windows-1257 . (mm-codepage-setup 1257 t))))
417 "An alist of (CHARSET . FORM) pairs.
418If an article is encoded in an unknown CHARSET, FORM is
419evaluated. This allows to load additional libraries providing
420charsets on demand. If supported by your Emacs version, you
421could use `autoload-coding-system' here."
67099291 422 :version "22.1" ;; Gnus 5.10.9
bd29ba20
RS
423 :type '(list (set :inline t
424 (const (windows-1250 . (mm-codepage-setup 1250 t)))
425 (const (windows-1251 . (mm-codepage-setup 1251 t)))
426 (const (windows-1253 . (mm-codepage-setup 1253 t)))
427 (const (windows-1257 . (mm-codepage-setup 1257 t)))
428 (const (cp850 . (mm-codepage-setup 850 nil))))
429 (repeat :inline t
430 :tag "Other options"
431 (cons (symbol :tag "charset")
432 (symbol :tag "form"))))
433 :group 'mime)
60ece9b0 434(put 'mm-charset-eval-alist 'risky-local-variable t)
c113de23 435
c113de23 436(defvar mm-binary-coding-system
95fa1ff7 437 (cond
c113de23
GM
438 ((mm-coding-system-p 'binary) 'binary)
439 ((mm-coding-system-p 'no-conversion) 'no-conversion)
440 (t nil))
441 "100% binary coding system.")
442
443(defvar mm-text-coding-system
444 (or (if (memq system-type '(windows-nt ms-dos ms-windows))
445 (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos)
446 (and (mm-coding-system-p 'raw-text) 'raw-text))
447 mm-binary-coding-system)
448 "Text-safe coding system (For removing ^M).")
449
450(defvar mm-text-coding-system-for-write nil
451 "Text coding system for write.")
452
453(defvar mm-auto-save-coding-system
95fa1ff7 454 (cond
23f87bed 455 ((mm-coding-system-p 'utf-8-emacs) ; Mule 7
56e09c09
DL
456 (if (memq system-type '(windows-nt ms-dos ms-windows))
457 (if (mm-coding-system-p 'utf-8-emacs-dos)
458 'utf-8-emacs-dos mm-binary-coding-system)
459 'utf-8-emacs))
c113de23
GM
460 ((mm-coding-system-p 'emacs-mule)
461 (if (memq system-type '(windows-nt ms-dos ms-windows))
95fa1ff7 462 (if (mm-coding-system-p 'emacs-mule-dos)
c113de23
GM
463 'emacs-mule-dos mm-binary-coding-system)
464 'emacs-mule))
465 ((mm-coding-system-p 'escape-quoted) 'escape-quoted)
466 (t mm-binary-coding-system))
467 "Coding system of auto save file.")
468
95fa1ff7 469(defvar mm-universal-coding-system mm-auto-save-coding-system
47b63dfa 470 "The universal coding system.")
95fa1ff7
SZ
471
472;; Fixme: some of the cars here aren't valid MIME charsets. That
473;; should only matter with XEmacs, though.
474(defvar mm-mime-mule-charset-alist
475 `((us-ascii ascii)
476 (iso-8859-1 latin-iso8859-1)
477 (iso-8859-2 latin-iso8859-2)
478 (iso-8859-3 latin-iso8859-3)
479 (iso-8859-4 latin-iso8859-4)
480 (iso-8859-5 cyrillic-iso8859-5)
481 ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters.
482 ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default
483 ;; charset is koi8-r, not iso-8859-5.
484 (koi8-r cyrillic-iso8859-5 gnus-koi8-r)
485 (iso-8859-6 arabic-iso8859-6)
486 (iso-8859-7 greek-iso8859-7)
487 (iso-8859-8 hebrew-iso8859-8)
488 (iso-8859-9 latin-iso8859-9)
489 (iso-8859-14 latin-iso8859-14)
490 (iso-8859-15 latin-iso8859-15)
491 (viscii vietnamese-viscii-lower)
492 (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978)
493 (euc-kr korean-ksc5601)
494 (gb2312 chinese-gb2312)
495 (big5 chinese-big5-1 chinese-big5-2)
496 (tibetan tibetan)
497 (thai-tis620 thai-tis620)
0683d241 498 (windows-1251 cyrillic-iso8859-5)
95fa1ff7
SZ
499 (iso-2022-7bit ethiopic arabic-1-column arabic-2-column)
500 (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7
501 latin-jisx0201 japanese-jisx0208-1978
502 chinese-gb2312 japanese-jisx0208
0683d241 503 korean-ksc5601 japanese-jisx0212)
95fa1ff7
SZ
504 (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7
505 latin-jisx0201 japanese-jisx0208-1978
506 chinese-gb2312 japanese-jisx0208
507 korean-ksc5601 japanese-jisx0212
508 chinese-cns11643-1 chinese-cns11643-2)
509 (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2
510 cyrillic-iso8859-5 greek-iso8859-7
511 latin-jisx0201 japanese-jisx0208-1978
512 chinese-gb2312 japanese-jisx0208
513 korean-ksc5601 japanese-jisx0212
514 chinese-cns11643-1 chinese-cns11643-2
515 chinese-cns11643-3 chinese-cns11643-4
516 chinese-cns11643-5 chinese-cns11643-6
517 chinese-cns11643-7)
0683d241
MB
518 (iso-2022-jp-3 latin-jisx0201 japanese-jisx0208-1978 japanese-jisx0208
519 japanese-jisx0213-1 japanese-jisx0213-2)
520 (shift_jis latin-jisx0201 katakana-jisx0201 japanese-jisx0208)
26c9afc3
MB
521 ,(cond ((fboundp 'unicode-precedence-list)
522 (cons 'utf-8 (delq 'ascii (mapcar 'charset-name
523 (unicode-precedence-list)))))
524 ((or (not (fboundp 'charsetp)) ;; non-Mule case
525 (charsetp 'unicode-a)
526 (not (mm-coding-system-p 'mule-utf-8)))
527 '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e))
528 (t ;; If we have utf-8 we're in Mule 5+.
529 (append '(utf-8)
530 (delete 'ascii
531 (coding-system-get 'mule-utf-8 'safe-charsets))))))
95fa1ff7
SZ
532 "Alist of MIME-charset/MULE-charsets.")
533
0683d241
MB
534(defun mm-enrich-utf-8-by-mule-ucs ()
535 "Make the `utf-8' MIME charset usable by the Mule-UCS package.
536This function will run when the `un-define' module is loaded under
537XEmacs, and fill the `utf-8' entry in `mm-mime-mule-charset-alist'
538with Mule charsets. It is completely useless for Emacs."
0683d241
MB
539 (when (boundp 'unicode-basic-translation-charset-order-list)
540 (condition-case nil
541 (let ((val (delq
542 'ascii
543 (copy-sequence
544 (symbol-value
545 'unicode-basic-translation-charset-order-list))))
546 (elem (assq 'utf-8 mm-mime-mule-charset-alist)))
547 (if elem
548 (setcdr elem val)
549 (setq mm-mime-mule-charset-alist
550 (nconc mm-mime-mule-charset-alist
551 (list (cons 'utf-8 val))))))
552 (error))))
553
554;; Correct by construction, but should be unnecessary for Emacs:
555(if (featurep 'xemacs)
556 (eval-after-load "un-define" '(mm-enrich-utf-8-by-mule-ucs))
557 (when (and (fboundp 'coding-system-list)
558 (fboundp 'sort-coding-systems))
559 (let ((css (sort-coding-systems (coding-system-list 'base-only)))
560 cs mime mule alist)
561 (while css
562 (setq cs (pop css)
5f4264e5 563 mime (or (coding-system-get cs :mime-charset) ; Emacs 23 (unicode)
0683d241
MB
564 (coding-system-get cs 'mime-charset)))
565 (when (and mime
566 (not (eq t (setq mule
567 (coding-system-get cs 'safe-charsets))))
568 (not (assq mime alist)))
569 (push (cons mime (delq 'ascii mule)) alist)))
570 (setq mm-mime-mule-charset-alist (nreverse alist)))))
95fa1ff7 571
47b63dfa
SZ
572(defvar mm-hack-charsets '(iso-8859-15 iso-2022-jp-2)
573 "A list of special charsets.
574Valid elements include:
575`iso-8859-15' convert ISO-8859-1, -9 to ISO-8859-15 if ISO-8859-15 exists.
576`iso-2022-jp-2' convert ISO-2022-jp to ISO-2022-jp-2 if ISO-2022-jp-2 exists."
577)
578
a1506d29 579(defvar mm-iso-8859-15-compatible
47b63dfa
SZ
580 '((iso-8859-1 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE")
581 (iso-8859-9 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE\xD0\xDD\xDE\xF0\xFD\xFE"))
582 "ISO-8859-15 exchangeable coding systems and inconvertible characters.")
583
584(defvar mm-iso-8859-x-to-15-table
585 (and (fboundp 'coding-system-p)
586 (mm-coding-system-p 'iso-8859-15)
a1506d29 587 (mapcar
47b63dfa
SZ
588 (lambda (cs)
589 (if (mm-coding-system-p (car cs))
a1506d29 590 (let ((c (string-to-char
47b63dfa
SZ
591 (decode-coding-string "\341" (car cs)))))
592 (cons (char-charset c)
593 (cons
a1506d29 594 (- (string-to-char
47b63dfa 595 (decode-coding-string "\341" 'iso-8859-15)) c)
a1506d29 596 (string-to-list (decode-coding-string (car (cdr cs))
47b63dfa
SZ
597 (car cs))))))
598 '(gnus-charset 0)))
599 mm-iso-8859-15-compatible))
600 "A table of the difference character between ISO-8859-X and ISO-8859-15.")
601
23f87bed
MB
602(defcustom mm-coding-system-priorities
603 (if (boundp 'current-language-environment)
604 (let ((lang (symbol-value 'current-language-environment)))
605 (cond ((string= lang "Japanese")
5153a47a
MB
606 ;; Japanese users prefer iso-2022-jp to euc-japan or
607 ;; shift_jis, however iso-8859-1 should be used when
608 ;; there are only ASCII text and Latin-1 characters.
609 '(iso-8859-1 iso-2022-jp iso-2022-jp-2 shift_jis utf-8)))))
23f87bed
MB
610 "Preferred coding systems for encoding outgoing messages.
611
612More than one suitable coding system may be found for some text.
613By default, the coding system with the highest priority is used
614to encode outgoing messages (see `sort-coding-systems'). If this
615variable is set, it overrides the default priority."
a08b59c9 616 :version "21.2"
23f87bed
MB
617 :type '(repeat (symbol :tag "Coding system"))
618 :group 'mime)
619
620;; ??
1f7d2e14
SZ
621(defvar mm-use-find-coding-systems-region
622 (fboundp 'find-coding-systems-region)
23f87bed
MB
623 "Use `find-coding-systems-region' to find proper coding systems.
624
625Setting it to nil is useful on Emacsen supporting Unicode if sending
626mail with multiple parts is preferred to sending a Unicode one.")
1f7d2e14 627
c113de23
GM
628;;; Internal variables:
629
630;;; Functions:
631
632(defun mm-mule-charset-to-mime-charset (charset)
1c57d870 633 "Return the MIME charset corresponding to the given Mule CHARSET."
23f87bed
MB
634 (if (and (fboundp 'find-coding-systems-for-charsets)
635 (fboundp 'sort-coding-systems))
0683d241
MB
636 (let ((css (sort (sort-coding-systems
637 (find-coding-systems-for-charsets (list charset)))
638 'mm-sort-coding-systems-predicate))
639 cs mime)
640 (while (and (not mime)
641 css)
642 (when (setq cs (pop css))
643 (setq mime (or (coding-system-get cs :mime-charset)
644 (coding-system-get cs 'mime-charset)))))
95fa1ff7 645 mime)
0683d241
MB
646 (let ((alist (mapcar (lambda (cs)
647 (assq cs mm-mime-mule-charset-alist))
648 (sort (mapcar 'car mm-mime-mule-charset-alist)
649 'mm-sort-coding-systems-predicate)))
95fa1ff7
SZ
650 out)
651 (while alist
652 (when (memq charset (cdar alist))
653 (setq out (caar alist)
654 alist nil))
655 (pop alist))
656 out)))
c113de23 657
bd29ba20
RS
658(defun mm-charset-to-coding-system (charset &optional lbt
659 allow-override)
c113de23
GM
660 "Return coding-system corresponding to CHARSET.
661CHARSET is a symbol naming a MIME charset.
662If optional argument LBT (`unix', `dos' or `mac') is specified, it is
bd29ba20
RS
663used as the line break code type of the coding system.
664
665If ALLOW-OVERRIDE is given, use `mm-charset-override-alist' to
666map undesired charset names to their replacement. This should
667only be used for decoding, not for encoding."
668 ;; OVERRIDE is used (only) in `mm-decode-body' and `mm-decode-string'.
c113de23
GM
669 (when (stringp charset)
670 (setq charset (intern (downcase charset))))
c113de23
GM
671 (when lbt
672 (setq charset (intern (format "%s-%s" charset lbt))))
673 (cond
47b63dfa
SZ
674 ((null charset)
675 charset)
c113de23 676 ;; Running in a non-MULE environment.
23f87bed
MB
677 ((or (null (mm-get-coding-system-list))
678 (not (fboundp 'coding-system-get)))
c113de23 679 charset)
bd29ba20
RS
680 ;; Check override list quite early. Should only used for decoding, not for
681 ;; encoding!
682 ((and allow-override
683 (let ((cs (cdr (assq charset mm-charset-override-alist))))
684 (and cs (mm-coding-system-p cs) cs))))
c113de23
GM
685 ;; ascii
686 ((eq charset 'us-ascii)
687 'ascii)
1c57d870
DL
688 ;; Check to see whether we can handle this charset. (This depends
689 ;; on there being some coding system matching each `mime-charset'
95fa1ff7
SZ
690 ;; property defined, as there should be.)
691 ((and (mm-coding-system-p charset)
692;;; Doing this would potentially weed out incorrect charsets.
693;;; charset
694;;; (eq charset (coding-system-get charset 'mime-charset))
695 )
c113de23 696 charset)
bd29ba20
RS
697 ;; Eval expressions from `mm-charset-eval-alist'
698 ((let* ((el (assq charset mm-charset-eval-alist))
699 (cs (car el))
700 (form (cdr el)))
701 (and cs
702 form
703 (prog2
704 ;; Avoid errors...
705 (condition-case nil (eval form) (error nil))
706 ;; (message "Failed to eval `%s'" form))
707 (mm-coding-system-p cs)
708 (message "Added charset `%s' via `mm-charset-eval-alist'" cs))
709 cs)))
95fa1ff7 710 ;; Translate invalid charsets.
d62d49df 711 ((let ((cs (cdr (assq charset mm-charset-synonym-alist))))
bd29ba20
RS
712 (and cs
713 (mm-coding-system-p cs)
714 ;; (message
715 ;; "Using synonym `%s' from `mm-charset-synonym-alist' for `%s'"
716 ;; cs charset)
717 cs)))
95fa1ff7
SZ
718 ;; Last resort: search the coding system list for entries which
719 ;; have the right mime-charset in case the canonical name isn't
720 ;; defined (though it should be).
721 ((let (cs)
722 ;; mm-get-coding-system-list returns a list of cs without lbt.
723 ;; Do we need -lbt?
724 (dolist (c (mm-get-coding-system-list))
725 (if (and (null cs)
56e09c09
DL
726 (eq charset (or (coding-system-get c :mime-charset)
727 (coding-system-get c 'mime-charset))))
95fa1ff7 728 (setq cs c)))
bd29ba20
RS
729 (unless cs
730 ;; Warn the user about unknown charset:
731 (if (fboundp 'gnus-message)
732 (gnus-message 7 "Unknown charset: %s" charset)
733 (message "Unknown charset: %s" charset)))
95fa1ff7
SZ
734 cs))))
735
95fa1ff7
SZ
736(eval-and-compile
737 (defvar mm-emacs-mule (and (not (featurep 'xemacs))
738 (boundp 'default-enable-multibyte-characters)
739 default-enable-multibyte-characters
740 (fboundp 'set-buffer-multibyte))
56e09c09 741 "True in Emacs with Mule.")
95fa1ff7
SZ
742
743 (if mm-emacs-mule
744 (defun mm-enable-multibyte ()
745 "Set the multibyte flag of the current buffer.
1c57d870
DL
746Only do this if the default value of `enable-multibyte-characters' is
747non-nil. This is a no-op in XEmacs."
23f87bed 748 (set-buffer-multibyte 'to))
95fa1ff7 749 (defalias 'mm-enable-multibyte 'ignore))
c113de23 750
95fa1ff7
SZ
751 (if mm-emacs-mule
752 (defun mm-disable-multibyte ()
753 "Unset the multibyte flag of in the current buffer.
1c57d870 754This is a no-op in XEmacs."
95fa1ff7 755 (set-buffer-multibyte nil))
56e09c09 756 (defalias 'mm-disable-multibyte 'ignore)))
052802c1 757
c113de23
GM
758(defun mm-preferred-coding-system (charset)
759 ;; A typo in some Emacs versions.
47b63dfa
SZ
760 (or (get-charset-property charset 'preferred-coding-system)
761 (get-charset-property charset 'prefered-coding-system)))
c113de23 762
23f87bed
MB
763;; Mule charsets shouldn't be used.
764(defsubst mm-guess-charset ()
765 "Guess Mule charset from the language environment."
766 (or
767 mail-parse-mule-charset ;; cached mule-charset
768 (progn
769 (setq mail-parse-mule-charset
770 (and (boundp 'current-language-environment)
771 (car (last
772 (assq 'charset
773 (assoc current-language-environment
774 language-info-alist))))))
775 (if (or (not mail-parse-mule-charset)
776 (eq mail-parse-mule-charset 'ascii))
777 (setq mail-parse-mule-charset
778 (or (car (last (assq mail-parse-charset
779 mm-mime-mule-charset-alist)))
780 ;; default
781 'latin-iso8859-1)))
782 mail-parse-mule-charset)))
783
c113de23
GM
784(defun mm-charset-after (&optional pos)
785 "Return charset of a character in current buffer at position POS.
786If POS is nil, it defauls to the current point.
787If POS is out of range, the value is nil.
788If the charset is `composition', return the actual one."
052802c1
DL
789 (let ((char (char-after pos)) charset)
790 (if (< (mm-char-int char) 128)
791 (setq charset 'ascii)
792 ;; charset-after is fake in some Emacsen.
793 (setq charset (and (fboundp 'char-charset) (char-charset char)))
56e09c09 794 (if (eq charset 'composition) ; Mule 4
052802c1
DL
795 (let ((p (or pos (point))))
796 (cadr (find-charset-region p (1+ p))))
797 (if (and charset (not (memq charset '(ascii eight-bit-control
798 eight-bit-graphic))))
799 charset
23f87bed 800 (mm-guess-charset))))))
c113de23
GM
801
802(defun mm-mime-charset (charset)
1c57d870 803 "Return the MIME charset corresponding to the given Mule CHARSET."
95fa1ff7
SZ
804 (if (eq charset 'unknown)
805 (error "The message contains non-printable characters, please use attachment"))
052802c1 806 (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property))
c113de23
GM
807 ;; This exists in Emacs 20.
808 (or
809 (and (mm-preferred-coding-system charset)
56e09c09
DL
810 (or (coding-system-get
811 (mm-preferred-coding-system charset) :mime-charset)
812 (coding-system-get
813 (mm-preferred-coding-system charset) 'mime-charset)))
c113de23
GM
814 (and (eq charset 'ascii)
815 'us-ascii)
95fa1ff7 816 (mm-preferred-coding-system charset)
c113de23
GM
817 (mm-mule-charset-to-mime-charset charset))
818 ;; This is for XEmacs.
819 (mm-mule-charset-to-mime-charset charset)))
820
8753ddee
MB
821(if (fboundp 'delete-dups)
822 (defalias 'mm-delete-duplicates 'delete-dups)
823 (defun mm-delete-duplicates (list)
824 "Destructively remove `equal' duplicates from LIST.
825Store the result in LIST and return it. LIST must be a proper list.
826Of several `equal' occurrences of an element in LIST, the first
827one is kept.
828
829This is a compatibility function for Emacsen without `delete-dups'."
830 ;; Code from `subr.el' in Emacs 22:
831 (let ((tail list))
832 (while tail
833 (setcdr tail (delete (car tail) (cdr tail)))
834 (setq tail (cdr tail))))
835 list))
c113de23 836
23f87bed
MB
837;; Fixme: This is used in places when it should be testing the
838;; default multibyteness. See mm-default-multibyte-p.
839(eval-and-compile
052802c1
DL
840 (if (and (not (featurep 'xemacs))
841 (boundp 'enable-multibyte-characters))
23f87bed
MB
842 (defun mm-multibyte-p ()
843 "Non-nil if multibyte is enabled in the current buffer."
844 enable-multibyte-characters)
845 (defun mm-multibyte-p () (featurep 'mule))))
846
847(defun mm-default-multibyte-p ()
848 "Return non-nil if the session is multibyte.
849This affects whether coding conversion should be attempted generally."
850 (if (featurep 'mule)
851 (if (boundp 'default-enable-multibyte-characters)
852 default-enable-multibyte-characters
853 t)))
c113de23 854
47b63dfa
SZ
855(defun mm-iso-8859-x-to-15-region (&optional b e)
856 (if (fboundp 'char-charset)
857 (let (charset item c inconvertible)
858 (save-restriction
859 (if e (narrow-to-region b e))
860 (goto-char (point-min))
861 (skip-chars-forward "\0-\177")
862 (while (not (eobp))
a1506d29
JB
863 (cond
864 ((not (setq item (assq (char-charset (setq c (char-after)))
47b63dfa
SZ
865 mm-iso-8859-x-to-15-table)))
866 (forward-char))
867 ((memq c (cdr (cdr item)))
868 (setq inconvertible t)
869 (forward-char))
870 (t
23f87bed
MB
871 (insert-before-markers (prog1 (+ c (car (cdr item)))
872 (delete-char 1)))))
873 (skip-chars-forward "\0-\177")))
47b63dfa
SZ
874 (not inconvertible))))
875
876(defun mm-sort-coding-systems-predicate (a b)
23f87bed
MB
877 (let ((priorities
878 (mapcar (lambda (cs)
879 ;; Note: invalid entries are dropped silently
0683d241 880 (and (setq cs (mm-coding-system-p cs))
23f87bed
MB
881 (coding-system-base cs)))
882 mm-coding-system-priorities)))
0683d241
MB
883 (and (setq a (mm-coding-system-p a))
884 (if (setq b (mm-coding-system-p b))
885 (> (length (memq (coding-system-base a) priorities))
886 (length (memq (coding-system-base b) priorities)))
887 t))))
47b63dfa 888
aa0a8561
MB
889(eval-when-compile
890 (autoload 'latin-unity-massage-name "latin-unity")
891 (autoload 'latin-unity-maybe-remap "latin-unity")
892 (autoload 'latin-unity-representations-feasible-region "latin-unity")
9efa445f
DN
893 (autoload 'latin-unity-representations-present-region "latin-unity"))
894
895(defvar latin-unity-coding-systems)
896(defvar latin-unity-ucs-list)
aa0a8561
MB
897
898(defun mm-xemacs-find-mime-charset-1 (begin end)
899 "Determine which MIME charset to use to send region as message.
900This uses the XEmacs-specific latin-unity package to better handle the
901case where identical characters from diverse ISO-8859-? character sets
902can be encoded using a single one of the corresponding coding systems.
903
904It treats `mm-coding-system-priorities' as the list of preferred
905coding systems; a useful example setting for this list in Western
906Europe would be '(iso-8859-1 iso-8859-15 utf-8), which would default
907to the very standard Latin 1 coding system, and only move to coding
908systems that are less supported as is necessary to encode the
909characters that exist in the buffer.
910
911Latin Unity doesn't know about those non-ASCII Roman characters that
912are available in various East Asian character sets. As such, its
913behavior if you have a JIS 0212 LATIN SMALL LETTER A WITH ACUTE in a
914buffer and it can otherwise be encoded as Latin 1, won't be ideal.
915But this is very much a corner case, so don't worry about it."
916 (let ((systems mm-coding-system-priorities) csets psets curset)
917
918 ;; Load the Latin Unity library, if available.
919 (when (and (not (featurep 'latin-unity)) (locate-library "latin-unity"))
01c52d31 920 (require 'latin-unity))
aa0a8561
MB
921
922 ;; Now, can we use it?
923 (if (featurep 'latin-unity)
924 (progn
925 (setq csets (latin-unity-representations-feasible-region begin end)
926 psets (latin-unity-representations-present-region begin end))
927
928 (catch 'done
929
930 ;; Pass back the first coding system in the preferred list
931 ;; that can encode the whole region.
932 (dolist (curset systems)
933 (setq curset (latin-unity-massage-name 'buffer-default curset))
934
935 ;; If the coding system is a universal coding system, then
936 ;; it can certainly encode all the characters in the region.
937 (if (memq curset latin-unity-ucs-list)
938 (throw 'done (list curset)))
939
940 ;; If a coding system isn't universal, and isn't in
941 ;; the list that latin unity knows about, we can't
942 ;; decide whether to use it here. Leave that until later
943 ;; in `mm-find-mime-charset-region' function, whence we
944 ;; have been called.
945 (unless (memq curset latin-unity-coding-systems)
946 (throw 'done nil))
947
948 ;; Right, we know about this coding system, and it may
949 ;; conceivably be able to encode all the characters in
950 ;; the region.
951 (if (latin-unity-maybe-remap begin end curset csets psets t)
952 (throw 'done (list curset))))
953
954 ;; Can't encode using anything from the
955 ;; `mm-coding-system-priorities' list.
956 ;; Leave `mm-find-mime-charset' to do most of the work.
957 nil))
958
959 ;; Right, latin unity isn't available; let `mm-find-charset-region'
960 ;; take its default action, which equally applies to GNU Emacs.
961 nil)))
962
963(defmacro mm-xemacs-find-mime-charset (begin end)
964 (when (featurep 'xemacs)
10ace8ea 965 `(and (featurep 'mule) (mm-xemacs-find-mime-charset-1 ,begin ,end))))
aa0a8561 966
b5000590
GM
967(declare-function mm-delete-duplicates "mm-util" (list))
968
47b63dfa 969(defun mm-find-mime-charset-region (b e &optional hack-charsets)
95fa1ff7 970 "Return the MIME charsets needed to encode the region between B and E.
f0529b5b 971nil means ASCII, a single-element list represents an appropriate MIME
95fa1ff7 972charset, and a longer list means no appropriate charset."
47b63dfa
SZ
973 (let (charsets)
974 ;; The return possibilities of this function are a mess...
975 (or (and (mm-multibyte-p)
1f7d2e14 976 mm-use-find-coding-systems-region
47b63dfa
SZ
977 ;; Find the mime-charset of the most preferred coding
978 ;; system that has one.
979 (let ((systems (find-coding-systems-region b e)))
980 (when mm-coding-system-priorities
a1506d29 981 (setq systems
47b63dfa 982 (sort systems 'mm-sort-coding-systems-predicate)))
47b63dfa
SZ
983 (setq systems (delq 'compound-text systems))
984 (unless (equal systems '(undecided))
985 (while systems
56e09c09
DL
986 (let* ((head (pop systems))
987 (cs (or (coding-system-get head :mime-charset)
988 (coding-system-get head 'mime-charset))))
23f87bed
MB
989 ;; The mime-charset (`x-ctext') of
990 ;; `compound-text' is not in the IANA list. We
991 ;; shouldn't normally use anything here with a
992 ;; mime-charset having an `x-' prefix.
993 ;; Fixme: Allow this to be overridden, since
994 ;; there is existing use of x-ctext.
995 ;; Also people apparently need the coding system
996 ;; `iso-2022-jp-3' (which Mule-UCS defines with
997 ;; mime-charset, though it's not valid).
998 (if (and cs
999 (not (string-match "^[Xx]-" (symbol-name cs)))
1000 ;; UTF-16 of any variety is invalid for
1001 ;; text parts and, unfortunately, has
1002 ;; mime-charset defined both in Mule-UCS
1003 ;; and versions of Emacs. (The name
1004 ;; might be `mule-utf-16...' or
1005 ;; `utf-16...'.)
1006 (not (string-match "utf-16" (symbol-name cs))))
47b63dfa
SZ
1007 (setq systems nil
1008 charsets (list cs))))))
1009 charsets))
aa0a8561
MB
1010 ;; If we're XEmacs, and some coding system is appropriate,
1011 ;; mm-xemacs-find-mime-charset will return an appropriate list.
1012 ;; Otherwise, we'll get nil, and the next setq will get invoked.
1013 (setq charsets (mm-xemacs-find-mime-charset b e))
1014
1015 ;; We're not multibyte, or a single coding system won't cover it.
a1506d29 1016 (setq charsets
47b63dfa
SZ
1017 (mm-delete-duplicates
1018 (mapcar 'mm-mime-charset
1019 (delq 'ascii
1020 (mm-find-charset-region b e))))))
23f87bed
MB
1021 (if (and (> (length charsets) 1)
1022 (memq 'iso-8859-15 charsets)
47b63dfa
SZ
1023 (memq 'iso-8859-15 hack-charsets)
1024 (save-excursion (mm-iso-8859-x-to-15-region b e)))
01c52d31
MB
1025 (dolist (x mm-iso-8859-15-compatible)
1026 (setq charsets (delq (car x) charsets))))
47b63dfa
SZ
1027 (if (and (memq 'iso-2022-jp-2 charsets)
1028 (memq 'iso-2022-jp-2 hack-charsets))
1029 (setq charsets (delq 'iso-2022-jp charsets)))
11e22c4a
MB
1030 ;; Attempt to reduce the number of charsets if utf-8 is available.
1031 (if (and (featurep 'xemacs)
1032 (> (length charsets) 1)
1033 (mm-coding-system-p 'utf-8))
1034 (let ((mm-coding-system-priorities
1035 (cons 'utf-8 mm-coding-system-priorities)))
1036 (setq charsets
1037 (mm-delete-duplicates
1038 (mapcar 'mm-mime-charset
1039 (delq 'ascii
1040 (mm-find-charset-region b e)))))))
47b63dfa 1041 charsets))
95fa1ff7 1042
c113de23
GM
1043(defmacro mm-with-unibyte-buffer (&rest forms)
1044 "Create a temporary buffer, and evaluate FORMS there like `progn'.
1c57d870
DL
1045Use unibyte mode for this."
1046 `(let (default-enable-multibyte-characters)
1047 (with-temp-buffer ,@forms)))
c113de23
GM
1048(put 'mm-with-unibyte-buffer 'lisp-indent-function 0)
1049(put 'mm-with-unibyte-buffer 'edebug-form-spec '(body))
1050
23f87bed
MB
1051(defmacro mm-with-multibyte-buffer (&rest forms)
1052 "Create a temporary buffer, and evaluate FORMS there like `progn'.
1053Use multibyte mode for this."
1054 `(let ((default-enable-multibyte-characters t))
1055 (with-temp-buffer ,@forms)))
1056(put 'mm-with-multibyte-buffer 'lisp-indent-function 0)
1057(put 'mm-with-multibyte-buffer 'edebug-form-spec '(body))
1058
c113de23 1059(defmacro mm-with-unibyte-current-buffer (&rest forms)
56e09c09 1060 "Evaluate FORMS with current buffer temporarily made unibyte.
1c57d870 1061Also bind `default-enable-multibyte-characters' to nil.
719120ef
MB
1062Equivalent to `progn' in XEmacs
1063
1064NOTE: Use this macro with caution in multibyte buffers (it is not
1065worth using this macro in unibyte buffers of course). Use of
1066`(set-buffer-multibyte t)', which is run finally, is generally
1067harmful since it is likely to modify existing data in the buffer.
fe62aacc
MB
1068For instance, it converts \"\\300\\255\" into \"\\255\" in
1069Emacs 23 (unicode)."
95fa1ff7
SZ
1070 (let ((multibyte (make-symbol "multibyte"))
1071 (buffer (make-symbol "buffer")))
a1506d29 1072 `(if mm-emacs-mule
719120ef 1073 (let ((,multibyte enable-multibyte-characters)
95fa1ff7 1074 (,buffer (current-buffer)))
1c57d870
DL
1075 (unwind-protect
1076 (let (default-enable-multibyte-characters)
1077 (set-buffer-multibyte nil)
1078 ,@forms)
95fa1ff7 1079 (set-buffer ,buffer)
1c57d870 1080 (set-buffer-multibyte ,multibyte)))
95fa1ff7 1081 (let (default-enable-multibyte-characters)
1c57d870 1082 ,@forms))))
c113de23
GM
1083(put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0)
1084(put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body))
1085
1086(defmacro mm-with-unibyte (&rest forms)
23f87bed 1087 "Eval the FORMS with the default value of `enable-multibyte-characters' nil."
1c57d870
DL
1088 `(let (default-enable-multibyte-characters)
1089 ,@forms))
c113de23
GM
1090(put 'mm-with-unibyte 'lisp-indent-function 0)
1091(put 'mm-with-unibyte 'edebug-form-spec '(body))
1092
23f87bed
MB
1093(defmacro mm-with-multibyte (&rest forms)
1094 "Eval the FORMS with the default value of `enable-multibyte-characters' t."
1095 `(let ((default-enable-multibyte-characters t))
1096 ,@forms))
1097(put 'mm-with-multibyte 'lisp-indent-function 0)
1098(put 'mm-with-multibyte 'edebug-form-spec '(body))
1099
c113de23 1100(defun mm-find-charset-region (b e)
1c57d870 1101 "Return a list of Emacs charsets in the region B to E."
c113de23
GM
1102 (cond
1103 ((and (mm-multibyte-p)
95fa1ff7 1104 (fboundp 'find-charset-region))
c113de23 1105 ;; Remove composition since the base charsets have been included.
95fa1ff7
SZ
1106 ;; Remove eight-bit-*, treat them as ascii.
1107 (let ((css (find-charset-region b e)))
01c52d31
MB
1108 (dolist (cs
1109 '(composition eight-bit-control eight-bit-graphic control-1)
1110 css)
1111 (setq css (delq cs css)))))
052802c1
DL
1112 (t
1113 ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit.
c113de23
GM
1114 (save-excursion
1115 (save-restriction
1116 (narrow-to-region b e)
1117 (goto-char (point-min))
1118 (skip-chars-forward "\0-\177")
1119 (if (eobp)
1120 '(ascii)
052802c1
DL
1121 (let (charset)
1122 (setq charset
1123 (and (boundp 'current-language-environment)
95fa1ff7
SZ
1124 (car (last (assq 'charset
1125 (assoc current-language-environment
052802c1
DL
1126 language-info-alist))))))
1127 (if (eq charset 'ascii) (setq charset nil))
1128 (or charset
1129 (setq charset
1130 (car (last (assq mail-parse-charset
1131 mm-mime-mule-charset-alist)))))
1132 (list 'ascii (or charset 'latin-iso8859-1)))))))))
c113de23 1133
c113de23
GM
1134(defun mm-auto-mode-alist ()
1135 "Return an `auto-mode-alist' with only the .gz (etc) thingies."
1136 (let ((alist auto-mode-alist)
1137 out)
1138 (while alist
1139 (when (listp (cdar alist))
1140 (push (car alist) out))
1141 (pop alist))
1142 (nreverse out)))
1143
1144(defvar mm-inhibit-file-name-handlers
01c52d31 1145 '(jka-compr-handler image-file-handler epa-file-handler)
c113de23
GM
1146 "A list of handlers doing (un)compression (etc) thingies.")
1147
1148(defun mm-insert-file-contents (filename &optional visit beg end replace
1149 inhibit)
23f87bed 1150 "Like `insert-file-contents', but only reads in the file.
c113de23
GM
1151A buffer may be modified in several ways after reading into the buffer due
1152to advanced Emacs features, such as file-name-handlers, format decoding,
23f87bed 1153`find-file-hooks', etc.
56e09c09 1154If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'.
c113de23 1155 This function ensures that none of these modifications will take place."
4a43ee9b
MB
1156 (let* ((format-alist nil)
1157 (auto-mode-alist (if inhibit nil (mm-auto-mode-alist)))
1158 (default-major-mode 'fundamental-mode)
1159 (enable-local-variables nil)
1160 (after-insert-file-functions nil)
1161 (enable-local-eval nil)
1162 (inhibit-file-name-operation (if inhibit
1163 'insert-file-contents
1164 inhibit-file-name-operation))
1165 (inhibit-file-name-handlers
1166 (if inhibit
1167 (append mm-inhibit-file-name-handlers
1168 inhibit-file-name-handlers)
1169 inhibit-file-name-handlers))
1170 (ffh (if (boundp 'find-file-hook)
1171 'find-file-hook
1172 'find-file-hooks))
1173 (val (symbol-value ffh)))
1174 (set ffh nil)
1175 (unwind-protect
1176 (insert-file-contents filename visit beg end replace)
1177 (set ffh val))))
c113de23
GM
1178
1179(defun mm-append-to-file (start end filename &optional codesys inhibit)
1180 "Append the contents of the region to the end of file FILENAME.
1181When called from a function, expects three arguments,
1182START, END and FILENAME. START and END are buffer positions
1183saying what text to write.
1184Optional fourth argument specifies the coding system to use when
1185encoding the file.
23f87bed 1186If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'."
95fa1ff7
SZ
1187 (let ((coding-system-for-write
1188 (or codesys mm-text-coding-system-for-write
c113de23 1189 mm-text-coding-system))
95fa1ff7 1190 (inhibit-file-name-operation (if inhibit
c113de23
GM
1191 'append-to-file
1192 inhibit-file-name-operation))
1193 (inhibit-file-name-handlers
1194 (if inhibit
95fa1ff7 1195 (append mm-inhibit-file-name-handlers
c113de23
GM
1196 inhibit-file-name-handlers)
1197 inhibit-file-name-handlers)))
23f87bed
MB
1198 (write-region start end filename t 'no-message)
1199 (message "Appended to %s" filename)))
c113de23 1200
95fa1ff7 1201(defun mm-write-region (start end filename &optional append visit lockname
c113de23
GM
1202 coding-system inhibit)
1203
1204 "Like `write-region'.
23f87bed 1205If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'."
95fa1ff7
SZ
1206 (let ((coding-system-for-write
1207 (or coding-system mm-text-coding-system-for-write
c113de23 1208 mm-text-coding-system))
95fa1ff7 1209 (inhibit-file-name-operation (if inhibit
c113de23
GM
1210 'write-region
1211 inhibit-file-name-operation))
1212 (inhibit-file-name-handlers
1213 (if inhibit
95fa1ff7 1214 (append mm-inhibit-file-name-handlers
c113de23
GM
1215 inhibit-file-name-handlers)
1216 inhibit-file-name-handlers)))
1217 (write-region start end filename append visit lockname)))
1218
b5000590
GM
1219(autoload 'gmm-write-region "gmm-utils")
1220
cf5a5c38
MB
1221;; It is not a MIME function, but some MIME functions use it.
1222(if (and (fboundp 'make-temp-file)
1223 (ignore-errors
1224 (let ((def (symbol-function 'make-temp-file)))
1225 (and (byte-code-function-p def)
1226 (setq def (if (fboundp 'compiled-function-arglist)
1227 ;; XEmacs
1228 (eval (list 'compiled-function-arglist def))
1229 (aref def 0)))
1230 (>= (length def) 4)
1231 (eq (nth 3 def) 'suffix)))))
1232 (defalias 'mm-make-temp-file 'make-temp-file)
01c52d31 1233 ;; Stolen (and modified for XEmacs) from Emacs 22.
cf5a5c38
MB
1234 (defun mm-make-temp-file (prefix &optional dir-flag suffix)
1235 "Create a temporary file.
1236The returned file name (created by appending some random characters at the end
1237of PREFIX, and expanding against `temporary-file-directory' if necessary),
1238is guaranteed to point to a newly created empty file.
1239You can then use `write-region' to write new data into the file.
1240
1241If DIR-FLAG is non-nil, create a new empty directory instead of a file.
1242
1243If SUFFIX is non-nil, add that at the end of the file name."
1244 (let ((umask (default-file-modes))
1245 file)
1246 (unwind-protect
1247 (progn
1248 ;; Create temp files with strict access rights. It's easy to
1249 ;; loosen them later, whereas it's impossible to close the
1250 ;; time-window of loose permissions otherwise.
1251 (set-default-file-modes 448)
1252 (while (condition-case err
1253 (progn
1254 (setq file
1255 (make-temp-name
1256 (expand-file-name
1257 prefix
1258 (if (fboundp 'temp-directory)
1259 ;; XEmacs
1260 (temp-directory)
1261 temporary-file-directory))))
1262 (if suffix
1263 (setq file (concat file suffix)))
1264 (if dir-flag
1265 (make-directory file)
92edaeed
MB
1266 ;; NOTE: This is unsafe if Emacs 20
1267 ;; users and XEmacs users don't use
1268 ;; a secure temp directory.
1269 (gmm-write-region "" nil file nil 'silent
1270 nil 'excl))
cf5a5c38
MB
1271 nil)
1272 (file-already-exists t)
01c52d31
MB
1273 ;; The XEmacs version of `make-directory' issues
1274 ;; `file-error'.
1275 (file-error (or (and (featurep 'xemacs)
cf5a5c38
MB
1276 (file-exists-p file))
1277 (signal (car err) (cdr err)))))
1278 ;; the file was somehow created by someone else between
1279 ;; `make-temp-name' and `write-region', let's try again.
1280 nil)
1281 file)
1282 ;; Reset the umask.
1283 (set-default-file-modes umask)))))
1284
95fa1ff7
SZ
1285(defun mm-image-load-path (&optional package)
1286 (let (dir result)
1287 (dolist (path load-path (nreverse result))
f4dd4ae8
MB
1288 (when (and path
1289 (file-directory-p
1290 (setq dir (concat (file-name-directory
1291 (directory-file-name path))
d31fa104 1292 "etc/images/" (or package "gnus/")))))
f4dd4ae8 1293 (push dir result))
95fa1ff7
SZ
1294 (push path result))))
1295
23f87bed
MB
1296;; Fixme: This doesn't look useful where it's used.
1297(if (fboundp 'detect-coding-region)
1298 (defun mm-detect-coding-region (start end)
1299 "Like `detect-coding-region' except returning the best one."
1300 (let ((coding-systems
9d9b0de9 1301 (detect-coding-region start end)))
23f87bed
MB
1302 (or (car-safe coding-systems)
1303 coding-systems)))
1304 (defun mm-detect-coding-region (start end)
1305 (let ((point (point)))
1306 (goto-char start)
1307 (skip-chars-forward "\0-\177" end)
1308 (prog1
1309 (if (eq (point) end) 'ascii (mm-guess-charset))
1310 (goto-char point)))))
1311
b5000590
GM
1312(declare-function mm-detect-coding-region "mm-util" (start end))
1313
23f87bed
MB
1314(if (fboundp 'coding-system-get)
1315 (defun mm-detect-mime-charset-region (start end)
1316 "Detect MIME charset of the text in the region between START and END."
1317 (let ((cs (mm-detect-coding-region start end)))
bd29ba20
RS
1318 (or (coding-system-get cs :mime-charset)
1319 (coding-system-get cs 'mime-charset))))
23f87bed
MB
1320 (defun mm-detect-mime-charset-region (start end)
1321 "Detect MIME charset of the text in the region between START and END."
1322 (let ((cs (mm-detect-coding-region start end)))
1323 cs)))
1324
01c52d31
MB
1325(eval-when-compile
1326 (unless (fboundp 'coding-system-to-mime-charset)
1327 (defalias 'coding-system-to-mime-charset 'ignore)))
1328
1329(defun mm-coding-system-to-mime-charset (coding-system)
1330 "Return the MIME charset corresponding to CODING-SYSTEM.
1331To make this function work with XEmacs, the APEL package is required."
1332 (when coding-system
1333 (or (and (fboundp 'coding-system-get)
1334 (or (coding-system-get coding-system :mime-charset)
1335 (coding-system-get coding-system 'mime-charset)))
1336 (and (featurep 'xemacs)
1337 (or (and (fboundp 'coding-system-to-mime-charset)
1338 (not (eq (symbol-function 'coding-system-to-mime-charset)
1339 'ignore)))
1340 (and (condition-case nil
1341 (require 'mcharset)
1342 (error nil))
1343 (fboundp 'coding-system-to-mime-charset)))
1344 (coding-system-to-mime-charset coding-system)))))
1345
1346(eval-when-compile
1347 (require 'jka-compr))
1348
1349(defun mm-decompress-buffer (filename &optional inplace force)
1350 "Decompress buffer's contents, depending on jka-compr.
1351Only when FORCE is t or `auto-compression-mode' is enabled and FILENAME
1352agrees with `jka-compr-compression-info-list', decompression is done.
1353Signal an error if FORCE is neither nil nor t and compressed data are
1354not decompressed because `auto-compression-mode' is disabled.
1355If INPLACE is nil, return decompressed data or nil without modifying
1356the buffer. Otherwise, replace the buffer's contents with the
1357decompressed data. The buffer's multibyteness must be turned off."
1358 (when (and filename
1359 (if force
1360 (prog1 t (require 'jka-compr))
1361 (and (fboundp 'jka-compr-installed-p)
1362 (jka-compr-installed-p))))
1363 (let ((info (jka-compr-get-compression-info filename)))
1364 (when info
1365 (unless (or (memq force (list nil t))
1366 (jka-compr-installed-p))
1367 (error ""))
1368 (let ((prog (jka-compr-info-uncompress-program info))
1369 (args (jka-compr-info-uncompress-args info))
1370 (msg (format "%s %s..."
1371 (jka-compr-info-uncompress-message info)
1372 filename))
1373 (err-file (jka-compr-make-temp-name))
1374 (cur (current-buffer))
1375 (coding-system-for-read mm-binary-coding-system)
1376 (coding-system-for-write mm-binary-coding-system)
1377 retval err-msg)
1378 (message "%s" msg)
1379 (mm-with-unibyte-buffer
1380 (insert-buffer-substring cur)
1381 (condition-case err
1382 (progn
1383 (unless (memq (apply 'call-process-region
1384 (point-min) (point-max)
1385 prog t (list t err-file) nil args)
1386 jka-compr-acceptable-retval-list)
1387 (erase-buffer)
1388 (insert (mapconcat
1389 'identity
1390 (delete "" (split-string
1391 (prog2
1392 (insert-file-contents err-file)
1393 (buffer-string)
1394 (erase-buffer))))
1395 " ")
1396 "\n")
1397 (setq err-msg
1398 (format "Error while executing \"%s %s < %s\""
1399 prog (mapconcat 'identity args " ")
1400 filename)))
1401 (setq retval (buffer-string)))
1402 (error
1403 (setq err-msg (error-message-string err)))))
1404 (when (file-exists-p err-file)
1405 (ignore-errors (jka-compr-delete-temp-file err-file)))
1406 (when inplace
1407 (unless err-msg
1408 (delete-region (point-min) (point-max))
1409 (insert retval))
1410 (setq retval nil))
1411 (message "%s" (or err-msg (concat msg "done")))
1412 retval)))))
1413
1414(eval-when-compile
1415 (unless (fboundp 'coding-system-name)
1416 (defalias 'coding-system-name 'ignore))
1417 (unless (fboundp 'find-file-coding-system-for-read-from-filename)
1418 (defalias 'find-file-coding-system-for-read-from-filename 'ignore))
1419 (unless (fboundp 'find-operation-coding-system)
1420 (defalias 'find-operation-coding-system 'ignore)))
1421
1422(defun mm-find-buffer-file-coding-system (&optional filename)
1423 "Find coding system used to decode the contents of the current buffer.
1424This function looks for the coding system magic cookie or examines the
1425coding system specified by `file-coding-system-alist' being associated
1426with FILENAME which defaults to `buffer-file-name'. Data compressed by
1427gzip, bzip2, etc. are allowed."
1428 (unless filename
1429 (setq filename buffer-file-name))
1430 (save-excursion
1431 (let ((decomp (unless ;; No worth to examine charset of tar files.
1432 (and filename
1433 (string-match
1434 "\\.\\(?:tar\\.[^.]+\\|tbz\\|tgz\\)\\'"
1435 filename))
1436 (mm-decompress-buffer filename nil t))))
1437 (when decomp
1438 (set-buffer (let (default-enable-multibyte-characters)
1439 (generate-new-buffer " *temp*")))
1440 (insert decomp)
1441 (setq filename (file-name-sans-extension filename)))
1442 (goto-char (point-min))
1443 (prog1
1444 (cond
1445 ((boundp 'set-auto-coding-function) ;; Emacs
1446 (if filename
1447 (or (funcall (symbol-value 'set-auto-coding-function)
1448 filename (- (point-max) (point-min)))
1449 (car (find-operation-coding-system 'insert-file-contents
1450 filename)))
1451 (let (auto-coding-alist)
1452 (condition-case nil
1453 (funcall (symbol-value 'set-auto-coding-function)
1454 nil (- (point-max) (point-min)))
1455 (error nil)))))
9efa445f 1456 ((and (featurep 'xemacs) (featurep 'file-coding)) ;; XEmacs
01c52d31
MB
1457 (let ((case-fold-search t)
1458 (end (point-at-eol))
1459 codesys start)
1460 (or
1461 (and (re-search-forward "-\\*-+[\t ]*" end t)
1462 (progn
1463 (setq start (match-end 0))
1464 (re-search-forward "[\t ]*-+\\*-" end t))
1465 (progn
1466 (setq end (match-beginning 0))
1467 (goto-char start)
1468 (or (looking-at "coding:[\t ]*\\([^\t ;]+\\)")
1469 (re-search-forward
1470 "[\t ;]+coding:[\t ]*\\([^\t ;]+\\)"
1471 end t)))
1472 (find-coding-system (setq codesys
1473 (intern (match-string 1))))
1474 codesys)
1475 (and (re-search-forward "^[\t ]*;+[\t ]*Local[\t ]+Variables:"
1476 nil t)
1477 (progn
1478 (setq start (match-end 0))
1479 (re-search-forward "^[\t ]*;+[\t ]*End:" nil t))
1480 (progn
1481 (setq end (match-beginning 0))
1482 (goto-char start)
1483 (re-search-forward
1484 "^[\t ]*;+[\t ]*coding:[\t ]*\\([^\t\n\r ]+\\)"
1485 end t))
1486 (find-coding-system (setq codesys
1487 (intern (match-string 1))))
1488 codesys)
1489 (and (progn
1490 (goto-char (point-min))
1491 (setq case-fold-search nil)
1492 (re-search-forward "^;;;coding system: "
1493 ;;(+ (point-min) 3000) t))
1494 nil t))
1495 (looking-at "[^\t\n\r ]+")
1496 (find-coding-system
1497 (setq codesys (intern (match-string 0))))
1498 codesys)
1499 (and filename
1500 (setq codesys
1501 (find-file-coding-system-for-read-from-filename
1502 filename))
1503 (coding-system-name (coding-system-base codesys)))))))
1504 (when decomp
1505 (kill-buffer (current-buffer)))))))
3efe5554 1506
c113de23
GM
1507(provide 'mm-util)
1508
9d9b0de9 1509;; arch-tag: 94dc5388-825d-4fd1-bfa5-2100aa351238
c113de23 1510;;; mm-util.el ends here