Commit | Line | Data |
---|---|---|
95fa1ff7 | 1 | ;;; mm-util.el --- Utility functions for Mule and low level things |
e84b4b86 TTN |
2 | |
3 | ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, | |
88e6695f | 4 | ;; 2005, 2006 Free Software Foundation, Inc. |
c113de23 GM |
5 | |
6 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
7 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation; either version 2, or (at your option) | |
13 | ;; any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
3a35cf56 LK |
22 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
23 | ;; Boston, MA 02110-1301, USA. | |
c113de23 GM |
24 | |
25 | ;;; Commentary: | |
26 | ||
27 | ;;; Code: | |
28 | ||
23f87bed | 29 | (eval-when-compile (require 'cl)) |
c113de23 GM |
30 | (require 'mail-prsvr) |
31 | ||
f53b2875 DL |
32 | (eval-and-compile |
33 | (mapcar | |
34 | (lambda (elem) | |
35 | (let ((nfunc (intern (format "mm-%s" (car elem))))) | |
36 | (if (fboundp (car elem)) | |
37 | (defalias nfunc (car elem)) | |
38 | (defalias nfunc (cdr elem))))) | |
39 | '((decode-coding-string . (lambda (s a) s)) | |
40 | (encode-coding-string . (lambda (s a) s)) | |
41 | (encode-coding-region . ignore) | |
42 | (coding-system-list . ignore) | |
43 | (decode-coding-region . ignore) | |
44 | (char-int . identity) | |
f53b2875 DL |
45 | (coding-system-equal . equal) |
46 | (annotationp . ignore) | |
47 | (set-buffer-file-coding-system . ignore) | |
48 | (make-char | |
49 | . (lambda (charset int) | |
50 | (int-to-char int))) | |
f53b2875 DL |
51 | (read-charset |
52 | . (lambda (prompt) | |
53 | "Return a charset." | |
54 | (intern | |
55 | (completing-read | |
56 | prompt | |
57 | (mapcar (lambda (e) (list (symbol-name (car e)))) | |
58 | mm-mime-mule-charset-alist) | |
59 | nil t)))) | |
95fa1ff7 | 60 | (subst-char-in-string |
91472578 MB |
61 | . (lambda (from to string &optional inplace) |
62 | ;; stolen (and renamed) from nnheader.el | |
63 | "Replace characters in STRING from FROM to TO. | |
64 | Unless optional argument INPLACE is non-nil, return a new string." | |
65 | (let ((string (if inplace string (copy-sequence string))) | |
95fa1ff7 SZ |
66 | (len (length string)) |
67 | (idx 0)) | |
68 | ;; Replace all occurrences of FROM with TO. | |
69 | (while (< idx len) | |
70 | (when (= (aref string idx) from) | |
71 | (aset string idx to)) | |
72 | (setq idx (1+ idx))) | |
73 | string))) | |
f53b2875 | 74 | (string-as-unibyte . identity) |
23f87bed | 75 | (string-make-unibyte . identity) |
9d9b0de9 SM |
76 | ;; string-as-multibyte often doesn't really do what you think it does. |
77 | ;; Example: | |
78 | ;; (aref (string-as-multibyte "\201") 0) -> 129 (aka ?\201) | |
79 | ;; (aref (string-as-multibyte "\300") 0) -> 192 (aka ?\300) | |
80 | ;; (aref (string-as-multibyte "\300\201") 0) -> 192 (aka ?\300) | |
81 | ;; (aref (string-as-multibyte "\300\201") 1) -> 129 (aka ?\201) | |
82 | ;; but | |
83 | ;; (aref (string-as-multibyte "\201\300") 0) -> 2240 | |
84 | ;; (aref (string-as-multibyte "\201\300") 1) -> <error> | |
85 | ;; Better use string-to-multibyte or encode-coding-string. | |
86 | ;; If you really need string-as-multibyte somewhere it's usually | |
87 | ;; because you're using the internal emacs-mule representation (maybe | |
88 | ;; because you're using string-as-unibyte somewhere), which is | |
89 | ;; generally a problem in itself. | |
90 | ;; Here is an approximate equivalence table to help think about it: | |
91 | ;; (string-as-multibyte s) ~= (decode-coding-string s 'emacs-mule) | |
92 | ;; (string-to-multibyte s) ~= (decode-coding-string s 'binary) | |
93 | ;; (string-make-multibyte s) ~= (decode-coding-string s locale-coding-system) | |
95fa1ff7 | 94 | (string-as-multibyte . identity) |
ff5e68bf MB |
95 | (string-to-multibyte |
96 | . (lambda (string) | |
97 | "Return a multibyte string with the same individual chars as string." | |
98 | (mapconcat | |
99 | (lambda (ch) (mm-string-as-multibyte (char-to-string ch))) | |
100 | string ""))) | |
56e09c09 | 101 | (multibyte-string-p . ignore) |
56e09c09 DL |
102 | (insert-byte . insert-char) |
103 | (multibyte-char-to-unibyte . identity)))) | |
f53b2875 | 104 | |
10ace8ea MB |
105 | (eval-and-compile |
106 | (cond | |
107 | ((fboundp 'replace-in-string) | |
108 | (defalias 'mm-replace-in-string 'replace-in-string)) | |
109 | ((fboundp 'replace-regexp-in-string) | |
110 | (defun mm-replace-in-string (string regexp newtext &optional literal) | |
111 | "Replace all matches for REGEXP with NEWTEXT in STRING. | |
112 | If LITERAL is non-nil, insert NEWTEXT literally. Return a new | |
113 | string containing the replacements. | |
114 | ||
115 | This is a compatibility function for different Emacsen." | |
116 | (replace-regexp-in-string regexp newtext string nil literal))) | |
117 | (t | |
118 | (defun mm-replace-in-string (string regexp newtext &optional literal) | |
119 | "Replace all matches for REGEXP with NEWTEXT in STRING. | |
120 | If LITERAL is non-nil, insert NEWTEXT literally. Return a new | |
121 | string containing the replacements. | |
122 | ||
123 | This is a compatibility function for different Emacsen." | |
124 | (let ((start 0) tail) | |
125 | (while (string-match regexp string start) | |
126 | (setq tail (- (length string) (match-end 0))) | |
127 | (setq string (replace-match newtext nil literal string)) | |
128 | (setq start (- (length string) tail)))) | |
129 | string)))) | |
130 | ||
c113de23 GM |
131 | (eval-and-compile |
132 | (defalias 'mm-char-or-char-int-p | |
95fa1ff7 | 133 | (cond |
c113de23 | 134 | ((fboundp 'char-or-char-int-p) 'char-or-char-int-p) |
95fa1ff7 | 135 | ((fboundp 'char-valid-p) 'char-valid-p) |
c113de23 GM |
136 | (t 'identity)))) |
137 | ||
23f87bed MB |
138 | ;; Fixme: This seems always to be used to read a MIME charset, so it |
139 | ;; should be re-named and fixed (in Emacs) to offer completion only on | |
140 | ;; proper charset names (base coding systems which have a | |
141 | ;; mime-charset defined). XEmacs doesn't believe in mime-charset; | |
142 | ;; test with | |
143 | ;; `(or (coding-system-get 'iso-8859-1 'mime-charset) | |
144 | ;; (coding-system-get 'iso-8859-1 :mime-charset))' | |
145 | ;; Actually, there should be an `mm-coding-system-mime-charset'. | |
95fa1ff7 SZ |
146 | (eval-and-compile |
147 | (defalias 'mm-read-coding-system | |
148 | (cond | |
149 | ((fboundp 'read-coding-system) | |
150 | (if (and (featurep 'xemacs) | |
151 | (<= (string-to-number emacs-version) 21.1)) | |
152 | (lambda (prompt &optional default-coding-system) | |
153 | (read-coding-system prompt)) | |
154 | 'read-coding-system)) | |
155 | (t (lambda (prompt &optional default-coding-system) | |
156 | "Prompt the user for a coding system." | |
157 | (completing-read | |
158 | prompt (mapcar (lambda (s) (list (symbol-name (car s)))) | |
159 | mm-mime-mule-charset-alist))))))) | |
160 | ||
c113de23 GM |
161 | (defvar mm-coding-system-list nil) |
162 | (defun mm-get-coding-system-list () | |
163 | "Get the coding system list." | |
164 | (or mm-coding-system-list | |
165 | (setq mm-coding-system-list (mm-coding-system-list)))) | |
166 | ||
23f87bed MB |
167 | (defun mm-coding-system-p (cs) |
168 | "Return non-nil if CS is a symbol naming a coding system. | |
0683d241 MB |
169 | In XEmacs, also return non-nil if CS is a coding system object. |
170 | If CS is available, return CS itself in Emacs, and return a coding | |
171 | system object in XEmacs." | |
23f87bed | 172 | (if (fboundp 'find-coding-system) |
91472578 | 173 | (and cs (find-coding-system cs)) |
23f87bed | 174 | (if (fboundp 'coding-system-p) |
0683d241 MB |
175 | (when (coding-system-p cs) |
176 | cs) | |
5f4264e5 | 177 | ;; no-MULE XEmacs: |
0683d241 | 178 | (car (memq cs (mm-get-coding-system-list)))))) |
95fa1ff7 | 179 | |
bd29ba20 RS |
180 | (defun mm-codepage-setup (number &optional alias) |
181 | "Create a coding system cpNUMBER. | |
182 | The coding system is created using `codepage-setup'. If ALIAS is | |
183 | non-nil, an alias is created and added to | |
184 | `mm-charset-synonym-alist'. If ALIAS is a string, it's used as | |
185 | the alias. Else windows-NUMBER is used." | |
186 | (interactive | |
187 | (let ((completion-ignore-case t) | |
188 | (candidates (cp-supported-codepages))) | |
189 | (list (completing-read "Setup DOS Codepage: (default 437) " candidates | |
190 | nil t nil nil "437")))) | |
191 | (when alias | |
192 | (setq alias (if (stringp alias) | |
193 | (intern alias) | |
194 | (intern (format "windows-%s" number))))) | |
195 | (let* ((cp (intern (format "cp%s" number)))) | |
196 | (unless (mm-coding-system-p cp) | |
197 | (codepage-setup number)) | |
198 | (when (and alias | |
199 | ;; Don't add alias if setup of cp failed. | |
200 | (mm-coding-system-p cp)) | |
201 | (add-to-list 'mm-charset-synonym-alist (cons alias cp))))) | |
202 | ||
c113de23 | 203 | (defvar mm-charset-synonym-alist |
95fa1ff7 | 204 | `( |
95fa1ff7 | 205 | ;; Not in XEmacs, but it's not a proper MIME charset anyhow. |
72eb5fc7 | 206 | ,@(unless (mm-coding-system-p 'x-ctext) |
b44409c9 | 207 | '((x-ctext . ctext))) |
23f87bed MB |
208 | ;; ISO-8859-15 is very similar to ISO-8859-1. But it's _different_! |
209 | ,@(unless (mm-coding-system-p 'iso-8859-15) | |
b44409c9 | 210 | '((iso-8859-15 . iso-8859-1))) |
23f87bed MB |
211 | ;; BIG-5HKSCS is similar to, but different than, BIG-5. |
212 | ,@(unless (mm-coding-system-p 'big5-hkscs) | |
213 | '((big5-hkscs . big5))) | |
d1a7bc93 DL |
214 | ;; Windows-1252 is actually a superset of Latin-1. See also |
215 | ;; `gnus-article-dumbquotes-map'. | |
a1506d29 | 216 | ,@(unless (mm-coding-system-p 'windows-1252) |
b44409c9 MB |
217 | (if (mm-coding-system-p 'cp1252) |
218 | '((windows-1252 . cp1252)) | |
219 | '((windows-1252 . iso-8859-1)))) | |
20c381cf GM |
220 | ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft |
221 | ;; Outlook users in Czech republic. Use this to allow reading of their | |
222 | ;; e-mails. cp1250 should be defined by M-x codepage-setup. | |
72eb5fc7 SZ |
223 | ,@(if (and (not (mm-coding-system-p 'windows-1250)) |
224 | (mm-coding-system-p 'cp1250)) | |
225 | '((windows-1250 . cp1250))) | |
bd29ba20 RS |
226 | ;; A Microsoft misunderstanding. |
227 | ,@(if (and (not (mm-coding-system-p 'unicode)) | |
228 | (mm-coding-system-p 'utf-16-le)) | |
229 | '((unicode . utf-16-le))) | |
230 | ;; A Microsoft misunderstanding. | |
231 | ,@(unless (mm-coding-system-p 'ks_c_5601-1987) | |
232 | (if (mm-coding-system-p 'cp949) | |
233 | '((ks_c_5601-1987 . cp949)) | |
234 | '((ks_c_5601-1987 . euc-kr)))) | |
b44409c9 MB |
235 | ;; Windows-31J is Windows Codepage 932. |
236 | ,@(if (and (not (mm-coding-system-p 'windows-31j)) | |
237 | (mm-coding-system-p 'cp932)) | |
238 | '((windows-31j . cp932))) | |
95fa1ff7 | 239 | ) |
bd29ba20 RS |
240 | "A mapping from unknown or invalid charset names to the real charset names.") |
241 | ||
242 | (defcustom mm-charset-override-alist | |
243 | `((iso-8859-1 . windows-1252)) | |
244 | "A mapping from undesired charset names to their replacement. | |
245 | ||
246 | You may add pairs like (iso-8859-1 . windows-1252) here, | |
247 | i.e. treat iso-8859-1 as windows-1252. windows-1252 is a | |
248 | superset of iso-8859-1." | |
249 | :type '(list (set :inline t | |
250 | (const (iso-8859-1 . windows-1252)) | |
251 | (const (undecided . windows-1252))) | |
252 | (repeat :inline t | |
253 | :tag "Other options" | |
254 | (cons (symbol :tag "From charset") | |
255 | (symbol :tag "To charset")))) | |
67099291 | 256 | :version "22.1" ;; Gnus 5.10.9 |
bd29ba20 RS |
257 | :group 'mime) |
258 | ||
259 | (defcustom mm-charset-eval-alist | |
260 | (if (featurep 'xemacs) | |
261 | nil ;; I don't know what would be useful for XEmacs. | |
262 | '(;; Emacs 21 offers 1250 1251 1253 1257. Emacs 22 provides autoloads for | |
263 | ;; 1250-1258 (i.e. `mm-codepage-setup' does nothing). | |
264 | (windows-1250 . (mm-codepage-setup 1250 t)) | |
265 | (windows-1251 . (mm-codepage-setup 1251 t)) | |
266 | (windows-1253 . (mm-codepage-setup 1253 t)) | |
267 | (windows-1257 . (mm-codepage-setup 1257 t)))) | |
268 | "An alist of (CHARSET . FORM) pairs. | |
269 | If an article is encoded in an unknown CHARSET, FORM is | |
270 | evaluated. This allows to load additional libraries providing | |
271 | charsets on demand. If supported by your Emacs version, you | |
272 | could use `autoload-coding-system' here." | |
67099291 | 273 | :version "22.1" ;; Gnus 5.10.9 |
bd29ba20 RS |
274 | :type '(list (set :inline t |
275 | (const (windows-1250 . (mm-codepage-setup 1250 t))) | |
276 | (const (windows-1251 . (mm-codepage-setup 1251 t))) | |
277 | (const (windows-1253 . (mm-codepage-setup 1253 t))) | |
278 | (const (windows-1257 . (mm-codepage-setup 1257 t))) | |
279 | (const (cp850 . (mm-codepage-setup 850 nil)))) | |
280 | (repeat :inline t | |
281 | :tag "Other options" | |
282 | (cons (symbol :tag "charset") | |
283 | (symbol :tag "form")))) | |
284 | :group 'mime) | |
c113de23 | 285 | |
c113de23 | 286 | (defvar mm-binary-coding-system |
95fa1ff7 | 287 | (cond |
c113de23 GM |
288 | ((mm-coding-system-p 'binary) 'binary) |
289 | ((mm-coding-system-p 'no-conversion) 'no-conversion) | |
290 | (t nil)) | |
291 | "100% binary coding system.") | |
292 | ||
293 | (defvar mm-text-coding-system | |
294 | (or (if (memq system-type '(windows-nt ms-dos ms-windows)) | |
295 | (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos) | |
296 | (and (mm-coding-system-p 'raw-text) 'raw-text)) | |
297 | mm-binary-coding-system) | |
298 | "Text-safe coding system (For removing ^M).") | |
299 | ||
300 | (defvar mm-text-coding-system-for-write nil | |
301 | "Text coding system for write.") | |
302 | ||
303 | (defvar mm-auto-save-coding-system | |
95fa1ff7 | 304 | (cond |
23f87bed | 305 | ((mm-coding-system-p 'utf-8-emacs) ; Mule 7 |
56e09c09 DL |
306 | (if (memq system-type '(windows-nt ms-dos ms-windows)) |
307 | (if (mm-coding-system-p 'utf-8-emacs-dos) | |
308 | 'utf-8-emacs-dos mm-binary-coding-system) | |
309 | 'utf-8-emacs)) | |
c113de23 GM |
310 | ((mm-coding-system-p 'emacs-mule) |
311 | (if (memq system-type '(windows-nt ms-dos ms-windows)) | |
95fa1ff7 | 312 | (if (mm-coding-system-p 'emacs-mule-dos) |
c113de23 GM |
313 | 'emacs-mule-dos mm-binary-coding-system) |
314 | 'emacs-mule)) | |
315 | ((mm-coding-system-p 'escape-quoted) 'escape-quoted) | |
316 | (t mm-binary-coding-system)) | |
317 | "Coding system of auto save file.") | |
318 | ||
95fa1ff7 | 319 | (defvar mm-universal-coding-system mm-auto-save-coding-system |
47b63dfa | 320 | "The universal coding system.") |
95fa1ff7 SZ |
321 | |
322 | ;; Fixme: some of the cars here aren't valid MIME charsets. That | |
323 | ;; should only matter with XEmacs, though. | |
324 | (defvar mm-mime-mule-charset-alist | |
325 | `((us-ascii ascii) | |
326 | (iso-8859-1 latin-iso8859-1) | |
327 | (iso-8859-2 latin-iso8859-2) | |
328 | (iso-8859-3 latin-iso8859-3) | |
329 | (iso-8859-4 latin-iso8859-4) | |
330 | (iso-8859-5 cyrillic-iso8859-5) | |
331 | ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters. | |
332 | ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default | |
333 | ;; charset is koi8-r, not iso-8859-5. | |
334 | (koi8-r cyrillic-iso8859-5 gnus-koi8-r) | |
335 | (iso-8859-6 arabic-iso8859-6) | |
336 | (iso-8859-7 greek-iso8859-7) | |
337 | (iso-8859-8 hebrew-iso8859-8) | |
338 | (iso-8859-9 latin-iso8859-9) | |
339 | (iso-8859-14 latin-iso8859-14) | |
340 | (iso-8859-15 latin-iso8859-15) | |
341 | (viscii vietnamese-viscii-lower) | |
342 | (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978) | |
343 | (euc-kr korean-ksc5601) | |
344 | (gb2312 chinese-gb2312) | |
345 | (big5 chinese-big5-1 chinese-big5-2) | |
346 | (tibetan tibetan) | |
347 | (thai-tis620 thai-tis620) | |
0683d241 | 348 | (windows-1251 cyrillic-iso8859-5) |
95fa1ff7 SZ |
349 | (iso-2022-7bit ethiopic arabic-1-column arabic-2-column) |
350 | (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7 | |
351 | latin-jisx0201 japanese-jisx0208-1978 | |
352 | chinese-gb2312 japanese-jisx0208 | |
0683d241 | 353 | korean-ksc5601 japanese-jisx0212) |
95fa1ff7 SZ |
354 | (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7 |
355 | latin-jisx0201 japanese-jisx0208-1978 | |
356 | chinese-gb2312 japanese-jisx0208 | |
357 | korean-ksc5601 japanese-jisx0212 | |
358 | chinese-cns11643-1 chinese-cns11643-2) | |
359 | (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2 | |
360 | cyrillic-iso8859-5 greek-iso8859-7 | |
361 | latin-jisx0201 japanese-jisx0208-1978 | |
362 | chinese-gb2312 japanese-jisx0208 | |
363 | korean-ksc5601 japanese-jisx0212 | |
364 | chinese-cns11643-1 chinese-cns11643-2 | |
365 | chinese-cns11643-3 chinese-cns11643-4 | |
366 | chinese-cns11643-5 chinese-cns11643-6 | |
367 | chinese-cns11643-7) | |
0683d241 MB |
368 | (iso-2022-jp-3 latin-jisx0201 japanese-jisx0208-1978 japanese-jisx0208 |
369 | japanese-jisx0213-1 japanese-jisx0213-2) | |
370 | (shift_jis latin-jisx0201 katakana-jisx0201 japanese-jisx0208) | |
26c9afc3 MB |
371 | ,(cond ((fboundp 'unicode-precedence-list) |
372 | (cons 'utf-8 (delq 'ascii (mapcar 'charset-name | |
373 | (unicode-precedence-list))))) | |
374 | ((or (not (fboundp 'charsetp)) ;; non-Mule case | |
375 | (charsetp 'unicode-a) | |
376 | (not (mm-coding-system-p 'mule-utf-8))) | |
377 | '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e)) | |
378 | (t ;; If we have utf-8 we're in Mule 5+. | |
379 | (append '(utf-8) | |
380 | (delete 'ascii | |
381 | (coding-system-get 'mule-utf-8 'safe-charsets)))))) | |
95fa1ff7 SZ |
382 | "Alist of MIME-charset/MULE-charsets.") |
383 | ||
0683d241 MB |
384 | (defun mm-enrich-utf-8-by-mule-ucs () |
385 | "Make the `utf-8' MIME charset usable by the Mule-UCS package. | |
386 | This function will run when the `un-define' module is loaded under | |
387 | XEmacs, and fill the `utf-8' entry in `mm-mime-mule-charset-alist' | |
388 | with Mule charsets. It is completely useless for Emacs." | |
0683d241 MB |
389 | (when (boundp 'unicode-basic-translation-charset-order-list) |
390 | (condition-case nil | |
391 | (let ((val (delq | |
392 | 'ascii | |
393 | (copy-sequence | |
394 | (symbol-value | |
395 | 'unicode-basic-translation-charset-order-list)))) | |
396 | (elem (assq 'utf-8 mm-mime-mule-charset-alist))) | |
397 | (if elem | |
398 | (setcdr elem val) | |
399 | (setq mm-mime-mule-charset-alist | |
400 | (nconc mm-mime-mule-charset-alist | |
401 | (list (cons 'utf-8 val)))))) | |
402 | (error)))) | |
403 | ||
404 | ;; Correct by construction, but should be unnecessary for Emacs: | |
405 | (if (featurep 'xemacs) | |
406 | (eval-after-load "un-define" '(mm-enrich-utf-8-by-mule-ucs)) | |
407 | (when (and (fboundp 'coding-system-list) | |
408 | (fboundp 'sort-coding-systems)) | |
409 | (let ((css (sort-coding-systems (coding-system-list 'base-only))) | |
410 | cs mime mule alist) | |
411 | (while css | |
412 | (setq cs (pop css) | |
5f4264e5 | 413 | mime (or (coding-system-get cs :mime-charset) ; Emacs 23 (unicode) |
0683d241 MB |
414 | (coding-system-get cs 'mime-charset))) |
415 | (when (and mime | |
416 | (not (eq t (setq mule | |
417 | (coding-system-get cs 'safe-charsets)))) | |
418 | (not (assq mime alist))) | |
419 | (push (cons mime (delq 'ascii mule)) alist))) | |
420 | (setq mm-mime-mule-charset-alist (nreverse alist))))) | |
95fa1ff7 | 421 | |
47b63dfa SZ |
422 | (defvar mm-hack-charsets '(iso-8859-15 iso-2022-jp-2) |
423 | "A list of special charsets. | |
424 | Valid elements include: | |
425 | `iso-8859-15' convert ISO-8859-1, -9 to ISO-8859-15 if ISO-8859-15 exists. | |
426 | `iso-2022-jp-2' convert ISO-2022-jp to ISO-2022-jp-2 if ISO-2022-jp-2 exists." | |
427 | ) | |
428 | ||
a1506d29 | 429 | (defvar mm-iso-8859-15-compatible |
47b63dfa SZ |
430 | '((iso-8859-1 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE") |
431 | (iso-8859-9 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE\xD0\xDD\xDE\xF0\xFD\xFE")) | |
432 | "ISO-8859-15 exchangeable coding systems and inconvertible characters.") | |
433 | ||
434 | (defvar mm-iso-8859-x-to-15-table | |
435 | (and (fboundp 'coding-system-p) | |
436 | (mm-coding-system-p 'iso-8859-15) | |
a1506d29 | 437 | (mapcar |
47b63dfa SZ |
438 | (lambda (cs) |
439 | (if (mm-coding-system-p (car cs)) | |
a1506d29 | 440 | (let ((c (string-to-char |
47b63dfa SZ |
441 | (decode-coding-string "\341" (car cs))))) |
442 | (cons (char-charset c) | |
443 | (cons | |
a1506d29 | 444 | (- (string-to-char |
47b63dfa | 445 | (decode-coding-string "\341" 'iso-8859-15)) c) |
a1506d29 | 446 | (string-to-list (decode-coding-string (car (cdr cs)) |
47b63dfa SZ |
447 | (car cs)))))) |
448 | '(gnus-charset 0))) | |
449 | mm-iso-8859-15-compatible)) | |
450 | "A table of the difference character between ISO-8859-X and ISO-8859-15.") | |
451 | ||
23f87bed MB |
452 | (defcustom mm-coding-system-priorities |
453 | (if (boundp 'current-language-environment) | |
454 | (let ((lang (symbol-value 'current-language-environment))) | |
455 | (cond ((string= lang "Japanese") | |
5153a47a MB |
456 | ;; Japanese users prefer iso-2022-jp to euc-japan or |
457 | ;; shift_jis, however iso-8859-1 should be used when | |
458 | ;; there are only ASCII text and Latin-1 characters. | |
459 | '(iso-8859-1 iso-2022-jp iso-2022-jp-2 shift_jis utf-8))))) | |
23f87bed MB |
460 | "Preferred coding systems for encoding outgoing messages. |
461 | ||
462 | More than one suitable coding system may be found for some text. | |
463 | By default, the coding system with the highest priority is used | |
464 | to encode outgoing messages (see `sort-coding-systems'). If this | |
465 | variable is set, it overrides the default priority." | |
a08b59c9 | 466 | :version "21.2" |
23f87bed MB |
467 | :type '(repeat (symbol :tag "Coding system")) |
468 | :group 'mime) | |
469 | ||
470 | ;; ?? | |
1f7d2e14 SZ |
471 | (defvar mm-use-find-coding-systems-region |
472 | (fboundp 'find-coding-systems-region) | |
23f87bed MB |
473 | "Use `find-coding-systems-region' to find proper coding systems. |
474 | ||
475 | Setting it to nil is useful on Emacsen supporting Unicode if sending | |
476 | mail with multiple parts is preferred to sending a Unicode one.") | |
1f7d2e14 | 477 | |
c113de23 GM |
478 | ;;; Internal variables: |
479 | ||
480 | ;;; Functions: | |
481 | ||
482 | (defun mm-mule-charset-to-mime-charset (charset) | |
1c57d870 | 483 | "Return the MIME charset corresponding to the given Mule CHARSET." |
23f87bed MB |
484 | (if (and (fboundp 'find-coding-systems-for-charsets) |
485 | (fboundp 'sort-coding-systems)) | |
0683d241 MB |
486 | (let ((css (sort (sort-coding-systems |
487 | (find-coding-systems-for-charsets (list charset))) | |
488 | 'mm-sort-coding-systems-predicate)) | |
489 | cs mime) | |
490 | (while (and (not mime) | |
491 | css) | |
492 | (when (setq cs (pop css)) | |
493 | (setq mime (or (coding-system-get cs :mime-charset) | |
494 | (coding-system-get cs 'mime-charset))))) | |
95fa1ff7 | 495 | mime) |
0683d241 MB |
496 | (let ((alist (mapcar (lambda (cs) |
497 | (assq cs mm-mime-mule-charset-alist)) | |
498 | (sort (mapcar 'car mm-mime-mule-charset-alist) | |
499 | 'mm-sort-coding-systems-predicate))) | |
95fa1ff7 SZ |
500 | out) |
501 | (while alist | |
502 | (when (memq charset (cdar alist)) | |
503 | (setq out (caar alist) | |
504 | alist nil)) | |
505 | (pop alist)) | |
506 | out))) | |
c113de23 | 507 | |
bd29ba20 RS |
508 | (defun mm-charset-to-coding-system (charset &optional lbt |
509 | allow-override) | |
c113de23 GM |
510 | "Return coding-system corresponding to CHARSET. |
511 | CHARSET is a symbol naming a MIME charset. | |
512 | If optional argument LBT (`unix', `dos' or `mac') is specified, it is | |
bd29ba20 RS |
513 | used as the line break code type of the coding system. |
514 | ||
515 | If ALLOW-OVERRIDE is given, use `mm-charset-override-alist' to | |
516 | map undesired charset names to their replacement. This should | |
517 | only be used for decoding, not for encoding." | |
518 | ;; OVERRIDE is used (only) in `mm-decode-body' and `mm-decode-string'. | |
c113de23 GM |
519 | (when (stringp charset) |
520 | (setq charset (intern (downcase charset)))) | |
c113de23 GM |
521 | (when lbt |
522 | (setq charset (intern (format "%s-%s" charset lbt)))) | |
523 | (cond | |
47b63dfa SZ |
524 | ((null charset) |
525 | charset) | |
c113de23 | 526 | ;; Running in a non-MULE environment. |
23f87bed MB |
527 | ((or (null (mm-get-coding-system-list)) |
528 | (not (fboundp 'coding-system-get))) | |
c113de23 | 529 | charset) |
bd29ba20 RS |
530 | ;; Check override list quite early. Should only used for decoding, not for |
531 | ;; encoding! | |
532 | ((and allow-override | |
533 | (let ((cs (cdr (assq charset mm-charset-override-alist)))) | |
534 | (and cs (mm-coding-system-p cs) cs)))) | |
c113de23 GM |
535 | ;; ascii |
536 | ((eq charset 'us-ascii) | |
537 | 'ascii) | |
1c57d870 DL |
538 | ;; Check to see whether we can handle this charset. (This depends |
539 | ;; on there being some coding system matching each `mime-charset' | |
95fa1ff7 SZ |
540 | ;; property defined, as there should be.) |
541 | ((and (mm-coding-system-p charset) | |
542 | ;;; Doing this would potentially weed out incorrect charsets. | |
543 | ;;; charset | |
544 | ;;; (eq charset (coding-system-get charset 'mime-charset)) | |
545 | ) | |
c113de23 | 546 | charset) |
bd29ba20 RS |
547 | ;; Eval expressions from `mm-charset-eval-alist' |
548 | ((let* ((el (assq charset mm-charset-eval-alist)) | |
549 | (cs (car el)) | |
550 | (form (cdr el))) | |
551 | (and cs | |
552 | form | |
553 | (prog2 | |
554 | ;; Avoid errors... | |
555 | (condition-case nil (eval form) (error nil)) | |
556 | ;; (message "Failed to eval `%s'" form)) | |
557 | (mm-coding-system-p cs) | |
558 | (message "Added charset `%s' via `mm-charset-eval-alist'" cs)) | |
559 | cs))) | |
95fa1ff7 | 560 | ;; Translate invalid charsets. |
d62d49df | 561 | ((let ((cs (cdr (assq charset mm-charset-synonym-alist)))) |
bd29ba20 RS |
562 | (and cs |
563 | (mm-coding-system-p cs) | |
564 | ;; (message | |
565 | ;; "Using synonym `%s' from `mm-charset-synonym-alist' for `%s'" | |
566 | ;; cs charset) | |
567 | cs))) | |
95fa1ff7 SZ |
568 | ;; Last resort: search the coding system list for entries which |
569 | ;; have the right mime-charset in case the canonical name isn't | |
570 | ;; defined (though it should be). | |
571 | ((let (cs) | |
572 | ;; mm-get-coding-system-list returns a list of cs without lbt. | |
573 | ;; Do we need -lbt? | |
574 | (dolist (c (mm-get-coding-system-list)) | |
575 | (if (and (null cs) | |
56e09c09 DL |
576 | (eq charset (or (coding-system-get c :mime-charset) |
577 | (coding-system-get c 'mime-charset)))) | |
95fa1ff7 | 578 | (setq cs c))) |
bd29ba20 RS |
579 | (unless cs |
580 | ;; Warn the user about unknown charset: | |
581 | (if (fboundp 'gnus-message) | |
582 | (gnus-message 7 "Unknown charset: %s" charset) | |
583 | (message "Unknown charset: %s" charset))) | |
95fa1ff7 SZ |
584 | cs)))) |
585 | ||
586 | (defsubst mm-replace-chars-in-string (string from to) | |
587 | (mm-subst-char-in-string from to string)) | |
588 | ||
589 | (eval-and-compile | |
590 | (defvar mm-emacs-mule (and (not (featurep 'xemacs)) | |
591 | (boundp 'default-enable-multibyte-characters) | |
592 | default-enable-multibyte-characters | |
593 | (fboundp 'set-buffer-multibyte)) | |
56e09c09 | 594 | "True in Emacs with Mule.") |
95fa1ff7 SZ |
595 | |
596 | (if mm-emacs-mule | |
597 | (defun mm-enable-multibyte () | |
598 | "Set the multibyte flag of the current buffer. | |
1c57d870 DL |
599 | Only do this if the default value of `enable-multibyte-characters' is |
600 | non-nil. This is a no-op in XEmacs." | |
23f87bed | 601 | (set-buffer-multibyte 'to)) |
95fa1ff7 | 602 | (defalias 'mm-enable-multibyte 'ignore)) |
c113de23 | 603 | |
95fa1ff7 SZ |
604 | (if mm-emacs-mule |
605 | (defun mm-disable-multibyte () | |
606 | "Unset the multibyte flag of in the current buffer. | |
1c57d870 | 607 | This is a no-op in XEmacs." |
95fa1ff7 | 608 | (set-buffer-multibyte nil)) |
56e09c09 | 609 | (defalias 'mm-disable-multibyte 'ignore))) |
052802c1 | 610 | |
c113de23 GM |
611 | (defun mm-preferred-coding-system (charset) |
612 | ;; A typo in some Emacs versions. | |
47b63dfa SZ |
613 | (or (get-charset-property charset 'preferred-coding-system) |
614 | (get-charset-property charset 'prefered-coding-system))) | |
c113de23 | 615 | |
23f87bed MB |
616 | ;; Mule charsets shouldn't be used. |
617 | (defsubst mm-guess-charset () | |
618 | "Guess Mule charset from the language environment." | |
619 | (or | |
620 | mail-parse-mule-charset ;; cached mule-charset | |
621 | (progn | |
622 | (setq mail-parse-mule-charset | |
623 | (and (boundp 'current-language-environment) | |
624 | (car (last | |
625 | (assq 'charset | |
626 | (assoc current-language-environment | |
627 | language-info-alist)))))) | |
628 | (if (or (not mail-parse-mule-charset) | |
629 | (eq mail-parse-mule-charset 'ascii)) | |
630 | (setq mail-parse-mule-charset | |
631 | (or (car (last (assq mail-parse-charset | |
632 | mm-mime-mule-charset-alist))) | |
633 | ;; default | |
634 | 'latin-iso8859-1))) | |
635 | mail-parse-mule-charset))) | |
636 | ||
c113de23 GM |
637 | (defun mm-charset-after (&optional pos) |
638 | "Return charset of a character in current buffer at position POS. | |
639 | If POS is nil, it defauls to the current point. | |
640 | If POS is out of range, the value is nil. | |
641 | If the charset is `composition', return the actual one." | |
052802c1 DL |
642 | (let ((char (char-after pos)) charset) |
643 | (if (< (mm-char-int char) 128) | |
644 | (setq charset 'ascii) | |
645 | ;; charset-after is fake in some Emacsen. | |
646 | (setq charset (and (fboundp 'char-charset) (char-charset char))) | |
56e09c09 | 647 | (if (eq charset 'composition) ; Mule 4 |
052802c1 DL |
648 | (let ((p (or pos (point)))) |
649 | (cadr (find-charset-region p (1+ p)))) | |
650 | (if (and charset (not (memq charset '(ascii eight-bit-control | |
651 | eight-bit-graphic)))) | |
652 | charset | |
23f87bed | 653 | (mm-guess-charset)))))) |
c113de23 GM |
654 | |
655 | (defun mm-mime-charset (charset) | |
1c57d870 | 656 | "Return the MIME charset corresponding to the given Mule CHARSET." |
95fa1ff7 SZ |
657 | (if (eq charset 'unknown) |
658 | (error "The message contains non-printable characters, please use attachment")) | |
052802c1 | 659 | (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property)) |
c113de23 GM |
660 | ;; This exists in Emacs 20. |
661 | (or | |
662 | (and (mm-preferred-coding-system charset) | |
56e09c09 DL |
663 | (or (coding-system-get |
664 | (mm-preferred-coding-system charset) :mime-charset) | |
665 | (coding-system-get | |
666 | (mm-preferred-coding-system charset) 'mime-charset))) | |
c113de23 GM |
667 | (and (eq charset 'ascii) |
668 | 'us-ascii) | |
95fa1ff7 | 669 | (mm-preferred-coding-system charset) |
c113de23 GM |
670 | (mm-mule-charset-to-mime-charset charset)) |
671 | ;; This is for XEmacs. | |
672 | (mm-mule-charset-to-mime-charset charset))) | |
673 | ||
8753ddee MB |
674 | (if (fboundp 'delete-dups) |
675 | (defalias 'mm-delete-duplicates 'delete-dups) | |
676 | (defun mm-delete-duplicates (list) | |
677 | "Destructively remove `equal' duplicates from LIST. | |
678 | Store the result in LIST and return it. LIST must be a proper list. | |
679 | Of several `equal' occurrences of an element in LIST, the first | |
680 | one is kept. | |
681 | ||
682 | This is a compatibility function for Emacsen without `delete-dups'." | |
683 | ;; Code from `subr.el' in Emacs 22: | |
684 | (let ((tail list)) | |
685 | (while tail | |
686 | (setcdr tail (delete (car tail) (cdr tail))) | |
687 | (setq tail (cdr tail)))) | |
688 | list)) | |
c113de23 | 689 | |
23f87bed MB |
690 | ;; Fixme: This is used in places when it should be testing the |
691 | ;; default multibyteness. See mm-default-multibyte-p. | |
692 | (eval-and-compile | |
052802c1 DL |
693 | (if (and (not (featurep 'xemacs)) |
694 | (boundp 'enable-multibyte-characters)) | |
23f87bed MB |
695 | (defun mm-multibyte-p () |
696 | "Non-nil if multibyte is enabled in the current buffer." | |
697 | enable-multibyte-characters) | |
698 | (defun mm-multibyte-p () (featurep 'mule)))) | |
699 | ||
700 | (defun mm-default-multibyte-p () | |
701 | "Return non-nil if the session is multibyte. | |
702 | This affects whether coding conversion should be attempted generally." | |
703 | (if (featurep 'mule) | |
704 | (if (boundp 'default-enable-multibyte-characters) | |
705 | default-enable-multibyte-characters | |
706 | t))) | |
c113de23 | 707 | |
47b63dfa SZ |
708 | (defun mm-iso-8859-x-to-15-region (&optional b e) |
709 | (if (fboundp 'char-charset) | |
710 | (let (charset item c inconvertible) | |
711 | (save-restriction | |
712 | (if e (narrow-to-region b e)) | |
713 | (goto-char (point-min)) | |
714 | (skip-chars-forward "\0-\177") | |
715 | (while (not (eobp)) | |
a1506d29 JB |
716 | (cond |
717 | ((not (setq item (assq (char-charset (setq c (char-after))) | |
47b63dfa SZ |
718 | mm-iso-8859-x-to-15-table))) |
719 | (forward-char)) | |
720 | ((memq c (cdr (cdr item))) | |
721 | (setq inconvertible t) | |
722 | (forward-char)) | |
723 | (t | |
23f87bed MB |
724 | (insert-before-markers (prog1 (+ c (car (cdr item))) |
725 | (delete-char 1))))) | |
726 | (skip-chars-forward "\0-\177"))) | |
47b63dfa SZ |
727 | (not inconvertible)))) |
728 | ||
729 | (defun mm-sort-coding-systems-predicate (a b) | |
23f87bed MB |
730 | (let ((priorities |
731 | (mapcar (lambda (cs) | |
732 | ;; Note: invalid entries are dropped silently | |
0683d241 | 733 | (and (setq cs (mm-coding-system-p cs)) |
23f87bed MB |
734 | (coding-system-base cs))) |
735 | mm-coding-system-priorities))) | |
0683d241 MB |
736 | (and (setq a (mm-coding-system-p a)) |
737 | (if (setq b (mm-coding-system-p b)) | |
738 | (> (length (memq (coding-system-base a) priorities)) | |
739 | (length (memq (coding-system-base b) priorities))) | |
740 | t)))) | |
47b63dfa | 741 | |
aa0a8561 MB |
742 | (eval-when-compile |
743 | (autoload 'latin-unity-massage-name "latin-unity") | |
744 | (autoload 'latin-unity-maybe-remap "latin-unity") | |
745 | (autoload 'latin-unity-representations-feasible-region "latin-unity") | |
746 | (autoload 'latin-unity-representations-present-region "latin-unity") | |
747 | (defvar latin-unity-coding-systems) | |
748 | (defvar latin-unity-ucs-list)) | |
749 | ||
750 | (defun mm-xemacs-find-mime-charset-1 (begin end) | |
751 | "Determine which MIME charset to use to send region as message. | |
752 | This uses the XEmacs-specific latin-unity package to better handle the | |
753 | case where identical characters from diverse ISO-8859-? character sets | |
754 | can be encoded using a single one of the corresponding coding systems. | |
755 | ||
756 | It treats `mm-coding-system-priorities' as the list of preferred | |
757 | coding systems; a useful example setting for this list in Western | |
758 | Europe would be '(iso-8859-1 iso-8859-15 utf-8), which would default | |
759 | to the very standard Latin 1 coding system, and only move to coding | |
760 | systems that are less supported as is necessary to encode the | |
761 | characters that exist in the buffer. | |
762 | ||
763 | Latin Unity doesn't know about those non-ASCII Roman characters that | |
764 | are available in various East Asian character sets. As such, its | |
765 | behavior if you have a JIS 0212 LATIN SMALL LETTER A WITH ACUTE in a | |
766 | buffer and it can otherwise be encoded as Latin 1, won't be ideal. | |
767 | But this is very much a corner case, so don't worry about it." | |
768 | (let ((systems mm-coding-system-priorities) csets psets curset) | |
769 | ||
770 | ;; Load the Latin Unity library, if available. | |
771 | (when (and (not (featurep 'latin-unity)) (locate-library "latin-unity")) | |
10ace8ea | 772 | (ignore-errors (require 'latin-unity))) |
aa0a8561 MB |
773 | |
774 | ;; Now, can we use it? | |
775 | (if (featurep 'latin-unity) | |
776 | (progn | |
777 | (setq csets (latin-unity-representations-feasible-region begin end) | |
778 | psets (latin-unity-representations-present-region begin end)) | |
779 | ||
780 | (catch 'done | |
781 | ||
782 | ;; Pass back the first coding system in the preferred list | |
783 | ;; that can encode the whole region. | |
784 | (dolist (curset systems) | |
785 | (setq curset (latin-unity-massage-name 'buffer-default curset)) | |
786 | ||
787 | ;; If the coding system is a universal coding system, then | |
788 | ;; it can certainly encode all the characters in the region. | |
789 | (if (memq curset latin-unity-ucs-list) | |
790 | (throw 'done (list curset))) | |
791 | ||
792 | ;; If a coding system isn't universal, and isn't in | |
793 | ;; the list that latin unity knows about, we can't | |
794 | ;; decide whether to use it here. Leave that until later | |
795 | ;; in `mm-find-mime-charset-region' function, whence we | |
796 | ;; have been called. | |
797 | (unless (memq curset latin-unity-coding-systems) | |
798 | (throw 'done nil)) | |
799 | ||
800 | ;; Right, we know about this coding system, and it may | |
801 | ;; conceivably be able to encode all the characters in | |
802 | ;; the region. | |
803 | (if (latin-unity-maybe-remap begin end curset csets psets t) | |
804 | (throw 'done (list curset)))) | |
805 | ||
806 | ;; Can't encode using anything from the | |
807 | ;; `mm-coding-system-priorities' list. | |
808 | ;; Leave `mm-find-mime-charset' to do most of the work. | |
809 | nil)) | |
810 | ||
811 | ;; Right, latin unity isn't available; let `mm-find-charset-region' | |
812 | ;; take its default action, which equally applies to GNU Emacs. | |
813 | nil))) | |
814 | ||
815 | (defmacro mm-xemacs-find-mime-charset (begin end) | |
816 | (when (featurep 'xemacs) | |
10ace8ea | 817 | `(and (featurep 'mule) (mm-xemacs-find-mime-charset-1 ,begin ,end)))) |
aa0a8561 | 818 | |
47b63dfa | 819 | (defun mm-find-mime-charset-region (b e &optional hack-charsets) |
95fa1ff7 | 820 | "Return the MIME charsets needed to encode the region between B and E. |
f0529b5b | 821 | nil means ASCII, a single-element list represents an appropriate MIME |
95fa1ff7 | 822 | charset, and a longer list means no appropriate charset." |
47b63dfa SZ |
823 | (let (charsets) |
824 | ;; The return possibilities of this function are a mess... | |
825 | (or (and (mm-multibyte-p) | |
1f7d2e14 | 826 | mm-use-find-coding-systems-region |
47b63dfa SZ |
827 | ;; Find the mime-charset of the most preferred coding |
828 | ;; system that has one. | |
829 | (let ((systems (find-coding-systems-region b e))) | |
830 | (when mm-coding-system-priorities | |
a1506d29 | 831 | (setq systems |
47b63dfa | 832 | (sort systems 'mm-sort-coding-systems-predicate))) |
47b63dfa SZ |
833 | (setq systems (delq 'compound-text systems)) |
834 | (unless (equal systems '(undecided)) | |
835 | (while systems | |
56e09c09 DL |
836 | (let* ((head (pop systems)) |
837 | (cs (or (coding-system-get head :mime-charset) | |
838 | (coding-system-get head 'mime-charset)))) | |
23f87bed MB |
839 | ;; The mime-charset (`x-ctext') of |
840 | ;; `compound-text' is not in the IANA list. We | |
841 | ;; shouldn't normally use anything here with a | |
842 | ;; mime-charset having an `x-' prefix. | |
843 | ;; Fixme: Allow this to be overridden, since | |
844 | ;; there is existing use of x-ctext. | |
845 | ;; Also people apparently need the coding system | |
846 | ;; `iso-2022-jp-3' (which Mule-UCS defines with | |
847 | ;; mime-charset, though it's not valid). | |
848 | (if (and cs | |
849 | (not (string-match "^[Xx]-" (symbol-name cs))) | |
850 | ;; UTF-16 of any variety is invalid for | |
851 | ;; text parts and, unfortunately, has | |
852 | ;; mime-charset defined both in Mule-UCS | |
853 | ;; and versions of Emacs. (The name | |
854 | ;; might be `mule-utf-16...' or | |
855 | ;; `utf-16...'.) | |
856 | (not (string-match "utf-16" (symbol-name cs)))) | |
47b63dfa SZ |
857 | (setq systems nil |
858 | charsets (list cs)))))) | |
859 | charsets)) | |
aa0a8561 MB |
860 | ;; If we're XEmacs, and some coding system is appropriate, |
861 | ;; mm-xemacs-find-mime-charset will return an appropriate list. | |
862 | ;; Otherwise, we'll get nil, and the next setq will get invoked. | |
863 | (setq charsets (mm-xemacs-find-mime-charset b e)) | |
864 | ||
865 | ;; We're not multibyte, or a single coding system won't cover it. | |
a1506d29 | 866 | (setq charsets |
47b63dfa SZ |
867 | (mm-delete-duplicates |
868 | (mapcar 'mm-mime-charset | |
869 | (delq 'ascii | |
870 | (mm-find-charset-region b e)))))) | |
23f87bed MB |
871 | (if (and (> (length charsets) 1) |
872 | (memq 'iso-8859-15 charsets) | |
47b63dfa SZ |
873 | (memq 'iso-8859-15 hack-charsets) |
874 | (save-excursion (mm-iso-8859-x-to-15-region b e))) | |
875 | (mapcar (lambda (x) (setq charsets (delq (car x) charsets))) | |
876 | mm-iso-8859-15-compatible)) | |
877 | (if (and (memq 'iso-2022-jp-2 charsets) | |
878 | (memq 'iso-2022-jp-2 hack-charsets)) | |
879 | (setq charsets (delq 'iso-2022-jp charsets))) | |
11e22c4a MB |
880 | ;; Attempt to reduce the number of charsets if utf-8 is available. |
881 | (if (and (featurep 'xemacs) | |
882 | (> (length charsets) 1) | |
883 | (mm-coding-system-p 'utf-8)) | |
884 | (let ((mm-coding-system-priorities | |
885 | (cons 'utf-8 mm-coding-system-priorities))) | |
886 | (setq charsets | |
887 | (mm-delete-duplicates | |
888 | (mapcar 'mm-mime-charset | |
889 | (delq 'ascii | |
890 | (mm-find-charset-region b e))))))) | |
47b63dfa | 891 | charsets)) |
95fa1ff7 | 892 | |
c113de23 GM |
893 | (defmacro mm-with-unibyte-buffer (&rest forms) |
894 | "Create a temporary buffer, and evaluate FORMS there like `progn'. | |
1c57d870 DL |
895 | Use unibyte mode for this." |
896 | `(let (default-enable-multibyte-characters) | |
897 | (with-temp-buffer ,@forms))) | |
c113de23 GM |
898 | (put 'mm-with-unibyte-buffer 'lisp-indent-function 0) |
899 | (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body)) | |
900 | ||
23f87bed MB |
901 | (defmacro mm-with-multibyte-buffer (&rest forms) |
902 | "Create a temporary buffer, and evaluate FORMS there like `progn'. | |
903 | Use multibyte mode for this." | |
904 | `(let ((default-enable-multibyte-characters t)) | |
905 | (with-temp-buffer ,@forms))) | |
906 | (put 'mm-with-multibyte-buffer 'lisp-indent-function 0) | |
907 | (put 'mm-with-multibyte-buffer 'edebug-form-spec '(body)) | |
908 | ||
c113de23 | 909 | (defmacro mm-with-unibyte-current-buffer (&rest forms) |
56e09c09 | 910 | "Evaluate FORMS with current buffer temporarily made unibyte. |
1c57d870 | 911 | Also bind `default-enable-multibyte-characters' to nil. |
719120ef MB |
912 | Equivalent to `progn' in XEmacs |
913 | ||
914 | NOTE: Use this macro with caution in multibyte buffers (it is not | |
915 | worth using this macro in unibyte buffers of course). Use of | |
916 | `(set-buffer-multibyte t)', which is run finally, is generally | |
917 | harmful since it is likely to modify existing data in the buffer. | |
fe62aacc MB |
918 | For instance, it converts \"\\300\\255\" into \"\\255\" in |
919 | Emacs 23 (unicode)." | |
95fa1ff7 SZ |
920 | (let ((multibyte (make-symbol "multibyte")) |
921 | (buffer (make-symbol "buffer"))) | |
a1506d29 | 922 | `(if mm-emacs-mule |
719120ef | 923 | (let ((,multibyte enable-multibyte-characters) |
95fa1ff7 | 924 | (,buffer (current-buffer))) |
1c57d870 DL |
925 | (unwind-protect |
926 | (let (default-enable-multibyte-characters) | |
927 | (set-buffer-multibyte nil) | |
928 | ,@forms) | |
95fa1ff7 | 929 | (set-buffer ,buffer) |
1c57d870 | 930 | (set-buffer-multibyte ,multibyte))) |
95fa1ff7 | 931 | (let (default-enable-multibyte-characters) |
1c57d870 | 932 | ,@forms)))) |
c113de23 GM |
933 | (put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0) |
934 | (put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body)) | |
935 | ||
936 | (defmacro mm-with-unibyte (&rest forms) | |
23f87bed | 937 | "Eval the FORMS with the default value of `enable-multibyte-characters' nil." |
1c57d870 DL |
938 | `(let (default-enable-multibyte-characters) |
939 | ,@forms)) | |
c113de23 GM |
940 | (put 'mm-with-unibyte 'lisp-indent-function 0) |
941 | (put 'mm-with-unibyte 'edebug-form-spec '(body)) | |
942 | ||
23f87bed MB |
943 | (defmacro mm-with-multibyte (&rest forms) |
944 | "Eval the FORMS with the default value of `enable-multibyte-characters' t." | |
945 | `(let ((default-enable-multibyte-characters t)) | |
946 | ,@forms)) | |
947 | (put 'mm-with-multibyte 'lisp-indent-function 0) | |
948 | (put 'mm-with-multibyte 'edebug-form-spec '(body)) | |
949 | ||
c113de23 | 950 | (defun mm-find-charset-region (b e) |
1c57d870 | 951 | "Return a list of Emacs charsets in the region B to E." |
c113de23 GM |
952 | (cond |
953 | ((and (mm-multibyte-p) | |
95fa1ff7 | 954 | (fboundp 'find-charset-region)) |
c113de23 | 955 | ;; Remove composition since the base charsets have been included. |
95fa1ff7 SZ |
956 | ;; Remove eight-bit-*, treat them as ascii. |
957 | (let ((css (find-charset-region b e))) | |
958 | (mapcar (lambda (cs) (setq css (delq cs css))) | |
959 | '(composition eight-bit-control eight-bit-graphic | |
960 | control-1)) | |
961 | css)) | |
052802c1 DL |
962 | (t |
963 | ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit. | |
c113de23 GM |
964 | (save-excursion |
965 | (save-restriction | |
966 | (narrow-to-region b e) | |
967 | (goto-char (point-min)) | |
968 | (skip-chars-forward "\0-\177") | |
969 | (if (eobp) | |
970 | '(ascii) | |
052802c1 DL |
971 | (let (charset) |
972 | (setq charset | |
973 | (and (boundp 'current-language-environment) | |
95fa1ff7 SZ |
974 | (car (last (assq 'charset |
975 | (assoc current-language-environment | |
052802c1 DL |
976 | language-info-alist)))))) |
977 | (if (eq charset 'ascii) (setq charset nil)) | |
978 | (or charset | |
979 | (setq charset | |
980 | (car (last (assq mail-parse-charset | |
981 | mm-mime-mule-charset-alist))))) | |
982 | (list 'ascii (or charset 'latin-iso8859-1))))))))) | |
c113de23 GM |
983 | |
984 | (if (fboundp 'shell-quote-argument) | |
985 | (defalias 'mm-quote-arg 'shell-quote-argument) | |
986 | (defun mm-quote-arg (arg) | |
987 | "Return a version of ARG that is safe to evaluate in a shell." | |
988 | (let ((pos 0) new-pos accum) | |
989 | ;; *** bug: we don't handle newline characters properly | |
990 | (while (setq new-pos (string-match "[]*[;!'`\"$\\& \t{} |()<>]" arg pos)) | |
991 | (push (substring arg pos new-pos) accum) | |
992 | (push "\\" accum) | |
993 | (push (list (aref arg new-pos)) accum) | |
994 | (setq pos (1+ new-pos))) | |
995 | (if (= pos 0) | |
996 | arg | |
997 | (apply 'concat (nconc (nreverse accum) (list (substring arg pos)))))))) | |
998 | ||
999 | (defun mm-auto-mode-alist () | |
1000 | "Return an `auto-mode-alist' with only the .gz (etc) thingies." | |
1001 | (let ((alist auto-mode-alist) | |
1002 | out) | |
1003 | (while alist | |
1004 | (when (listp (cdar alist)) | |
1005 | (push (car alist) out)) | |
1006 | (pop alist)) | |
1007 | (nreverse out))) | |
1008 | ||
1009 | (defvar mm-inhibit-file-name-handlers | |
244d58ba | 1010 | '(jka-compr-handler image-file-handler) |
c113de23 GM |
1011 | "A list of handlers doing (un)compression (etc) thingies.") |
1012 | ||
1013 | (defun mm-insert-file-contents (filename &optional visit beg end replace | |
1014 | inhibit) | |
23f87bed | 1015 | "Like `insert-file-contents', but only reads in the file. |
c113de23 GM |
1016 | A buffer may be modified in several ways after reading into the buffer due |
1017 | to advanced Emacs features, such as file-name-handlers, format decoding, | |
23f87bed | 1018 | `find-file-hooks', etc. |
56e09c09 | 1019 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'. |
c113de23 | 1020 | This function ensures that none of these modifications will take place." |
4a43ee9b MB |
1021 | (let* ((format-alist nil) |
1022 | (auto-mode-alist (if inhibit nil (mm-auto-mode-alist))) | |
1023 | (default-major-mode 'fundamental-mode) | |
1024 | (enable-local-variables nil) | |
1025 | (after-insert-file-functions nil) | |
1026 | (enable-local-eval nil) | |
1027 | (inhibit-file-name-operation (if inhibit | |
1028 | 'insert-file-contents | |
1029 | inhibit-file-name-operation)) | |
1030 | (inhibit-file-name-handlers | |
1031 | (if inhibit | |
1032 | (append mm-inhibit-file-name-handlers | |
1033 | inhibit-file-name-handlers) | |
1034 | inhibit-file-name-handlers)) | |
1035 | (ffh (if (boundp 'find-file-hook) | |
1036 | 'find-file-hook | |
1037 | 'find-file-hooks)) | |
1038 | (val (symbol-value ffh))) | |
1039 | (set ffh nil) | |
1040 | (unwind-protect | |
1041 | (insert-file-contents filename visit beg end replace) | |
1042 | (set ffh val)))) | |
c113de23 GM |
1043 | |
1044 | (defun mm-append-to-file (start end filename &optional codesys inhibit) | |
1045 | "Append the contents of the region to the end of file FILENAME. | |
1046 | When called from a function, expects three arguments, | |
1047 | START, END and FILENAME. START and END are buffer positions | |
1048 | saying what text to write. | |
1049 | Optional fourth argument specifies the coding system to use when | |
1050 | encoding the file. | |
23f87bed | 1051 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." |
95fa1ff7 SZ |
1052 | (let ((coding-system-for-write |
1053 | (or codesys mm-text-coding-system-for-write | |
c113de23 | 1054 | mm-text-coding-system)) |
95fa1ff7 | 1055 | (inhibit-file-name-operation (if inhibit |
c113de23 GM |
1056 | 'append-to-file |
1057 | inhibit-file-name-operation)) | |
1058 | (inhibit-file-name-handlers | |
1059 | (if inhibit | |
95fa1ff7 | 1060 | (append mm-inhibit-file-name-handlers |
c113de23 GM |
1061 | inhibit-file-name-handlers) |
1062 | inhibit-file-name-handlers))) | |
23f87bed MB |
1063 | (write-region start end filename t 'no-message) |
1064 | (message "Appended to %s" filename))) | |
c113de23 | 1065 | |
95fa1ff7 | 1066 | (defun mm-write-region (start end filename &optional append visit lockname |
c113de23 GM |
1067 | coding-system inhibit) |
1068 | ||
1069 | "Like `write-region'. | |
23f87bed | 1070 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." |
95fa1ff7 SZ |
1071 | (let ((coding-system-for-write |
1072 | (or coding-system mm-text-coding-system-for-write | |
c113de23 | 1073 | mm-text-coding-system)) |
95fa1ff7 | 1074 | (inhibit-file-name-operation (if inhibit |
c113de23 GM |
1075 | 'write-region |
1076 | inhibit-file-name-operation)) | |
1077 | (inhibit-file-name-handlers | |
1078 | (if inhibit | |
95fa1ff7 | 1079 | (append mm-inhibit-file-name-handlers |
c113de23 GM |
1080 | inhibit-file-name-handlers) |
1081 | inhibit-file-name-handlers))) | |
1082 | (write-region start end filename append visit lockname))) | |
1083 | ||
cf5a5c38 MB |
1084 | ;; It is not a MIME function, but some MIME functions use it. |
1085 | (if (and (fboundp 'make-temp-file) | |
1086 | (ignore-errors | |
1087 | (let ((def (symbol-function 'make-temp-file))) | |
1088 | (and (byte-code-function-p def) | |
1089 | (setq def (if (fboundp 'compiled-function-arglist) | |
1090 | ;; XEmacs | |
1091 | (eval (list 'compiled-function-arglist def)) | |
1092 | (aref def 0))) | |
1093 | (>= (length def) 4) | |
1094 | (eq (nth 3 def) 'suffix))))) | |
1095 | (defalias 'mm-make-temp-file 'make-temp-file) | |
1096 | ;; Stolen (and modified for Emacs 20 and XEmacs) from Emacs 22. | |
1097 | (defun mm-make-temp-file (prefix &optional dir-flag suffix) | |
1098 | "Create a temporary file. | |
1099 | The returned file name (created by appending some random characters at the end | |
1100 | of PREFIX, and expanding against `temporary-file-directory' if necessary), | |
1101 | is guaranteed to point to a newly created empty file. | |
1102 | You can then use `write-region' to write new data into the file. | |
1103 | ||
1104 | If DIR-FLAG is non-nil, create a new empty directory instead of a file. | |
1105 | ||
1106 | If SUFFIX is non-nil, add that at the end of the file name." | |
1107 | (let ((umask (default-file-modes)) | |
1108 | file) | |
1109 | (unwind-protect | |
1110 | (progn | |
1111 | ;; Create temp files with strict access rights. It's easy to | |
1112 | ;; loosen them later, whereas it's impossible to close the | |
1113 | ;; time-window of loose permissions otherwise. | |
1114 | (set-default-file-modes 448) | |
1115 | (while (condition-case err | |
1116 | (progn | |
1117 | (setq file | |
1118 | (make-temp-name | |
1119 | (expand-file-name | |
1120 | prefix | |
1121 | (if (fboundp 'temp-directory) | |
1122 | ;; XEmacs | |
1123 | (temp-directory) | |
1124 | temporary-file-directory)))) | |
1125 | (if suffix | |
1126 | (setq file (concat file suffix))) | |
1127 | (if dir-flag | |
1128 | (make-directory file) | |
1129 | (if (or (featurep 'xemacs) | |
1130 | (= emacs-major-version 20)) | |
1131 | ;; NOTE: This is unsafe if Emacs 20 | |
1132 | ;; users and XEmacs users don't use | |
1133 | ;; a secure temp directory. | |
1134 | (if (file-exists-p file) | |
1135 | (signal 'file-already-exists | |
1136 | (list "File exists" file)) | |
1137 | (write-region "" nil file nil 'silent)) | |
1138 | (write-region "" nil file nil 'silent | |
1139 | nil 'excl))) | |
1140 | nil) | |
1141 | (file-already-exists t) | |
1142 | ;; The Emacs 20 and XEmacs versions of | |
1143 | ;; `make-directory' issue `file-error'. | |
1144 | (file-error (or (and (or (featurep 'xemacs) | |
1145 | (= emacs-major-version 20)) | |
1146 | (file-exists-p file)) | |
1147 | (signal (car err) (cdr err))))) | |
1148 | ;; the file was somehow created by someone else between | |
1149 | ;; `make-temp-name' and `write-region', let's try again. | |
1150 | nil) | |
1151 | file) | |
1152 | ;; Reset the umask. | |
1153 | (set-default-file-modes umask))))) | |
1154 | ||
95fa1ff7 SZ |
1155 | (defun mm-image-load-path (&optional package) |
1156 | (let (dir result) | |
1157 | (dolist (path load-path (nreverse result)) | |
f4dd4ae8 MB |
1158 | (when (and path |
1159 | (file-directory-p | |
1160 | (setq dir (concat (file-name-directory | |
1161 | (directory-file-name path)) | |
d31fa104 | 1162 | "etc/images/" (or package "gnus/"))))) |
f4dd4ae8 | 1163 | (push dir result)) |
95fa1ff7 SZ |
1164 | (push path result)))) |
1165 | ||
23f87bed MB |
1166 | ;; Fixme: This doesn't look useful where it's used. |
1167 | (if (fboundp 'detect-coding-region) | |
1168 | (defun mm-detect-coding-region (start end) | |
1169 | "Like `detect-coding-region' except returning the best one." | |
1170 | (let ((coding-systems | |
9d9b0de9 | 1171 | (detect-coding-region start end))) |
23f87bed MB |
1172 | (or (car-safe coding-systems) |
1173 | coding-systems))) | |
1174 | (defun mm-detect-coding-region (start end) | |
1175 | (let ((point (point))) | |
1176 | (goto-char start) | |
1177 | (skip-chars-forward "\0-\177" end) | |
1178 | (prog1 | |
1179 | (if (eq (point) end) 'ascii (mm-guess-charset)) | |
1180 | (goto-char point))))) | |
1181 | ||
1182 | (if (fboundp 'coding-system-get) | |
1183 | (defun mm-detect-mime-charset-region (start end) | |
1184 | "Detect MIME charset of the text in the region between START and END." | |
1185 | (let ((cs (mm-detect-coding-region start end))) | |
bd29ba20 RS |
1186 | (or (coding-system-get cs :mime-charset) |
1187 | (coding-system-get cs 'mime-charset)))) | |
23f87bed MB |
1188 | (defun mm-detect-mime-charset-region (start end) |
1189 | "Detect MIME charset of the text in the region between START and END." | |
1190 | (let ((cs (mm-detect-coding-region start end))) | |
1191 | cs))) | |
1192 | ||
3efe5554 | 1193 | |
c113de23 GM |
1194 | (provide 'mm-util) |
1195 | ||
9d9b0de9 | 1196 | ;; arch-tag: 94dc5388-825d-4fd1-bfa5-2100aa351238 |
c113de23 | 1197 | ;;; mm-util.el ends here |