Commit | Line | Data |
---|---|---|
95fa1ff7 | 1 | ;;; mm-util.el --- Utility functions for Mule and low level things |
23f87bed MB |
2 | ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004 |
3 | ;; Free Software Foundation, Inc. | |
c113de23 GM |
4 | |
5 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
6 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
7 | ;; This file is part of GNU Emacs. | |
8 | ||
9 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
10 | ;; it under the terms of the GNU General Public License as published by | |
11 | ;; the Free Software Foundation; either version 2, or (at your option) | |
12 | ;; any later version. | |
13 | ||
14 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
15 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | ;; GNU General Public License for more details. | |
18 | ||
19 | ;; You should have received a copy of the GNU General Public License | |
20 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
21 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
22 | ;; Boston, MA 02111-1307, USA. | |
23 | ||
24 | ;;; Commentary: | |
25 | ||
26 | ;;; Code: | |
27 | ||
23f87bed | 28 | (eval-when-compile (require 'cl)) |
c113de23 GM |
29 | (require 'mail-prsvr) |
30 | ||
f53b2875 DL |
31 | (eval-and-compile |
32 | (mapcar | |
33 | (lambda (elem) | |
34 | (let ((nfunc (intern (format "mm-%s" (car elem))))) | |
35 | (if (fboundp (car elem)) | |
36 | (defalias nfunc (car elem)) | |
37 | (defalias nfunc (cdr elem))))) | |
38 | '((decode-coding-string . (lambda (s a) s)) | |
39 | (encode-coding-string . (lambda (s a) s)) | |
40 | (encode-coding-region . ignore) | |
41 | (coding-system-list . ignore) | |
42 | (decode-coding-region . ignore) | |
43 | (char-int . identity) | |
f53b2875 DL |
44 | (coding-system-equal . equal) |
45 | (annotationp . ignore) | |
46 | (set-buffer-file-coding-system . ignore) | |
47 | (make-char | |
48 | . (lambda (charset int) | |
49 | (int-to-char int))) | |
f53b2875 DL |
50 | (read-charset |
51 | . (lambda (prompt) | |
52 | "Return a charset." | |
53 | (intern | |
54 | (completing-read | |
55 | prompt | |
56 | (mapcar (lambda (e) (list (symbol-name (car e)))) | |
57 | mm-mime-mule-charset-alist) | |
58 | nil t)))) | |
95fa1ff7 SZ |
59 | (subst-char-in-string |
60 | . (lambda (from to string) ;; stolen (and renamed) from nnheader.el | |
61 | "Replace characters in STRING from FROM to TO." | |
62 | (let ((string (substring string 0)) ;Copy string. | |
63 | (len (length string)) | |
64 | (idx 0)) | |
65 | ;; Replace all occurrences of FROM with TO. | |
66 | (while (< idx len) | |
67 | (when (= (aref string idx) from) | |
68 | (aset string idx to)) | |
69 | (setq idx (1+ idx))) | |
70 | string))) | |
f53b2875 | 71 | (string-as-unibyte . identity) |
23f87bed | 72 | (string-make-unibyte . identity) |
95fa1ff7 | 73 | (string-as-multibyte . identity) |
56e09c09 | 74 | (multibyte-string-p . ignore) |
23f87bed MB |
75 | ;; It is not a MIME function, but some MIME functions use it. |
76 | (make-temp-file . (lambda (prefix &optional dir-flag) | |
77 | (let ((file (expand-file-name | |
78 | (make-temp-name prefix) | |
79 | (if (fboundp 'temp-directory) | |
80 | (temp-directory) | |
81 | temporary-file-directory)))) | |
82 | (if dir-flag | |
83 | (make-directory file)) | |
84 | file))) | |
56e09c09 DL |
85 | (insert-byte . insert-char) |
86 | (multibyte-char-to-unibyte . identity)))) | |
f53b2875 | 87 | |
c113de23 GM |
88 | (eval-and-compile |
89 | (defalias 'mm-char-or-char-int-p | |
95fa1ff7 | 90 | (cond |
c113de23 | 91 | ((fboundp 'char-or-char-int-p) 'char-or-char-int-p) |
95fa1ff7 | 92 | ((fboundp 'char-valid-p) 'char-valid-p) |
c113de23 GM |
93 | (t 'identity)))) |
94 | ||
23f87bed MB |
95 | ;; Fixme: This seems always to be used to read a MIME charset, so it |
96 | ;; should be re-named and fixed (in Emacs) to offer completion only on | |
97 | ;; proper charset names (base coding systems which have a | |
98 | ;; mime-charset defined). XEmacs doesn't believe in mime-charset; | |
99 | ;; test with | |
100 | ;; `(or (coding-system-get 'iso-8859-1 'mime-charset) | |
101 | ;; (coding-system-get 'iso-8859-1 :mime-charset))' | |
102 | ;; Actually, there should be an `mm-coding-system-mime-charset'. | |
95fa1ff7 SZ |
103 | (eval-and-compile |
104 | (defalias 'mm-read-coding-system | |
105 | (cond | |
106 | ((fboundp 'read-coding-system) | |
107 | (if (and (featurep 'xemacs) | |
108 | (<= (string-to-number emacs-version) 21.1)) | |
109 | (lambda (prompt &optional default-coding-system) | |
110 | (read-coding-system prompt)) | |
111 | 'read-coding-system)) | |
112 | (t (lambda (prompt &optional default-coding-system) | |
113 | "Prompt the user for a coding system." | |
114 | (completing-read | |
115 | prompt (mapcar (lambda (s) (list (symbol-name (car s)))) | |
116 | mm-mime-mule-charset-alist))))))) | |
117 | ||
c113de23 GM |
118 | (defvar mm-coding-system-list nil) |
119 | (defun mm-get-coding-system-list () | |
120 | "Get the coding system list." | |
121 | (or mm-coding-system-list | |
122 | (setq mm-coding-system-list (mm-coding-system-list)))) | |
123 | ||
23f87bed MB |
124 | (defun mm-coding-system-p (cs) |
125 | "Return non-nil if CS is a symbol naming a coding system. | |
0683d241 MB |
126 | In XEmacs, also return non-nil if CS is a coding system object. |
127 | If CS is available, return CS itself in Emacs, and return a coding | |
128 | system object in XEmacs." | |
23f87bed MB |
129 | (if (fboundp 'find-coding-system) |
130 | (find-coding-system cs) | |
131 | (if (fboundp 'coding-system-p) | |
0683d241 MB |
132 | (when (coding-system-p cs) |
133 | cs) | |
23f87bed | 134 | ;; Is this branch ever actually useful? |
0683d241 | 135 | (car (memq cs (mm-get-coding-system-list)))))) |
95fa1ff7 | 136 | |
c113de23 | 137 | (defvar mm-charset-synonym-alist |
95fa1ff7 | 138 | `( |
95fa1ff7 | 139 | ;; Not in XEmacs, but it's not a proper MIME charset anyhow. |
72eb5fc7 SZ |
140 | ,@(unless (mm-coding-system-p 'x-ctext) |
141 | '((x-ctext . ctext))) | |
23f87bed MB |
142 | ;; ISO-8859-15 is very similar to ISO-8859-1. But it's _different_! |
143 | ,@(unless (mm-coding-system-p 'iso-8859-15) | |
72eb5fc7 | 144 | '((iso-8859-15 . iso-8859-1))) |
23f87bed MB |
145 | ;; BIG-5HKSCS is similar to, but different than, BIG-5. |
146 | ,@(unless (mm-coding-system-p 'big5-hkscs) | |
147 | '((big5-hkscs . big5))) | |
d1a7bc93 DL |
148 | ;; Windows-1252 is actually a superset of Latin-1. See also |
149 | ;; `gnus-article-dumbquotes-map'. | |
a1506d29 | 150 | ,@(unless (mm-coding-system-p 'windows-1252) |
72eb5fc7 SZ |
151 | (if (mm-coding-system-p 'cp1252) |
152 | '((windows-1252 . cp1252)) | |
153 | '((windows-1252 . iso-8859-1)))) | |
20c381cf GM |
154 | ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft |
155 | ;; Outlook users in Czech republic. Use this to allow reading of their | |
156 | ;; e-mails. cp1250 should be defined by M-x codepage-setup. | |
72eb5fc7 SZ |
157 | ,@(if (and (not (mm-coding-system-p 'windows-1250)) |
158 | (mm-coding-system-p 'cp1250)) | |
159 | '((windows-1250 . cp1250))) | |
95fa1ff7 | 160 | ) |
c113de23 GM |
161 | "A mapping from invalid charset names to the real charset names.") |
162 | ||
c113de23 | 163 | (defvar mm-binary-coding-system |
95fa1ff7 | 164 | (cond |
c113de23 GM |
165 | ((mm-coding-system-p 'binary) 'binary) |
166 | ((mm-coding-system-p 'no-conversion) 'no-conversion) | |
167 | (t nil)) | |
168 | "100% binary coding system.") | |
169 | ||
170 | (defvar mm-text-coding-system | |
171 | (or (if (memq system-type '(windows-nt ms-dos ms-windows)) | |
172 | (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos) | |
173 | (and (mm-coding-system-p 'raw-text) 'raw-text)) | |
174 | mm-binary-coding-system) | |
175 | "Text-safe coding system (For removing ^M).") | |
176 | ||
177 | (defvar mm-text-coding-system-for-write nil | |
178 | "Text coding system for write.") | |
179 | ||
180 | (defvar mm-auto-save-coding-system | |
95fa1ff7 | 181 | (cond |
23f87bed | 182 | ((mm-coding-system-p 'utf-8-emacs) ; Mule 7 |
56e09c09 DL |
183 | (if (memq system-type '(windows-nt ms-dos ms-windows)) |
184 | (if (mm-coding-system-p 'utf-8-emacs-dos) | |
185 | 'utf-8-emacs-dos mm-binary-coding-system) | |
186 | 'utf-8-emacs)) | |
c113de23 GM |
187 | ((mm-coding-system-p 'emacs-mule) |
188 | (if (memq system-type '(windows-nt ms-dos ms-windows)) | |
95fa1ff7 | 189 | (if (mm-coding-system-p 'emacs-mule-dos) |
c113de23 GM |
190 | 'emacs-mule-dos mm-binary-coding-system) |
191 | 'emacs-mule)) | |
192 | ((mm-coding-system-p 'escape-quoted) 'escape-quoted) | |
193 | (t mm-binary-coding-system)) | |
194 | "Coding system of auto save file.") | |
195 | ||
95fa1ff7 | 196 | (defvar mm-universal-coding-system mm-auto-save-coding-system |
47b63dfa | 197 | "The universal coding system.") |
95fa1ff7 SZ |
198 | |
199 | ;; Fixme: some of the cars here aren't valid MIME charsets. That | |
200 | ;; should only matter with XEmacs, though. | |
201 | (defvar mm-mime-mule-charset-alist | |
202 | `((us-ascii ascii) | |
203 | (iso-8859-1 latin-iso8859-1) | |
204 | (iso-8859-2 latin-iso8859-2) | |
205 | (iso-8859-3 latin-iso8859-3) | |
206 | (iso-8859-4 latin-iso8859-4) | |
207 | (iso-8859-5 cyrillic-iso8859-5) | |
208 | ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters. | |
209 | ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default | |
210 | ;; charset is koi8-r, not iso-8859-5. | |
211 | (koi8-r cyrillic-iso8859-5 gnus-koi8-r) | |
212 | (iso-8859-6 arabic-iso8859-6) | |
213 | (iso-8859-7 greek-iso8859-7) | |
214 | (iso-8859-8 hebrew-iso8859-8) | |
215 | (iso-8859-9 latin-iso8859-9) | |
216 | (iso-8859-14 latin-iso8859-14) | |
217 | (iso-8859-15 latin-iso8859-15) | |
218 | (viscii vietnamese-viscii-lower) | |
219 | (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978) | |
220 | (euc-kr korean-ksc5601) | |
221 | (gb2312 chinese-gb2312) | |
222 | (big5 chinese-big5-1 chinese-big5-2) | |
223 | (tibetan tibetan) | |
224 | (thai-tis620 thai-tis620) | |
0683d241 | 225 | (windows-1251 cyrillic-iso8859-5) |
95fa1ff7 SZ |
226 | (iso-2022-7bit ethiopic arabic-1-column arabic-2-column) |
227 | (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7 | |
228 | latin-jisx0201 japanese-jisx0208-1978 | |
229 | chinese-gb2312 japanese-jisx0208 | |
0683d241 | 230 | korean-ksc5601 japanese-jisx0212) |
95fa1ff7 SZ |
231 | (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7 |
232 | latin-jisx0201 japanese-jisx0208-1978 | |
233 | chinese-gb2312 japanese-jisx0208 | |
234 | korean-ksc5601 japanese-jisx0212 | |
235 | chinese-cns11643-1 chinese-cns11643-2) | |
236 | (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2 | |
237 | cyrillic-iso8859-5 greek-iso8859-7 | |
238 | latin-jisx0201 japanese-jisx0208-1978 | |
239 | chinese-gb2312 japanese-jisx0208 | |
240 | korean-ksc5601 japanese-jisx0212 | |
241 | chinese-cns11643-1 chinese-cns11643-2 | |
242 | chinese-cns11643-3 chinese-cns11643-4 | |
243 | chinese-cns11643-5 chinese-cns11643-6 | |
244 | chinese-cns11643-7) | |
0683d241 MB |
245 | (iso-2022-jp-3 latin-jisx0201 japanese-jisx0208-1978 japanese-jisx0208 |
246 | japanese-jisx0213-1 japanese-jisx0213-2) | |
247 | (shift_jis latin-jisx0201 katakana-jisx0201 japanese-jisx0208) | |
95fa1ff7 SZ |
248 | ,(if (or (not (fboundp 'charsetp)) ;; non-Mule case |
249 | (charsetp 'unicode-a) | |
250 | (not (mm-coding-system-p 'mule-utf-8))) | |
251 | '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e) | |
252 | ;; If we have utf-8 we're in Mule 5+. | |
253 | (append '(utf-8) | |
254 | (delete 'ascii | |
255 | (coding-system-get 'mule-utf-8 'safe-charsets))))) | |
256 | "Alist of MIME-charset/MULE-charsets.") | |
257 | ||
0683d241 MB |
258 | (defun mm-enrich-utf-8-by-mule-ucs () |
259 | "Make the `utf-8' MIME charset usable by the Mule-UCS package. | |
260 | This function will run when the `un-define' module is loaded under | |
261 | XEmacs, and fill the `utf-8' entry in `mm-mime-mule-charset-alist' | |
262 | with Mule charsets. It is completely useless for Emacs." | |
263 | (unless (cdr (delete '(mm-enrich-utf-8-by-mule-ucs) | |
264 | (assoc "un-define" after-load-alist))) | |
265 | (setq after-load-alist | |
266 | (delete '("un-define") after-load-alist))) | |
267 | (when (boundp 'unicode-basic-translation-charset-order-list) | |
268 | (condition-case nil | |
269 | (let ((val (delq | |
270 | 'ascii | |
271 | (copy-sequence | |
272 | (symbol-value | |
273 | 'unicode-basic-translation-charset-order-list)))) | |
274 | (elem (assq 'utf-8 mm-mime-mule-charset-alist))) | |
275 | (if elem | |
276 | (setcdr elem val) | |
277 | (setq mm-mime-mule-charset-alist | |
278 | (nconc mm-mime-mule-charset-alist | |
279 | (list (cons 'utf-8 val)))))) | |
280 | (error)))) | |
281 | ||
282 | ;; Correct by construction, but should be unnecessary for Emacs: | |
283 | (if (featurep 'xemacs) | |
284 | (eval-after-load "un-define" '(mm-enrich-utf-8-by-mule-ucs)) | |
285 | (when (and (fboundp 'coding-system-list) | |
286 | (fboundp 'sort-coding-systems)) | |
287 | (let ((css (sort-coding-systems (coding-system-list 'base-only))) | |
288 | cs mime mule alist) | |
289 | (while css | |
290 | (setq cs (pop css) | |
291 | mime (or (coding-system-get cs :mime-charset) ; Emacs 22 | |
292 | (coding-system-get cs 'mime-charset))) | |
293 | (when (and mime | |
294 | (not (eq t (setq mule | |
295 | (coding-system-get cs 'safe-charsets)))) | |
296 | (not (assq mime alist))) | |
297 | (push (cons mime (delq 'ascii mule)) alist))) | |
298 | (setq mm-mime-mule-charset-alist (nreverse alist))))) | |
95fa1ff7 | 299 | |
23f87bed MB |
300 | (defcustom mm-coding-system-priorities |
301 | (if (boundp 'current-language-environment) | |
302 | (let ((lang (symbol-value 'current-language-environment))) | |
303 | (cond ((string= lang "Japanese") | |
5153a47a MB |
304 | ;; Japanese users prefer iso-2022-jp to euc-japan or |
305 | ;; shift_jis, however iso-8859-1 should be used when | |
306 | ;; there are only ASCII text and Latin-1 characters. | |
307 | '(iso-8859-1 iso-2022-jp iso-2022-jp-2 shift_jis utf-8))))) | |
23f87bed MB |
308 | "Preferred coding systems for encoding outgoing messages. |
309 | ||
310 | More than one suitable coding system may be found for some text. | |
311 | By default, the coding system with the highest priority is used | |
312 | to encode outgoing messages (see `sort-coding-systems'). If this | |
313 | variable is set, it overrides the default priority." | |
a08b59c9 | 314 | :version "21.2" |
23f87bed MB |
315 | :type '(repeat (symbol :tag "Coding system")) |
316 | :group 'mime) | |
317 | ||
318 | ;; ?? | |
1f7d2e14 SZ |
319 | (defvar mm-use-find-coding-systems-region |
320 | (fboundp 'find-coding-systems-region) | |
23f87bed MB |
321 | "Use `find-coding-systems-region' to find proper coding systems. |
322 | ||
323 | Setting it to nil is useful on Emacsen supporting Unicode if sending | |
324 | mail with multiple parts is preferred to sending a Unicode one.") | |
1f7d2e14 | 325 | |
c113de23 GM |
326 | ;;; Internal variables: |
327 | ||
328 | ;;; Functions: | |
329 | ||
330 | (defun mm-mule-charset-to-mime-charset (charset) | |
1c57d870 | 331 | "Return the MIME charset corresponding to the given Mule CHARSET." |
23f87bed MB |
332 | (if (and (fboundp 'find-coding-systems-for-charsets) |
333 | (fboundp 'sort-coding-systems)) | |
0683d241 MB |
334 | (let ((css (sort (sort-coding-systems |
335 | (find-coding-systems-for-charsets (list charset))) | |
336 | 'mm-sort-coding-systems-predicate)) | |
337 | cs mime) | |
338 | (while (and (not mime) | |
339 | css) | |
340 | (when (setq cs (pop css)) | |
341 | (setq mime (or (coding-system-get cs :mime-charset) | |
342 | (coding-system-get cs 'mime-charset))))) | |
95fa1ff7 | 343 | mime) |
0683d241 MB |
344 | (let ((alist (mapcar (lambda (cs) |
345 | (assq cs mm-mime-mule-charset-alist)) | |
346 | (sort (mapcar 'car mm-mime-mule-charset-alist) | |
347 | 'mm-sort-coding-systems-predicate))) | |
95fa1ff7 SZ |
348 | out) |
349 | (while alist | |
350 | (when (memq charset (cdar alist)) | |
351 | (setq out (caar alist) | |
352 | alist nil)) | |
353 | (pop alist)) | |
354 | out))) | |
c113de23 GM |
355 | |
356 | (defun mm-charset-to-coding-system (charset &optional lbt) | |
357 | "Return coding-system corresponding to CHARSET. | |
358 | CHARSET is a symbol naming a MIME charset. | |
359 | If optional argument LBT (`unix', `dos' or `mac') is specified, it is | |
360 | used as the line break code type of the coding system." | |
361 | (when (stringp charset) | |
362 | (setq charset (intern (downcase charset)))) | |
c113de23 GM |
363 | (when lbt |
364 | (setq charset (intern (format "%s-%s" charset lbt)))) | |
365 | (cond | |
47b63dfa SZ |
366 | ((null charset) |
367 | charset) | |
c113de23 | 368 | ;; Running in a non-MULE environment. |
23f87bed MB |
369 | ((or (null (mm-get-coding-system-list)) |
370 | (not (fboundp 'coding-system-get))) | |
c113de23 GM |
371 | charset) |
372 | ;; ascii | |
373 | ((eq charset 'us-ascii) | |
374 | 'ascii) | |
1c57d870 DL |
375 | ;; Check to see whether we can handle this charset. (This depends |
376 | ;; on there being some coding system matching each `mime-charset' | |
95fa1ff7 SZ |
377 | ;; property defined, as there should be.) |
378 | ((and (mm-coding-system-p charset) | |
379 | ;;; Doing this would potentially weed out incorrect charsets. | |
380 | ;;; charset | |
381 | ;;; (eq charset (coding-system-get charset 'mime-charset)) | |
382 | ) | |
c113de23 | 383 | charset) |
95fa1ff7 | 384 | ;; Translate invalid charsets. |
d62d49df | 385 | ((let ((cs (cdr (assq charset mm-charset-synonym-alist)))) |
23f87bed | 386 | (and cs (mm-coding-system-p cs) cs))) |
95fa1ff7 SZ |
387 | ;; Last resort: search the coding system list for entries which |
388 | ;; have the right mime-charset in case the canonical name isn't | |
389 | ;; defined (though it should be). | |
390 | ((let (cs) | |
391 | ;; mm-get-coding-system-list returns a list of cs without lbt. | |
392 | ;; Do we need -lbt? | |
393 | (dolist (c (mm-get-coding-system-list)) | |
394 | (if (and (null cs) | |
56e09c09 DL |
395 | (eq charset (or (coding-system-get c :mime-charset) |
396 | (coding-system-get c 'mime-charset)))) | |
95fa1ff7 SZ |
397 | (setq cs c))) |
398 | cs)))) | |
399 | ||
400 | (defsubst mm-replace-chars-in-string (string from to) | |
401 | (mm-subst-char-in-string from to string)) | |
402 | ||
403 | (eval-and-compile | |
404 | (defvar mm-emacs-mule (and (not (featurep 'xemacs)) | |
405 | (boundp 'default-enable-multibyte-characters) | |
406 | default-enable-multibyte-characters | |
407 | (fboundp 'set-buffer-multibyte)) | |
56e09c09 | 408 | "True in Emacs with Mule.") |
95fa1ff7 SZ |
409 | |
410 | (if mm-emacs-mule | |
411 | (defun mm-enable-multibyte () | |
412 | "Set the multibyte flag of the current buffer. | |
1c57d870 DL |
413 | Only do this if the default value of `enable-multibyte-characters' is |
414 | non-nil. This is a no-op in XEmacs." | |
473ad4a5 | 415 | (set-buffer-multibyte 'to)) |
95fa1ff7 | 416 | (defalias 'mm-enable-multibyte 'ignore)) |
c113de23 | 417 | |
95fa1ff7 SZ |
418 | (if mm-emacs-mule |
419 | (defun mm-disable-multibyte () | |
420 | "Unset the multibyte flag of in the current buffer. | |
1c57d870 | 421 | This is a no-op in XEmacs." |
95fa1ff7 | 422 | (set-buffer-multibyte nil)) |
56e09c09 | 423 | (defalias 'mm-disable-multibyte 'ignore))) |
052802c1 | 424 | |
c113de23 GM |
425 | (defun mm-preferred-coding-system (charset) |
426 | ;; A typo in some Emacs versions. | |
47b63dfa SZ |
427 | (or (get-charset-property charset 'preferred-coding-system) |
428 | (get-charset-property charset 'prefered-coding-system))) | |
c113de23 | 429 | |
23f87bed MB |
430 | ;; Mule charsets shouldn't be used. |
431 | (defsubst mm-guess-charset () | |
432 | "Guess Mule charset from the language environment." | |
433 | (or | |
434 | mail-parse-mule-charset ;; cached mule-charset | |
435 | (progn | |
436 | (setq mail-parse-mule-charset | |
437 | (and (boundp 'current-language-environment) | |
438 | (car (last | |
439 | (assq 'charset | |
440 | (assoc current-language-environment | |
441 | language-info-alist)))))) | |
442 | (if (or (not mail-parse-mule-charset) | |
443 | (eq mail-parse-mule-charset 'ascii)) | |
444 | (setq mail-parse-mule-charset | |
445 | (or (car (last (assq mail-parse-charset | |
446 | mm-mime-mule-charset-alist))) | |
447 | ;; default | |
448 | 'latin-iso8859-1))) | |
449 | mail-parse-mule-charset))) | |
450 | ||
c113de23 GM |
451 | (defun mm-charset-after (&optional pos) |
452 | "Return charset of a character in current buffer at position POS. | |
453 | If POS is nil, it defauls to the current point. | |
454 | If POS is out of range, the value is nil. | |
455 | If the charset is `composition', return the actual one." | |
052802c1 DL |
456 | (let ((char (char-after pos)) charset) |
457 | (if (< (mm-char-int char) 128) | |
458 | (setq charset 'ascii) | |
459 | ;; charset-after is fake in some Emacsen. | |
460 | (setq charset (and (fboundp 'char-charset) (char-charset char))) | |
56e09c09 | 461 | (if (eq charset 'composition) ; Mule 4 |
052802c1 DL |
462 | (let ((p (or pos (point)))) |
463 | (cadr (find-charset-region p (1+ p)))) | |
464 | (if (and charset (not (memq charset '(ascii eight-bit-control | |
465 | eight-bit-graphic)))) | |
466 | charset | |
23f87bed | 467 | (mm-guess-charset)))))) |
c113de23 GM |
468 | |
469 | (defun mm-mime-charset (charset) | |
1c57d870 | 470 | "Return the MIME charset corresponding to the given Mule CHARSET." |
95fa1ff7 SZ |
471 | (if (eq charset 'unknown) |
472 | (error "The message contains non-printable characters, please use attachment")) | |
052802c1 | 473 | (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property)) |
c113de23 GM |
474 | ;; This exists in Emacs 20. |
475 | (or | |
476 | (and (mm-preferred-coding-system charset) | |
56e09c09 DL |
477 | (or (coding-system-get |
478 | (mm-preferred-coding-system charset) :mime-charset) | |
479 | (coding-system-get | |
480 | (mm-preferred-coding-system charset) 'mime-charset))) | |
c113de23 GM |
481 | (and (eq charset 'ascii) |
482 | 'us-ascii) | |
95fa1ff7 | 483 | (mm-preferred-coding-system charset) |
c113de23 GM |
484 | (mm-mule-charset-to-mime-charset charset)) |
485 | ;; This is for XEmacs. | |
486 | (mm-mule-charset-to-mime-charset charset))) | |
487 | ||
488 | (defun mm-delete-duplicates (list) | |
b8898393 | 489 | "Simple substitute for CL `delete-duplicates', testing with `equal'." |
c113de23 GM |
490 | (let (result head) |
491 | (while list | |
492 | (setq head (car list)) | |
493 | (setq list (delete head list)) | |
494 | (setq result (cons head result))) | |
495 | (nreverse result))) | |
496 | ||
23f87bed MB |
497 | ;; Fixme: This is used in places when it should be testing the |
498 | ;; default multibyteness. See mm-default-multibyte-p. | |
499 | (eval-and-compile | |
052802c1 DL |
500 | (if (and (not (featurep 'xemacs)) |
501 | (boundp 'enable-multibyte-characters)) | |
23f87bed MB |
502 | (defun mm-multibyte-p () |
503 | "Non-nil if multibyte is enabled in the current buffer." | |
504 | enable-multibyte-characters) | |
505 | (defun mm-multibyte-p () (featurep 'mule)))) | |
506 | ||
507 | (defun mm-default-multibyte-p () | |
508 | "Return non-nil if the session is multibyte. | |
509 | This affects whether coding conversion should be attempted generally." | |
510 | (if (featurep 'mule) | |
511 | (if (boundp 'default-enable-multibyte-characters) | |
512 | default-enable-multibyte-characters | |
513 | t))) | |
c113de23 | 514 | |
47b63dfa | 515 | (defun mm-sort-coding-systems-predicate (a b) |
23f87bed MB |
516 | (let ((priorities |
517 | (mapcar (lambda (cs) | |
518 | ;; Note: invalid entries are dropped silently | |
0683d241 | 519 | (and (setq cs (mm-coding-system-p cs)) |
23f87bed MB |
520 | (coding-system-base cs))) |
521 | mm-coding-system-priorities))) | |
0683d241 MB |
522 | (and (setq a (mm-coding-system-p a)) |
523 | (if (setq b (mm-coding-system-p b)) | |
524 | (> (length (memq (coding-system-base a) priorities)) | |
525 | (length (memq (coding-system-base b) priorities))) | |
526 | t)))) | |
47b63dfa | 527 | |
0c129bca | 528 | (defun mm-find-mime-charset-region (b e) |
95fa1ff7 | 529 | "Return the MIME charsets needed to encode the region between B and E. |
f0529b5b | 530 | nil means ASCII, a single-element list represents an appropriate MIME |
95fa1ff7 | 531 | charset, and a longer list means no appropriate charset." |
47b63dfa SZ |
532 | (let (charsets) |
533 | ;; The return possibilities of this function are a mess... | |
534 | (or (and (mm-multibyte-p) | |
1f7d2e14 | 535 | mm-use-find-coding-systems-region |
47b63dfa SZ |
536 | ;; Find the mime-charset of the most preferred coding |
537 | ;; system that has one. | |
538 | (let ((systems (find-coding-systems-region b e))) | |
539 | (when mm-coding-system-priorities | |
a1506d29 | 540 | (setq systems |
47b63dfa | 541 | (sort systems 'mm-sort-coding-systems-predicate))) |
47b63dfa SZ |
542 | (setq systems (delq 'compound-text systems)) |
543 | (unless (equal systems '(undecided)) | |
544 | (while systems | |
56e09c09 DL |
545 | (let* ((head (pop systems)) |
546 | (cs (or (coding-system-get head :mime-charset) | |
547 | (coding-system-get head 'mime-charset)))) | |
23f87bed MB |
548 | ;; The mime-charset (`x-ctext') of |
549 | ;; `compound-text' is not in the IANA list. We | |
550 | ;; shouldn't normally use anything here with a | |
551 | ;; mime-charset having an `x-' prefix. | |
552 | ;; Fixme: Allow this to be overridden, since | |
553 | ;; there is existing use of x-ctext. | |
554 | ;; Also people apparently need the coding system | |
555 | ;; `iso-2022-jp-3' (which Mule-UCS defines with | |
556 | ;; mime-charset, though it's not valid). | |
557 | (if (and cs | |
558 | (not (string-match "^[Xx]-" (symbol-name cs))) | |
559 | ;; UTF-16 of any variety is invalid for | |
560 | ;; text parts and, unfortunately, has | |
561 | ;; mime-charset defined both in Mule-UCS | |
562 | ;; and versions of Emacs. (The name | |
563 | ;; might be `mule-utf-16...' or | |
564 | ;; `utf-16...'.) | |
565 | (not (string-match "utf-16" (symbol-name cs)))) | |
47b63dfa SZ |
566 | (setq systems nil |
567 | charsets (list cs)))))) | |
568 | charsets)) | |
0c129bca DL |
569 | ;; Fixme: won't work for unibyte Emacs 22: |
570 | ||
23f87bed MB |
571 | ;; Otherwise we're not multibyte, we're XEmacs, or a single |
572 | ;; coding system won't cover it. | |
a1506d29 | 573 | (setq charsets |
47b63dfa SZ |
574 | (mm-delete-duplicates |
575 | (mapcar 'mm-mime-charset | |
576 | (delq 'ascii | |
577 | (mm-find-charset-region b e)))))) | |
47b63dfa | 578 | charsets)) |
95fa1ff7 | 579 | |
c113de23 GM |
580 | (defmacro mm-with-unibyte-buffer (&rest forms) |
581 | "Create a temporary buffer, and evaluate FORMS there like `progn'. | |
1c57d870 DL |
582 | Use unibyte mode for this." |
583 | `(let (default-enable-multibyte-characters) | |
584 | (with-temp-buffer ,@forms))) | |
c113de23 GM |
585 | (put 'mm-with-unibyte-buffer 'lisp-indent-function 0) |
586 | (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body)) | |
587 | ||
23f87bed MB |
588 | (defmacro mm-with-multibyte-buffer (&rest forms) |
589 | "Create a temporary buffer, and evaluate FORMS there like `progn'. | |
590 | Use multibyte mode for this." | |
591 | `(let ((default-enable-multibyte-characters t)) | |
592 | (with-temp-buffer ,@forms))) | |
593 | (put 'mm-with-multibyte-buffer 'lisp-indent-function 0) | |
594 | (put 'mm-with-multibyte-buffer 'edebug-form-spec '(body)) | |
595 | ||
c113de23 | 596 | (defmacro mm-with-unibyte-current-buffer (&rest forms) |
56e09c09 | 597 | "Evaluate FORMS with current buffer temporarily made unibyte. |
1c57d870 DL |
598 | Also bind `default-enable-multibyte-characters' to nil. |
599 | Equivalent to `progn' in XEmacs" | |
95fa1ff7 SZ |
600 | (let ((multibyte (make-symbol "multibyte")) |
601 | (buffer (make-symbol "buffer"))) | |
a1506d29 | 602 | `(if mm-emacs-mule |
95fa1ff7 SZ |
603 | (let ((,multibyte enable-multibyte-characters) |
604 | (,buffer (current-buffer))) | |
1c57d870 DL |
605 | (unwind-protect |
606 | (let (default-enable-multibyte-characters) | |
607 | (set-buffer-multibyte nil) | |
608 | ,@forms) | |
95fa1ff7 | 609 | (set-buffer ,buffer) |
1c57d870 | 610 | (set-buffer-multibyte ,multibyte))) |
95fa1ff7 | 611 | (let (default-enable-multibyte-characters) |
1c57d870 | 612 | ,@forms)))) |
c113de23 GM |
613 | (put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0) |
614 | (put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body)) | |
615 | ||
616 | (defmacro mm-with-unibyte (&rest forms) | |
23f87bed | 617 | "Eval the FORMS with the default value of `enable-multibyte-characters' nil." |
1c57d870 DL |
618 | `(let (default-enable-multibyte-characters) |
619 | ,@forms)) | |
c113de23 GM |
620 | (put 'mm-with-unibyte 'lisp-indent-function 0) |
621 | (put 'mm-with-unibyte 'edebug-form-spec '(body)) | |
622 | ||
23f87bed MB |
623 | (defmacro mm-with-multibyte (&rest forms) |
624 | "Eval the FORMS with the default value of `enable-multibyte-characters' t." | |
625 | `(let ((default-enable-multibyte-characters t)) | |
626 | ,@forms)) | |
627 | (put 'mm-with-multibyte 'lisp-indent-function 0) | |
628 | (put 'mm-with-multibyte 'edebug-form-spec '(body)) | |
629 | ||
c113de23 | 630 | (defun mm-find-charset-region (b e) |
1c57d870 | 631 | "Return a list of Emacs charsets in the region B to E." |
c113de23 GM |
632 | (cond |
633 | ((and (mm-multibyte-p) | |
95fa1ff7 | 634 | (fboundp 'find-charset-region)) |
c113de23 | 635 | ;; Remove composition since the base charsets have been included. |
95fa1ff7 SZ |
636 | ;; Remove eight-bit-*, treat them as ascii. |
637 | (let ((css (find-charset-region b e))) | |
638 | (mapcar (lambda (cs) (setq css (delq cs css))) | |
639 | '(composition eight-bit-control eight-bit-graphic | |
640 | control-1)) | |
641 | css)) | |
052802c1 DL |
642 | (t |
643 | ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit. | |
c113de23 GM |
644 | (save-excursion |
645 | (save-restriction | |
646 | (narrow-to-region b e) | |
647 | (goto-char (point-min)) | |
648 | (skip-chars-forward "\0-\177") | |
649 | (if (eobp) | |
650 | '(ascii) | |
052802c1 DL |
651 | (let (charset) |
652 | (setq charset | |
653 | (and (boundp 'current-language-environment) | |
95fa1ff7 SZ |
654 | (car (last (assq 'charset |
655 | (assoc current-language-environment | |
052802c1 DL |
656 | language-info-alist)))))) |
657 | (if (eq charset 'ascii) (setq charset nil)) | |
658 | (or charset | |
659 | (setq charset | |
660 | (car (last (assq mail-parse-charset | |
661 | mm-mime-mule-charset-alist))))) | |
662 | (list 'ascii (or charset 'latin-iso8859-1))))))))) | |
c113de23 GM |
663 | |
664 | (if (fboundp 'shell-quote-argument) | |
665 | (defalias 'mm-quote-arg 'shell-quote-argument) | |
666 | (defun mm-quote-arg (arg) | |
667 | "Return a version of ARG that is safe to evaluate in a shell." | |
668 | (let ((pos 0) new-pos accum) | |
669 | ;; *** bug: we don't handle newline characters properly | |
670 | (while (setq new-pos (string-match "[]*[;!'`\"$\\& \t{} |()<>]" arg pos)) | |
671 | (push (substring arg pos new-pos) accum) | |
672 | (push "\\" accum) | |
673 | (push (list (aref arg new-pos)) accum) | |
674 | (setq pos (1+ new-pos))) | |
675 | (if (= pos 0) | |
676 | arg | |
677 | (apply 'concat (nconc (nreverse accum) (list (substring arg pos)))))))) | |
678 | ||
679 | (defun mm-auto-mode-alist () | |
680 | "Return an `auto-mode-alist' with only the .gz (etc) thingies." | |
681 | (let ((alist auto-mode-alist) | |
682 | out) | |
683 | (while alist | |
684 | (when (listp (cdar alist)) | |
685 | (push (car alist) out)) | |
686 | (pop alist)) | |
687 | (nreverse out))) | |
688 | ||
689 | (defvar mm-inhibit-file-name-handlers | |
244d58ba | 690 | '(jka-compr-handler image-file-handler) |
c113de23 GM |
691 | "A list of handlers doing (un)compression (etc) thingies.") |
692 | ||
693 | (defun mm-insert-file-contents (filename &optional visit beg end replace | |
694 | inhibit) | |
23f87bed | 695 | "Like `insert-file-contents', but only reads in the file. |
c113de23 GM |
696 | A buffer may be modified in several ways after reading into the buffer due |
697 | to advanced Emacs features, such as file-name-handlers, format decoding, | |
23f87bed | 698 | `find-file-hooks', etc. |
56e09c09 | 699 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'. |
c113de23 GM |
700 | This function ensures that none of these modifications will take place." |
701 | (let ((format-alist nil) | |
702 | (auto-mode-alist (if inhibit nil (mm-auto-mode-alist))) | |
703 | (default-major-mode 'fundamental-mode) | |
704 | (enable-local-variables nil) | |
95fa1ff7 | 705 | (after-insert-file-functions nil) |
c113de23 GM |
706 | (enable-local-eval nil) |
707 | (find-file-hooks nil) | |
95fa1ff7 | 708 | (inhibit-file-name-operation (if inhibit |
c113de23 GM |
709 | 'insert-file-contents |
710 | inhibit-file-name-operation)) | |
711 | (inhibit-file-name-handlers | |
712 | (if inhibit | |
95fa1ff7 | 713 | (append mm-inhibit-file-name-handlers |
c113de23 GM |
714 | inhibit-file-name-handlers) |
715 | inhibit-file-name-handlers))) | |
716 | (insert-file-contents filename visit beg end replace))) | |
717 | ||
718 | (defun mm-append-to-file (start end filename &optional codesys inhibit) | |
719 | "Append the contents of the region to the end of file FILENAME. | |
720 | When called from a function, expects three arguments, | |
721 | START, END and FILENAME. START and END are buffer positions | |
722 | saying what text to write. | |
723 | Optional fourth argument specifies the coding system to use when | |
724 | encoding the file. | |
23f87bed | 725 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." |
95fa1ff7 SZ |
726 | (let ((coding-system-for-write |
727 | (or codesys mm-text-coding-system-for-write | |
c113de23 | 728 | mm-text-coding-system)) |
95fa1ff7 | 729 | (inhibit-file-name-operation (if inhibit |
c113de23 GM |
730 | 'append-to-file |
731 | inhibit-file-name-operation)) | |
732 | (inhibit-file-name-handlers | |
733 | (if inhibit | |
95fa1ff7 | 734 | (append mm-inhibit-file-name-handlers |
c113de23 GM |
735 | inhibit-file-name-handlers) |
736 | inhibit-file-name-handlers))) | |
23f87bed MB |
737 | (write-region start end filename t 'no-message) |
738 | (message "Appended to %s" filename))) | |
c113de23 | 739 | |
95fa1ff7 | 740 | (defun mm-write-region (start end filename &optional append visit lockname |
c113de23 GM |
741 | coding-system inhibit) |
742 | ||
743 | "Like `write-region'. | |
23f87bed | 744 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." |
95fa1ff7 SZ |
745 | (let ((coding-system-for-write |
746 | (or coding-system mm-text-coding-system-for-write | |
c113de23 | 747 | mm-text-coding-system)) |
95fa1ff7 | 748 | (inhibit-file-name-operation (if inhibit |
c113de23 GM |
749 | 'write-region |
750 | inhibit-file-name-operation)) | |
751 | (inhibit-file-name-handlers | |
752 | (if inhibit | |
95fa1ff7 | 753 | (append mm-inhibit-file-name-handlers |
c113de23 GM |
754 | inhibit-file-name-handlers) |
755 | inhibit-file-name-handlers))) | |
756 | (write-region start end filename append visit lockname))) | |
757 | ||
95fa1ff7 SZ |
758 | (defun mm-image-load-path (&optional package) |
759 | (let (dir result) | |
760 | (dolist (path load-path (nreverse result)) | |
f4dd4ae8 MB |
761 | (when (and path |
762 | (file-directory-p | |
763 | (setq dir (concat (file-name-directory | |
764 | (directory-file-name path)) | |
765 | "etc/" (or package "gnus/"))))) | |
766 | (push dir result)) | |
95fa1ff7 SZ |
767 | (push path result)))) |
768 | ||
23f87bed MB |
769 | ;; Fixme: This doesn't look useful where it's used. |
770 | (if (fboundp 'detect-coding-region) | |
771 | (defun mm-detect-coding-region (start end) | |
772 | "Like `detect-coding-region' except returning the best one." | |
773 | (let ((coding-systems | |
774 | (detect-coding-region (point) (point-max)))) | |
775 | (or (car-safe coding-systems) | |
776 | coding-systems))) | |
777 | (defun mm-detect-coding-region (start end) | |
778 | (let ((point (point))) | |
779 | (goto-char start) | |
780 | (skip-chars-forward "\0-\177" end) | |
781 | (prog1 | |
782 | (if (eq (point) end) 'ascii (mm-guess-charset)) | |
783 | (goto-char point))))) | |
784 | ||
785 | (if (fboundp 'coding-system-get) | |
786 | (defun mm-detect-mime-charset-region (start end) | |
787 | "Detect MIME charset of the text in the region between START and END." | |
788 | (let ((cs (mm-detect-coding-region start end))) | |
789 | (coding-system-get cs 'mime-charset))) | |
790 | (defun mm-detect-mime-charset-region (start end) | |
791 | "Detect MIME charset of the text in the region between START and END." | |
792 | (let ((cs (mm-detect-coding-region start end))) | |
793 | cs))) | |
794 | ||
3efe5554 | 795 | |
c113de23 GM |
796 | (provide 'mm-util) |
797 | ||
b8898393 | 798 | ;;; arch-tag: 94dc5388-825d-4fd1-bfa5-2100aa351238 |
c113de23 | 799 | ;;; mm-util.el ends here |