Commit | Line | Data |
---|---|---|
95fa1ff7 | 1 | ;;; mm-util.el --- Utility functions for Mule and low level things |
e84b4b86 | 2 | |
2e62b574 | 3 | ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, |
114f9c96 | 4 | ;; 2007, 2008, 2009, 2010 Free Software Foundation, Inc. |
c113de23 GM |
5 | |
6 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
7 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
8 | ;; This file is part of GNU Emacs. | |
9 | ||
5e809f55 | 10 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
c113de23 | 11 | ;; it under the terms of the GNU General Public License as published by |
5e809f55 GM |
12 | ;; the Free Software Foundation, either version 3 of the License, or |
13 | ;; (at your option) any later version. | |
c113de23 GM |
14 | |
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
5e809f55 | 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
c113de23 GM |
18 | ;; GNU General Public License for more details. |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
5e809f55 | 21 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
c113de23 GM |
22 | |
23 | ;;; Commentary: | |
24 | ||
25 | ;;; Code: | |
26 | ||
b5000590 GM |
27 | ;; For Emacs < 22.2. |
28 | (eval-and-compile | |
29 | (unless (fboundp 'declare-function) (defmacro declare-function (&rest r)))) | |
30 | ||
23f87bed | 31 | (eval-when-compile (require 'cl)) |
c113de23 GM |
32 | (require 'mail-prsvr) |
33 | ||
f53b2875 | 34 | (eval-and-compile |
01c52d31 MB |
35 | (if (featurep 'xemacs) |
36 | (unless (ignore-errors | |
37 | (require 'timer-funcs)) | |
38 | (require 'timer)) | |
39 | (require 'timer))) | |
40 | ||
9efa445f DN |
41 | (defvar mm-mime-mule-charset-alist ) |
42 | ||
e3e955fe MB |
43 | ;; Emulate functions that are not available in every (X)Emacs version. |
44 | ;; The name of a function is prefixed with mm-, like `mm-char-int' for | |
45 | ;; `char-int' that is a native XEmacs function, not available in Emacs. | |
46 | ;; Gnus programs all should use mm- functions, not the original ones. | |
01c52d31 MB |
47 | (eval-and-compile |
48 | (mapc | |
f53b2875 DL |
49 | (lambda (elem) |
50 | (let ((nfunc (intern (format "mm-%s" (car elem))))) | |
51 | (if (fboundp (car elem)) | |
52 | (defalias nfunc (car elem)) | |
53 | (defalias nfunc (cdr elem))))) | |
e3e955fe MB |
54 | `(;; `coding-system-list' is not available in XEmacs 21.4 built |
55 | ;; without the `file-coding' feature. | |
56 | (coding-system-list . ignore) | |
57 | ;; `char-int' is an XEmacs function, not available in Emacs. | |
f53b2875 | 58 | (char-int . identity) |
e3e955fe | 59 | ;; `coding-system-equal' is an Emacs function, not available in XEmacs. |
f53b2875 | 60 | (coding-system-equal . equal) |
e3e955fe | 61 | ;; `annotationp' is an XEmacs function, not available in Emacs. |
f53b2875 | 62 | (annotationp . ignore) |
e3e955fe MB |
63 | ;; `set-buffer-file-coding-system' is not available in XEmacs 21.4 |
64 | ;; built without the `file-coding' feature. | |
f53b2875 | 65 | (set-buffer-file-coding-system . ignore) |
e3e955fe | 66 | ;; `read-charset' is an Emacs function, not available in XEmacs. |
f53b2875 | 67 | (read-charset |
c7948b5f MB |
68 | . ,(lambda (prompt) |
69 | "Return a charset." | |
70 | (intern | |
71 | (completing-read | |
72 | prompt | |
73 | (mapcar (lambda (e) (list (symbol-name (car e)))) | |
74 | mm-mime-mule-charset-alist) | |
75 | nil t)))) | |
e3e955fe | 76 | ;; `subst-char-in-string' is not available in XEmacs 21.4. |
95fa1ff7 | 77 | (subst-char-in-string |
c7948b5f MB |
78 | . ,(lambda (from to string &optional inplace) |
79 | ;; stolen (and renamed) from nnheader.el | |
80 | "Replace characters in STRING from FROM to TO. | |
91472578 | 81 | Unless optional argument INPLACE is non-nil, return a new string." |
c7948b5f MB |
82 | (let ((string (if inplace string (copy-sequence string))) |
83 | (len (length string)) | |
84 | (idx 0)) | |
85 | ;; Replace all occurrences of FROM with TO. | |
86 | (while (< idx len) | |
87 | (when (= (aref string idx) from) | |
88 | (aset string idx to)) | |
89 | (setq idx (1+ idx))) | |
90 | string))) | |
e3e955fe | 91 | ;; `replace-in-string' is an XEmacs function, not available in Emacs. |
01c52d31 | 92 | (replace-in-string |
c7948b5f MB |
93 | . ,(lambda (string regexp rep &optional literal) |
94 | "See `replace-regexp-in-string', only the order of args differs." | |
95 | (replace-regexp-in-string regexp rep string nil literal))) | |
e3e955fe | 96 | ;; `string-as-unibyte' is an Emacs function, not available in XEmacs. |
f53b2875 | 97 | (string-as-unibyte . identity) |
e3e955fe | 98 | ;; `string-make-unibyte' is an Emacs function, not available in XEmacs. |
23f87bed | 99 | (string-make-unibyte . identity) |
9d9b0de9 SM |
100 | ;; string-as-multibyte often doesn't really do what you think it does. |
101 | ;; Example: | |
102 | ;; (aref (string-as-multibyte "\201") 0) -> 129 (aka ?\201) | |
103 | ;; (aref (string-as-multibyte "\300") 0) -> 192 (aka ?\300) | |
104 | ;; (aref (string-as-multibyte "\300\201") 0) -> 192 (aka ?\300) | |
105 | ;; (aref (string-as-multibyte "\300\201") 1) -> 129 (aka ?\201) | |
106 | ;; but | |
107 | ;; (aref (string-as-multibyte "\201\300") 0) -> 2240 | |
108 | ;; (aref (string-as-multibyte "\201\300") 1) -> <error> | |
109 | ;; Better use string-to-multibyte or encode-coding-string. | |
110 | ;; If you really need string-as-multibyte somewhere it's usually | |
111 | ;; because you're using the internal emacs-mule representation (maybe | |
112 | ;; because you're using string-as-unibyte somewhere), which is | |
113 | ;; generally a problem in itself. | |
114 | ;; Here is an approximate equivalence table to help think about it: | |
115 | ;; (string-as-multibyte s) ~= (decode-coding-string s 'emacs-mule) | |
116 | ;; (string-to-multibyte s) ~= (decode-coding-string s 'binary) | |
117 | ;; (string-make-multibyte s) ~= (decode-coding-string s locale-coding-system) | |
e3e955fe | 118 | ;; `string-as-multibyte' is an Emacs function, not available in XEmacs. |
95fa1ff7 | 119 | (string-as-multibyte . identity) |
e3e955fe | 120 | ;; `multibyte-string-p' is an Emacs function, not available in XEmacs. |
56e09c09 | 121 | (multibyte-string-p . ignore) |
e3e955fe | 122 | ;; `insert-byte' is available only in Emacs 23.1 or greater. |
56e09c09 | 123 | (insert-byte . insert-char) |
e3e955fe MB |
124 | ;; `multibyte-char-to-unibyte' is an Emacs function, not available |
125 | ;; in XEmacs. | |
01c52d31 | 126 | (multibyte-char-to-unibyte . identity) |
e3e955fe | 127 | ;; `set-buffer-multibyte' is an Emacs function, not available in XEmacs. |
df06dd59 | 128 | (set-buffer-multibyte . ignore) |
e3e955fe | 129 | ;; `special-display-p' is an Emacs function, not available in XEmacs. |
01c52d31 | 130 | (special-display-p |
c7948b5f MB |
131 | . ,(lambda (buffer-name) |
132 | "Returns non-nil if a buffer named BUFFER-NAME gets a special frame." | |
133 | (and special-display-function | |
134 | (or (and (member buffer-name special-display-buffer-names) t) | |
135 | (cdr (assoc buffer-name special-display-buffer-names)) | |
136 | (catch 'return | |
137 | (dolist (elem special-display-regexps) | |
138 | (and (stringp elem) | |
139 | (string-match elem buffer-name) | |
140 | (throw 'return t)) | |
141 | (and (consp elem) | |
142 | (stringp (car elem)) | |
143 | (string-match (car elem) buffer-name) | |
144 | (throw 'return (cdr elem))))))))) | |
e3e955fe | 145 | ;; `substring-no-properties' is available only in Emacs 22.1 or greater. |
c7948b5f MB |
146 | (substring-no-properties |
147 | . ,(lambda (string &optional from to) | |
148 | "Return a substring of STRING, without text properties. | |
149 | It starts at index FROM and ending before TO. | |
150 | TO may be nil or omitted; then the substring runs to the end of STRING. | |
151 | If FROM is nil or omitted, the substring starts at the beginning of STRING. | |
152 | If FROM or TO is negative, it counts from the end. | |
153 | ||
154 | With one argument, just copy STRING without its properties." | |
155 | (setq string (substring string (or from 0) to)) | |
156 | (set-text-properties 0 (length string) nil string) | |
e3e955fe MB |
157 | string)) |
158 | ;; `line-number-at-pos' is available only in Emacs 22.1 or greater | |
159 | ;; and XEmacs 21.5. | |
160 | (line-number-at-pos | |
161 | . ,(lambda (&optional pos) | |
162 | "Return (narrowed) buffer line number at position POS. | |
163 | If POS is nil, use current buffer location. | |
164 | Counting starts at (point-min), so the value refers | |
165 | to the contents of the accessible portion of the buffer." | |
166 | (let ((opoint (or pos (point))) start) | |
167 | (save-excursion | |
168 | (goto-char (point-min)) | |
169 | (setq start (point)) | |
170 | (goto-char opoint) | |
171 | (forward-line 0) | |
172 | (1+ (count-lines start (point)))))))))) | |
f53b2875 | 173 | |
e3e955fe MB |
174 | ;; `decode-coding-string', `encode-coding-string', `decode-coding-region' |
175 | ;; and `encode-coding-region' are available in Emacs and XEmacs built with | |
176 | ;; the `file-coding' feature, but the XEmacs versions treat nil, that is | |
177 | ;; given as the `coding-system' argument, as the `binary' coding system. | |
82fe1aed MB |
178 | (eval-and-compile |
179 | (if (featurep 'xemacs) | |
180 | (if (featurep 'file-coding) | |
82fe1aed MB |
181 | (progn |
182 | (defun mm-decode-coding-string (str coding-system) | |
183 | (if coding-system | |
184 | (decode-coding-string str coding-system) | |
185 | str)) | |
186 | (defun mm-encode-coding-string (str coding-system) | |
187 | (if coding-system | |
188 | (encode-coding-string str coding-system) | |
189 | str)) | |
190 | (defun mm-decode-coding-region (start end coding-system) | |
191 | (if coding-system | |
192 | (decode-coding-region start end coding-system))) | |
193 | (defun mm-encode-coding-region (start end coding-system) | |
194 | (if coding-system | |
195 | (encode-coding-region start end coding-system)))) | |
196 | (defun mm-decode-coding-string (str coding-system) str) | |
197 | (defun mm-encode-coding-string (str coding-system) str) | |
198 | (defalias 'mm-decode-coding-region 'ignore) | |
199 | (defalias 'mm-encode-coding-region 'ignore)) | |
200 | (defalias 'mm-decode-coding-string 'decode-coding-string) | |
201 | (defalias 'mm-encode-coding-string 'encode-coding-string) | |
202 | (defalias 'mm-decode-coding-region 'decode-coding-region) | |
203 | (defalias 'mm-encode-coding-region 'encode-coding-region))) | |
204 | ||
e3e955fe | 205 | ;; `string-to-multibyte' is available only in Emacs 22.1 or greater. |
e8f0f70d MB |
206 | (defalias 'mm-string-to-multibyte |
207 | (cond | |
208 | ((featurep 'xemacs) | |
209 | 'identity) | |
210 | ((fboundp 'string-to-multibyte) | |
211 | 'string-to-multibyte) | |
212 | (t | |
213 | (lambda (string) | |
c7948b5f | 214 | "Return a multibyte string with the same individual chars as STRING." |
e8f0f70d MB |
215 | (mapconcat |
216 | (lambda (ch) (mm-string-as-multibyte (char-to-string ch))) | |
217 | string ""))))) | |
218 | ||
e3e955fe | 219 | ;; `char-or-char-int-p' is an XEmacs function, not available in Emacs. |
c113de23 GM |
220 | (eval-and-compile |
221 | (defalias 'mm-char-or-char-int-p | |
95fa1ff7 | 222 | (cond |
c113de23 | 223 | ((fboundp 'char-or-char-int-p) 'char-or-char-int-p) |
95fa1ff7 | 224 | ((fboundp 'char-valid-p) 'char-valid-p) |
c113de23 GM |
225 | (t 'identity)))) |
226 | ||
e3e955fe MB |
227 | ;; `ucs-to-char' is a function that Mule-UCS provides. |
228 | (if (featurep 'xemacs) | |
229 | (cond ((and (fboundp 'unicode-to-char) ;; XEmacs 21.5. | |
230 | (subrp (symbol-function 'unicode-to-char))) | |
231 | (if (featurep 'mule) | |
232 | (defalias 'mm-ucs-to-char 'unicode-to-char) | |
233 | (defun mm-ucs-to-char (codepoint) | |
234 | "Convert Unicode codepoint to character." | |
235 | (or (unicode-to-char codepoint) ?#)))) | |
236 | ((featurep 'mule) | |
237 | (defun mm-ucs-to-char (codepoint) | |
238 | "Convert Unicode codepoint to character." | |
239 | (if (fboundp 'ucs-to-char) ;; Mule-UCS is loaded. | |
240 | (progn | |
241 | (defalias 'mm-ucs-to-char | |
242 | (lambda (codepoint) | |
243 | "Convert Unicode codepoint to character." | |
244 | (condition-case nil | |
245 | (or (ucs-to-char codepoint) ?#) | |
246 | (error ?#)))) | |
247 | (mm-ucs-to-char codepoint)) | |
248 | (condition-case nil | |
249 | (or (int-to-char codepoint) ?#) | |
250 | (error ?#))))) | |
251 | (t | |
252 | (defun mm-ucs-to-char (codepoint) | |
253 | "Convert Unicode codepoint to character." | |
254 | (condition-case nil | |
255 | (or (int-to-char codepoint) ?#) | |
256 | (error ?#))))) | |
257 | (if (let ((char (make-char 'japanese-jisx0208 36 34))) | |
258 | (eq char (decode-char 'ucs char))) | |
259 | ;; Emacs 23. | |
260 | (defalias 'mm-ucs-to-char 'identity) | |
261 | (defun mm-ucs-to-char (codepoint) | |
262 | "Convert Unicode codepoint to character." | |
263 | (or (decode-char 'ucs codepoint) ?#)))) | |
264 | ||
23f87bed MB |
265 | ;; Fixme: This seems always to be used to read a MIME charset, so it |
266 | ;; should be re-named and fixed (in Emacs) to offer completion only on | |
267 | ;; proper charset names (base coding systems which have a | |
268 | ;; mime-charset defined). XEmacs doesn't believe in mime-charset; | |
269 | ;; test with | |
270 | ;; `(or (coding-system-get 'iso-8859-1 'mime-charset) | |
271 | ;; (coding-system-get 'iso-8859-1 :mime-charset))' | |
272 | ;; Actually, there should be an `mm-coding-system-mime-charset'. | |
95fa1ff7 SZ |
273 | (eval-and-compile |
274 | (defalias 'mm-read-coding-system | |
275 | (cond | |
276 | ((fboundp 'read-coding-system) | |
277 | (if (and (featurep 'xemacs) | |
278 | (<= (string-to-number emacs-version) 21.1)) | |
279 | (lambda (prompt &optional default-coding-system) | |
280 | (read-coding-system prompt)) | |
281 | 'read-coding-system)) | |
282 | (t (lambda (prompt &optional default-coding-system) | |
283 | "Prompt the user for a coding system." | |
284 | (completing-read | |
285 | prompt (mapcar (lambda (s) (list (symbol-name (car s)))) | |
286 | mm-mime-mule-charset-alist))))))) | |
287 | ||
c113de23 GM |
288 | (defvar mm-coding-system-list nil) |
289 | (defun mm-get-coding-system-list () | |
290 | "Get the coding system list." | |
291 | (or mm-coding-system-list | |
292 | (setq mm-coding-system-list (mm-coding-system-list)))) | |
293 | ||
23f87bed MB |
294 | (defun mm-coding-system-p (cs) |
295 | "Return non-nil if CS is a symbol naming a coding system. | |
0683d241 MB |
296 | In XEmacs, also return non-nil if CS is a coding system object. |
297 | If CS is available, return CS itself in Emacs, and return a coding | |
298 | system object in XEmacs." | |
23f87bed | 299 | (if (fboundp 'find-coding-system) |
91472578 | 300 | (and cs (find-coding-system cs)) |
23f87bed | 301 | (if (fboundp 'coding-system-p) |
0683d241 MB |
302 | (when (coding-system-p cs) |
303 | cs) | |
5f4264e5 | 304 | ;; no-MULE XEmacs: |
0683d241 | 305 | (car (memq cs (mm-get-coding-system-list)))))) |
95fa1ff7 | 306 | |
bd29ba20 RS |
307 | (defun mm-codepage-setup (number &optional alias) |
308 | "Create a coding system cpNUMBER. | |
309 | The coding system is created using `codepage-setup'. If ALIAS is | |
310 | non-nil, an alias is created and added to | |
311 | `mm-charset-synonym-alist'. If ALIAS is a string, it's used as | |
312 | the alias. Else windows-NUMBER is used." | |
313 | (interactive | |
314 | (let ((completion-ignore-case t) | |
0b9c5ab7 RS |
315 | (candidates (if (fboundp 'cp-supported-codepages) |
316 | (cp-supported-codepages) | |
06b2d4c6 | 317 | ;; Removed in Emacs 23 (unicode), so signal an error: |
d93ec753 | 318 | (error "`codepage-setup' not present in this Emacs version")))) |
bd29ba20 RS |
319 | (list (completing-read "Setup DOS Codepage: (default 437) " candidates |
320 | nil t nil nil "437")))) | |
321 | (when alias | |
322 | (setq alias (if (stringp alias) | |
323 | (intern alias) | |
324 | (intern (format "windows-%s" number))))) | |
325 | (let* ((cp (intern (format "cp%s" number)))) | |
326 | (unless (mm-coding-system-p cp) | |
06b2d4c6 GM |
327 | (if (fboundp 'codepage-setup) ; silence compiler |
328 | (codepage-setup number) | |
d93ec753 | 329 | (error "`codepage-setup' not present in this Emacs version"))) |
bd29ba20 RS |
330 | (when (and alias |
331 | ;; Don't add alias if setup of cp failed. | |
332 | (mm-coding-system-p cp)) | |
333 | (add-to-list 'mm-charset-synonym-alist (cons alias cp))))) | |
334 | ||
c113de23 | 335 | (defvar mm-charset-synonym-alist |
95fa1ff7 | 336 | `( |
95fa1ff7 | 337 | ;; Not in XEmacs, but it's not a proper MIME charset anyhow. |
72eb5fc7 | 338 | ,@(unless (mm-coding-system-p 'x-ctext) |
b44409c9 | 339 | '((x-ctext . ctext))) |
ab785936 MB |
340 | ;; ISO-8859-15 is very similar to ISO-8859-1. But it's _different_ in 8 |
341 | ;; positions! | |
23f87bed | 342 | ,@(unless (mm-coding-system-p 'iso-8859-15) |
b44409c9 | 343 | '((iso-8859-15 . iso-8859-1))) |
23f87bed MB |
344 | ;; BIG-5HKSCS is similar to, but different than, BIG-5. |
345 | ,@(unless (mm-coding-system-p 'big5-hkscs) | |
346 | '((big5-hkscs . big5))) | |
bd29ba20 | 347 | ;; A Microsoft misunderstanding. |
ab785936 MB |
348 | ,@(when (and (not (mm-coding-system-p 'unicode)) |
349 | (mm-coding-system-p 'utf-16-le)) | |
350 | '((unicode . utf-16-le))) | |
bd29ba20 RS |
351 | ;; A Microsoft misunderstanding. |
352 | ,@(unless (mm-coding-system-p 'ks_c_5601-1987) | |
353 | (if (mm-coding-system-p 'cp949) | |
354 | '((ks_c_5601-1987 . cp949)) | |
355 | '((ks_c_5601-1987 . euc-kr)))) | |
b44409c9 | 356 | ;; Windows-31J is Windows Codepage 932. |
ab785936 MB |
357 | ,@(when (and (not (mm-coding-system-p 'windows-31j)) |
358 | (mm-coding-system-p 'cp932)) | |
359 | '((windows-31j . cp932))) | |
4b70e299 MB |
360 | ;; Charset name: GBK, Charset aliases: CP936, MS936, windows-936 |
361 | ;; http://www.iana.org/assignments/charset-reg/GBK | |
362 | ;; Emacs 22.1 has cp936, but not gbk, so we alias it: | |
363 | ,@(when (and (not (mm-coding-system-p 'gbk)) | |
364 | (mm-coding-system-p 'cp936)) | |
365 | '((gbk . cp936))) | |
bf46b4d4 MB |
366 | ;; UTF8 is a bogus name for UTF-8 |
367 | ,@(when (and (not (mm-coding-system-p 'utf8)) | |
368 | (mm-coding-system-p 'utf-8)) | |
369 | '((utf8 . utf-8))) | |
01c52d31 MB |
370 | ;; ISO8859-1 is a bogus name for ISO-8859-1 |
371 | ,@(when (and (not (mm-coding-system-p 'iso8859-1)) | |
372 | (mm-coding-system-p 'iso-8859-1)) | |
373 | '((iso8859-1 . iso-8859-1))) | |
bf46b4d4 MB |
374 | ;; ISO_8859-1 is a bogus name for ISO-8859-1 |
375 | ,@(when (and (not (mm-coding-system-p 'iso_8859-1)) | |
376 | (mm-coding-system-p 'iso-8859-1)) | |
377 | '((iso_8859-1 . iso-8859-1))) | |
95fa1ff7 | 378 | ) |
ab785936 MB |
379 | "A mapping from unknown or invalid charset names to the real charset names. |
380 | ||
381 | See `mm-codepage-iso-8859-list' and `mm-codepage-ibm-list'.") | |
382 | ||
383 | (defcustom mm-codepage-iso-8859-list | |
384 | (list 1250 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft | |
385 | ;; Outlook users in Czech republic. Use this to allow reading of | |
386 | ;; their e-mails. cp1250 should be defined by M-x codepage-setup | |
387 | ;; (Emacs 21). | |
388 | '(1252 . 1) ;; Windows-1252 is a superset of iso-8859-1 (West | |
389 | ;; Europe). See also `gnus-article-dumbquotes-map'. | |
390 | '(1254 . 9) ;; Windows-1254 is a superset of iso-8859-9 (Turkish). | |
391 | '(1255 . 8));; Windows-1255 is a superset of iso-8859-8 (Hebrew). | |
392 | "A list of Windows codepage numbers and iso-8859 charset numbers. | |
393 | ||
394 | If an element is a number corresponding to a supported windows | |
395 | codepage, appropriate entries to `mm-charset-synonym-alist' are | |
396 | added by `mm-setup-codepage-iso-8859'. An element may also be a | |
397 | cons cell where the car is a codepage number and the cdr is the | |
398 | corresponding number of an iso-8859 charset." | |
399 | :type '(list (set :inline t | |
400 | (const 1250 :tag "Central and East European") | |
401 | (const (1252 . 1) :tag "West European") | |
402 | (const (1254 . 9) :tag "Turkish") | |
403 | (const (1255 . 8) :tag "Hebrew")) | |
404 | (repeat :inline t | |
405 | :tag "Other options" | |
406 | (choice | |
407 | (integer :tag "Windows codepage number") | |
408 | (cons (integer :tag "Windows codepage number") | |
409 | (integer :tag "iso-8859 charset number"))))) | |
410 | :version "22.1" ;; Gnus 5.10.9 | |
411 | :group 'mime) | |
412 | ||
413 | (defcustom mm-codepage-ibm-list | |
414 | (list 437 ;; (US etc.) | |
415 | 860 ;; (Portugal) | |
416 | 861 ;; (Iceland) | |
417 | 862 ;; (Israel) | |
418 | 863 ;; (Canadian French) | |
419 | 865 ;; (Nordic) | |
420 | 852 ;; | |
421 | 850 ;; (Latin 1) | |
422 | 855 ;; (Cyrillic) | |
423 | 866 ;; (Cyrillic - Russian) | |
424 | 857 ;; (Turkish) | |
425 | 864 ;; (Arabic) | |
426 | 869 ;; (Greek) | |
427 | 874);; (Thai) | |
428 | ;; In Emacs 23 (unicode), cp... and ibm... are aliases. | |
429 | ;; Cf. http://thread.gmane.org/v9lkng5nwy.fsf@marauder.physik.uni-ulm.de | |
430 | "List of IBM codepage numbers. | |
431 | ||
432 | The codepage mappings slighly differ between IBM and other vendors. | |
433 | See \"ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/IBM/README.TXT\". | |
434 | ||
435 | If an element is a number corresponding to a supported windows | |
436 | codepage, appropriate entries to `mm-charset-synonym-alist' are | |
437 | added by `mm-setup-codepage-ibm'." | |
438 | :type '(list (set :inline t | |
439 | (const 437 :tag "US etc.") | |
440 | (const 860 :tag "Portugal") | |
441 | (const 861 :tag "Iceland") | |
442 | (const 862 :tag "Israel") | |
443 | (const 863 :tag "Canadian French") | |
444 | (const 865 :tag "Nordic") | |
445 | (const 852) | |
446 | (const 850 :tag "Latin 1") | |
447 | (const 855 :tag "Cyrillic") | |
448 | (const 866 :tag "Cyrillic - Russian") | |
449 | (const 857 :tag "Turkish") | |
450 | (const 864 :tag "Arabic") | |
451 | (const 869 :tag "Greek") | |
452 | (const 874 :tag "Thai")) | |
453 | (repeat :inline t | |
454 | :tag "Other options" | |
455 | (integer :tag "Codepage number"))) | |
456 | :version "22.1" ;; Gnus 5.10.9 | |
457 | :group 'mime) | |
458 | ||
459 | (defun mm-setup-codepage-iso-8859 (&optional list) | |
460 | "Add appropriate entries to `mm-charset-synonym-alist'. | |
461 | Unless LIST is given, `mm-codepage-iso-8859-list' is used." | |
462 | (unless list | |
463 | (setq list mm-codepage-iso-8859-list)) | |
464 | (dolist (i list) | |
465 | (let (cp windows iso) | |
466 | (if (consp i) | |
467 | (setq cp (intern (format "cp%d" (car i))) | |
468 | windows (intern (format "windows-%d" (car i))) | |
469 | iso (intern (format "iso-8859-%d" (cdr i)))) | |
470 | (setq cp (intern (format "cp%d" i)) | |
471 | windows (intern (format "windows-%d" i)))) | |
472 | (unless (mm-coding-system-p windows) | |
473 | (if (mm-coding-system-p cp) | |
474 | (add-to-list 'mm-charset-synonym-alist (cons windows cp)) | |
475 | (add-to-list 'mm-charset-synonym-alist (cons windows iso))))))) | |
476 | ||
477 | (defun mm-setup-codepage-ibm (&optional list) | |
478 | "Add appropriate entries to `mm-charset-synonym-alist'. | |
479 | Unless LIST is given, `mm-codepage-ibm-list' is used." | |
480 | (unless list | |
481 | (setq list mm-codepage-ibm-list)) | |
482 | (dolist (number list) | |
483 | (let ((ibm (intern (format "ibm%d" number))) | |
484 | (cp (intern (format "cp%d" number)))) | |
485 | (when (and (not (mm-coding-system-p ibm)) | |
486 | (mm-coding-system-p cp)) | |
487 | (add-to-list 'mm-charset-synonym-alist (cons ibm cp)))))) | |
488 | ||
489 | ;; Initialize: | |
490 | (mm-setup-codepage-iso-8859) | |
491 | (mm-setup-codepage-ibm) | |
bd29ba20 | 492 | |
96a22201 KY |
493 | ;; Note: this has to be defined before `mm-charset-to-coding-system'. |
494 | (defcustom mm-charset-eval-alist | |
495 | (if (featurep 'xemacs) | |
496 | nil ;; I don't know what would be useful for XEmacs. | |
497 | '(;; Emacs 21 offers 1250 1251 1253 1257. Emacs 22 provides autoloads for | |
498 | ;; 1250-1258 (i.e. `mm-codepage-setup' does nothing). | |
499 | (windows-1250 . (mm-codepage-setup 1250 t)) | |
500 | (windows-1251 . (mm-codepage-setup 1251 t)) | |
501 | (windows-1253 . (mm-codepage-setup 1253 t)) | |
502 | (windows-1257 . (mm-codepage-setup 1257 t)))) | |
503 | "An alist of (CHARSET . FORM) pairs. | |
504 | If an article is encoded in an unknown CHARSET, FORM is | |
505 | evaluated. This allows to load additional libraries providing | |
506 | charsets on demand. If supported by your Emacs version, you | |
507 | could use `autoload-coding-system' here." | |
508 | :version "22.1" ;; Gnus 5.10.9 | |
509 | :type '(list (set :inline t | |
510 | (const (windows-1250 . (mm-codepage-setup 1250 t))) | |
511 | (const (windows-1251 . (mm-codepage-setup 1251 t))) | |
512 | (const (windows-1253 . (mm-codepage-setup 1253 t))) | |
513 | (const (windows-1257 . (mm-codepage-setup 1257 t))) | |
514 | (const (cp850 . (mm-codepage-setup 850 nil)))) | |
515 | (repeat :inline t | |
516 | :tag "Other options" | |
517 | (cons (symbol :tag "charset") | |
518 | (symbol :tag "form")))) | |
519 | :group 'mime) | |
520 | (put 'mm-charset-eval-alist 'risky-local-variable t) | |
521 | ||
58d8c5cd GM |
522 | (defvar mm-charset-override-alist) |
523 | ||
96a22201 KY |
524 | ;; Note: this function has to be defined before `mm-charset-override-alist' |
525 | ;; since it will use this function in order to determine its default value | |
526 | ;; when loading mm-util.elc. | |
527 | (defun mm-charset-to-coding-system (charset &optional lbt | |
528 | allow-override silent) | |
529 | "Return coding-system corresponding to CHARSET. | |
530 | CHARSET is a symbol naming a MIME charset. | |
531 | If optional argument LBT (`unix', `dos' or `mac') is specified, it is | |
532 | used as the line break code type of the coding system. | |
533 | ||
534 | If ALLOW-OVERRIDE is given, use `mm-charset-override-alist' to | |
535 | map undesired charset names to their replacement. This should | |
536 | only be used for decoding, not for encoding. | |
537 | ||
538 | A non-nil value of SILENT means don't issue a warning even if CHARSET | |
539 | is not available." | |
540 | ;; OVERRIDE is used (only) in `mm-decode-body' and `mm-decode-string'. | |
541 | (when (stringp charset) | |
542 | (setq charset (intern (downcase charset)))) | |
543 | (when lbt | |
544 | (setq charset (intern (format "%s-%s" charset lbt)))) | |
545 | (cond | |
546 | ((null charset) | |
547 | charset) | |
548 | ;; Running in a non-MULE environment. | |
549 | ((or (null (mm-get-coding-system-list)) | |
550 | (not (fboundp 'coding-system-get))) | |
551 | charset) | |
552 | ;; Check override list quite early. Should only used for decoding, not for | |
553 | ;; encoding! | |
554 | ((and allow-override | |
555 | (let ((cs (cdr (assq charset mm-charset-override-alist)))) | |
556 | (and cs (mm-coding-system-p cs) cs)))) | |
557 | ;; ascii | |
558 | ((eq charset 'us-ascii) | |
559 | 'ascii) | |
560 | ;; Check to see whether we can handle this charset. (This depends | |
561 | ;; on there being some coding system matching each `mime-charset' | |
562 | ;; property defined, as there should be.) | |
563 | ((and (mm-coding-system-p charset) | |
564 | ;;; Doing this would potentially weed out incorrect charsets. | |
565 | ;;; charset | |
566 | ;;; (eq charset (coding-system-get charset 'mime-charset)) | |
567 | ) | |
568 | charset) | |
569 | ;; Eval expressions from `mm-charset-eval-alist' | |
570 | ((let* ((el (assq charset mm-charset-eval-alist)) | |
571 | (cs (car el)) | |
572 | (form (cdr el))) | |
573 | (and cs | |
574 | form | |
575 | (prog2 | |
576 | ;; Avoid errors... | |
577 | (condition-case nil (eval form) (error nil)) | |
578 | ;; (message "Failed to eval `%s'" form)) | |
579 | (mm-coding-system-p cs) | |
580 | (message "Added charset `%s' via `mm-charset-eval-alist'" cs)) | |
581 | cs))) | |
582 | ;; Translate invalid charsets. | |
583 | ((let ((cs (cdr (assq charset mm-charset-synonym-alist)))) | |
584 | (and cs | |
585 | (mm-coding-system-p cs) | |
586 | ;; (message | |
587 | ;; "Using synonym `%s' from `mm-charset-synonym-alist' for `%s'" | |
588 | ;; cs charset) | |
589 | cs))) | |
590 | ;; Last resort: search the coding system list for entries which | |
591 | ;; have the right mime-charset in case the canonical name isn't | |
592 | ;; defined (though it should be). | |
593 | ((let (cs) | |
594 | ;; mm-get-coding-system-list returns a list of cs without lbt. | |
595 | ;; Do we need -lbt? | |
596 | (dolist (c (mm-get-coding-system-list)) | |
597 | (if (and (null cs) | |
598 | (eq charset (or (coding-system-get c :mime-charset) | |
599 | (coding-system-get c 'mime-charset)))) | |
600 | (setq cs c))) | |
601 | (unless (or silent cs) | |
602 | ;; Warn the user about unknown charset: | |
603 | (if (fboundp 'gnus-message) | |
604 | (gnus-message 7 "Unknown charset: %s" charset) | |
605 | (message "Unknown charset: %s" charset))) | |
606 | cs)))) | |
607 | ||
608 | ;; Note: `mm-charset-to-coding-system' has to be defined before this. | |
bd29ba20 | 609 | (defcustom mm-charset-override-alist |
96a22201 KY |
610 | ;; Note: pairs that cannot be used in the Emacs version currently running |
611 | ;; will be removed. | |
612 | '((gb2312 . gbk) | |
613 | (iso-8859-1 . windows-1252) | |
01c52d31 MB |
614 | (iso-8859-8 . windows-1255) |
615 | (iso-8859-9 . windows-1254)) | |
bd29ba20 RS |
616 | "A mapping from undesired charset names to their replacement. |
617 | ||
618 | You may add pairs like (iso-8859-1 . windows-1252) here, | |
619 | i.e. treat iso-8859-1 as windows-1252. windows-1252 is a | |
620 | superset of iso-8859-1." | |
b6b8f5fd KY |
621 | :type |
622 | '(list | |
623 | :convert-widget | |
624 | (lambda (widget) | |
625 | (let ((defaults | |
626 | (delq nil | |
627 | (mapcar (lambda (pair) | |
96a22201 KY |
628 | (if (mm-charset-to-coding-system (cdr pair) |
629 | nil nil t) | |
b6b8f5fd KY |
630 | pair)) |
631 | '((gb2312 . gbk) | |
632 | (iso-8859-1 . windows-1252) | |
633 | (iso-8859-8 . windows-1255) | |
634 | (iso-8859-9 . windows-1254) | |
635 | (undecided . windows-1252))))) | |
636 | (val (copy-sequence (default-value 'mm-charset-override-alist))) | |
637 | pair rest) | |
638 | (while val | |
639 | (push (if (and (prog1 | |
640 | (setq pair (assq (caar val) defaults)) | |
641 | (setq defaults (delq pair defaults))) | |
642 | (equal (car val) pair)) | |
643 | `(const ,pair) | |
644 | `(cons :format "%v" | |
645 | (const :format "(%v" ,(caar val)) | |
646 | (symbol :size 3 :format " . %v)\n" ,(cdar val)))) | |
647 | rest) | |
648 | (setq val (cdr val))) | |
649 | (while defaults | |
650 | (push `(const ,(pop defaults)) rest)) | |
651 | (widget-convert | |
652 | 'list | |
653 | `(set :inline t :format "%v" ,@(nreverse rest)) | |
654 | `(repeat :inline t :tag "Other options" | |
655 | (cons :format "%v" | |
656 | (symbol :size 3 :format "(%v") | |
657 | (symbol :size 3 :format " . %v)\n"))))))) | |
96a22201 KY |
658 | ;; Remove pairs that cannot be used in the Emacs version currently |
659 | ;; running. Note that this section will be evaluated when loading | |
660 | ;; mm-util.elc. | |
661 | :set (lambda (symbol value) | |
662 | (custom-set-default | |
663 | symbol (delq nil | |
664 | (mapcar (lambda (pair) | |
665 | (if (mm-charset-to-coding-system (cdr pair) | |
666 | nil nil t) | |
667 | pair)) | |
668 | value)))) | |
67099291 | 669 | :version "22.1" ;; Gnus 5.10.9 |
bd29ba20 RS |
670 | :group 'mime) |
671 | ||
c113de23 | 672 | (defvar mm-binary-coding-system |
95fa1ff7 | 673 | (cond |
c113de23 GM |
674 | ((mm-coding-system-p 'binary) 'binary) |
675 | ((mm-coding-system-p 'no-conversion) 'no-conversion) | |
676 | (t nil)) | |
677 | "100% binary coding system.") | |
678 | ||
679 | (defvar mm-text-coding-system | |
680 | (or (if (memq system-type '(windows-nt ms-dos ms-windows)) | |
681 | (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos) | |
682 | (and (mm-coding-system-p 'raw-text) 'raw-text)) | |
683 | mm-binary-coding-system) | |
684 | "Text-safe coding system (For removing ^M).") | |
685 | ||
686 | (defvar mm-text-coding-system-for-write nil | |
687 | "Text coding system for write.") | |
688 | ||
689 | (defvar mm-auto-save-coding-system | |
95fa1ff7 | 690 | (cond |
23f87bed | 691 | ((mm-coding-system-p 'utf-8-emacs) ; Mule 7 |
56e09c09 DL |
692 | (if (memq system-type '(windows-nt ms-dos ms-windows)) |
693 | (if (mm-coding-system-p 'utf-8-emacs-dos) | |
694 | 'utf-8-emacs-dos mm-binary-coding-system) | |
695 | 'utf-8-emacs)) | |
c113de23 GM |
696 | ((mm-coding-system-p 'emacs-mule) |
697 | (if (memq system-type '(windows-nt ms-dos ms-windows)) | |
95fa1ff7 | 698 | (if (mm-coding-system-p 'emacs-mule-dos) |
c113de23 GM |
699 | 'emacs-mule-dos mm-binary-coding-system) |
700 | 'emacs-mule)) | |
701 | ((mm-coding-system-p 'escape-quoted) 'escape-quoted) | |
702 | (t mm-binary-coding-system)) | |
703 | "Coding system of auto save file.") | |
704 | ||
95fa1ff7 | 705 | (defvar mm-universal-coding-system mm-auto-save-coding-system |
47b63dfa | 706 | "The universal coding system.") |
95fa1ff7 SZ |
707 | |
708 | ;; Fixme: some of the cars here aren't valid MIME charsets. That | |
709 | ;; should only matter with XEmacs, though. | |
710 | (defvar mm-mime-mule-charset-alist | |
711 | `((us-ascii ascii) | |
712 | (iso-8859-1 latin-iso8859-1) | |
713 | (iso-8859-2 latin-iso8859-2) | |
714 | (iso-8859-3 latin-iso8859-3) | |
715 | (iso-8859-4 latin-iso8859-4) | |
716 | (iso-8859-5 cyrillic-iso8859-5) | |
717 | ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters. | |
718 | ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default | |
719 | ;; charset is koi8-r, not iso-8859-5. | |
720 | (koi8-r cyrillic-iso8859-5 gnus-koi8-r) | |
721 | (iso-8859-6 arabic-iso8859-6) | |
722 | (iso-8859-7 greek-iso8859-7) | |
723 | (iso-8859-8 hebrew-iso8859-8) | |
724 | (iso-8859-9 latin-iso8859-9) | |
725 | (iso-8859-14 latin-iso8859-14) | |
726 | (iso-8859-15 latin-iso8859-15) | |
727 | (viscii vietnamese-viscii-lower) | |
728 | (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978) | |
729 | (euc-kr korean-ksc5601) | |
730 | (gb2312 chinese-gb2312) | |
13287a2a KH |
731 | (gbk chinese-gbk) |
732 | (gb18030 gb18030-2-byte | |
733 | gb18030-4-byte-bmp gb18030-4-byte-smp | |
734 | gb18030-4-byte-ext-1 gb18030-4-byte-ext-2) | |
95fa1ff7 SZ |
735 | (big5 chinese-big5-1 chinese-big5-2) |
736 | (tibetan tibetan) | |
737 | (thai-tis620 thai-tis620) | |
0683d241 | 738 | (windows-1251 cyrillic-iso8859-5) |
95fa1ff7 SZ |
739 | (iso-2022-7bit ethiopic arabic-1-column arabic-2-column) |
740 | (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7 | |
741 | latin-jisx0201 japanese-jisx0208-1978 | |
742 | chinese-gb2312 japanese-jisx0208 | |
0683d241 | 743 | korean-ksc5601 japanese-jisx0212) |
95fa1ff7 SZ |
744 | (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7 |
745 | latin-jisx0201 japanese-jisx0208-1978 | |
746 | chinese-gb2312 japanese-jisx0208 | |
747 | korean-ksc5601 japanese-jisx0212 | |
748 | chinese-cns11643-1 chinese-cns11643-2) | |
749 | (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2 | |
750 | cyrillic-iso8859-5 greek-iso8859-7 | |
751 | latin-jisx0201 japanese-jisx0208-1978 | |
752 | chinese-gb2312 japanese-jisx0208 | |
753 | korean-ksc5601 japanese-jisx0212 | |
754 | chinese-cns11643-1 chinese-cns11643-2 | |
755 | chinese-cns11643-3 chinese-cns11643-4 | |
756 | chinese-cns11643-5 chinese-cns11643-6 | |
757 | chinese-cns11643-7) | |
0683d241 MB |
758 | (iso-2022-jp-3 latin-jisx0201 japanese-jisx0208-1978 japanese-jisx0208 |
759 | japanese-jisx0213-1 japanese-jisx0213-2) | |
760 | (shift_jis latin-jisx0201 katakana-jisx0201 japanese-jisx0208) | |
26c9afc3 MB |
761 | ,(cond ((fboundp 'unicode-precedence-list) |
762 | (cons 'utf-8 (delq 'ascii (mapcar 'charset-name | |
763 | (unicode-precedence-list))))) | |
764 | ((or (not (fboundp 'charsetp)) ;; non-Mule case | |
765 | (charsetp 'unicode-a) | |
766 | (not (mm-coding-system-p 'mule-utf-8))) | |
767 | '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e)) | |
768 | (t ;; If we have utf-8 we're in Mule 5+. | |
769 | (append '(utf-8) | |
770 | (delete 'ascii | |
771 | (coding-system-get 'mule-utf-8 'safe-charsets)))))) | |
95fa1ff7 SZ |
772 | "Alist of MIME-charset/MULE-charsets.") |
773 | ||
0683d241 MB |
774 | (defun mm-enrich-utf-8-by-mule-ucs () |
775 | "Make the `utf-8' MIME charset usable by the Mule-UCS package. | |
776 | This function will run when the `un-define' module is loaded under | |
777 | XEmacs, and fill the `utf-8' entry in `mm-mime-mule-charset-alist' | |
778 | with Mule charsets. It is completely useless for Emacs." | |
0683d241 MB |
779 | (when (boundp 'unicode-basic-translation-charset-order-list) |
780 | (condition-case nil | |
781 | (let ((val (delq | |
782 | 'ascii | |
783 | (copy-sequence | |
784 | (symbol-value | |
785 | 'unicode-basic-translation-charset-order-list)))) | |
786 | (elem (assq 'utf-8 mm-mime-mule-charset-alist))) | |
787 | (if elem | |
788 | (setcdr elem val) | |
789 | (setq mm-mime-mule-charset-alist | |
790 | (nconc mm-mime-mule-charset-alist | |
791 | (list (cons 'utf-8 val)))))) | |
792 | (error)))) | |
793 | ||
794 | ;; Correct by construction, but should be unnecessary for Emacs: | |
795 | (if (featurep 'xemacs) | |
796 | (eval-after-load "un-define" '(mm-enrich-utf-8-by-mule-ucs)) | |
797 | (when (and (fboundp 'coding-system-list) | |
798 | (fboundp 'sort-coding-systems)) | |
799 | (let ((css (sort-coding-systems (coding-system-list 'base-only))) | |
800 | cs mime mule alist) | |
801 | (while css | |
802 | (setq cs (pop css) | |
5432dcf9 | 803 | mime (or (coding-system-get cs :mime-charset); Emacs 23 (unicode) |
0683d241 MB |
804 | (coding-system-get cs 'mime-charset))) |
805 | (when (and mime | |
806 | (not (eq t (setq mule | |
807 | (coding-system-get cs 'safe-charsets)))) | |
808 | (not (assq mime alist))) | |
809 | (push (cons mime (delq 'ascii mule)) alist))) | |
810 | (setq mm-mime-mule-charset-alist (nreverse alist))))) | |
95fa1ff7 | 811 | |
f5490ddc MB |
812 | (defvar mm-hack-charsets '(iso-8859-15 iso-2022-jp-2) |
813 | "A list of special charsets. | |
814 | Valid elements include: | |
815 | `iso-8859-15' convert ISO-8859-1, -9 to ISO-8859-15 if ISO-8859-15 exists. | |
816 | `iso-2022-jp-2' convert ISO-2022-jp to ISO-2022-jp-2 if ISO-2022-jp-2 exists." | |
817 | ) | |
818 | ||
819 | (defvar mm-iso-8859-15-compatible | |
820 | '((iso-8859-1 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE") | |
821 | (iso-8859-9 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE\xD0\xDD\xDE\xF0\xFD\xFE")) | |
822 | "ISO-8859-15 exchangeable coding systems and inconvertible characters.") | |
823 | ||
824 | (defvar mm-iso-8859-x-to-15-table | |
825 | (and (fboundp 'coding-system-p) | |
826 | (mm-coding-system-p 'iso-8859-15) | |
827 | (mapcar | |
828 | (lambda (cs) | |
829 | (if (mm-coding-system-p (car cs)) | |
830 | (let ((c (string-to-char | |
831 | (decode-coding-string "\341" (car cs))))) | |
832 | (cons (char-charset c) | |
833 | (cons | |
834 | (- (string-to-char | |
835 | (decode-coding-string "\341" 'iso-8859-15)) c) | |
836 | (string-to-list (decode-coding-string (car (cdr cs)) | |
837 | (car cs)))))) | |
838 | '(gnus-charset 0))) | |
839 | mm-iso-8859-15-compatible)) | |
840 | "A table of the difference character between ISO-8859-X and ISO-8859-15.") | |
841 | ||
23f87bed | 842 | (defcustom mm-coding-system-priorities |
548f737d MB |
843 | (let ((lang (if (boundp 'current-language-environment) |
844 | (symbol-value 'current-language-environment)))) | |
845 | (cond (;; XEmacs without Mule but with `file-coding'. | |
846 | (not lang) nil) | |
847 | ;; In XEmacs 21.5 it may be the one like "Japanese (UTF-8)". | |
848 | ((string-match "\\`Japanese" lang) | |
849 | ;; Japanese users prefer iso-2022-jp to euc-japan or | |
850 | ;; shift_jis, however iso-8859-1 should be used when | |
851 | ;; there are only ASCII text and Latin-1 characters. | |
852 | '(iso-8859-1 iso-2022-jp iso-2022-jp-2 shift_jis utf-8)))) | |
23f87bed MB |
853 | "Preferred coding systems for encoding outgoing messages. |
854 | ||
855 | More than one suitable coding system may be found for some text. | |
856 | By default, the coding system with the highest priority is used | |
857 | to encode outgoing messages (see `sort-coding-systems'). If this | |
858 | variable is set, it overrides the default priority." | |
a08b59c9 | 859 | :version "21.2" |
23f87bed MB |
860 | :type '(repeat (symbol :tag "Coding system")) |
861 | :group 'mime) | |
862 | ||
863 | ;; ?? | |
1f7d2e14 SZ |
864 | (defvar mm-use-find-coding-systems-region |
865 | (fboundp 'find-coding-systems-region) | |
23f87bed MB |
866 | "Use `find-coding-systems-region' to find proper coding systems. |
867 | ||
868 | Setting it to nil is useful on Emacsen supporting Unicode if sending | |
869 | mail with multiple parts is preferred to sending a Unicode one.") | |
1f7d2e14 | 870 | |
c113de23 GM |
871 | ;;; Internal variables: |
872 | ||
873 | ;;; Functions: | |
874 | ||
875 | (defun mm-mule-charset-to-mime-charset (charset) | |
1c57d870 | 876 | "Return the MIME charset corresponding to the given Mule CHARSET." |
23f87bed MB |
877 | (if (and (fboundp 'find-coding-systems-for-charsets) |
878 | (fboundp 'sort-coding-systems)) | |
0683d241 MB |
879 | (let ((css (sort (sort-coding-systems |
880 | (find-coding-systems-for-charsets (list charset))) | |
881 | 'mm-sort-coding-systems-predicate)) | |
882 | cs mime) | |
883 | (while (and (not mime) | |
884 | css) | |
885 | (when (setq cs (pop css)) | |
886 | (setq mime (or (coding-system-get cs :mime-charset) | |
887 | (coding-system-get cs 'mime-charset))))) | |
95fa1ff7 | 888 | mime) |
0683d241 MB |
889 | (let ((alist (mapcar (lambda (cs) |
890 | (assq cs mm-mime-mule-charset-alist)) | |
891 | (sort (mapcar 'car mm-mime-mule-charset-alist) | |
892 | 'mm-sort-coding-systems-predicate))) | |
95fa1ff7 SZ |
893 | out) |
894 | (while alist | |
895 | (when (memq charset (cdar alist)) | |
896 | (setq out (caar alist) | |
897 | alist nil)) | |
898 | (pop alist)) | |
899 | out))) | |
c113de23 | 900 | |
95fa1ff7 SZ |
901 | (eval-and-compile |
902 | (defvar mm-emacs-mule (and (not (featurep 'xemacs)) | |
2e62b574 GM |
903 | (boundp 'enable-multibyte-characters) |
904 | (default-value 'enable-multibyte-characters) | |
95fa1ff7 | 905 | (fboundp 'set-buffer-multibyte)) |
56e09c09 | 906 | "True in Emacs with Mule.") |
95fa1ff7 SZ |
907 | |
908 | (if mm-emacs-mule | |
909 | (defun mm-enable-multibyte () | |
910 | "Set the multibyte flag of the current buffer. | |
1c57d870 DL |
911 | Only do this if the default value of `enable-multibyte-characters' is |
912 | non-nil. This is a no-op in XEmacs." | |
473ad4a5 | 913 | (set-buffer-multibyte 'to)) |
95fa1ff7 | 914 | (defalias 'mm-enable-multibyte 'ignore)) |
c113de23 | 915 | |
95fa1ff7 SZ |
916 | (if mm-emacs-mule |
917 | (defun mm-disable-multibyte () | |
918 | "Unset the multibyte flag of in the current buffer. | |
1c57d870 | 919 | This is a no-op in XEmacs." |
95fa1ff7 | 920 | (set-buffer-multibyte nil)) |
56e09c09 | 921 | (defalias 'mm-disable-multibyte 'ignore))) |
052802c1 | 922 | |
c113de23 GM |
923 | (defun mm-preferred-coding-system (charset) |
924 | ;; A typo in some Emacs versions. | |
47b63dfa SZ |
925 | (or (get-charset-property charset 'preferred-coding-system) |
926 | (get-charset-property charset 'prefered-coding-system))) | |
c113de23 | 927 | |
23f87bed MB |
928 | ;; Mule charsets shouldn't be used. |
929 | (defsubst mm-guess-charset () | |
930 | "Guess Mule charset from the language environment." | |
931 | (or | |
932 | mail-parse-mule-charset ;; cached mule-charset | |
933 | (progn | |
934 | (setq mail-parse-mule-charset | |
935 | (and (boundp 'current-language-environment) | |
936 | (car (last | |
937 | (assq 'charset | |
938 | (assoc current-language-environment | |
939 | language-info-alist)))))) | |
940 | (if (or (not mail-parse-mule-charset) | |
941 | (eq mail-parse-mule-charset 'ascii)) | |
942 | (setq mail-parse-mule-charset | |
943 | (or (car (last (assq mail-parse-charset | |
944 | mm-mime-mule-charset-alist))) | |
945 | ;; default | |
946 | 'latin-iso8859-1))) | |
947 | mail-parse-mule-charset))) | |
948 | ||
c113de23 GM |
949 | (defun mm-charset-after (&optional pos) |
950 | "Return charset of a character in current buffer at position POS. | |
951 | If POS is nil, it defauls to the current point. | |
952 | If POS is out of range, the value is nil. | |
953 | If the charset is `composition', return the actual one." | |
052802c1 DL |
954 | (let ((char (char-after pos)) charset) |
955 | (if (< (mm-char-int char) 128) | |
956 | (setq charset 'ascii) | |
957 | ;; charset-after is fake in some Emacsen. | |
958 | (setq charset (and (fboundp 'char-charset) (char-charset char))) | |
56e09c09 | 959 | (if (eq charset 'composition) ; Mule 4 |
052802c1 DL |
960 | (let ((p (or pos (point)))) |
961 | (cadr (find-charset-region p (1+ p)))) | |
962 | (if (and charset (not (memq charset '(ascii eight-bit-control | |
963 | eight-bit-graphic)))) | |
964 | charset | |
23f87bed | 965 | (mm-guess-charset)))))) |
c113de23 GM |
966 | |
967 | (defun mm-mime-charset (charset) | |
1c57d870 | 968 | "Return the MIME charset corresponding to the given Mule CHARSET." |
95fa1ff7 SZ |
969 | (if (eq charset 'unknown) |
970 | (error "The message contains non-printable characters, please use attachment")) | |
052802c1 | 971 | (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property)) |
c113de23 GM |
972 | ;; This exists in Emacs 20. |
973 | (or | |
974 | (and (mm-preferred-coding-system charset) | |
56e09c09 DL |
975 | (or (coding-system-get |
976 | (mm-preferred-coding-system charset) :mime-charset) | |
977 | (coding-system-get | |
978 | (mm-preferred-coding-system charset) 'mime-charset))) | |
c113de23 GM |
979 | (and (eq charset 'ascii) |
980 | 'us-ascii) | |
95fa1ff7 | 981 | (mm-preferred-coding-system charset) |
c113de23 GM |
982 | (mm-mule-charset-to-mime-charset charset)) |
983 | ;; This is for XEmacs. | |
984 | (mm-mule-charset-to-mime-charset charset))) | |
985 | ||
8753ddee MB |
986 | (if (fboundp 'delete-dups) |
987 | (defalias 'mm-delete-duplicates 'delete-dups) | |
988 | (defun mm-delete-duplicates (list) | |
989 | "Destructively remove `equal' duplicates from LIST. | |
990 | Store the result in LIST and return it. LIST must be a proper list. | |
991 | Of several `equal' occurrences of an element in LIST, the first | |
992 | one is kept. | |
993 | ||
994 | This is a compatibility function for Emacsen without `delete-dups'." | |
995 | ;; Code from `subr.el' in Emacs 22: | |
996 | (let ((tail list)) | |
997 | (while tail | |
998 | (setcdr tail (delete (car tail) (cdr tail))) | |
999 | (setq tail (cdr tail)))) | |
1000 | list)) | |
c113de23 | 1001 | |
23f87bed MB |
1002 | ;; Fixme: This is used in places when it should be testing the |
1003 | ;; default multibyteness. See mm-default-multibyte-p. | |
1004 | (eval-and-compile | |
052802c1 DL |
1005 | (if (and (not (featurep 'xemacs)) |
1006 | (boundp 'enable-multibyte-characters)) | |
23f87bed MB |
1007 | (defun mm-multibyte-p () |
1008 | "Non-nil if multibyte is enabled in the current buffer." | |
1009 | enable-multibyte-characters) | |
1010 | (defun mm-multibyte-p () (featurep 'mule)))) | |
1011 | ||
1012 | (defun mm-default-multibyte-p () | |
1013 | "Return non-nil if the session is multibyte. | |
1014 | This affects whether coding conversion should be attempted generally." | |
1015 | (if (featurep 'mule) | |
2e62b574 GM |
1016 | (if (boundp 'enable-multibyte-characters) |
1017 | (default-value 'enable-multibyte-characters) | |
23f87bed | 1018 | t))) |
c113de23 | 1019 | |
f5490ddc MB |
1020 | (defun mm-iso-8859-x-to-15-region (&optional b e) |
1021 | (if (fboundp 'char-charset) | |
1022 | (let (charset item c inconvertible) | |
1023 | (save-restriction | |
1024 | (if e (narrow-to-region b e)) | |
1025 | (goto-char (point-min)) | |
1026 | (skip-chars-forward "\0-\177") | |
1027 | (while (not (eobp)) | |
1028 | (cond | |
1029 | ((not (setq item (assq (char-charset (setq c (char-after))) | |
1030 | mm-iso-8859-x-to-15-table))) | |
1031 | (forward-char)) | |
1032 | ((memq c (cdr (cdr item))) | |
1033 | (setq inconvertible t) | |
1034 | (forward-char)) | |
1035 | (t | |
1036 | (insert-before-markers (prog1 (+ c (car (cdr item))) | |
1037 | (delete-char 1))))) | |
1038 | (skip-chars-forward "\0-\177"))) | |
1039 | (not inconvertible)))) | |
1040 | ||
47b63dfa | 1041 | (defun mm-sort-coding-systems-predicate (a b) |
23f87bed MB |
1042 | (let ((priorities |
1043 | (mapcar (lambda (cs) | |
1044 | ;; Note: invalid entries are dropped silently | |
0683d241 | 1045 | (and (setq cs (mm-coding-system-p cs)) |
23f87bed MB |
1046 | (coding-system-base cs))) |
1047 | mm-coding-system-priorities))) | |
0683d241 MB |
1048 | (and (setq a (mm-coding-system-p a)) |
1049 | (if (setq b (mm-coding-system-p b)) | |
1050 | (> (length (memq (coding-system-base a) priorities)) | |
1051 | (length (memq (coding-system-base b) priorities))) | |
1052 | t)))) | |
47b63dfa | 1053 | |
aa0a8561 MB |
1054 | (eval-when-compile |
1055 | (autoload 'latin-unity-massage-name "latin-unity") | |
1056 | (autoload 'latin-unity-maybe-remap "latin-unity") | |
1057 | (autoload 'latin-unity-representations-feasible-region "latin-unity") | |
9efa445f DN |
1058 | (autoload 'latin-unity-representations-present-region "latin-unity")) |
1059 | ||
1060 | (defvar latin-unity-coding-systems) | |
1061 | (defvar latin-unity-ucs-list) | |
aa0a8561 MB |
1062 | |
1063 | (defun mm-xemacs-find-mime-charset-1 (begin end) | |
1064 | "Determine which MIME charset to use to send region as message. | |
1065 | This uses the XEmacs-specific latin-unity package to better handle the | |
1066 | case where identical characters from diverse ISO-8859-? character sets | |
1067 | can be encoded using a single one of the corresponding coding systems. | |
1068 | ||
1069 | It treats `mm-coding-system-priorities' as the list of preferred | |
1070 | coding systems; a useful example setting for this list in Western | |
1071 | Europe would be '(iso-8859-1 iso-8859-15 utf-8), which would default | |
1072 | to the very standard Latin 1 coding system, and only move to coding | |
1073 | systems that are less supported as is necessary to encode the | |
1074 | characters that exist in the buffer. | |
1075 | ||
1076 | Latin Unity doesn't know about those non-ASCII Roman characters that | |
1077 | are available in various East Asian character sets. As such, its | |
1078 | behavior if you have a JIS 0212 LATIN SMALL LETTER A WITH ACUTE in a | |
1079 | buffer and it can otherwise be encoded as Latin 1, won't be ideal. | |
1080 | But this is very much a corner case, so don't worry about it." | |
1081 | (let ((systems mm-coding-system-priorities) csets psets curset) | |
1082 | ||
1083 | ;; Load the Latin Unity library, if available. | |
1084 | (when (and (not (featurep 'latin-unity)) (locate-library "latin-unity")) | |
01c52d31 | 1085 | (require 'latin-unity)) |
aa0a8561 MB |
1086 | |
1087 | ;; Now, can we use it? | |
1088 | (if (featurep 'latin-unity) | |
1089 | (progn | |
1090 | (setq csets (latin-unity-representations-feasible-region begin end) | |
1091 | psets (latin-unity-representations-present-region begin end)) | |
1092 | ||
1093 | (catch 'done | |
1094 | ||
1095 | ;; Pass back the first coding system in the preferred list | |
1096 | ;; that can encode the whole region. | |
1097 | (dolist (curset systems) | |
1098 | (setq curset (latin-unity-massage-name 'buffer-default curset)) | |
1099 | ||
1100 | ;; If the coding system is a universal coding system, then | |
1101 | ;; it can certainly encode all the characters in the region. | |
1102 | (if (memq curset latin-unity-ucs-list) | |
1103 | (throw 'done (list curset))) | |
1104 | ||
1105 | ;; If a coding system isn't universal, and isn't in | |
1106 | ;; the list that latin unity knows about, we can't | |
1107 | ;; decide whether to use it here. Leave that until later | |
1108 | ;; in `mm-find-mime-charset-region' function, whence we | |
1109 | ;; have been called. | |
1110 | (unless (memq curset latin-unity-coding-systems) | |
1111 | (throw 'done nil)) | |
1112 | ||
1113 | ;; Right, we know about this coding system, and it may | |
1114 | ;; conceivably be able to encode all the characters in | |
1115 | ;; the region. | |
1116 | (if (latin-unity-maybe-remap begin end curset csets psets t) | |
1117 | (throw 'done (list curset)))) | |
1118 | ||
1119 | ;; Can't encode using anything from the | |
1120 | ;; `mm-coding-system-priorities' list. | |
1121 | ;; Leave `mm-find-mime-charset' to do most of the work. | |
1122 | nil)) | |
1123 | ||
1124 | ;; Right, latin unity isn't available; let `mm-find-charset-region' | |
1125 | ;; take its default action, which equally applies to GNU Emacs. | |
1126 | nil))) | |
1127 | ||
1128 | (defmacro mm-xemacs-find-mime-charset (begin end) | |
1129 | (when (featurep 'xemacs) | |
10ace8ea | 1130 | `(and (featurep 'mule) (mm-xemacs-find-mime-charset-1 ,begin ,end)))) |
aa0a8561 | 1131 | |
b5000590 GM |
1132 | (declare-function mm-delete-duplicates "mm-util" (list)) |
1133 | ||
47b63dfa | 1134 | (defun mm-find-mime-charset-region (b e &optional hack-charsets) |
95fa1ff7 | 1135 | "Return the MIME charsets needed to encode the region between B and E. |
f0529b5b | 1136 | nil means ASCII, a single-element list represents an appropriate MIME |
95fa1ff7 | 1137 | charset, and a longer list means no appropriate charset." |
47b63dfa SZ |
1138 | (let (charsets) |
1139 | ;; The return possibilities of this function are a mess... | |
1140 | (or (and (mm-multibyte-p) | |
1f7d2e14 | 1141 | mm-use-find-coding-systems-region |
47b63dfa SZ |
1142 | ;; Find the mime-charset of the most preferred coding |
1143 | ;; system that has one. | |
1144 | (let ((systems (find-coding-systems-region b e))) | |
1145 | (when mm-coding-system-priorities | |
a1506d29 | 1146 | (setq systems |
47b63dfa | 1147 | (sort systems 'mm-sort-coding-systems-predicate))) |
47b63dfa SZ |
1148 | (setq systems (delq 'compound-text systems)) |
1149 | (unless (equal systems '(undecided)) | |
1150 | (while systems | |
56e09c09 DL |
1151 | (let* ((head (pop systems)) |
1152 | (cs (or (coding-system-get head :mime-charset) | |
1153 | (coding-system-get head 'mime-charset)))) | |
23f87bed MB |
1154 | ;; The mime-charset (`x-ctext') of |
1155 | ;; `compound-text' is not in the IANA list. We | |
1156 | ;; shouldn't normally use anything here with a | |
1157 | ;; mime-charset having an `x-' prefix. | |
1158 | ;; Fixme: Allow this to be overridden, since | |
1159 | ;; there is existing use of x-ctext. | |
1160 | ;; Also people apparently need the coding system | |
1161 | ;; `iso-2022-jp-3' (which Mule-UCS defines with | |
1162 | ;; mime-charset, though it's not valid). | |
1163 | (if (and cs | |
1164 | (not (string-match "^[Xx]-" (symbol-name cs))) | |
1165 | ;; UTF-16 of any variety is invalid for | |
1166 | ;; text parts and, unfortunately, has | |
1167 | ;; mime-charset defined both in Mule-UCS | |
1168 | ;; and versions of Emacs. (The name | |
1169 | ;; might be `mule-utf-16...' or | |
1170 | ;; `utf-16...'.) | |
1171 | (not (string-match "utf-16" (symbol-name cs)))) | |
47b63dfa SZ |
1172 | (setq systems nil |
1173 | charsets (list cs)))))) | |
1174 | charsets)) | |
aa0a8561 MB |
1175 | ;; If we're XEmacs, and some coding system is appropriate, |
1176 | ;; mm-xemacs-find-mime-charset will return an appropriate list. | |
1177 | ;; Otherwise, we'll get nil, and the next setq will get invoked. | |
1178 | (setq charsets (mm-xemacs-find-mime-charset b e)) | |
1179 | ||
8589dc17 | 1180 | ;; Fixme: won't work for unibyte Emacs 23: |
0c129bca | 1181 | |
aa0a8561 | 1182 | ;; We're not multibyte, or a single coding system won't cover it. |
a1506d29 | 1183 | (setq charsets |
47b63dfa SZ |
1184 | (mm-delete-duplicates |
1185 | (mapcar 'mm-mime-charset | |
1186 | (delq 'ascii | |
1187 | (mm-find-charset-region b e)))))) | |
f5490ddc MB |
1188 | (if (and (> (length charsets) 1) |
1189 | (memq 'iso-8859-15 charsets) | |
1190 | (memq 'iso-8859-15 hack-charsets) | |
1191 | (save-excursion (mm-iso-8859-x-to-15-region b e))) | |
1192 | (dolist (x mm-iso-8859-15-compatible) | |
1193 | (setq charsets (delq (car x) charsets)))) | |
1194 | (if (and (memq 'iso-2022-jp-2 charsets) | |
1195 | (memq 'iso-2022-jp-2 hack-charsets)) | |
1196 | (setq charsets (delq 'iso-2022-jp charsets))) | |
1197 | ;; Attempt to reduce the number of charsets if utf-8 is available. | |
1198 | (if (and (featurep 'xemacs) | |
1199 | (> (length charsets) 1) | |
1200 | (mm-coding-system-p 'utf-8)) | |
1201 | (let ((mm-coding-system-priorities | |
1202 | (cons 'utf-8 mm-coding-system-priorities))) | |
1203 | (setq charsets | |
1204 | (mm-delete-duplicates | |
1205 | (mapcar 'mm-mime-charset | |
1206 | (delq 'ascii | |
1207 | (mm-find-charset-region b e))))))) | |
47b63dfa | 1208 | charsets)) |
95fa1ff7 | 1209 | |
c113de23 GM |
1210 | (defmacro mm-with-unibyte-buffer (&rest forms) |
1211 | "Create a temporary buffer, and evaluate FORMS there like `progn'. | |
1c57d870 | 1212 | Use unibyte mode for this." |
d37ded9e SM |
1213 | `(with-temp-buffer |
1214 | (mm-disable-multibyte) | |
1215 | ,@forms)) | |
c113de23 GM |
1216 | (put 'mm-with-unibyte-buffer 'lisp-indent-function 0) |
1217 | (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body)) | |
1218 | ||
23f87bed MB |
1219 | (defmacro mm-with-multibyte-buffer (&rest forms) |
1220 | "Create a temporary buffer, and evaluate FORMS there like `progn'. | |
1221 | Use multibyte mode for this." | |
d37ded9e SM |
1222 | `(with-temp-buffer |
1223 | (mm-enable-multibyte) | |
1224 | ,@forms)) | |
23f87bed MB |
1225 | (put 'mm-with-multibyte-buffer 'lisp-indent-function 0) |
1226 | (put 'mm-with-multibyte-buffer 'edebug-form-spec '(body)) | |
1227 | ||
c113de23 | 1228 | (defmacro mm-with-unibyte-current-buffer (&rest forms) |
56e09c09 | 1229 | "Evaluate FORMS with current buffer temporarily made unibyte. |
63220e37 | 1230 | Also bind the default-value of `enable-multibyte-characters' to nil. |
719120ef MB |
1231 | Equivalent to `progn' in XEmacs |
1232 | ||
1233 | NOTE: Use this macro with caution in multibyte buffers (it is not | |
1234 | worth using this macro in unibyte buffers of course). Use of | |
1235 | `(set-buffer-multibyte t)', which is run finally, is generally | |
1236 | harmful since it is likely to modify existing data in the buffer. | |
fe62aacc MB |
1237 | For instance, it converts \"\\300\\255\" into \"\\255\" in |
1238 | Emacs 23 (unicode)." | |
95fa1ff7 SZ |
1239 | (let ((multibyte (make-symbol "multibyte")) |
1240 | (buffer (make-symbol "buffer"))) | |
a1506d29 | 1241 | `(if mm-emacs-mule |
719120ef | 1242 | (let ((,multibyte enable-multibyte-characters) |
95fa1ff7 | 1243 | (,buffer (current-buffer))) |
1c57d870 | 1244 | (unwind-protect |
63220e37 | 1245 | (letf (((default-value 'enable-multibyte-characters) nil)) |
1c57d870 DL |
1246 | (set-buffer-multibyte nil) |
1247 | ,@forms) | |
95fa1ff7 | 1248 | (set-buffer ,buffer) |
1c57d870 | 1249 | (set-buffer-multibyte ,multibyte))) |
63220e37 | 1250 | (letf (((default-value 'enable-multibyte-characters) nil)) |
1c57d870 | 1251 | ,@forms)))) |
c113de23 GM |
1252 | (put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0) |
1253 | (put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body)) | |
1254 | ||
c113de23 | 1255 | (defun mm-find-charset-region (b e) |
1c57d870 | 1256 | "Return a list of Emacs charsets in the region B to E." |
c113de23 GM |
1257 | (cond |
1258 | ((and (mm-multibyte-p) | |
95fa1ff7 | 1259 | (fboundp 'find-charset-region)) |
c113de23 | 1260 | ;; Remove composition since the base charsets have been included. |
95fa1ff7 SZ |
1261 | ;; Remove eight-bit-*, treat them as ascii. |
1262 | (let ((css (find-charset-region b e))) | |
01c52d31 MB |
1263 | (dolist (cs |
1264 | '(composition eight-bit-control eight-bit-graphic control-1) | |
1265 | css) | |
1266 | (setq css (delq cs css))))) | |
052802c1 DL |
1267 | (t |
1268 | ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit. | |
c113de23 GM |
1269 | (save-excursion |
1270 | (save-restriction | |
1271 | (narrow-to-region b e) | |
1272 | (goto-char (point-min)) | |
1273 | (skip-chars-forward "\0-\177") | |
1274 | (if (eobp) | |
1275 | '(ascii) | |
052802c1 DL |
1276 | (let (charset) |
1277 | (setq charset | |
1278 | (and (boundp 'current-language-environment) | |
95fa1ff7 SZ |
1279 | (car (last (assq 'charset |
1280 | (assoc current-language-environment | |
052802c1 DL |
1281 | language-info-alist)))))) |
1282 | (if (eq charset 'ascii) (setq charset nil)) | |
1283 | (or charset | |
1284 | (setq charset | |
1285 | (car (last (assq mail-parse-charset | |
1286 | mm-mime-mule-charset-alist))))) | |
1287 | (list 'ascii (or charset 'latin-iso8859-1))))))))) | |
c113de23 | 1288 | |
c113de23 GM |
1289 | (defun mm-auto-mode-alist () |
1290 | "Return an `auto-mode-alist' with only the .gz (etc) thingies." | |
1291 | (let ((alist auto-mode-alist) | |
1292 | out) | |
1293 | (while alist | |
1294 | (when (listp (cdar alist)) | |
1295 | (push (car alist) out)) | |
1296 | (pop alist)) | |
1297 | (nreverse out))) | |
1298 | ||
1299 | (defvar mm-inhibit-file-name-handlers | |
01c52d31 | 1300 | '(jka-compr-handler image-file-handler epa-file-handler) |
c113de23 GM |
1301 | "A list of handlers doing (un)compression (etc) thingies.") |
1302 | ||
1303 | (defun mm-insert-file-contents (filename &optional visit beg end replace | |
1304 | inhibit) | |
23f87bed | 1305 | "Like `insert-file-contents', but only reads in the file. |
c113de23 GM |
1306 | A buffer may be modified in several ways after reading into the buffer due |
1307 | to advanced Emacs features, such as file-name-handlers, format decoding, | |
23f87bed | 1308 | `find-file-hooks', etc. |
56e09c09 | 1309 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'. |
c113de23 | 1310 | This function ensures that none of these modifications will take place." |
14acf2f5 SM |
1311 | (letf* ((format-alist nil) |
1312 | (auto-mode-alist (if inhibit nil (mm-auto-mode-alist))) | |
1313 | ((default-value 'major-mode) 'fundamental-mode) | |
1314 | (enable-local-variables nil) | |
1315 | (after-insert-file-functions nil) | |
1316 | (enable-local-eval nil) | |
1317 | (inhibit-file-name-operation (if inhibit | |
1318 | 'insert-file-contents | |
1319 | inhibit-file-name-operation)) | |
1320 | (inhibit-file-name-handlers | |
1321 | (if inhibit | |
1322 | (append mm-inhibit-file-name-handlers | |
1323 | inhibit-file-name-handlers) | |
1324 | inhibit-file-name-handlers)) | |
1325 | (ffh (if (boundp 'find-file-hook) | |
1326 | 'find-file-hook | |
1327 | 'find-file-hooks)) | |
1328 | (val (symbol-value ffh))) | |
4a43ee9b MB |
1329 | (set ffh nil) |
1330 | (unwind-protect | |
1331 | (insert-file-contents filename visit beg end replace) | |
1332 | (set ffh val)))) | |
c113de23 GM |
1333 | |
1334 | (defun mm-append-to-file (start end filename &optional codesys inhibit) | |
1335 | "Append the contents of the region to the end of file FILENAME. | |
1336 | When called from a function, expects three arguments, | |
1337 | START, END and FILENAME. START and END are buffer positions | |
1338 | saying what text to write. | |
1339 | Optional fourth argument specifies the coding system to use when | |
1340 | encoding the file. | |
23f87bed | 1341 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." |
95fa1ff7 SZ |
1342 | (let ((coding-system-for-write |
1343 | (or codesys mm-text-coding-system-for-write | |
c113de23 | 1344 | mm-text-coding-system)) |
95fa1ff7 | 1345 | (inhibit-file-name-operation (if inhibit |
c113de23 GM |
1346 | 'append-to-file |
1347 | inhibit-file-name-operation)) | |
1348 | (inhibit-file-name-handlers | |
1349 | (if inhibit | |
95fa1ff7 | 1350 | (append mm-inhibit-file-name-handlers |
c113de23 GM |
1351 | inhibit-file-name-handlers) |
1352 | inhibit-file-name-handlers))) | |
23f87bed MB |
1353 | (write-region start end filename t 'no-message) |
1354 | (message "Appended to %s" filename))) | |
c113de23 | 1355 | |
95fa1ff7 | 1356 | (defun mm-write-region (start end filename &optional append visit lockname |
c113de23 GM |
1357 | coding-system inhibit) |
1358 | ||
1359 | "Like `write-region'. | |
23f87bed | 1360 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." |
95fa1ff7 SZ |
1361 | (let ((coding-system-for-write |
1362 | (or coding-system mm-text-coding-system-for-write | |
c113de23 | 1363 | mm-text-coding-system)) |
95fa1ff7 | 1364 | (inhibit-file-name-operation (if inhibit |
c113de23 GM |
1365 | 'write-region |
1366 | inhibit-file-name-operation)) | |
1367 | (inhibit-file-name-handlers | |
1368 | (if inhibit | |
95fa1ff7 | 1369 | (append mm-inhibit-file-name-handlers |
c113de23 GM |
1370 | inhibit-file-name-handlers) |
1371 | inhibit-file-name-handlers))) | |
1372 | (write-region start end filename append visit lockname))) | |
1373 | ||
b5000590 GM |
1374 | (autoload 'gmm-write-region "gmm-utils") |
1375 | ||
cf5a5c38 MB |
1376 | ;; It is not a MIME function, but some MIME functions use it. |
1377 | (if (and (fboundp 'make-temp-file) | |
1378 | (ignore-errors | |
1379 | (let ((def (symbol-function 'make-temp-file))) | |
1380 | (and (byte-code-function-p def) | |
1381 | (setq def (if (fboundp 'compiled-function-arglist) | |
1382 | ;; XEmacs | |
1383 | (eval (list 'compiled-function-arglist def)) | |
1384 | (aref def 0))) | |
1385 | (>= (length def) 4) | |
1386 | (eq (nth 3 def) 'suffix))))) | |
1387 | (defalias 'mm-make-temp-file 'make-temp-file) | |
01c52d31 | 1388 | ;; Stolen (and modified for XEmacs) from Emacs 22. |
cf5a5c38 MB |
1389 | (defun mm-make-temp-file (prefix &optional dir-flag suffix) |
1390 | "Create a temporary file. | |
1391 | The returned file name (created by appending some random characters at the end | |
1392 | of PREFIX, and expanding against `temporary-file-directory' if necessary), | |
1393 | is guaranteed to point to a newly created empty file. | |
1394 | You can then use `write-region' to write new data into the file. | |
1395 | ||
1396 | If DIR-FLAG is non-nil, create a new empty directory instead of a file. | |
1397 | ||
1398 | If SUFFIX is non-nil, add that at the end of the file name." | |
1399 | (let ((umask (default-file-modes)) | |
1400 | file) | |
1401 | (unwind-protect | |
1402 | (progn | |
1403 | ;; Create temp files with strict access rights. It's easy to | |
1404 | ;; loosen them later, whereas it's impossible to close the | |
1405 | ;; time-window of loose permissions otherwise. | |
1406 | (set-default-file-modes 448) | |
1407 | (while (condition-case err | |
1408 | (progn | |
1409 | (setq file | |
1410 | (make-temp-name | |
1411 | (expand-file-name | |
1412 | prefix | |
1413 | (if (fboundp 'temp-directory) | |
1414 | ;; XEmacs | |
1415 | (temp-directory) | |
1416 | temporary-file-directory)))) | |
1417 | (if suffix | |
1418 | (setq file (concat file suffix))) | |
1419 | (if dir-flag | |
1420 | (make-directory file) | |
92edaeed MB |
1421 | ;; NOTE: This is unsafe if Emacs 20 |
1422 | ;; users and XEmacs users don't use | |
1423 | ;; a secure temp directory. | |
1424 | (gmm-write-region "" nil file nil 'silent | |
1425 | nil 'excl)) | |
cf5a5c38 MB |
1426 | nil) |
1427 | (file-already-exists t) | |
01c52d31 MB |
1428 | ;; The XEmacs version of `make-directory' issues |
1429 | ;; `file-error'. | |
1430 | (file-error (or (and (featurep 'xemacs) | |
cf5a5c38 MB |
1431 | (file-exists-p file)) |
1432 | (signal (car err) (cdr err))))) | |
1433 | ;; the file was somehow created by someone else between | |
1434 | ;; `make-temp-name' and `write-region', let's try again. | |
1435 | nil) | |
1436 | file) | |
1437 | ;; Reset the umask. | |
1438 | (set-default-file-modes umask))))) | |
1439 | ||
95fa1ff7 SZ |
1440 | (defun mm-image-load-path (&optional package) |
1441 | (let (dir result) | |
1442 | (dolist (path load-path (nreverse result)) | |
f4dd4ae8 MB |
1443 | (when (and path |
1444 | (file-directory-p | |
1445 | (setq dir (concat (file-name-directory | |
1446 | (directory-file-name path)) | |
d31fa104 | 1447 | "etc/images/" (or package "gnus/"))))) |
f4dd4ae8 | 1448 | (push dir result)) |
95fa1ff7 SZ |
1449 | (push path result)))) |
1450 | ||
23f87bed MB |
1451 | ;; Fixme: This doesn't look useful where it's used. |
1452 | (if (fboundp 'detect-coding-region) | |
1453 | (defun mm-detect-coding-region (start end) | |
1454 | "Like `detect-coding-region' except returning the best one." | |
1455 | (let ((coding-systems | |
9d9b0de9 | 1456 | (detect-coding-region start end))) |
23f87bed MB |
1457 | (or (car-safe coding-systems) |
1458 | coding-systems))) | |
1459 | (defun mm-detect-coding-region (start end) | |
1460 | (let ((point (point))) | |
1461 | (goto-char start) | |
1462 | (skip-chars-forward "\0-\177" end) | |
1463 | (prog1 | |
1464 | (if (eq (point) end) 'ascii (mm-guess-charset)) | |
1465 | (goto-char point))))) | |
1466 | ||
b5000590 GM |
1467 | (declare-function mm-detect-coding-region "mm-util" (start end)) |
1468 | ||
23f87bed MB |
1469 | (if (fboundp 'coding-system-get) |
1470 | (defun mm-detect-mime-charset-region (start end) | |
1471 | "Detect MIME charset of the text in the region between START and END." | |
1472 | (let ((cs (mm-detect-coding-region start end))) | |
bd29ba20 RS |
1473 | (or (coding-system-get cs :mime-charset) |
1474 | (coding-system-get cs 'mime-charset)))) | |
23f87bed MB |
1475 | (defun mm-detect-mime-charset-region (start end) |
1476 | "Detect MIME charset of the text in the region between START and END." | |
1477 | (let ((cs (mm-detect-coding-region start end))) | |
1478 | cs))) | |
1479 | ||
01c52d31 MB |
1480 | (eval-when-compile |
1481 | (unless (fboundp 'coding-system-to-mime-charset) | |
1482 | (defalias 'coding-system-to-mime-charset 'ignore))) | |
1483 | ||
1484 | (defun mm-coding-system-to-mime-charset (coding-system) | |
1485 | "Return the MIME charset corresponding to CODING-SYSTEM. | |
1486 | To make this function work with XEmacs, the APEL package is required." | |
1487 | (when coding-system | |
1488 | (or (and (fboundp 'coding-system-get) | |
1489 | (or (coding-system-get coding-system :mime-charset) | |
1490 | (coding-system-get coding-system 'mime-charset))) | |
1491 | (and (featurep 'xemacs) | |
1492 | (or (and (fboundp 'coding-system-to-mime-charset) | |
1493 | (not (eq (symbol-function 'coding-system-to-mime-charset) | |
1494 | 'ignore))) | |
1495 | (and (condition-case nil | |
1496 | (require 'mcharset) | |
1497 | (error nil)) | |
1498 | (fboundp 'coding-system-to-mime-charset))) | |
1499 | (coding-system-to-mime-charset coding-system))))) | |
1500 | ||
1501 | (eval-when-compile | |
1502 | (require 'jka-compr)) | |
1503 | ||
1504 | (defun mm-decompress-buffer (filename &optional inplace force) | |
1505 | "Decompress buffer's contents, depending on jka-compr. | |
1506 | Only when FORCE is t or `auto-compression-mode' is enabled and FILENAME | |
1507 | agrees with `jka-compr-compression-info-list', decompression is done. | |
1508 | Signal an error if FORCE is neither nil nor t and compressed data are | |
1509 | not decompressed because `auto-compression-mode' is disabled. | |
1510 | If INPLACE is nil, return decompressed data or nil without modifying | |
1511 | the buffer. Otherwise, replace the buffer's contents with the | |
1512 | decompressed data. The buffer's multibyteness must be turned off." | |
1513 | (when (and filename | |
1514 | (if force | |
1515 | (prog1 t (require 'jka-compr)) | |
1516 | (and (fboundp 'jka-compr-installed-p) | |
1517 | (jka-compr-installed-p)))) | |
1518 | (let ((info (jka-compr-get-compression-info filename))) | |
1519 | (when info | |
1520 | (unless (or (memq force (list nil t)) | |
1521 | (jka-compr-installed-p)) | |
1522 | (error "")) | |
1523 | (let ((prog (jka-compr-info-uncompress-program info)) | |
1524 | (args (jka-compr-info-uncompress-args info)) | |
1525 | (msg (format "%s %s..." | |
1526 | (jka-compr-info-uncompress-message info) | |
1527 | filename)) | |
1528 | (err-file (jka-compr-make-temp-name)) | |
1529 | (cur (current-buffer)) | |
1530 | (coding-system-for-read mm-binary-coding-system) | |
1531 | (coding-system-for-write mm-binary-coding-system) | |
1532 | retval err-msg) | |
1533 | (message "%s" msg) | |
1534 | (mm-with-unibyte-buffer | |
1535 | (insert-buffer-substring cur) | |
1536 | (condition-case err | |
1537 | (progn | |
1538 | (unless (memq (apply 'call-process-region | |
1539 | (point-min) (point-max) | |
1540 | prog t (list t err-file) nil args) | |
1541 | jka-compr-acceptable-retval-list) | |
1542 | (erase-buffer) | |
1543 | (insert (mapconcat | |
1544 | 'identity | |
1545 | (delete "" (split-string | |
1546 | (prog2 | |
1547 | (insert-file-contents err-file) | |
1548 | (buffer-string) | |
1549 | (erase-buffer)))) | |
1550 | " ") | |
1551 | "\n") | |
1552 | (setq err-msg | |
1553 | (format "Error while executing \"%s %s < %s\"" | |
1554 | prog (mapconcat 'identity args " ") | |
1555 | filename))) | |
1556 | (setq retval (buffer-string))) | |
1557 | (error | |
1558 | (setq err-msg (error-message-string err))))) | |
1559 | (when (file-exists-p err-file) | |
1560 | (ignore-errors (jka-compr-delete-temp-file err-file))) | |
1561 | (when inplace | |
1562 | (unless err-msg | |
1563 | (delete-region (point-min) (point-max)) | |
1564 | (insert retval)) | |
1565 | (setq retval nil)) | |
1566 | (message "%s" (or err-msg (concat msg "done"))) | |
1567 | retval))))) | |
1568 | ||
1569 | (eval-when-compile | |
1570 | (unless (fboundp 'coding-system-name) | |
1571 | (defalias 'coding-system-name 'ignore)) | |
1572 | (unless (fboundp 'find-file-coding-system-for-read-from-filename) | |
1573 | (defalias 'find-file-coding-system-for-read-from-filename 'ignore)) | |
1574 | (unless (fboundp 'find-operation-coding-system) | |
1575 | (defalias 'find-operation-coding-system 'ignore))) | |
1576 | ||
1577 | (defun mm-find-buffer-file-coding-system (&optional filename) | |
1578 | "Find coding system used to decode the contents of the current buffer. | |
1579 | This function looks for the coding system magic cookie or examines the | |
1580 | coding system specified by `file-coding-system-alist' being associated | |
1581 | with FILENAME which defaults to `buffer-file-name'. Data compressed by | |
1582 | gzip, bzip2, etc. are allowed." | |
1583 | (unless filename | |
1584 | (setq filename buffer-file-name)) | |
1585 | (save-excursion | |
1586 | (let ((decomp (unless ;; No worth to examine charset of tar files. | |
1587 | (and filename | |
1588 | (string-match | |
1589 | "\\.\\(?:tar\\.[^.]+\\|tbz\\|tgz\\)\\'" | |
1590 | filename)) | |
1591 | (mm-decompress-buffer filename nil t)))) | |
1592 | (when decomp | |
63220e37 GM |
1593 | (set-buffer (letf (((default-value 'enable-multibyte-characters) nil)) |
1594 | (generate-new-buffer " *temp*"))) | |
01c52d31 MB |
1595 | (insert decomp) |
1596 | (setq filename (file-name-sans-extension filename))) | |
1597 | (goto-char (point-min)) | |
1598 | (prog1 | |
1599 | (cond | |
1600 | ((boundp 'set-auto-coding-function) ;; Emacs | |
1601 | (if filename | |
1602 | (or (funcall (symbol-value 'set-auto-coding-function) | |
1603 | filename (- (point-max) (point-min))) | |
1604 | (car (find-operation-coding-system 'insert-file-contents | |
1605 | filename))) | |
1606 | (let (auto-coding-alist) | |
1607 | (condition-case nil | |
1608 | (funcall (symbol-value 'set-auto-coding-function) | |
1609 | nil (- (point-max) (point-min))) | |
1610 | (error nil))))) | |
9efa445f | 1611 | ((and (featurep 'xemacs) (featurep 'file-coding)) ;; XEmacs |
01c52d31 MB |
1612 | (let ((case-fold-search t) |
1613 | (end (point-at-eol)) | |
1614 | codesys start) | |
1615 | (or | |
1616 | (and (re-search-forward "-\\*-+[\t ]*" end t) | |
1617 | (progn | |
1618 | (setq start (match-end 0)) | |
1619 | (re-search-forward "[\t ]*-+\\*-" end t)) | |
1620 | (progn | |
1621 | (setq end (match-beginning 0)) | |
1622 | (goto-char start) | |
1623 | (or (looking-at "coding:[\t ]*\\([^\t ;]+\\)") | |
1624 | (re-search-forward | |
1625 | "[\t ;]+coding:[\t ]*\\([^\t ;]+\\)" | |
1626 | end t))) | |
1627 | (find-coding-system (setq codesys | |
1628 | (intern (match-string 1)))) | |
1629 | codesys) | |
1630 | (and (re-search-forward "^[\t ]*;+[\t ]*Local[\t ]+Variables:" | |
1631 | nil t) | |
1632 | (progn | |
1633 | (setq start (match-end 0)) | |
1634 | (re-search-forward "^[\t ]*;+[\t ]*End:" nil t)) | |
1635 | (progn | |
1636 | (setq end (match-beginning 0)) | |
1637 | (goto-char start) | |
1638 | (re-search-forward | |
1639 | "^[\t ]*;+[\t ]*coding:[\t ]*\\([^\t\n\r ]+\\)" | |
1640 | end t)) | |
1641 | (find-coding-system (setq codesys | |
1642 | (intern (match-string 1)))) | |
1643 | codesys) | |
1644 | (and (progn | |
1645 | (goto-char (point-min)) | |
1646 | (setq case-fold-search nil) | |
1647 | (re-search-forward "^;;;coding system: " | |
1648 | ;;(+ (point-min) 3000) t)) | |
1649 | nil t)) | |
1650 | (looking-at "[^\t\n\r ]+") | |
1651 | (find-coding-system | |
1652 | (setq codesys (intern (match-string 0)))) | |
1653 | codesys) | |
1654 | (and filename | |
1655 | (setq codesys | |
1656 | (find-file-coding-system-for-read-from-filename | |
1657 | filename)) | |
1658 | (coding-system-name (coding-system-base codesys))))))) | |
1659 | (when decomp | |
1660 | (kill-buffer (current-buffer))))))) | |
3efe5554 | 1661 | |
c113de23 GM |
1662 | (provide 'mm-util) |
1663 | ||
9d9b0de9 | 1664 | ;; arch-tag: 94dc5388-825d-4fd1-bfa5-2100aa351238 |
c113de23 | 1665 | ;;; mm-util.el ends here |