Commit | Line | Data |
---|---|---|
95fa1ff7 | 1 | ;;; mm-util.el --- Utility functions for Mule and low level things |
e84b4b86 | 2 | |
73b0cd50 | 3 | ;; Copyright (C) 1998-2011 Free Software Foundation, Inc. |
c113de23 GM |
4 | |
5 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
6 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
7 | ;; This file is part of GNU Emacs. | |
8 | ||
5e809f55 | 9 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
c113de23 | 10 | ;; it under the terms of the GNU General Public License as published by |
5e809f55 GM |
11 | ;; the Free Software Foundation, either version 3 of the License, or |
12 | ;; (at your option) any later version. | |
c113de23 GM |
13 | |
14 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
15 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
5e809f55 | 16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
c113de23 GM |
17 | ;; GNU General Public License for more details. |
18 | ||
19 | ;; You should have received a copy of the GNU General Public License | |
5e809f55 | 20 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
c113de23 GM |
21 | |
22 | ;;; Commentary: | |
23 | ||
24 | ;;; Code: | |
25 | ||
f0b7f5a8 | 26 | ;; For Emacs <22.2 and XEmacs. |
b5000590 GM |
27 | (eval-and-compile |
28 | (unless (fboundp 'declare-function) (defmacro declare-function (&rest r)))) | |
29 | ||
23f87bed | 30 | (eval-when-compile (require 'cl)) |
c113de23 GM |
31 | (require 'mail-prsvr) |
32 | ||
f53b2875 | 33 | (eval-and-compile |
01c52d31 MB |
34 | (if (featurep 'xemacs) |
35 | (unless (ignore-errors | |
36 | (require 'timer-funcs)) | |
37 | (require 'timer)) | |
38 | (require 'timer))) | |
39 | ||
9efa445f | 40 | (defvar mm-mime-mule-charset-alist ) |
4c188c5a GM |
41 | ;; Note this is not presently used on Emacs >= 23, which is good, |
42 | ;; since it means standalone message-mode (which requires mml and | |
43 | ;; hence mml-util) does not load gnus-util. | |
44 | (autoload 'gnus-completing-read "gnus-util") | |
9efa445f | 45 | |
e3e955fe MB |
46 | ;; Emulate functions that are not available in every (X)Emacs version. |
47 | ;; The name of a function is prefixed with mm-, like `mm-char-int' for | |
48 | ;; `char-int' that is a native XEmacs function, not available in Emacs. | |
49 | ;; Gnus programs all should use mm- functions, not the original ones. | |
01c52d31 MB |
50 | (eval-and-compile |
51 | (mapc | |
f53b2875 DL |
52 | (lambda (elem) |
53 | (let ((nfunc (intern (format "mm-%s" (car elem))))) | |
54 | (if (fboundp (car elem)) | |
55 | (defalias nfunc (car elem)) | |
56 | (defalias nfunc (cdr elem))))) | |
e3e955fe MB |
57 | `(;; `coding-system-list' is not available in XEmacs 21.4 built |
58 | ;; without the `file-coding' feature. | |
59 | (coding-system-list . ignore) | |
60 | ;; `char-int' is an XEmacs function, not available in Emacs. | |
f53b2875 | 61 | (char-int . identity) |
e3e955fe | 62 | ;; `coding-system-equal' is an Emacs function, not available in XEmacs. |
f53b2875 | 63 | (coding-system-equal . equal) |
e3e955fe | 64 | ;; `annotationp' is an XEmacs function, not available in Emacs. |
f53b2875 | 65 | (annotationp . ignore) |
e3e955fe MB |
66 | ;; `set-buffer-file-coding-system' is not available in XEmacs 21.4 |
67 | ;; built without the `file-coding' feature. | |
f53b2875 | 68 | (set-buffer-file-coding-system . ignore) |
e3e955fe | 69 | ;; `read-charset' is an Emacs function, not available in XEmacs. |
f53b2875 | 70 | (read-charset |
c7948b5f MB |
71 | . ,(lambda (prompt) |
72 | "Return a charset." | |
73 | (intern | |
229b59da | 74 | (gnus-completing-read |
c7948b5f | 75 | prompt |
229b59da | 76 | (mapcar (lambda (e) (symbol-name (car e))) |
c7948b5f | 77 | mm-mime-mule-charset-alist) |
229b59da | 78 | t)))) |
e3e955fe | 79 | ;; `subst-char-in-string' is not available in XEmacs 21.4. |
95fa1ff7 | 80 | (subst-char-in-string |
c7948b5f MB |
81 | . ,(lambda (from to string &optional inplace) |
82 | ;; stolen (and renamed) from nnheader.el | |
83 | "Replace characters in STRING from FROM to TO. | |
91472578 | 84 | Unless optional argument INPLACE is non-nil, return a new string." |
c7948b5f MB |
85 | (let ((string (if inplace string (copy-sequence string))) |
86 | (len (length string)) | |
87 | (idx 0)) | |
88 | ;; Replace all occurrences of FROM with TO. | |
89 | (while (< idx len) | |
90 | (when (= (aref string idx) from) | |
91 | (aset string idx to)) | |
92 | (setq idx (1+ idx))) | |
93 | string))) | |
e3e955fe | 94 | ;; `replace-in-string' is an XEmacs function, not available in Emacs. |
01c52d31 | 95 | (replace-in-string |
c7948b5f MB |
96 | . ,(lambda (string regexp rep &optional literal) |
97 | "See `replace-regexp-in-string', only the order of args differs." | |
98 | (replace-regexp-in-string regexp rep string nil literal))) | |
e3e955fe | 99 | ;; `string-as-unibyte' is an Emacs function, not available in XEmacs. |
f53b2875 | 100 | (string-as-unibyte . identity) |
e3e955fe | 101 | ;; `string-make-unibyte' is an Emacs function, not available in XEmacs. |
23f87bed | 102 | (string-make-unibyte . identity) |
9d9b0de9 SM |
103 | ;; string-as-multibyte often doesn't really do what you think it does. |
104 | ;; Example: | |
105 | ;; (aref (string-as-multibyte "\201") 0) -> 129 (aka ?\201) | |
106 | ;; (aref (string-as-multibyte "\300") 0) -> 192 (aka ?\300) | |
107 | ;; (aref (string-as-multibyte "\300\201") 0) -> 192 (aka ?\300) | |
108 | ;; (aref (string-as-multibyte "\300\201") 1) -> 129 (aka ?\201) | |
109 | ;; but | |
110 | ;; (aref (string-as-multibyte "\201\300") 0) -> 2240 | |
111 | ;; (aref (string-as-multibyte "\201\300") 1) -> <error> | |
112 | ;; Better use string-to-multibyte or encode-coding-string. | |
113 | ;; If you really need string-as-multibyte somewhere it's usually | |
114 | ;; because you're using the internal emacs-mule representation (maybe | |
115 | ;; because you're using string-as-unibyte somewhere), which is | |
116 | ;; generally a problem in itself. | |
117 | ;; Here is an approximate equivalence table to help think about it: | |
118 | ;; (string-as-multibyte s) ~= (decode-coding-string s 'emacs-mule) | |
119 | ;; (string-to-multibyte s) ~= (decode-coding-string s 'binary) | |
120 | ;; (string-make-multibyte s) ~= (decode-coding-string s locale-coding-system) | |
e3e955fe | 121 | ;; `string-as-multibyte' is an Emacs function, not available in XEmacs. |
95fa1ff7 | 122 | (string-as-multibyte . identity) |
e3e955fe | 123 | ;; `multibyte-string-p' is an Emacs function, not available in XEmacs. |
56e09c09 | 124 | (multibyte-string-p . ignore) |
e3e955fe | 125 | ;; `insert-byte' is available only in Emacs 23.1 or greater. |
56e09c09 | 126 | (insert-byte . insert-char) |
e3e955fe MB |
127 | ;; `multibyte-char-to-unibyte' is an Emacs function, not available |
128 | ;; in XEmacs. | |
01c52d31 | 129 | (multibyte-char-to-unibyte . identity) |
e3e955fe | 130 | ;; `set-buffer-multibyte' is an Emacs function, not available in XEmacs. |
df06dd59 | 131 | (set-buffer-multibyte . ignore) |
e3e955fe | 132 | ;; `special-display-p' is an Emacs function, not available in XEmacs. |
01c52d31 | 133 | (special-display-p |
c7948b5f MB |
134 | . ,(lambda (buffer-name) |
135 | "Returns non-nil if a buffer named BUFFER-NAME gets a special frame." | |
136 | (and special-display-function | |
137 | (or (and (member buffer-name special-display-buffer-names) t) | |
138 | (cdr (assoc buffer-name special-display-buffer-names)) | |
139 | (catch 'return | |
140 | (dolist (elem special-display-regexps) | |
141 | (and (stringp elem) | |
142 | (string-match elem buffer-name) | |
143 | (throw 'return t)) | |
144 | (and (consp elem) | |
145 | (stringp (car elem)) | |
146 | (string-match (car elem) buffer-name) | |
147 | (throw 'return (cdr elem))))))))) | |
e3e955fe | 148 | ;; `substring-no-properties' is available only in Emacs 22.1 or greater. |
c7948b5f MB |
149 | (substring-no-properties |
150 | . ,(lambda (string &optional from to) | |
151 | "Return a substring of STRING, without text properties. | |
152 | It starts at index FROM and ending before TO. | |
153 | TO may be nil or omitted; then the substring runs to the end of STRING. | |
154 | If FROM is nil or omitted, the substring starts at the beginning of STRING. | |
155 | If FROM or TO is negative, it counts from the end. | |
156 | ||
157 | With one argument, just copy STRING without its properties." | |
158 | (setq string (substring string (or from 0) to)) | |
159 | (set-text-properties 0 (length string) nil string) | |
e3e955fe MB |
160 | string)) |
161 | ;; `line-number-at-pos' is available only in Emacs 22.1 or greater | |
162 | ;; and XEmacs 21.5. | |
163 | (line-number-at-pos | |
164 | . ,(lambda (&optional pos) | |
165 | "Return (narrowed) buffer line number at position POS. | |
166 | If POS is nil, use current buffer location. | |
167 | Counting starts at (point-min), so the value refers | |
168 | to the contents of the accessible portion of the buffer." | |
169 | (let ((opoint (or pos (point))) start) | |
170 | (save-excursion | |
171 | (goto-char (point-min)) | |
172 | (setq start (point)) | |
173 | (goto-char opoint) | |
174 | (forward-line 0) | |
175 | (1+ (count-lines start (point)))))))))) | |
f53b2875 | 176 | |
e3e955fe MB |
177 | ;; `decode-coding-string', `encode-coding-string', `decode-coding-region' |
178 | ;; and `encode-coding-region' are available in Emacs and XEmacs built with | |
179 | ;; the `file-coding' feature, but the XEmacs versions treat nil, that is | |
180 | ;; given as the `coding-system' argument, as the `binary' coding system. | |
82fe1aed MB |
181 | (eval-and-compile |
182 | (if (featurep 'xemacs) | |
183 | (if (featurep 'file-coding) | |
82fe1aed MB |
184 | (progn |
185 | (defun mm-decode-coding-string (str coding-system) | |
186 | (if coding-system | |
187 | (decode-coding-string str coding-system) | |
188 | str)) | |
189 | (defun mm-encode-coding-string (str coding-system) | |
190 | (if coding-system | |
191 | (encode-coding-string str coding-system) | |
192 | str)) | |
193 | (defun mm-decode-coding-region (start end coding-system) | |
194 | (if coding-system | |
195 | (decode-coding-region start end coding-system))) | |
196 | (defun mm-encode-coding-region (start end coding-system) | |
197 | (if coding-system | |
198 | (encode-coding-region start end coding-system)))) | |
199 | (defun mm-decode-coding-string (str coding-system) str) | |
200 | (defun mm-encode-coding-string (str coding-system) str) | |
201 | (defalias 'mm-decode-coding-region 'ignore) | |
202 | (defalias 'mm-encode-coding-region 'ignore)) | |
203 | (defalias 'mm-decode-coding-string 'decode-coding-string) | |
204 | (defalias 'mm-encode-coding-string 'encode-coding-string) | |
205 | (defalias 'mm-decode-coding-region 'decode-coding-region) | |
206 | (defalias 'mm-encode-coding-region 'encode-coding-region))) | |
207 | ||
ed1d182d GM |
208 | ;; `string-to-multibyte' is available only in Emacs. |
209 | (defalias 'mm-string-to-multibyte (if (featurep 'xemacs) | |
210 | 'identity | |
211 | 'string-to-multibyte)) | |
e8f0f70d | 212 | |
e3e955fe | 213 | ;; `char-or-char-int-p' is an XEmacs function, not available in Emacs. |
c113de23 GM |
214 | (eval-and-compile |
215 | (defalias 'mm-char-or-char-int-p | |
95fa1ff7 | 216 | (cond |
c113de23 | 217 | ((fboundp 'char-or-char-int-p) 'char-or-char-int-p) |
95fa1ff7 | 218 | ((fboundp 'char-valid-p) 'char-valid-p) |
c113de23 GM |
219 | (t 'identity)))) |
220 | ||
e3e955fe | 221 | ;; `ucs-to-char' is a function that Mule-UCS provides. |
99139556 KY |
222 | (eval-and-compile |
223 | (if (featurep 'xemacs) | |
224 | (cond ((and (fboundp 'unicode-to-char) ;; XEmacs 21.5. | |
225 | (subrp (symbol-function 'unicode-to-char))) | |
226 | (if (featurep 'mule) | |
227 | (defalias 'mm-ucs-to-char 'unicode-to-char) | |
228 | (defun mm-ucs-to-char (codepoint) | |
229 | "Convert Unicode codepoint to character." | |
230 | (or (unicode-to-char codepoint) ?#)))) | |
231 | ((featurep 'mule) | |
232 | (defun mm-ucs-to-char (codepoint) | |
233 | "Convert Unicode codepoint to character." | |
234 | (if (fboundp 'ucs-to-char) ;; Mule-UCS is loaded. | |
235 | (progn | |
236 | (defalias 'mm-ucs-to-char | |
237 | (lambda (codepoint) | |
238 | "Convert Unicode codepoint to character." | |
239 | (condition-case nil | |
240 | (or (ucs-to-char codepoint) ?#) | |
241 | (error ?#)))) | |
242 | (mm-ucs-to-char codepoint)) | |
243 | (condition-case nil | |
244 | (or (int-to-char codepoint) ?#) | |
245 | (error ?#))))) | |
246 | (t | |
e3e955fe MB |
247 | (defun mm-ucs-to-char (codepoint) |
248 | "Convert Unicode codepoint to character." | |
e3e955fe MB |
249 | (condition-case nil |
250 | (or (int-to-char codepoint) ?#) | |
251 | (error ?#))))) | |
99139556 KY |
252 | (if (let ((char (make-char 'japanese-jisx0208 36 34))) |
253 | (eq char (decode-char 'ucs char))) | |
254 | ;; Emacs 23. | |
255 | (defalias 'mm-ucs-to-char 'identity) | |
256 | (defun mm-ucs-to-char (codepoint) | |
257 | "Convert Unicode codepoint to character." | |
258 | (or (decode-char 'ucs codepoint) ?#))))) | |
e3e955fe | 259 | |
23f87bed MB |
260 | ;; Fixme: This seems always to be used to read a MIME charset, so it |
261 | ;; should be re-named and fixed (in Emacs) to offer completion only on | |
262 | ;; proper charset names (base coding systems which have a | |
263 | ;; mime-charset defined). XEmacs doesn't believe in mime-charset; | |
264 | ;; test with | |
265 | ;; `(or (coding-system-get 'iso-8859-1 'mime-charset) | |
266 | ;; (coding-system-get 'iso-8859-1 :mime-charset))' | |
267 | ;; Actually, there should be an `mm-coding-system-mime-charset'. | |
95fa1ff7 SZ |
268 | (eval-and-compile |
269 | (defalias 'mm-read-coding-system | |
4c188c5a GM |
270 | (if (featurep 'emacs) 'read-coding-system |
271 | (cond | |
272 | ((fboundp 'read-coding-system) | |
273 | (if (and (featurep 'xemacs) | |
274 | (<= (string-to-number emacs-version) 21.1)) | |
275 | (lambda (prompt &optional default-coding-system) | |
276 | (read-coding-system prompt)) | |
277 | 'read-coding-system)) | |
278 | (t (lambda (prompt &optional default-coding-system) | |
279 | "Prompt the user for a coding system." | |
280 | (gnus-completing-read | |
281 | prompt (mapcar (lambda (s) (symbol-name (car s))) | |
282 | mm-mime-mule-charset-alist)))))))) | |
95fa1ff7 | 283 | |
c113de23 GM |
284 | (defvar mm-coding-system-list nil) |
285 | (defun mm-get-coding-system-list () | |
286 | "Get the coding system list." | |
287 | (or mm-coding-system-list | |
288 | (setq mm-coding-system-list (mm-coding-system-list)))) | |
289 | ||
23f87bed MB |
290 | (defun mm-coding-system-p (cs) |
291 | "Return non-nil if CS is a symbol naming a coding system. | |
0683d241 MB |
292 | In XEmacs, also return non-nil if CS is a coding system object. |
293 | If CS is available, return CS itself in Emacs, and return a coding | |
294 | system object in XEmacs." | |
23f87bed | 295 | (if (fboundp 'find-coding-system) |
91472578 | 296 | (and cs (find-coding-system cs)) |
23f87bed | 297 | (if (fboundp 'coding-system-p) |
0683d241 MB |
298 | (when (coding-system-p cs) |
299 | cs) | |
5f4264e5 | 300 | ;; no-MULE XEmacs: |
0683d241 | 301 | (car (memq cs (mm-get-coding-system-list)))))) |
95fa1ff7 | 302 | |
c113de23 | 303 | (defvar mm-charset-synonym-alist |
95fa1ff7 | 304 | `( |
95fa1ff7 | 305 | ;; Not in XEmacs, but it's not a proper MIME charset anyhow. |
72eb5fc7 | 306 | ,@(unless (mm-coding-system-p 'x-ctext) |
b44409c9 | 307 | '((x-ctext . ctext))) |
ab785936 MB |
308 | ;; ISO-8859-15 is very similar to ISO-8859-1. But it's _different_ in 8 |
309 | ;; positions! | |
23f87bed | 310 | ,@(unless (mm-coding-system-p 'iso-8859-15) |
b44409c9 | 311 | '((iso-8859-15 . iso-8859-1))) |
23f87bed MB |
312 | ;; BIG-5HKSCS is similar to, but different than, BIG-5. |
313 | ,@(unless (mm-coding-system-p 'big5-hkscs) | |
314 | '((big5-hkscs . big5))) | |
bd29ba20 | 315 | ;; A Microsoft misunderstanding. |
ab785936 MB |
316 | ,@(when (and (not (mm-coding-system-p 'unicode)) |
317 | (mm-coding-system-p 'utf-16-le)) | |
318 | '((unicode . utf-16-le))) | |
bd29ba20 RS |
319 | ;; A Microsoft misunderstanding. |
320 | ,@(unless (mm-coding-system-p 'ks_c_5601-1987) | |
321 | (if (mm-coding-system-p 'cp949) | |
322 | '((ks_c_5601-1987 . cp949)) | |
323 | '((ks_c_5601-1987 . euc-kr)))) | |
b44409c9 | 324 | ;; Windows-31J is Windows Codepage 932. |
ab785936 MB |
325 | ,@(when (and (not (mm-coding-system-p 'windows-31j)) |
326 | (mm-coding-system-p 'cp932)) | |
327 | '((windows-31j . cp932))) | |
4b70e299 MB |
328 | ;; Charset name: GBK, Charset aliases: CP936, MS936, windows-936 |
329 | ;; http://www.iana.org/assignments/charset-reg/GBK | |
330 | ;; Emacs 22.1 has cp936, but not gbk, so we alias it: | |
331 | ,@(when (and (not (mm-coding-system-p 'gbk)) | |
332 | (mm-coding-system-p 'cp936)) | |
333 | '((gbk . cp936))) | |
bf46b4d4 MB |
334 | ;; UTF8 is a bogus name for UTF-8 |
335 | ,@(when (and (not (mm-coding-system-p 'utf8)) | |
336 | (mm-coding-system-p 'utf-8)) | |
337 | '((utf8 . utf-8))) | |
01c52d31 MB |
338 | ;; ISO8859-1 is a bogus name for ISO-8859-1 |
339 | ,@(when (and (not (mm-coding-system-p 'iso8859-1)) | |
340 | (mm-coding-system-p 'iso-8859-1)) | |
341 | '((iso8859-1 . iso-8859-1))) | |
bf46b4d4 MB |
342 | ;; ISO_8859-1 is a bogus name for ISO-8859-1 |
343 | ,@(when (and (not (mm-coding-system-p 'iso_8859-1)) | |
344 | (mm-coding-system-p 'iso-8859-1)) | |
345 | '((iso_8859-1 . iso-8859-1))) | |
95fa1ff7 | 346 | ) |
ab785936 MB |
347 | "A mapping from unknown or invalid charset names to the real charset names. |
348 | ||
349 | See `mm-codepage-iso-8859-list' and `mm-codepage-ibm-list'.") | |
350 | ||
ddf6fd30 GM |
351 | (defun mm-codepage-setup (number &optional alias) |
352 | "Create a coding system cpNUMBER. | |
353 | The coding system is created using `codepage-setup'. If ALIAS is | |
354 | non-nil, an alias is created and added to | |
355 | `mm-charset-synonym-alist'. If ALIAS is a string, it's used as | |
356 | the alias. Else windows-NUMBER is used." | |
357 | (interactive | |
358 | (let ((completion-ignore-case t) | |
359 | (candidates (if (fboundp 'cp-supported-codepages) | |
360 | (cp-supported-codepages) | |
361 | ;; Removed in Emacs 23 (unicode), so signal an error: | |
362 | (error "`codepage-setup' not present in this Emacs version")))) | |
363 | (list (gnus-completing-read "Setup DOS Codepage" candidates | |
364 | t nil nil "437")))) | |
365 | (when alias | |
366 | (setq alias (if (stringp alias) | |
367 | (intern alias) | |
368 | (intern (format "windows-%s" number))))) | |
369 | (let* ((cp (intern (format "cp%s" number)))) | |
370 | (unless (mm-coding-system-p cp) | |
371 | (if (fboundp 'codepage-setup) ; silence compiler | |
372 | (codepage-setup number) | |
373 | (error "`codepage-setup' not present in this Emacs version"))) | |
374 | (when (and alias | |
375 | ;; Don't add alias if setup of cp failed. | |
376 | (mm-coding-system-p cp)) | |
377 | (add-to-list 'mm-charset-synonym-alist (cons alias cp))))) | |
378 | ||
ab785936 MB |
379 | (defcustom mm-codepage-iso-8859-list |
380 | (list 1250 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft | |
381 | ;; Outlook users in Czech republic. Use this to allow reading of | |
0c43b6f8 | 382 | ;; their e-mails. |
ab785936 MB |
383 | '(1252 . 1) ;; Windows-1252 is a superset of iso-8859-1 (West |
384 | ;; Europe). See also `gnus-article-dumbquotes-map'. | |
385 | '(1254 . 9) ;; Windows-1254 is a superset of iso-8859-9 (Turkish). | |
386 | '(1255 . 8));; Windows-1255 is a superset of iso-8859-8 (Hebrew). | |
387 | "A list of Windows codepage numbers and iso-8859 charset numbers. | |
388 | ||
389 | If an element is a number corresponding to a supported windows | |
390 | codepage, appropriate entries to `mm-charset-synonym-alist' are | |
391 | added by `mm-setup-codepage-iso-8859'. An element may also be a | |
392 | cons cell where the car is a codepage number and the cdr is the | |
393 | corresponding number of an iso-8859 charset." | |
394 | :type '(list (set :inline t | |
395 | (const 1250 :tag "Central and East European") | |
396 | (const (1252 . 1) :tag "West European") | |
397 | (const (1254 . 9) :tag "Turkish") | |
398 | (const (1255 . 8) :tag "Hebrew")) | |
399 | (repeat :inline t | |
400 | :tag "Other options" | |
401 | (choice | |
402 | (integer :tag "Windows codepage number") | |
403 | (cons (integer :tag "Windows codepage number") | |
404 | (integer :tag "iso-8859 charset number"))))) | |
405 | :version "22.1" ;; Gnus 5.10.9 | |
406 | :group 'mime) | |
407 | ||
408 | (defcustom mm-codepage-ibm-list | |
409 | (list 437 ;; (US etc.) | |
410 | 860 ;; (Portugal) | |
411 | 861 ;; (Iceland) | |
412 | 862 ;; (Israel) | |
413 | 863 ;; (Canadian French) | |
414 | 865 ;; (Nordic) | |
415 | 852 ;; | |
416 | 850 ;; (Latin 1) | |
417 | 855 ;; (Cyrillic) | |
418 | 866 ;; (Cyrillic - Russian) | |
419 | 857 ;; (Turkish) | |
420 | 864 ;; (Arabic) | |
421 | 869 ;; (Greek) | |
422 | 874);; (Thai) | |
423 | ;; In Emacs 23 (unicode), cp... and ibm... are aliases. | |
424 | ;; Cf. http://thread.gmane.org/v9lkng5nwy.fsf@marauder.physik.uni-ulm.de | |
425 | "List of IBM codepage numbers. | |
426 | ||
427 | The codepage mappings slighly differ between IBM and other vendors. | |
428 | See \"ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/IBM/README.TXT\". | |
429 | ||
430 | If an element is a number corresponding to a supported windows | |
431 | codepage, appropriate entries to `mm-charset-synonym-alist' are | |
432 | added by `mm-setup-codepage-ibm'." | |
433 | :type '(list (set :inline t | |
434 | (const 437 :tag "US etc.") | |
435 | (const 860 :tag "Portugal") | |
436 | (const 861 :tag "Iceland") | |
437 | (const 862 :tag "Israel") | |
438 | (const 863 :tag "Canadian French") | |
439 | (const 865 :tag "Nordic") | |
440 | (const 852) | |
441 | (const 850 :tag "Latin 1") | |
442 | (const 855 :tag "Cyrillic") | |
443 | (const 866 :tag "Cyrillic - Russian") | |
444 | (const 857 :tag "Turkish") | |
445 | (const 864 :tag "Arabic") | |
446 | (const 869 :tag "Greek") | |
447 | (const 874 :tag "Thai")) | |
448 | (repeat :inline t | |
449 | :tag "Other options" | |
450 | (integer :tag "Codepage number"))) | |
451 | :version "22.1" ;; Gnus 5.10.9 | |
452 | :group 'mime) | |
453 | ||
454 | (defun mm-setup-codepage-iso-8859 (&optional list) | |
455 | "Add appropriate entries to `mm-charset-synonym-alist'. | |
456 | Unless LIST is given, `mm-codepage-iso-8859-list' is used." | |
457 | (unless list | |
458 | (setq list mm-codepage-iso-8859-list)) | |
459 | (dolist (i list) | |
460 | (let (cp windows iso) | |
461 | (if (consp i) | |
462 | (setq cp (intern (format "cp%d" (car i))) | |
463 | windows (intern (format "windows-%d" (car i))) | |
464 | iso (intern (format "iso-8859-%d" (cdr i)))) | |
465 | (setq cp (intern (format "cp%d" i)) | |
466 | windows (intern (format "windows-%d" i)))) | |
467 | (unless (mm-coding-system-p windows) | |
468 | (if (mm-coding-system-p cp) | |
469 | (add-to-list 'mm-charset-synonym-alist (cons windows cp)) | |
470 | (add-to-list 'mm-charset-synonym-alist (cons windows iso))))))) | |
471 | ||
472 | (defun mm-setup-codepage-ibm (&optional list) | |
473 | "Add appropriate entries to `mm-charset-synonym-alist'. | |
474 | Unless LIST is given, `mm-codepage-ibm-list' is used." | |
475 | (unless list | |
476 | (setq list mm-codepage-ibm-list)) | |
477 | (dolist (number list) | |
478 | (let ((ibm (intern (format "ibm%d" number))) | |
479 | (cp (intern (format "cp%d" number)))) | |
480 | (when (and (not (mm-coding-system-p ibm)) | |
481 | (mm-coding-system-p cp)) | |
482 | (add-to-list 'mm-charset-synonym-alist (cons ibm cp)))))) | |
483 | ||
484 | ;; Initialize: | |
485 | (mm-setup-codepage-iso-8859) | |
486 | (mm-setup-codepage-ibm) | |
bd29ba20 | 487 | |
96a22201 KY |
488 | ;; Note: this has to be defined before `mm-charset-to-coding-system'. |
489 | (defcustom mm-charset-eval-alist | |
490 | (if (featurep 'xemacs) | |
491 | nil ;; I don't know what would be useful for XEmacs. | |
0c43b6f8 KY |
492 | '(;; Emacs 22 provides autoloads for 1250-1258 |
493 | ;; (i.e. `mm-codepage-setup' does nothing). | |
96a22201 KY |
494 | (windows-1250 . (mm-codepage-setup 1250 t)) |
495 | (windows-1251 . (mm-codepage-setup 1251 t)) | |
496 | (windows-1253 . (mm-codepage-setup 1253 t)) | |
497 | (windows-1257 . (mm-codepage-setup 1257 t)))) | |
498 | "An alist of (CHARSET . FORM) pairs. | |
499 | If an article is encoded in an unknown CHARSET, FORM is | |
500 | evaluated. This allows to load additional libraries providing | |
501 | charsets on demand. If supported by your Emacs version, you | |
502 | could use `autoload-coding-system' here." | |
503 | :version "22.1" ;; Gnus 5.10.9 | |
504 | :type '(list (set :inline t | |
505 | (const (windows-1250 . (mm-codepage-setup 1250 t))) | |
506 | (const (windows-1251 . (mm-codepage-setup 1251 t))) | |
507 | (const (windows-1253 . (mm-codepage-setup 1253 t))) | |
508 | (const (windows-1257 . (mm-codepage-setup 1257 t))) | |
509 | (const (cp850 . (mm-codepage-setup 850 nil)))) | |
510 | (repeat :inline t | |
511 | :tag "Other options" | |
512 | (cons (symbol :tag "charset") | |
513 | (symbol :tag "form")))) | |
514 | :group 'mime) | |
515 | (put 'mm-charset-eval-alist 'risky-local-variable t) | |
516 | ||
58d8c5cd GM |
517 | (defvar mm-charset-override-alist) |
518 | ||
96a22201 KY |
519 | ;; Note: this function has to be defined before `mm-charset-override-alist' |
520 | ;; since it will use this function in order to determine its default value | |
521 | ;; when loading mm-util.elc. | |
522 | (defun mm-charset-to-coding-system (charset &optional lbt | |
523 | allow-override silent) | |
524 | "Return coding-system corresponding to CHARSET. | |
525 | CHARSET is a symbol naming a MIME charset. | |
526 | If optional argument LBT (`unix', `dos' or `mac') is specified, it is | |
527 | used as the line break code type of the coding system. | |
528 | ||
529 | If ALLOW-OVERRIDE is given, use `mm-charset-override-alist' to | |
530 | map undesired charset names to their replacement. This should | |
531 | only be used for decoding, not for encoding. | |
532 | ||
533 | A non-nil value of SILENT means don't issue a warning even if CHARSET | |
534 | is not available." | |
535 | ;; OVERRIDE is used (only) in `mm-decode-body' and `mm-decode-string'. | |
536 | (when (stringp charset) | |
537 | (setq charset (intern (downcase charset)))) | |
538 | (when lbt | |
539 | (setq charset (intern (format "%s-%s" charset lbt)))) | |
540 | (cond | |
541 | ((null charset) | |
542 | charset) | |
543 | ;; Running in a non-MULE environment. | |
544 | ((or (null (mm-get-coding-system-list)) | |
545 | (not (fboundp 'coding-system-get))) | |
546 | charset) | |
547 | ;; Check override list quite early. Should only used for decoding, not for | |
548 | ;; encoding! | |
549 | ((and allow-override | |
550 | (let ((cs (cdr (assq charset mm-charset-override-alist)))) | |
551 | (and cs (mm-coding-system-p cs) cs)))) | |
552 | ;; ascii | |
faf3b348 LMI |
553 | ((or (eq charset 'us-ascii) |
554 | (string-match "ansi.x3.4" (symbol-name charset))) | |
96a22201 KY |
555 | 'ascii) |
556 | ;; Check to see whether we can handle this charset. (This depends | |
557 | ;; on there being some coding system matching each `mime-charset' | |
558 | ;; property defined, as there should be.) | |
559 | ((and (mm-coding-system-p charset) | |
560 | ;;; Doing this would potentially weed out incorrect charsets. | |
561 | ;;; charset | |
562 | ;;; (eq charset (coding-system-get charset 'mime-charset)) | |
563 | ) | |
564 | charset) | |
ddf5d974 KY |
565 | ;; Use coding system Emacs knows. |
566 | ((and (fboundp 'coding-system-from-name) | |
567 | (coding-system-from-name charset))) | |
96a22201 KY |
568 | ;; Eval expressions from `mm-charset-eval-alist' |
569 | ((let* ((el (assq charset mm-charset-eval-alist)) | |
570 | (cs (car el)) | |
571 | (form (cdr el))) | |
572 | (and cs | |
573 | form | |
574 | (prog2 | |
575 | ;; Avoid errors... | |
576 | (condition-case nil (eval form) (error nil)) | |
577 | ;; (message "Failed to eval `%s'" form)) | |
578 | (mm-coding-system-p cs) | |
579 | (message "Added charset `%s' via `mm-charset-eval-alist'" cs)) | |
580 | cs))) | |
581 | ;; Translate invalid charsets. | |
582 | ((let ((cs (cdr (assq charset mm-charset-synonym-alist)))) | |
583 | (and cs | |
584 | (mm-coding-system-p cs) | |
585 | ;; (message | |
586 | ;; "Using synonym `%s' from `mm-charset-synonym-alist' for `%s'" | |
587 | ;; cs charset) | |
588 | cs))) | |
589 | ;; Last resort: search the coding system list for entries which | |
590 | ;; have the right mime-charset in case the canonical name isn't | |
591 | ;; defined (though it should be). | |
592 | ((let (cs) | |
593 | ;; mm-get-coding-system-list returns a list of cs without lbt. | |
594 | ;; Do we need -lbt? | |
595 | (dolist (c (mm-get-coding-system-list)) | |
596 | (if (and (null cs) | |
597 | (eq charset (or (coding-system-get c :mime-charset) | |
598 | (coding-system-get c 'mime-charset)))) | |
599 | (setq cs c))) | |
600 | (unless (or silent cs) | |
601 | ;; Warn the user about unknown charset: | |
602 | (if (fboundp 'gnus-message) | |
603 | (gnus-message 7 "Unknown charset: %s" charset) | |
604 | (message "Unknown charset: %s" charset))) | |
605 | cs)))) | |
606 | ||
607 | ;; Note: `mm-charset-to-coding-system' has to be defined before this. | |
bd29ba20 | 608 | (defcustom mm-charset-override-alist |
96a22201 KY |
609 | ;; Note: pairs that cannot be used in the Emacs version currently running |
610 | ;; will be removed. | |
611 | '((gb2312 . gbk) | |
612 | (iso-8859-1 . windows-1252) | |
01c52d31 MB |
613 | (iso-8859-8 . windows-1255) |
614 | (iso-8859-9 . windows-1254)) | |
bd29ba20 RS |
615 | "A mapping from undesired charset names to their replacement. |
616 | ||
617 | You may add pairs like (iso-8859-1 . windows-1252) here, | |
618 | i.e. treat iso-8859-1 as windows-1252. windows-1252 is a | |
619 | superset of iso-8859-1." | |
b6b8f5fd KY |
620 | :type |
621 | '(list | |
622 | :convert-widget | |
623 | (lambda (widget) | |
624 | (let ((defaults | |
625 | (delq nil | |
626 | (mapcar (lambda (pair) | |
96a22201 KY |
627 | (if (mm-charset-to-coding-system (cdr pair) |
628 | nil nil t) | |
b6b8f5fd KY |
629 | pair)) |
630 | '((gb2312 . gbk) | |
631 | (iso-8859-1 . windows-1252) | |
632 | (iso-8859-8 . windows-1255) | |
633 | (iso-8859-9 . windows-1254) | |
634 | (undecided . windows-1252))))) | |
635 | (val (copy-sequence (default-value 'mm-charset-override-alist))) | |
636 | pair rest) | |
637 | (while val | |
638 | (push (if (and (prog1 | |
639 | (setq pair (assq (caar val) defaults)) | |
640 | (setq defaults (delq pair defaults))) | |
641 | (equal (car val) pair)) | |
642 | `(const ,pair) | |
643 | `(cons :format "%v" | |
644 | (const :format "(%v" ,(caar val)) | |
645 | (symbol :size 3 :format " . %v)\n" ,(cdar val)))) | |
646 | rest) | |
647 | (setq val (cdr val))) | |
648 | (while defaults | |
649 | (push `(const ,(pop defaults)) rest)) | |
650 | (widget-convert | |
651 | 'list | |
652 | `(set :inline t :format "%v" ,@(nreverse rest)) | |
653 | `(repeat :inline t :tag "Other options" | |
654 | (cons :format "%v" | |
655 | (symbol :size 3 :format "(%v") | |
656 | (symbol :size 3 :format " . %v)\n"))))))) | |
96a22201 KY |
657 | ;; Remove pairs that cannot be used in the Emacs version currently |
658 | ;; running. Note that this section will be evaluated when loading | |
659 | ;; mm-util.elc. | |
660 | :set (lambda (symbol value) | |
661 | (custom-set-default | |
662 | symbol (delq nil | |
663 | (mapcar (lambda (pair) | |
664 | (if (mm-charset-to-coding-system (cdr pair) | |
665 | nil nil t) | |
666 | pair)) | |
667 | value)))) | |
67099291 | 668 | :version "22.1" ;; Gnus 5.10.9 |
bd29ba20 RS |
669 | :group 'mime) |
670 | ||
c113de23 | 671 | (defvar mm-binary-coding-system |
95fa1ff7 | 672 | (cond |
c113de23 GM |
673 | ((mm-coding-system-p 'binary) 'binary) |
674 | ((mm-coding-system-p 'no-conversion) 'no-conversion) | |
675 | (t nil)) | |
676 | "100% binary coding system.") | |
677 | ||
678 | (defvar mm-text-coding-system | |
f5ec697d | 679 | (or (if (memq system-type '(windows-nt ms-dos)) |
c113de23 GM |
680 | (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos) |
681 | (and (mm-coding-system-p 'raw-text) 'raw-text)) | |
682 | mm-binary-coding-system) | |
683 | "Text-safe coding system (For removing ^M).") | |
684 | ||
685 | (defvar mm-text-coding-system-for-write nil | |
686 | "Text coding system for write.") | |
687 | ||
688 | (defvar mm-auto-save-coding-system | |
95fa1ff7 | 689 | (cond |
23f87bed | 690 | ((mm-coding-system-p 'utf-8-emacs) ; Mule 7 |
f5ec697d | 691 | (if (memq system-type '(windows-nt ms-dos)) |
56e09c09 DL |
692 | (if (mm-coding-system-p 'utf-8-emacs-dos) |
693 | 'utf-8-emacs-dos mm-binary-coding-system) | |
694 | 'utf-8-emacs)) | |
c113de23 | 695 | ((mm-coding-system-p 'emacs-mule) |
f5ec697d | 696 | (if (memq system-type '(windows-nt ms-dos)) |
95fa1ff7 | 697 | (if (mm-coding-system-p 'emacs-mule-dos) |
c113de23 GM |
698 | 'emacs-mule-dos mm-binary-coding-system) |
699 | 'emacs-mule)) | |
700 | ((mm-coding-system-p 'escape-quoted) 'escape-quoted) | |
701 | (t mm-binary-coding-system)) | |
702 | "Coding system of auto save file.") | |
703 | ||
95fa1ff7 | 704 | (defvar mm-universal-coding-system mm-auto-save-coding-system |
47b63dfa | 705 | "The universal coding system.") |
95fa1ff7 SZ |
706 | |
707 | ;; Fixme: some of the cars here aren't valid MIME charsets. That | |
708 | ;; should only matter with XEmacs, though. | |
709 | (defvar mm-mime-mule-charset-alist | |
710 | `((us-ascii ascii) | |
711 | (iso-8859-1 latin-iso8859-1) | |
712 | (iso-8859-2 latin-iso8859-2) | |
713 | (iso-8859-3 latin-iso8859-3) | |
714 | (iso-8859-4 latin-iso8859-4) | |
715 | (iso-8859-5 cyrillic-iso8859-5) | |
716 | ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters. | |
717 | ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default | |
718 | ;; charset is koi8-r, not iso-8859-5. | |
719 | (koi8-r cyrillic-iso8859-5 gnus-koi8-r) | |
720 | (iso-8859-6 arabic-iso8859-6) | |
721 | (iso-8859-7 greek-iso8859-7) | |
722 | (iso-8859-8 hebrew-iso8859-8) | |
723 | (iso-8859-9 latin-iso8859-9) | |
724 | (iso-8859-14 latin-iso8859-14) | |
725 | (iso-8859-15 latin-iso8859-15) | |
726 | (viscii vietnamese-viscii-lower) | |
727 | (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978) | |
728 | (euc-kr korean-ksc5601) | |
729 | (gb2312 chinese-gb2312) | |
13287a2a KH |
730 | (gbk chinese-gbk) |
731 | (gb18030 gb18030-2-byte | |
732 | gb18030-4-byte-bmp gb18030-4-byte-smp | |
733 | gb18030-4-byte-ext-1 gb18030-4-byte-ext-2) | |
95fa1ff7 SZ |
734 | (big5 chinese-big5-1 chinese-big5-2) |
735 | (tibetan tibetan) | |
736 | (thai-tis620 thai-tis620) | |
0683d241 | 737 | (windows-1251 cyrillic-iso8859-5) |
95fa1ff7 SZ |
738 | (iso-2022-7bit ethiopic arabic-1-column arabic-2-column) |
739 | (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7 | |
740 | latin-jisx0201 japanese-jisx0208-1978 | |
741 | chinese-gb2312 japanese-jisx0208 | |
0683d241 | 742 | korean-ksc5601 japanese-jisx0212) |
95fa1ff7 SZ |
743 | (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7 |
744 | latin-jisx0201 japanese-jisx0208-1978 | |
745 | chinese-gb2312 japanese-jisx0208 | |
746 | korean-ksc5601 japanese-jisx0212 | |
747 | chinese-cns11643-1 chinese-cns11643-2) | |
748 | (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2 | |
749 | cyrillic-iso8859-5 greek-iso8859-7 | |
750 | latin-jisx0201 japanese-jisx0208-1978 | |
751 | chinese-gb2312 japanese-jisx0208 | |
752 | korean-ksc5601 japanese-jisx0212 | |
753 | chinese-cns11643-1 chinese-cns11643-2 | |
754 | chinese-cns11643-3 chinese-cns11643-4 | |
755 | chinese-cns11643-5 chinese-cns11643-6 | |
756 | chinese-cns11643-7) | |
0683d241 MB |
757 | (iso-2022-jp-3 latin-jisx0201 japanese-jisx0208-1978 japanese-jisx0208 |
758 | japanese-jisx0213-1 japanese-jisx0213-2) | |
759 | (shift_jis latin-jisx0201 katakana-jisx0201 japanese-jisx0208) | |
26c9afc3 MB |
760 | ,(cond ((fboundp 'unicode-precedence-list) |
761 | (cons 'utf-8 (delq 'ascii (mapcar 'charset-name | |
762 | (unicode-precedence-list))))) | |
763 | ((or (not (fboundp 'charsetp)) ;; non-Mule case | |
764 | (charsetp 'unicode-a) | |
765 | (not (mm-coding-system-p 'mule-utf-8))) | |
766 | '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e)) | |
767 | (t ;; If we have utf-8 we're in Mule 5+. | |
768 | (append '(utf-8) | |
769 | (delete 'ascii | |
770 | (coding-system-get 'mule-utf-8 'safe-charsets)))))) | |
95fa1ff7 SZ |
771 | "Alist of MIME-charset/MULE-charsets.") |
772 | ||
0683d241 MB |
773 | (defun mm-enrich-utf-8-by-mule-ucs () |
774 | "Make the `utf-8' MIME charset usable by the Mule-UCS package. | |
775 | This function will run when the `un-define' module is loaded under | |
776 | XEmacs, and fill the `utf-8' entry in `mm-mime-mule-charset-alist' | |
777 | with Mule charsets. It is completely useless for Emacs." | |
0683d241 MB |
778 | (when (boundp 'unicode-basic-translation-charset-order-list) |
779 | (condition-case nil | |
780 | (let ((val (delq | |
781 | 'ascii | |
782 | (copy-sequence | |
783 | (symbol-value | |
784 | 'unicode-basic-translation-charset-order-list)))) | |
785 | (elem (assq 'utf-8 mm-mime-mule-charset-alist))) | |
786 | (if elem | |
787 | (setcdr elem val) | |
788 | (setq mm-mime-mule-charset-alist | |
789 | (nconc mm-mime-mule-charset-alist | |
790 | (list (cons 'utf-8 val)))))) | |
791 | (error)))) | |
792 | ||
793 | ;; Correct by construction, but should be unnecessary for Emacs: | |
794 | (if (featurep 'xemacs) | |
795 | (eval-after-load "un-define" '(mm-enrich-utf-8-by-mule-ucs)) | |
796 | (when (and (fboundp 'coding-system-list) | |
797 | (fboundp 'sort-coding-systems)) | |
798 | (let ((css (sort-coding-systems (coding-system-list 'base-only))) | |
799 | cs mime mule alist) | |
800 | (while css | |
801 | (setq cs (pop css) | |
5432dcf9 | 802 | mime (or (coding-system-get cs :mime-charset); Emacs 23 (unicode) |
0683d241 MB |
803 | (coding-system-get cs 'mime-charset))) |
804 | (when (and mime | |
805 | (not (eq t (setq mule | |
806 | (coding-system-get cs 'safe-charsets)))) | |
807 | (not (assq mime alist))) | |
808 | (push (cons mime (delq 'ascii mule)) alist))) | |
809 | (setq mm-mime-mule-charset-alist (nreverse alist))))) | |
95fa1ff7 | 810 | |
f5490ddc MB |
811 | (defvar mm-hack-charsets '(iso-8859-15 iso-2022-jp-2) |
812 | "A list of special charsets. | |
813 | Valid elements include: | |
814 | `iso-8859-15' convert ISO-8859-1, -9 to ISO-8859-15 if ISO-8859-15 exists. | |
815 | `iso-2022-jp-2' convert ISO-2022-jp to ISO-2022-jp-2 if ISO-2022-jp-2 exists." | |
816 | ) | |
817 | ||
818 | (defvar mm-iso-8859-15-compatible | |
819 | '((iso-8859-1 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE") | |
820 | (iso-8859-9 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE\xD0\xDD\xDE\xF0\xFD\xFE")) | |
821 | "ISO-8859-15 exchangeable coding systems and inconvertible characters.") | |
822 | ||
823 | (defvar mm-iso-8859-x-to-15-table | |
824 | (and (fboundp 'coding-system-p) | |
825 | (mm-coding-system-p 'iso-8859-15) | |
826 | (mapcar | |
827 | (lambda (cs) | |
828 | (if (mm-coding-system-p (car cs)) | |
829 | (let ((c (string-to-char | |
830 | (decode-coding-string "\341" (car cs))))) | |
831 | (cons (char-charset c) | |
832 | (cons | |
833 | (- (string-to-char | |
834 | (decode-coding-string "\341" 'iso-8859-15)) c) | |
835 | (string-to-list (decode-coding-string (car (cdr cs)) | |
836 | (car cs)))))) | |
837 | '(gnus-charset 0))) | |
838 | mm-iso-8859-15-compatible)) | |
839 | "A table of the difference character between ISO-8859-X and ISO-8859-15.") | |
840 | ||
23f87bed | 841 | (defcustom mm-coding-system-priorities |
548f737d MB |
842 | (let ((lang (if (boundp 'current-language-environment) |
843 | (symbol-value 'current-language-environment)))) | |
844 | (cond (;; XEmacs without Mule but with `file-coding'. | |
845 | (not lang) nil) | |
846 | ;; In XEmacs 21.5 it may be the one like "Japanese (UTF-8)". | |
847 | ((string-match "\\`Japanese" lang) | |
848 | ;; Japanese users prefer iso-2022-jp to euc-japan or | |
849 | ;; shift_jis, however iso-8859-1 should be used when | |
850 | ;; there are only ASCII text and Latin-1 characters. | |
851 | '(iso-8859-1 iso-2022-jp iso-2022-jp-2 shift_jis utf-8)))) | |
23f87bed MB |
852 | "Preferred coding systems for encoding outgoing messages. |
853 | ||
854 | More than one suitable coding system may be found for some text. | |
855 | By default, the coding system with the highest priority is used | |
856 | to encode outgoing messages (see `sort-coding-systems'). If this | |
857 | variable is set, it overrides the default priority." | |
a08b59c9 | 858 | :version "21.2" |
23f87bed MB |
859 | :type '(repeat (symbol :tag "Coding system")) |
860 | :group 'mime) | |
861 | ||
862 | ;; ?? | |
1f7d2e14 SZ |
863 | (defvar mm-use-find-coding-systems-region |
864 | (fboundp 'find-coding-systems-region) | |
23f87bed MB |
865 | "Use `find-coding-systems-region' to find proper coding systems. |
866 | ||
867 | Setting it to nil is useful on Emacsen supporting Unicode if sending | |
868 | mail with multiple parts is preferred to sending a Unicode one.") | |
1f7d2e14 | 869 | |
c912b478 KY |
870 | (defvar mm-extra-numeric-entities |
871 | (mapcar | |
872 | (lambda (item) | |
873 | (cons (car item) (mm-ucs-to-char (cdr item)))) | |
874 | '((#x80 . #x20AC) (#x82 . #x201A) (#x83 . #x0192) (#x84 . #x201E) | |
875 | (#x85 . #x2026) (#x86 . #x2020) (#x87 . #x2021) (#x88 . #x02C6) | |
876 | (#x89 . #x2030) (#x8A . #x0160) (#x8B . #x2039) (#x8C . #x0152) | |
877 | (#x8E . #x017D) (#x91 . #x2018) (#x92 . #x2019) (#x93 . #x201C) | |
878 | (#x94 . #x201D) (#x95 . #x2022) (#x96 . #x2013) (#x97 . #x2014) | |
879 | (#x98 . #x02DC) (#x99 . #x2122) (#x9A . #x0161) (#x9B . #x203A) | |
880 | (#x9C . #x0153) (#x9E . #x017E) (#x9F . #x0178))) | |
881 | "*Alist of extra numeric entities and characters other than ISO 10646. | |
882 | This table is used for decoding extra numeric entities to characters, | |
883 | like \"€\" to the euro sign, mainly in html messages.") | |
884 | ||
c113de23 GM |
885 | ;;; Internal variables: |
886 | ||
887 | ;;; Functions: | |
888 | ||
889 | (defun mm-mule-charset-to-mime-charset (charset) | |
1c57d870 | 890 | "Return the MIME charset corresponding to the given Mule CHARSET." |
23f87bed MB |
891 | (if (and (fboundp 'find-coding-systems-for-charsets) |
892 | (fboundp 'sort-coding-systems)) | |
0683d241 MB |
893 | (let ((css (sort (sort-coding-systems |
894 | (find-coding-systems-for-charsets (list charset))) | |
895 | 'mm-sort-coding-systems-predicate)) | |
896 | cs mime) | |
897 | (while (and (not mime) | |
898 | css) | |
899 | (when (setq cs (pop css)) | |
900 | (setq mime (or (coding-system-get cs :mime-charset) | |
901 | (coding-system-get cs 'mime-charset))))) | |
95fa1ff7 | 902 | mime) |
0683d241 MB |
903 | (let ((alist (mapcar (lambda (cs) |
904 | (assq cs mm-mime-mule-charset-alist)) | |
905 | (sort (mapcar 'car mm-mime-mule-charset-alist) | |
906 | 'mm-sort-coding-systems-predicate))) | |
95fa1ff7 SZ |
907 | out) |
908 | (while alist | |
909 | (when (memq charset (cdar alist)) | |
910 | (setq out (caar alist) | |
911 | alist nil)) | |
912 | (pop alist)) | |
913 | out))) | |
c113de23 | 914 | |
95fa1ff7 | 915 | (eval-and-compile |
765d4319 KY |
916 | (if (featurep 'xemacs) |
917 | (defalias 'mm-enable-multibyte 'ignore) | |
918 | (defun mm-enable-multibyte () | |
919 | "Set the multibyte flag of the current buffer. | |
1c57d870 DL |
920 | Only do this if the default value of `enable-multibyte-characters' is |
921 | non-nil. This is a no-op in XEmacs." | |
144b7b5c | 922 | (set-buffer-multibyte 'to))) |
c113de23 | 923 | |
765d4319 KY |
924 | (if (featurep 'xemacs) |
925 | (defalias 'mm-disable-multibyte 'ignore) | |
926 | (defun mm-disable-multibyte () | |
927 | "Unset the multibyte flag of in the current buffer. | |
1c57d870 | 928 | This is a no-op in XEmacs." |
765d4319 | 929 | (set-buffer-multibyte nil)))) |
052802c1 | 930 | |
c113de23 GM |
931 | (defun mm-preferred-coding-system (charset) |
932 | ;; A typo in some Emacs versions. | |
47b63dfa SZ |
933 | (or (get-charset-property charset 'preferred-coding-system) |
934 | (get-charset-property charset 'prefered-coding-system))) | |
c113de23 | 935 | |
23f87bed MB |
936 | ;; Mule charsets shouldn't be used. |
937 | (defsubst mm-guess-charset () | |
938 | "Guess Mule charset from the language environment." | |
939 | (or | |
940 | mail-parse-mule-charset ;; cached mule-charset | |
941 | (progn | |
942 | (setq mail-parse-mule-charset | |
943 | (and (boundp 'current-language-environment) | |
944 | (car (last | |
945 | (assq 'charset | |
946 | (assoc current-language-environment | |
947 | language-info-alist)))))) | |
948 | (if (or (not mail-parse-mule-charset) | |
949 | (eq mail-parse-mule-charset 'ascii)) | |
950 | (setq mail-parse-mule-charset | |
951 | (or (car (last (assq mail-parse-charset | |
952 | mm-mime-mule-charset-alist))) | |
953 | ;; default | |
954 | 'latin-iso8859-1))) | |
955 | mail-parse-mule-charset))) | |
956 | ||
c113de23 GM |
957 | (defun mm-charset-after (&optional pos) |
958 | "Return charset of a character in current buffer at position POS. | |
cd1181db | 959 | If POS is nil, it defaults to the current point. |
c113de23 GM |
960 | If POS is out of range, the value is nil. |
961 | If the charset is `composition', return the actual one." | |
052802c1 DL |
962 | (let ((char (char-after pos)) charset) |
963 | (if (< (mm-char-int char) 128) | |
964 | (setq charset 'ascii) | |
965 | ;; charset-after is fake in some Emacsen. | |
966 | (setq charset (and (fboundp 'char-charset) (char-charset char))) | |
56e09c09 | 967 | (if (eq charset 'composition) ; Mule 4 |
052802c1 DL |
968 | (let ((p (or pos (point)))) |
969 | (cadr (find-charset-region p (1+ p)))) | |
970 | (if (and charset (not (memq charset '(ascii eight-bit-control | |
971 | eight-bit-graphic)))) | |
972 | charset | |
23f87bed | 973 | (mm-guess-charset)))))) |
c113de23 GM |
974 | |
975 | (defun mm-mime-charset (charset) | |
1c57d870 | 976 | "Return the MIME charset corresponding to the given Mule CHARSET." |
95fa1ff7 SZ |
977 | (if (eq charset 'unknown) |
978 | (error "The message contains non-printable characters, please use attachment")) | |
052802c1 | 979 | (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property)) |
c113de23 GM |
980 | (or |
981 | (and (mm-preferred-coding-system charset) | |
56e09c09 DL |
982 | (or (coding-system-get |
983 | (mm-preferred-coding-system charset) :mime-charset) | |
984 | (coding-system-get | |
985 | (mm-preferred-coding-system charset) 'mime-charset))) | |
c113de23 GM |
986 | (and (eq charset 'ascii) |
987 | 'us-ascii) | |
95fa1ff7 | 988 | (mm-preferred-coding-system charset) |
c113de23 GM |
989 | (mm-mule-charset-to-mime-charset charset)) |
990 | ;; This is for XEmacs. | |
991 | (mm-mule-charset-to-mime-charset charset))) | |
992 | ||
ed797193 | 993 | ;; `delete-dups' is not available in XEmacs 21.4. |
8753ddee MB |
994 | (if (fboundp 'delete-dups) |
995 | (defalias 'mm-delete-duplicates 'delete-dups) | |
996 | (defun mm-delete-duplicates (list) | |
997 | "Destructively remove `equal' duplicates from LIST. | |
998 | Store the result in LIST and return it. LIST must be a proper list. | |
999 | Of several `equal' occurrences of an element in LIST, the first | |
1000 | one is kept. | |
1001 | ||
1002 | This is a compatibility function for Emacsen without `delete-dups'." | |
1003 | ;; Code from `subr.el' in Emacs 22: | |
1004 | (let ((tail list)) | |
1005 | (while tail | |
1006 | (setcdr tail (delete (car tail) (cdr tail))) | |
1007 | (setq tail (cdr tail)))) | |
1008 | list)) | |
c113de23 | 1009 | |
23f87bed MB |
1010 | ;; Fixme: This is used in places when it should be testing the |
1011 | ;; default multibyteness. See mm-default-multibyte-p. | |
1012 | (eval-and-compile | |
052802c1 DL |
1013 | (if (and (not (featurep 'xemacs)) |
1014 | (boundp 'enable-multibyte-characters)) | |
23f87bed MB |
1015 | (defun mm-multibyte-p () |
1016 | "Non-nil if multibyte is enabled in the current buffer." | |
1017 | enable-multibyte-characters) | |
1018 | (defun mm-multibyte-p () (featurep 'mule)))) | |
1019 | ||
1020 | (defun mm-default-multibyte-p () | |
1021 | "Return non-nil if the session is multibyte. | |
1022 | This affects whether coding conversion should be attempted generally." | |
1023 | (if (featurep 'mule) | |
2e62b574 GM |
1024 | (if (boundp 'enable-multibyte-characters) |
1025 | (default-value 'enable-multibyte-characters) | |
23f87bed | 1026 | t))) |
c113de23 | 1027 | |
f5490ddc MB |
1028 | (defun mm-iso-8859-x-to-15-region (&optional b e) |
1029 | (if (fboundp 'char-charset) | |
1030 | (let (charset item c inconvertible) | |
1031 | (save-restriction | |
1032 | (if e (narrow-to-region b e)) | |
1033 | (goto-char (point-min)) | |
1034 | (skip-chars-forward "\0-\177") | |
1035 | (while (not (eobp)) | |
1036 | (cond | |
1037 | ((not (setq item (assq (char-charset (setq c (char-after))) | |
1038 | mm-iso-8859-x-to-15-table))) | |
1039 | (forward-char)) | |
1040 | ((memq c (cdr (cdr item))) | |
1041 | (setq inconvertible t) | |
1042 | (forward-char)) | |
1043 | (t | |
1044 | (insert-before-markers (prog1 (+ c (car (cdr item))) | |
1045 | (delete-char 1))))) | |
1046 | (skip-chars-forward "\0-\177"))) | |
1047 | (not inconvertible)))) | |
1048 | ||
47b63dfa | 1049 | (defun mm-sort-coding-systems-predicate (a b) |
23f87bed MB |
1050 | (let ((priorities |
1051 | (mapcar (lambda (cs) | |
1052 | ;; Note: invalid entries are dropped silently | |
0683d241 | 1053 | (and (setq cs (mm-coding-system-p cs)) |
23f87bed MB |
1054 | (coding-system-base cs))) |
1055 | mm-coding-system-priorities))) | |
0683d241 MB |
1056 | (and (setq a (mm-coding-system-p a)) |
1057 | (if (setq b (mm-coding-system-p b)) | |
1058 | (> (length (memq (coding-system-base a) priorities)) | |
1059 | (length (memq (coding-system-base b) priorities))) | |
1060 | t)))) | |
47b63dfa | 1061 | |
aa0a8561 MB |
1062 | (eval-when-compile |
1063 | (autoload 'latin-unity-massage-name "latin-unity") | |
1064 | (autoload 'latin-unity-maybe-remap "latin-unity") | |
1065 | (autoload 'latin-unity-representations-feasible-region "latin-unity") | |
9efa445f DN |
1066 | (autoload 'latin-unity-representations-present-region "latin-unity")) |
1067 | ||
1068 | (defvar latin-unity-coding-systems) | |
1069 | (defvar latin-unity-ucs-list) | |
aa0a8561 MB |
1070 | |
1071 | (defun mm-xemacs-find-mime-charset-1 (begin end) | |
1072 | "Determine which MIME charset to use to send region as message. | |
1073 | This uses the XEmacs-specific latin-unity package to better handle the | |
1074 | case where identical characters from diverse ISO-8859-? character sets | |
1075 | can be encoded using a single one of the corresponding coding systems. | |
1076 | ||
1077 | It treats `mm-coding-system-priorities' as the list of preferred | |
1078 | coding systems; a useful example setting for this list in Western | |
1079 | Europe would be '(iso-8859-1 iso-8859-15 utf-8), which would default | |
1080 | to the very standard Latin 1 coding system, and only move to coding | |
1081 | systems that are less supported as is necessary to encode the | |
1082 | characters that exist in the buffer. | |
1083 | ||
1084 | Latin Unity doesn't know about those non-ASCII Roman characters that | |
1085 | are available in various East Asian character sets. As such, its | |
1086 | behavior if you have a JIS 0212 LATIN SMALL LETTER A WITH ACUTE in a | |
1087 | buffer and it can otherwise be encoded as Latin 1, won't be ideal. | |
1088 | But this is very much a corner case, so don't worry about it." | |
1089 | (let ((systems mm-coding-system-priorities) csets psets curset) | |
1090 | ||
1091 | ;; Load the Latin Unity library, if available. | |
1092 | (when (and (not (featurep 'latin-unity)) (locate-library "latin-unity")) | |
01c52d31 | 1093 | (require 'latin-unity)) |
aa0a8561 MB |
1094 | |
1095 | ;; Now, can we use it? | |
1096 | (if (featurep 'latin-unity) | |
1097 | (progn | |
1098 | (setq csets (latin-unity-representations-feasible-region begin end) | |
1099 | psets (latin-unity-representations-present-region begin end)) | |
1100 | ||
1101 | (catch 'done | |
1102 | ||
1103 | ;; Pass back the first coding system in the preferred list | |
1104 | ;; that can encode the whole region. | |
1105 | (dolist (curset systems) | |
1106 | (setq curset (latin-unity-massage-name 'buffer-default curset)) | |
1107 | ||
1108 | ;; If the coding system is a universal coding system, then | |
1109 | ;; it can certainly encode all the characters in the region. | |
1110 | (if (memq curset latin-unity-ucs-list) | |
1111 | (throw 'done (list curset))) | |
1112 | ||
1113 | ;; If a coding system isn't universal, and isn't in | |
1114 | ;; the list that latin unity knows about, we can't | |
1115 | ;; decide whether to use it here. Leave that until later | |
1116 | ;; in `mm-find-mime-charset-region' function, whence we | |
1117 | ;; have been called. | |
1118 | (unless (memq curset latin-unity-coding-systems) | |
1119 | (throw 'done nil)) | |
1120 | ||
1121 | ;; Right, we know about this coding system, and it may | |
1122 | ;; conceivably be able to encode all the characters in | |
1123 | ;; the region. | |
1124 | (if (latin-unity-maybe-remap begin end curset csets psets t) | |
1125 | (throw 'done (list curset)))) | |
1126 | ||
1127 | ;; Can't encode using anything from the | |
1128 | ;; `mm-coding-system-priorities' list. | |
1129 | ;; Leave `mm-find-mime-charset' to do most of the work. | |
1130 | nil)) | |
1131 | ||
1132 | ;; Right, latin unity isn't available; let `mm-find-charset-region' | |
1133 | ;; take its default action, which equally applies to GNU Emacs. | |
1134 | nil))) | |
1135 | ||
1136 | (defmacro mm-xemacs-find-mime-charset (begin end) | |
1137 | (when (featurep 'xemacs) | |
10ace8ea | 1138 | `(and (featurep 'mule) (mm-xemacs-find-mime-charset-1 ,begin ,end)))) |
aa0a8561 | 1139 | |
b5000590 GM |
1140 | (declare-function mm-delete-duplicates "mm-util" (list)) |
1141 | ||
47b63dfa | 1142 | (defun mm-find-mime-charset-region (b e &optional hack-charsets) |
95fa1ff7 | 1143 | "Return the MIME charsets needed to encode the region between B and E. |
f0529b5b | 1144 | nil means ASCII, a single-element list represents an appropriate MIME |
95fa1ff7 | 1145 | charset, and a longer list means no appropriate charset." |
47b63dfa SZ |
1146 | (let (charsets) |
1147 | ;; The return possibilities of this function are a mess... | |
1148 | (or (and (mm-multibyte-p) | |
1f7d2e14 | 1149 | mm-use-find-coding-systems-region |
47b63dfa SZ |
1150 | ;; Find the mime-charset of the most preferred coding |
1151 | ;; system that has one. | |
1152 | (let ((systems (find-coding-systems-region b e))) | |
1153 | (when mm-coding-system-priorities | |
a1506d29 | 1154 | (setq systems |
47b63dfa | 1155 | (sort systems 'mm-sort-coding-systems-predicate))) |
47b63dfa SZ |
1156 | (setq systems (delq 'compound-text systems)) |
1157 | (unless (equal systems '(undecided)) | |
1158 | (while systems | |
56e09c09 DL |
1159 | (let* ((head (pop systems)) |
1160 | (cs (or (coding-system-get head :mime-charset) | |
1161 | (coding-system-get head 'mime-charset)))) | |
23f87bed MB |
1162 | ;; The mime-charset (`x-ctext') of |
1163 | ;; `compound-text' is not in the IANA list. We | |
1164 | ;; shouldn't normally use anything here with a | |
1165 | ;; mime-charset having an `x-' prefix. | |
1166 | ;; Fixme: Allow this to be overridden, since | |
1167 | ;; there is existing use of x-ctext. | |
1168 | ;; Also people apparently need the coding system | |
1169 | ;; `iso-2022-jp-3' (which Mule-UCS defines with | |
1170 | ;; mime-charset, though it's not valid). | |
1171 | (if (and cs | |
1172 | (not (string-match "^[Xx]-" (symbol-name cs))) | |
1173 | ;; UTF-16 of any variety is invalid for | |
1174 | ;; text parts and, unfortunately, has | |
1175 | ;; mime-charset defined both in Mule-UCS | |
1176 | ;; and versions of Emacs. (The name | |
1177 | ;; might be `mule-utf-16...' or | |
1178 | ;; `utf-16...'.) | |
1179 | (not (string-match "utf-16" (symbol-name cs)))) | |
47b63dfa SZ |
1180 | (setq systems nil |
1181 | charsets (list cs)))))) | |
1182 | charsets)) | |
aa0a8561 MB |
1183 | ;; If we're XEmacs, and some coding system is appropriate, |
1184 | ;; mm-xemacs-find-mime-charset will return an appropriate list. | |
1185 | ;; Otherwise, we'll get nil, and the next setq will get invoked. | |
1186 | (setq charsets (mm-xemacs-find-mime-charset b e)) | |
1187 | ||
8589dc17 | 1188 | ;; Fixme: won't work for unibyte Emacs 23: |
0c129bca | 1189 | |
aa0a8561 | 1190 | ;; We're not multibyte, or a single coding system won't cover it. |
a1506d29 | 1191 | (setq charsets |
47b63dfa SZ |
1192 | (mm-delete-duplicates |
1193 | (mapcar 'mm-mime-charset | |
1194 | (delq 'ascii | |
1195 | (mm-find-charset-region b e)))))) | |
f5490ddc MB |
1196 | (if (and (> (length charsets) 1) |
1197 | (memq 'iso-8859-15 charsets) | |
1198 | (memq 'iso-8859-15 hack-charsets) | |
1199 | (save-excursion (mm-iso-8859-x-to-15-region b e))) | |
1200 | (dolist (x mm-iso-8859-15-compatible) | |
1201 | (setq charsets (delq (car x) charsets)))) | |
1202 | (if (and (memq 'iso-2022-jp-2 charsets) | |
1203 | (memq 'iso-2022-jp-2 hack-charsets)) | |
1204 | (setq charsets (delq 'iso-2022-jp charsets))) | |
1205 | ;; Attempt to reduce the number of charsets if utf-8 is available. | |
1206 | (if (and (featurep 'xemacs) | |
1207 | (> (length charsets) 1) | |
1208 | (mm-coding-system-p 'utf-8)) | |
1209 | (let ((mm-coding-system-priorities | |
1210 | (cons 'utf-8 mm-coding-system-priorities))) | |
1211 | (setq charsets | |
1212 | (mm-delete-duplicates | |
1213 | (mapcar 'mm-mime-charset | |
1214 | (delq 'ascii | |
1215 | (mm-find-charset-region b e))))))) | |
47b63dfa | 1216 | charsets)) |
95fa1ff7 | 1217 | |
c113de23 GM |
1218 | (defmacro mm-with-unibyte-buffer (&rest forms) |
1219 | "Create a temporary buffer, and evaluate FORMS there like `progn'. | |
1c57d870 | 1220 | Use unibyte mode for this." |
d37ded9e SM |
1221 | `(with-temp-buffer |
1222 | (mm-disable-multibyte) | |
1223 | ,@forms)) | |
c113de23 GM |
1224 | (put 'mm-with-unibyte-buffer 'lisp-indent-function 0) |
1225 | (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body)) | |
1226 | ||
23f87bed MB |
1227 | (defmacro mm-with-multibyte-buffer (&rest forms) |
1228 | "Create a temporary buffer, and evaluate FORMS there like `progn'. | |
1229 | Use multibyte mode for this." | |
d37ded9e SM |
1230 | `(with-temp-buffer |
1231 | (mm-enable-multibyte) | |
1232 | ,@forms)) | |
23f87bed MB |
1233 | (put 'mm-with-multibyte-buffer 'lisp-indent-function 0) |
1234 | (put 'mm-with-multibyte-buffer 'edebug-form-spec '(body)) | |
1235 | ||
c113de23 | 1236 | (defmacro mm-with-unibyte-current-buffer (&rest forms) |
56e09c09 | 1237 | "Evaluate FORMS with current buffer temporarily made unibyte. |
72e841ce KY |
1238 | Equivalent to `progn' in XEmacs. |
1239 | ||
1240 | Note: We recommend not using this macro any more; there should be | |
1241 | better ways to do a similar thing. The previous version of this macro | |
1242 | bound the default value of `enable-multibyte-characters' to nil while | |
1243 | evaluating FORMS but it is no longer done. So, some programs assuming | |
1244 | it if any may malfunction." | |
765d4319 KY |
1245 | (if (featurep 'xemacs) |
1246 | `(progn ,@forms) | |
72e841ce KY |
1247 | (let ((multibyte (make-symbol "multibyte"))) |
1248 | `(let ((,multibyte enable-multibyte-characters)) | |
1249 | (when ,multibyte | |
1250 | (set-buffer-multibyte nil)) | |
1251 | (prog1 | |
1252 | (progn ,@forms) | |
1253 | (when ,multibyte | |
1254 | (set-buffer-multibyte t))))))) | |
c113de23 GM |
1255 | (put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0) |
1256 | (put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body)) | |
1257 | ||
c113de23 | 1258 | (defun mm-find-charset-region (b e) |
1c57d870 | 1259 | "Return a list of Emacs charsets in the region B to E." |
c113de23 GM |
1260 | (cond |
1261 | ((and (mm-multibyte-p) | |
95fa1ff7 | 1262 | (fboundp 'find-charset-region)) |
c113de23 | 1263 | ;; Remove composition since the base charsets have been included. |
95fa1ff7 SZ |
1264 | ;; Remove eight-bit-*, treat them as ascii. |
1265 | (let ((css (find-charset-region b e))) | |
01c52d31 MB |
1266 | (dolist (cs |
1267 | '(composition eight-bit-control eight-bit-graphic control-1) | |
1268 | css) | |
1269 | (setq css (delq cs css))))) | |
052802c1 DL |
1270 | (t |
1271 | ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit. | |
c113de23 GM |
1272 | (save-excursion |
1273 | (save-restriction | |
1274 | (narrow-to-region b e) | |
1275 | (goto-char (point-min)) | |
1276 | (skip-chars-forward "\0-\177") | |
1277 | (if (eobp) | |
1278 | '(ascii) | |
052802c1 DL |
1279 | (let (charset) |
1280 | (setq charset | |
1281 | (and (boundp 'current-language-environment) | |
95fa1ff7 SZ |
1282 | (car (last (assq 'charset |
1283 | (assoc current-language-environment | |
052802c1 DL |
1284 | language-info-alist)))))) |
1285 | (if (eq charset 'ascii) (setq charset nil)) | |
1286 | (or charset | |
1287 | (setq charset | |
1288 | (car (last (assq mail-parse-charset | |
1289 | mm-mime-mule-charset-alist))))) | |
1290 | (list 'ascii (or charset 'latin-iso8859-1))))))))) | |
c113de23 | 1291 | |
c113de23 GM |
1292 | (defun mm-auto-mode-alist () |
1293 | "Return an `auto-mode-alist' with only the .gz (etc) thingies." | |
1294 | (let ((alist auto-mode-alist) | |
1295 | out) | |
1296 | (while alist | |
1297 | (when (listp (cdar alist)) | |
1298 | (push (car alist) out)) | |
1299 | (pop alist)) | |
1300 | (nreverse out))) | |
1301 | ||
1302 | (defvar mm-inhibit-file-name-handlers | |
01c52d31 | 1303 | '(jka-compr-handler image-file-handler epa-file-handler) |
c113de23 GM |
1304 | "A list of handlers doing (un)compression (etc) thingies.") |
1305 | ||
1306 | (defun mm-insert-file-contents (filename &optional visit beg end replace | |
1307 | inhibit) | |
23f87bed | 1308 | "Like `insert-file-contents', but only reads in the file. |
c113de23 GM |
1309 | A buffer may be modified in several ways after reading into the buffer due |
1310 | to advanced Emacs features, such as file-name-handlers, format decoding, | |
23f87bed | 1311 | `find-file-hooks', etc. |
56e09c09 | 1312 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'. |
c113de23 | 1313 | This function ensures that none of these modifications will take place." |
14acf2f5 SM |
1314 | (letf* ((format-alist nil) |
1315 | (auto-mode-alist (if inhibit nil (mm-auto-mode-alist))) | |
1316 | ((default-value 'major-mode) 'fundamental-mode) | |
1317 | (enable-local-variables nil) | |
1318 | (after-insert-file-functions nil) | |
1319 | (enable-local-eval nil) | |
1320 | (inhibit-file-name-operation (if inhibit | |
1321 | 'insert-file-contents | |
1322 | inhibit-file-name-operation)) | |
1323 | (inhibit-file-name-handlers | |
1324 | (if inhibit | |
1325 | (append mm-inhibit-file-name-handlers | |
1326 | inhibit-file-name-handlers) | |
1327 | inhibit-file-name-handlers)) | |
1328 | (ffh (if (boundp 'find-file-hook) | |
1329 | 'find-file-hook | |
1330 | 'find-file-hooks)) | |
1331 | (val (symbol-value ffh))) | |
4a43ee9b MB |
1332 | (set ffh nil) |
1333 | (unwind-protect | |
1334 | (insert-file-contents filename visit beg end replace) | |
1335 | (set ffh val)))) | |
c113de23 GM |
1336 | |
1337 | (defun mm-append-to-file (start end filename &optional codesys inhibit) | |
1338 | "Append the contents of the region to the end of file FILENAME. | |
1339 | When called from a function, expects three arguments, | |
1340 | START, END and FILENAME. START and END are buffer positions | |
1341 | saying what text to write. | |
1342 | Optional fourth argument specifies the coding system to use when | |
1343 | encoding the file. | |
23f87bed | 1344 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." |
95fa1ff7 SZ |
1345 | (let ((coding-system-for-write |
1346 | (or codesys mm-text-coding-system-for-write | |
c113de23 | 1347 | mm-text-coding-system)) |
95fa1ff7 | 1348 | (inhibit-file-name-operation (if inhibit |
c113de23 GM |
1349 | 'append-to-file |
1350 | inhibit-file-name-operation)) | |
1351 | (inhibit-file-name-handlers | |
1352 | (if inhibit | |
95fa1ff7 | 1353 | (append mm-inhibit-file-name-handlers |
c113de23 GM |
1354 | inhibit-file-name-handlers) |
1355 | inhibit-file-name-handlers))) | |
23f87bed MB |
1356 | (write-region start end filename t 'no-message) |
1357 | (message "Appended to %s" filename))) | |
c113de23 | 1358 | |
95fa1ff7 | 1359 | (defun mm-write-region (start end filename &optional append visit lockname |
c113de23 GM |
1360 | coding-system inhibit) |
1361 | ||
1362 | "Like `write-region'. | |
23f87bed | 1363 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." |
95fa1ff7 SZ |
1364 | (let ((coding-system-for-write |
1365 | (or coding-system mm-text-coding-system-for-write | |
c113de23 | 1366 | mm-text-coding-system)) |
95fa1ff7 | 1367 | (inhibit-file-name-operation (if inhibit |
c113de23 GM |
1368 | 'write-region |
1369 | inhibit-file-name-operation)) | |
1370 | (inhibit-file-name-handlers | |
1371 | (if inhibit | |
95fa1ff7 | 1372 | (append mm-inhibit-file-name-handlers |
c113de23 GM |
1373 | inhibit-file-name-handlers) |
1374 | inhibit-file-name-handlers))) | |
1375 | (write-region start end filename append visit lockname))) | |
1376 | ||
b5000590 GM |
1377 | (autoload 'gmm-write-region "gmm-utils") |
1378 | ||
cf5a5c38 MB |
1379 | ;; It is not a MIME function, but some MIME functions use it. |
1380 | (if (and (fboundp 'make-temp-file) | |
1381 | (ignore-errors | |
1382 | (let ((def (symbol-function 'make-temp-file))) | |
1383 | (and (byte-code-function-p def) | |
1384 | (setq def (if (fboundp 'compiled-function-arglist) | |
1385 | ;; XEmacs | |
1386 | (eval (list 'compiled-function-arglist def)) | |
1387 | (aref def 0))) | |
1388 | (>= (length def) 4) | |
1389 | (eq (nth 3 def) 'suffix))))) | |
1390 | (defalias 'mm-make-temp-file 'make-temp-file) | |
01c52d31 | 1391 | ;; Stolen (and modified for XEmacs) from Emacs 22. |
cf5a5c38 MB |
1392 | (defun mm-make-temp-file (prefix &optional dir-flag suffix) |
1393 | "Create a temporary file. | |
1394 | The returned file name (created by appending some random characters at the end | |
1395 | of PREFIX, and expanding against `temporary-file-directory' if necessary), | |
1396 | is guaranteed to point to a newly created empty file. | |
1397 | You can then use `write-region' to write new data into the file. | |
1398 | ||
1399 | If DIR-FLAG is non-nil, create a new empty directory instead of a file. | |
1400 | ||
1401 | If SUFFIX is non-nil, add that at the end of the file name." | |
1402 | (let ((umask (default-file-modes)) | |
1403 | file) | |
1404 | (unwind-protect | |
1405 | (progn | |
1406 | ;; Create temp files with strict access rights. It's easy to | |
1407 | ;; loosen them later, whereas it's impossible to close the | |
1408 | ;; time-window of loose permissions otherwise. | |
1409 | (set-default-file-modes 448) | |
1410 | (while (condition-case err | |
1411 | (progn | |
1412 | (setq file | |
1413 | (make-temp-name | |
1414 | (expand-file-name | |
1415 | prefix | |
1416 | (if (fboundp 'temp-directory) | |
1417 | ;; XEmacs | |
1418 | (temp-directory) | |
1419 | temporary-file-directory)))) | |
1420 | (if suffix | |
1421 | (setq file (concat file suffix))) | |
1422 | (if dir-flag | |
1423 | (make-directory file) | |
92edaeed MB |
1424 | ;; NOTE: This is unsafe if Emacs 20 |
1425 | ;; users and XEmacs users don't use | |
1426 | ;; a secure temp directory. | |
1427 | (gmm-write-region "" nil file nil 'silent | |
1428 | nil 'excl)) | |
cf5a5c38 MB |
1429 | nil) |
1430 | (file-already-exists t) | |
01c52d31 MB |
1431 | ;; The XEmacs version of `make-directory' issues |
1432 | ;; `file-error'. | |
1433 | (file-error (or (and (featurep 'xemacs) | |
cf5a5c38 MB |
1434 | (file-exists-p file)) |
1435 | (signal (car err) (cdr err))))) | |
1436 | ;; the file was somehow created by someone else between | |
1437 | ;; `make-temp-name' and `write-region', let's try again. | |
1438 | nil) | |
1439 | file) | |
1440 | ;; Reset the umask. | |
1441 | (set-default-file-modes umask))))) | |
1442 | ||
eecdcaf5 LMI |
1443 | (defvar mm-image-load-path-cache nil) |
1444 | ||
95fa1ff7 | 1445 | (defun mm-image-load-path (&optional package) |
eecdcaf5 LMI |
1446 | (if (and mm-image-load-path-cache |
1447 | (equal load-path (car mm-image-load-path-cache))) | |
1448 | (cdr mm-image-load-path-cache) | |
1449 | (let (dir result) | |
1450 | (dolist (path load-path) | |
1451 | (when (and path | |
1452 | (file-directory-p | |
1453 | (setq dir (concat (file-name-directory | |
1454 | (directory-file-name path)) | |
1455 | "etc/images/" (or package "gnus/"))))) | |
1456 | (push dir result))) | |
1457 | (setq result (nreverse result) | |
1458 | mm-image-load-path-cache (cons load-path result)) | |
1459 | result))) | |
95fa1ff7 | 1460 | |
23f87bed MB |
1461 | ;; Fixme: This doesn't look useful where it's used. |
1462 | (if (fboundp 'detect-coding-region) | |
1463 | (defun mm-detect-coding-region (start end) | |
1464 | "Like `detect-coding-region' except returning the best one." | |
1465 | (let ((coding-systems | |
9d9b0de9 | 1466 | (detect-coding-region start end))) |
23f87bed MB |
1467 | (or (car-safe coding-systems) |
1468 | coding-systems))) | |
1469 | (defun mm-detect-coding-region (start end) | |
1470 | (let ((point (point))) | |
1471 | (goto-char start) | |
1472 | (skip-chars-forward "\0-\177" end) | |
1473 | (prog1 | |
1474 | (if (eq (point) end) 'ascii (mm-guess-charset)) | |
1475 | (goto-char point))))) | |
1476 | ||
b5000590 GM |
1477 | (declare-function mm-detect-coding-region "mm-util" (start end)) |
1478 | ||
23f87bed MB |
1479 | (if (fboundp 'coding-system-get) |
1480 | (defun mm-detect-mime-charset-region (start end) | |
1481 | "Detect MIME charset of the text in the region between START and END." | |
1482 | (let ((cs (mm-detect-coding-region start end))) | |
bd29ba20 RS |
1483 | (or (coding-system-get cs :mime-charset) |
1484 | (coding-system-get cs 'mime-charset)))) | |
23f87bed MB |
1485 | (defun mm-detect-mime-charset-region (start end) |
1486 | "Detect MIME charset of the text in the region between START and END." | |
1487 | (let ((cs (mm-detect-coding-region start end))) | |
1488 | cs))) | |
1489 | ||
01c52d31 MB |
1490 | (eval-when-compile |
1491 | (unless (fboundp 'coding-system-to-mime-charset) | |
1492 | (defalias 'coding-system-to-mime-charset 'ignore))) | |
1493 | ||
1494 | (defun mm-coding-system-to-mime-charset (coding-system) | |
1495 | "Return the MIME charset corresponding to CODING-SYSTEM. | |
1496 | To make this function work with XEmacs, the APEL package is required." | |
1497 | (when coding-system | |
1498 | (or (and (fboundp 'coding-system-get) | |
1499 | (or (coding-system-get coding-system :mime-charset) | |
1500 | (coding-system-get coding-system 'mime-charset))) | |
1501 | (and (featurep 'xemacs) | |
1502 | (or (and (fboundp 'coding-system-to-mime-charset) | |
1503 | (not (eq (symbol-function 'coding-system-to-mime-charset) | |
1504 | 'ignore))) | |
1505 | (and (condition-case nil | |
1506 | (require 'mcharset) | |
1507 | (error nil)) | |
1508 | (fboundp 'coding-system-to-mime-charset))) | |
1509 | (coding-system-to-mime-charset coding-system))))) | |
1510 | ||
1511 | (eval-when-compile | |
1512 | (require 'jka-compr)) | |
1513 | ||
1514 | (defun mm-decompress-buffer (filename &optional inplace force) | |
1515 | "Decompress buffer's contents, depending on jka-compr. | |
1516 | Only when FORCE is t or `auto-compression-mode' is enabled and FILENAME | |
1517 | agrees with `jka-compr-compression-info-list', decompression is done. | |
1518 | Signal an error if FORCE is neither nil nor t and compressed data are | |
1519 | not decompressed because `auto-compression-mode' is disabled. | |
1520 | If INPLACE is nil, return decompressed data or nil without modifying | |
1521 | the buffer. Otherwise, replace the buffer's contents with the | |
1522 | decompressed data. The buffer's multibyteness must be turned off." | |
1523 | (when (and filename | |
1524 | (if force | |
1525 | (prog1 t (require 'jka-compr)) | |
1526 | (and (fboundp 'jka-compr-installed-p) | |
1527 | (jka-compr-installed-p)))) | |
1528 | (let ((info (jka-compr-get-compression-info filename))) | |
1529 | (when info | |
1530 | (unless (or (memq force (list nil t)) | |
1531 | (jka-compr-installed-p)) | |
1532 | (error "")) | |
1533 | (let ((prog (jka-compr-info-uncompress-program info)) | |
1534 | (args (jka-compr-info-uncompress-args info)) | |
1535 | (msg (format "%s %s..." | |
1536 | (jka-compr-info-uncompress-message info) | |
1537 | filename)) | |
1538 | (err-file (jka-compr-make-temp-name)) | |
1539 | (cur (current-buffer)) | |
1540 | (coding-system-for-read mm-binary-coding-system) | |
1541 | (coding-system-for-write mm-binary-coding-system) | |
1542 | retval err-msg) | |
1543 | (message "%s" msg) | |
1544 | (mm-with-unibyte-buffer | |
1545 | (insert-buffer-substring cur) | |
1546 | (condition-case err | |
1547 | (progn | |
1548 | (unless (memq (apply 'call-process-region | |
1549 | (point-min) (point-max) | |
1550 | prog t (list t err-file) nil args) | |
1551 | jka-compr-acceptable-retval-list) | |
1552 | (erase-buffer) | |
4def29e7 KY |
1553 | (insert (mapconcat 'identity |
1554 | (split-string | |
1555 | (prog2 | |
1556 | (insert-file-contents err-file) | |
1557 | (buffer-string) | |
1558 | (erase-buffer)) t) | |
1559 | " ") | |
01c52d31 MB |
1560 | "\n") |
1561 | (setq err-msg | |
1562 | (format "Error while executing \"%s %s < %s\"" | |
1563 | prog (mapconcat 'identity args " ") | |
1564 | filename))) | |
1565 | (setq retval (buffer-string))) | |
1566 | (error | |
1567 | (setq err-msg (error-message-string err))))) | |
1568 | (when (file-exists-p err-file) | |
61a9da25 | 1569 | (ignore-errors (delete-file err-file))) |
01c52d31 MB |
1570 | (when inplace |
1571 | (unless err-msg | |
1572 | (delete-region (point-min) (point-max)) | |
1573 | (insert retval)) | |
1574 | (setq retval nil)) | |
1575 | (message "%s" (or err-msg (concat msg "done"))) | |
1576 | retval))))) | |
1577 | ||
1578 | (eval-when-compile | |
1579 | (unless (fboundp 'coding-system-name) | |
1580 | (defalias 'coding-system-name 'ignore)) | |
1581 | (unless (fboundp 'find-file-coding-system-for-read-from-filename) | |
1582 | (defalias 'find-file-coding-system-for-read-from-filename 'ignore)) | |
1583 | (unless (fboundp 'find-operation-coding-system) | |
1584 | (defalias 'find-operation-coding-system 'ignore))) | |
1585 | ||
1586 | (defun mm-find-buffer-file-coding-system (&optional filename) | |
1587 | "Find coding system used to decode the contents of the current buffer. | |
1588 | This function looks for the coding system magic cookie or examines the | |
1589 | coding system specified by `file-coding-system-alist' being associated | |
1590 | with FILENAME which defaults to `buffer-file-name'. Data compressed by | |
1591 | gzip, bzip2, etc. are allowed." | |
1592 | (unless filename | |
1593 | (setq filename buffer-file-name)) | |
1594 | (save-excursion | |
1595 | (let ((decomp (unless ;; No worth to examine charset of tar files. | |
1596 | (and filename | |
1597 | (string-match | |
1598 | "\\.\\(?:tar\\.[^.]+\\|tbz\\|tgz\\)\\'" | |
1599 | filename)) | |
1600 | (mm-decompress-buffer filename nil t)))) | |
1601 | (when decomp | |
bd486b03 SM |
1602 | (set-buffer (generate-new-buffer " *temp*")) |
1603 | (mm-disable-multibyte) | |
01c52d31 MB |
1604 | (insert decomp) |
1605 | (setq filename (file-name-sans-extension filename))) | |
1606 | (goto-char (point-min)) | |
8dabbfd6 | 1607 | (unwind-protect |
01c52d31 MB |
1608 | (cond |
1609 | ((boundp 'set-auto-coding-function) ;; Emacs | |
1610 | (if filename | |
1611 | (or (funcall (symbol-value 'set-auto-coding-function) | |
1612 | filename (- (point-max) (point-min))) | |
1613 | (car (find-operation-coding-system 'insert-file-contents | |
1614 | filename))) | |
1615 | (let (auto-coding-alist) | |
1616 | (condition-case nil | |
1617 | (funcall (symbol-value 'set-auto-coding-function) | |
1618 | nil (- (point-max) (point-min))) | |
1619 | (error nil))))) | |
9efa445f | 1620 | ((and (featurep 'xemacs) (featurep 'file-coding)) ;; XEmacs |
01c52d31 MB |
1621 | (let ((case-fold-search t) |
1622 | (end (point-at-eol)) | |
1623 | codesys start) | |
1624 | (or | |
1625 | (and (re-search-forward "-\\*-+[\t ]*" end t) | |
1626 | (progn | |
1627 | (setq start (match-end 0)) | |
1628 | (re-search-forward "[\t ]*-+\\*-" end t)) | |
1629 | (progn | |
1630 | (setq end (match-beginning 0)) | |
1631 | (goto-char start) | |
1632 | (or (looking-at "coding:[\t ]*\\([^\t ;]+\\)") | |
1633 | (re-search-forward | |
1634 | "[\t ;]+coding:[\t ]*\\([^\t ;]+\\)" | |
1635 | end t))) | |
1636 | (find-coding-system (setq codesys | |
1637 | (intern (match-string 1)))) | |
1638 | codesys) | |
1639 | (and (re-search-forward "^[\t ]*;+[\t ]*Local[\t ]+Variables:" | |
1640 | nil t) | |
1641 | (progn | |
1642 | (setq start (match-end 0)) | |
1643 | (re-search-forward "^[\t ]*;+[\t ]*End:" nil t)) | |
1644 | (progn | |
1645 | (setq end (match-beginning 0)) | |
1646 | (goto-char start) | |
1647 | (re-search-forward | |
1648 | "^[\t ]*;+[\t ]*coding:[\t ]*\\([^\t\n\r ]+\\)" | |
1649 | end t)) | |
1650 | (find-coding-system (setq codesys | |
1651 | (intern (match-string 1)))) | |
1652 | codesys) | |
1653 | (and (progn | |
1654 | (goto-char (point-min)) | |
1655 | (setq case-fold-search nil) | |
1656 | (re-search-forward "^;;;coding system: " | |
1657 | ;;(+ (point-min) 3000) t)) | |
1658 | nil t)) | |
1659 | (looking-at "[^\t\n\r ]+") | |
1660 | (find-coding-system | |
1661 | (setq codesys (intern (match-string 0)))) | |
1662 | codesys) | |
1663 | (and filename | |
1664 | (setq codesys | |
1665 | (find-file-coding-system-for-read-from-filename | |
1666 | filename)) | |
1667 | (coding-system-name (coding-system-base codesys))))))) | |
1668 | (when decomp | |
1669 | (kill-buffer (current-buffer))))))) | |
3efe5554 | 1670 | |
c113de23 GM |
1671 | (provide 'mm-util) |
1672 | ||
1673 | ;;; mm-util.el ends here |