Commit | Line | Data |
---|---|---|
95fa1ff7 | 1 | ;;; mm-util.el --- Utility functions for Mule and low level things |
e84b4b86 | 2 | |
ba318903 | 3 | ;; Copyright (C) 1998-2014 Free Software Foundation, Inc. |
c113de23 GM |
4 | |
5 | ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
6 | ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
7 | ;; This file is part of GNU Emacs. | |
8 | ||
5e809f55 | 9 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
c113de23 | 10 | ;; it under the terms of the GNU General Public License as published by |
5e809f55 GM |
11 | ;; the Free Software Foundation, either version 3 of the License, or |
12 | ;; (at your option) any later version. | |
c113de23 GM |
13 | |
14 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
15 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
5e809f55 | 16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
c113de23 GM |
17 | ;; GNU General Public License for more details. |
18 | ||
19 | ;; You should have received a copy of the GNU General Public License | |
5e809f55 | 20 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
c113de23 GM |
21 | |
22 | ;;; Commentary: | |
23 | ||
24 | ;;; Code: | |
25 | ||
f0b7f5a8 | 26 | ;; For Emacs <22.2 and XEmacs. |
b5000590 GM |
27 | (eval-and-compile |
28 | (unless (fboundp 'declare-function) (defmacro declare-function (&rest r)))) | |
29 | ||
23f87bed | 30 | (eval-when-compile (require 'cl)) |
c113de23 GM |
31 | (require 'mail-prsvr) |
32 | ||
f53b2875 | 33 | (eval-and-compile |
01c52d31 MB |
34 | (if (featurep 'xemacs) |
35 | (unless (ignore-errors | |
36 | (require 'timer-funcs)) | |
37 | (require 'timer)) | |
38 | (require 'timer))) | |
39 | ||
9efa445f | 40 | (defvar mm-mime-mule-charset-alist ) |
4c188c5a GM |
41 | ;; Note this is not presently used on Emacs >= 23, which is good, |
42 | ;; since it means standalone message-mode (which requires mml and | |
43 | ;; hence mml-util) does not load gnus-util. | |
44 | (autoload 'gnus-completing-read "gnus-util") | |
9efa445f | 45 | |
e3e955fe MB |
46 | ;; Emulate functions that are not available in every (X)Emacs version. |
47 | ;; The name of a function is prefixed with mm-, like `mm-char-int' for | |
48 | ;; `char-int' that is a native XEmacs function, not available in Emacs. | |
49 | ;; Gnus programs all should use mm- functions, not the original ones. | |
01c52d31 MB |
50 | (eval-and-compile |
51 | (mapc | |
f53b2875 DL |
52 | (lambda (elem) |
53 | (let ((nfunc (intern (format "mm-%s" (car elem))))) | |
54 | (if (fboundp (car elem)) | |
55 | (defalias nfunc (car elem)) | |
56 | (defalias nfunc (cdr elem))))) | |
e3e955fe MB |
57 | `(;; `coding-system-list' is not available in XEmacs 21.4 built |
58 | ;; without the `file-coding' feature. | |
59 | (coding-system-list . ignore) | |
60 | ;; `char-int' is an XEmacs function, not available in Emacs. | |
f53b2875 | 61 | (char-int . identity) |
e3e955fe | 62 | ;; `coding-system-equal' is an Emacs function, not available in XEmacs. |
f53b2875 | 63 | (coding-system-equal . equal) |
e3e955fe | 64 | ;; `annotationp' is an XEmacs function, not available in Emacs. |
f53b2875 | 65 | (annotationp . ignore) |
e3e955fe MB |
66 | ;; `set-buffer-file-coding-system' is not available in XEmacs 21.4 |
67 | ;; built without the `file-coding' feature. | |
f53b2875 | 68 | (set-buffer-file-coding-system . ignore) |
e3e955fe | 69 | ;; `read-charset' is an Emacs function, not available in XEmacs. |
f53b2875 | 70 | (read-charset |
c7948b5f MB |
71 | . ,(lambda (prompt) |
72 | "Return a charset." | |
73 | (intern | |
229b59da | 74 | (gnus-completing-read |
c7948b5f | 75 | prompt |
229b59da | 76 | (mapcar (lambda (e) (symbol-name (car e))) |
c7948b5f | 77 | mm-mime-mule-charset-alist) |
229b59da | 78 | t)))) |
e3e955fe | 79 | ;; `subst-char-in-string' is not available in XEmacs 21.4. |
95fa1ff7 | 80 | (subst-char-in-string |
c7948b5f MB |
81 | . ,(lambda (from to string &optional inplace) |
82 | ;; stolen (and renamed) from nnheader.el | |
83 | "Replace characters in STRING from FROM to TO. | |
91472578 | 84 | Unless optional argument INPLACE is non-nil, return a new string." |
c7948b5f MB |
85 | (let ((string (if inplace string (copy-sequence string))) |
86 | (len (length string)) | |
87 | (idx 0)) | |
88 | ;; Replace all occurrences of FROM with TO. | |
89 | (while (< idx len) | |
90 | (when (= (aref string idx) from) | |
91 | (aset string idx to)) | |
92 | (setq idx (1+ idx))) | |
93 | string))) | |
e3e955fe | 94 | ;; `replace-in-string' is an XEmacs function, not available in Emacs. |
01c52d31 | 95 | (replace-in-string |
c7948b5f MB |
96 | . ,(lambda (string regexp rep &optional literal) |
97 | "See `replace-regexp-in-string', only the order of args differs." | |
98 | (replace-regexp-in-string regexp rep string nil literal))) | |
e3e955fe | 99 | ;; `string-as-unibyte' is an Emacs function, not available in XEmacs. |
f53b2875 | 100 | (string-as-unibyte . identity) |
e3e955fe | 101 | ;; `string-make-unibyte' is an Emacs function, not available in XEmacs. |
23f87bed | 102 | (string-make-unibyte . identity) |
9d9b0de9 SM |
103 | ;; string-as-multibyte often doesn't really do what you think it does. |
104 | ;; Example: | |
105 | ;; (aref (string-as-multibyte "\201") 0) -> 129 (aka ?\201) | |
106 | ;; (aref (string-as-multibyte "\300") 0) -> 192 (aka ?\300) | |
107 | ;; (aref (string-as-multibyte "\300\201") 0) -> 192 (aka ?\300) | |
108 | ;; (aref (string-as-multibyte "\300\201") 1) -> 129 (aka ?\201) | |
109 | ;; but | |
110 | ;; (aref (string-as-multibyte "\201\300") 0) -> 2240 | |
111 | ;; (aref (string-as-multibyte "\201\300") 1) -> <error> | |
112 | ;; Better use string-to-multibyte or encode-coding-string. | |
113 | ;; If you really need string-as-multibyte somewhere it's usually | |
114 | ;; because you're using the internal emacs-mule representation (maybe | |
115 | ;; because you're using string-as-unibyte somewhere), which is | |
116 | ;; generally a problem in itself. | |
117 | ;; Here is an approximate equivalence table to help think about it: | |
118 | ;; (string-as-multibyte s) ~= (decode-coding-string s 'emacs-mule) | |
119 | ;; (string-to-multibyte s) ~= (decode-coding-string s 'binary) | |
120 | ;; (string-make-multibyte s) ~= (decode-coding-string s locale-coding-system) | |
e3e955fe | 121 | ;; `string-as-multibyte' is an Emacs function, not available in XEmacs. |
95fa1ff7 | 122 | (string-as-multibyte . identity) |
e3e955fe | 123 | ;; `multibyte-string-p' is an Emacs function, not available in XEmacs. |
56e09c09 | 124 | (multibyte-string-p . ignore) |
e3e955fe | 125 | ;; `insert-byte' is available only in Emacs 23.1 or greater. |
56e09c09 | 126 | (insert-byte . insert-char) |
e3e955fe MB |
127 | ;; `multibyte-char-to-unibyte' is an Emacs function, not available |
128 | ;; in XEmacs. | |
01c52d31 | 129 | (multibyte-char-to-unibyte . identity) |
e3e955fe | 130 | ;; `set-buffer-multibyte' is an Emacs function, not available in XEmacs. |
df06dd59 | 131 | (set-buffer-multibyte . ignore) |
e3e955fe | 132 | ;; `substring-no-properties' is available only in Emacs 22.1 or greater. |
c7948b5f MB |
133 | (substring-no-properties |
134 | . ,(lambda (string &optional from to) | |
135 | "Return a substring of STRING, without text properties. | |
136 | It starts at index FROM and ending before TO. | |
137 | TO may be nil or omitted; then the substring runs to the end of STRING. | |
138 | If FROM is nil or omitted, the substring starts at the beginning of STRING. | |
139 | If FROM or TO is negative, it counts from the end. | |
140 | ||
141 | With one argument, just copy STRING without its properties." | |
142 | (setq string (substring string (or from 0) to)) | |
143 | (set-text-properties 0 (length string) nil string) | |
e3e955fe MB |
144 | string)) |
145 | ;; `line-number-at-pos' is available only in Emacs 22.1 or greater | |
146 | ;; and XEmacs 21.5. | |
147 | (line-number-at-pos | |
148 | . ,(lambda (&optional pos) | |
149 | "Return (narrowed) buffer line number at position POS. | |
150 | If POS is nil, use current buffer location. | |
151 | Counting starts at (point-min), so the value refers | |
152 | to the contents of the accessible portion of the buffer." | |
153 | (let ((opoint (or pos (point))) start) | |
154 | (save-excursion | |
155 | (goto-char (point-min)) | |
156 | (setq start (point)) | |
157 | (goto-char opoint) | |
158 | (forward-line 0) | |
159 | (1+ (count-lines start (point)))))))))) | |
f53b2875 | 160 | |
9ab16aab KY |
161 | ;; `special-display-p' is an Emacs function, not available in XEmacs. |
162 | (defalias 'mm-special-display-p | |
163 | (if (featurep 'emacs) | |
164 | 'special-display-p | |
165 | (lambda (buffer-name) | |
166 | "Returns non-nil if a buffer named BUFFER-NAME gets a special frame." | |
167 | (and special-display-function | |
168 | (or (and (member buffer-name special-display-buffer-names) t) | |
169 | (cdr (assoc buffer-name special-display-buffer-names)) | |
170 | (catch 'return | |
171 | (dolist (elem special-display-regexps) | |
172 | (and (stringp elem) | |
173 | (string-match elem buffer-name) | |
174 | (throw 'return t)) | |
175 | (and (consp elem) | |
176 | (stringp (car elem)) | |
177 | (string-match (car elem) buffer-name) | |
178 | (throw 'return (cdr elem)))))))))) | |
179 | ||
e3e955fe MB |
180 | ;; `decode-coding-string', `encode-coding-string', `decode-coding-region' |
181 | ;; and `encode-coding-region' are available in Emacs and XEmacs built with | |
182 | ;; the `file-coding' feature, but the XEmacs versions treat nil, that is | |
183 | ;; given as the `coding-system' argument, as the `binary' coding system. | |
82fe1aed MB |
184 | (eval-and-compile |
185 | (if (featurep 'xemacs) | |
186 | (if (featurep 'file-coding) | |
82fe1aed MB |
187 | (progn |
188 | (defun mm-decode-coding-string (str coding-system) | |
189 | (if coding-system | |
190 | (decode-coding-string str coding-system) | |
191 | str)) | |
192 | (defun mm-encode-coding-string (str coding-system) | |
193 | (if coding-system | |
194 | (encode-coding-string str coding-system) | |
195 | str)) | |
196 | (defun mm-decode-coding-region (start end coding-system) | |
197 | (if coding-system | |
198 | (decode-coding-region start end coding-system))) | |
199 | (defun mm-encode-coding-region (start end coding-system) | |
200 | (if coding-system | |
201 | (encode-coding-region start end coding-system)))) | |
202 | (defun mm-decode-coding-string (str coding-system) str) | |
203 | (defun mm-encode-coding-string (str coding-system) str) | |
204 | (defalias 'mm-decode-coding-region 'ignore) | |
205 | (defalias 'mm-encode-coding-region 'ignore)) | |
206 | (defalias 'mm-decode-coding-string 'decode-coding-string) | |
207 | (defalias 'mm-encode-coding-string 'encode-coding-string) | |
208 | (defalias 'mm-decode-coding-region 'decode-coding-region) | |
209 | (defalias 'mm-encode-coding-region 'encode-coding-region))) | |
210 | ||
ed1d182d GM |
211 | ;; `string-to-multibyte' is available only in Emacs. |
212 | (defalias 'mm-string-to-multibyte (if (featurep 'xemacs) | |
213 | 'identity | |
214 | 'string-to-multibyte)) | |
e8f0f70d | 215 | |
e3e955fe | 216 | ;; `char-or-char-int-p' is an XEmacs function, not available in Emacs. |
c113de23 GM |
217 | (eval-and-compile |
218 | (defalias 'mm-char-or-char-int-p | |
95fa1ff7 | 219 | (cond |
c113de23 | 220 | ((fboundp 'char-or-char-int-p) 'char-or-char-int-p) |
95fa1ff7 | 221 | ((fboundp 'char-valid-p) 'char-valid-p) |
c113de23 GM |
222 | (t 'identity)))) |
223 | ||
e3e955fe | 224 | ;; `ucs-to-char' is a function that Mule-UCS provides. |
99139556 KY |
225 | (eval-and-compile |
226 | (if (featurep 'xemacs) | |
227 | (cond ((and (fboundp 'unicode-to-char) ;; XEmacs 21.5. | |
228 | (subrp (symbol-function 'unicode-to-char))) | |
229 | (if (featurep 'mule) | |
230 | (defalias 'mm-ucs-to-char 'unicode-to-char) | |
231 | (defun mm-ucs-to-char (codepoint) | |
232 | "Convert Unicode codepoint to character." | |
233 | (or (unicode-to-char codepoint) ?#)))) | |
234 | ((featurep 'mule) | |
235 | (defun mm-ucs-to-char (codepoint) | |
236 | "Convert Unicode codepoint to character." | |
237 | (if (fboundp 'ucs-to-char) ;; Mule-UCS is loaded. | |
238 | (progn | |
239 | (defalias 'mm-ucs-to-char | |
240 | (lambda (codepoint) | |
241 | "Convert Unicode codepoint to character." | |
242 | (condition-case nil | |
243 | (or (ucs-to-char codepoint) ?#) | |
244 | (error ?#)))) | |
245 | (mm-ucs-to-char codepoint)) | |
246 | (condition-case nil | |
247 | (or (int-to-char codepoint) ?#) | |
248 | (error ?#))))) | |
249 | (t | |
e3e955fe MB |
250 | (defun mm-ucs-to-char (codepoint) |
251 | "Convert Unicode codepoint to character." | |
e3e955fe MB |
252 | (condition-case nil |
253 | (or (int-to-char codepoint) ?#) | |
254 | (error ?#))))) | |
99139556 KY |
255 | (if (let ((char (make-char 'japanese-jisx0208 36 34))) |
256 | (eq char (decode-char 'ucs char))) | |
257 | ;; Emacs 23. | |
258 | (defalias 'mm-ucs-to-char 'identity) | |
259 | (defun mm-ucs-to-char (codepoint) | |
260 | "Convert Unicode codepoint to character." | |
261 | (or (decode-char 'ucs codepoint) ?#))))) | |
e3e955fe | 262 | |
23f87bed MB |
263 | ;; Fixme: This seems always to be used to read a MIME charset, so it |
264 | ;; should be re-named and fixed (in Emacs) to offer completion only on | |
265 | ;; proper charset names (base coding systems which have a | |
266 | ;; mime-charset defined). XEmacs doesn't believe in mime-charset; | |
267 | ;; test with | |
268 | ;; `(or (coding-system-get 'iso-8859-1 'mime-charset) | |
269 | ;; (coding-system-get 'iso-8859-1 :mime-charset))' | |
270 | ;; Actually, there should be an `mm-coding-system-mime-charset'. | |
95fa1ff7 SZ |
271 | (eval-and-compile |
272 | (defalias 'mm-read-coding-system | |
4c188c5a GM |
273 | (if (featurep 'emacs) 'read-coding-system |
274 | (cond | |
275 | ((fboundp 'read-coding-system) | |
276 | (if (and (featurep 'xemacs) | |
277 | (<= (string-to-number emacs-version) 21.1)) | |
278 | (lambda (prompt &optional default-coding-system) | |
279 | (read-coding-system prompt)) | |
280 | 'read-coding-system)) | |
281 | (t (lambda (prompt &optional default-coding-system) | |
282 | "Prompt the user for a coding system." | |
283 | (gnus-completing-read | |
284 | prompt (mapcar (lambda (s) (symbol-name (car s))) | |
285 | mm-mime-mule-charset-alist)))))))) | |
95fa1ff7 | 286 | |
c113de23 GM |
287 | (defvar mm-coding-system-list nil) |
288 | (defun mm-get-coding-system-list () | |
289 | "Get the coding system list." | |
290 | (or mm-coding-system-list | |
291 | (setq mm-coding-system-list (mm-coding-system-list)))) | |
292 | ||
23f87bed MB |
293 | (defun mm-coding-system-p (cs) |
294 | "Return non-nil if CS is a symbol naming a coding system. | |
0683d241 MB |
295 | In XEmacs, also return non-nil if CS is a coding system object. |
296 | If CS is available, return CS itself in Emacs, and return a coding | |
297 | system object in XEmacs." | |
23f87bed | 298 | (if (fboundp 'find-coding-system) |
91472578 | 299 | (and cs (find-coding-system cs)) |
23f87bed | 300 | (if (fboundp 'coding-system-p) |
0683d241 MB |
301 | (when (coding-system-p cs) |
302 | cs) | |
5f4264e5 | 303 | ;; no-MULE XEmacs: |
0683d241 | 304 | (car (memq cs (mm-get-coding-system-list)))))) |
95fa1ff7 | 305 | |
c113de23 | 306 | (defvar mm-charset-synonym-alist |
95fa1ff7 | 307 | `( |
95fa1ff7 | 308 | ;; Not in XEmacs, but it's not a proper MIME charset anyhow. |
72eb5fc7 | 309 | ,@(unless (mm-coding-system-p 'x-ctext) |
b44409c9 | 310 | '((x-ctext . ctext))) |
ab785936 MB |
311 | ;; ISO-8859-15 is very similar to ISO-8859-1. But it's _different_ in 8 |
312 | ;; positions! | |
23f87bed | 313 | ,@(unless (mm-coding-system-p 'iso-8859-15) |
b44409c9 | 314 | '((iso-8859-15 . iso-8859-1))) |
23f87bed MB |
315 | ;; BIG-5HKSCS is similar to, but different than, BIG-5. |
316 | ,@(unless (mm-coding-system-p 'big5-hkscs) | |
317 | '((big5-hkscs . big5))) | |
bd29ba20 | 318 | ;; A Microsoft misunderstanding. |
ab785936 MB |
319 | ,@(when (and (not (mm-coding-system-p 'unicode)) |
320 | (mm-coding-system-p 'utf-16-le)) | |
321 | '((unicode . utf-16-le))) | |
bd29ba20 RS |
322 | ;; A Microsoft misunderstanding. |
323 | ,@(unless (mm-coding-system-p 'ks_c_5601-1987) | |
324 | (if (mm-coding-system-p 'cp949) | |
325 | '((ks_c_5601-1987 . cp949)) | |
326 | '((ks_c_5601-1987 . euc-kr)))) | |
b44409c9 | 327 | ;; Windows-31J is Windows Codepage 932. |
ab785936 MB |
328 | ,@(when (and (not (mm-coding-system-p 'windows-31j)) |
329 | (mm-coding-system-p 'cp932)) | |
330 | '((windows-31j . cp932))) | |
4b70e299 MB |
331 | ;; Charset name: GBK, Charset aliases: CP936, MS936, windows-936 |
332 | ;; http://www.iana.org/assignments/charset-reg/GBK | |
333 | ;; Emacs 22.1 has cp936, but not gbk, so we alias it: | |
334 | ,@(when (and (not (mm-coding-system-p 'gbk)) | |
335 | (mm-coding-system-p 'cp936)) | |
336 | '((gbk . cp936))) | |
bf46b4d4 MB |
337 | ;; UTF8 is a bogus name for UTF-8 |
338 | ,@(when (and (not (mm-coding-system-p 'utf8)) | |
339 | (mm-coding-system-p 'utf-8)) | |
340 | '((utf8 . utf-8))) | |
01c52d31 MB |
341 | ;; ISO8859-1 is a bogus name for ISO-8859-1 |
342 | ,@(when (and (not (mm-coding-system-p 'iso8859-1)) | |
343 | (mm-coding-system-p 'iso-8859-1)) | |
344 | '((iso8859-1 . iso-8859-1))) | |
bf46b4d4 MB |
345 | ;; ISO_8859-1 is a bogus name for ISO-8859-1 |
346 | ,@(when (and (not (mm-coding-system-p 'iso_8859-1)) | |
347 | (mm-coding-system-p 'iso-8859-1)) | |
348 | '((iso_8859-1 . iso-8859-1))) | |
95fa1ff7 | 349 | ) |
ab785936 MB |
350 | "A mapping from unknown or invalid charset names to the real charset names. |
351 | ||
352 | See `mm-codepage-iso-8859-list' and `mm-codepage-ibm-list'.") | |
353 | ||
ddf6fd30 GM |
354 | (defun mm-codepage-setup (number &optional alias) |
355 | "Create a coding system cpNUMBER. | |
356 | The coding system is created using `codepage-setup'. If ALIAS is | |
357 | non-nil, an alias is created and added to | |
358 | `mm-charset-synonym-alist'. If ALIAS is a string, it's used as | |
359 | the alias. Else windows-NUMBER is used." | |
360 | (interactive | |
361 | (let ((completion-ignore-case t) | |
362 | (candidates (if (fboundp 'cp-supported-codepages) | |
363 | (cp-supported-codepages) | |
364 | ;; Removed in Emacs 23 (unicode), so signal an error: | |
365 | (error "`codepage-setup' not present in this Emacs version")))) | |
366 | (list (gnus-completing-read "Setup DOS Codepage" candidates | |
367 | t nil nil "437")))) | |
368 | (when alias | |
369 | (setq alias (if (stringp alias) | |
370 | (intern alias) | |
371 | (intern (format "windows-%s" number))))) | |
372 | (let* ((cp (intern (format "cp%s" number)))) | |
373 | (unless (mm-coding-system-p cp) | |
374 | (if (fboundp 'codepage-setup) ; silence compiler | |
375 | (codepage-setup number) | |
376 | (error "`codepage-setup' not present in this Emacs version"))) | |
377 | (when (and alias | |
378 | ;; Don't add alias if setup of cp failed. | |
379 | (mm-coding-system-p cp)) | |
380 | (add-to-list 'mm-charset-synonym-alist (cons alias cp))))) | |
381 | ||
ab785936 MB |
382 | (defcustom mm-codepage-iso-8859-list |
383 | (list 1250 ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft | |
384 | ;; Outlook users in Czech republic. Use this to allow reading of | |
0c43b6f8 | 385 | ;; their e-mails. |
ab785936 MB |
386 | '(1252 . 1) ;; Windows-1252 is a superset of iso-8859-1 (West |
387 | ;; Europe). See also `gnus-article-dumbquotes-map'. | |
388 | '(1254 . 9) ;; Windows-1254 is a superset of iso-8859-9 (Turkish). | |
389 | '(1255 . 8));; Windows-1255 is a superset of iso-8859-8 (Hebrew). | |
390 | "A list of Windows codepage numbers and iso-8859 charset numbers. | |
391 | ||
392 | If an element is a number corresponding to a supported windows | |
393 | codepage, appropriate entries to `mm-charset-synonym-alist' are | |
394 | added by `mm-setup-codepage-iso-8859'. An element may also be a | |
395 | cons cell where the car is a codepage number and the cdr is the | |
396 | corresponding number of an iso-8859 charset." | |
397 | :type '(list (set :inline t | |
398 | (const 1250 :tag "Central and East European") | |
399 | (const (1252 . 1) :tag "West European") | |
400 | (const (1254 . 9) :tag "Turkish") | |
401 | (const (1255 . 8) :tag "Hebrew")) | |
402 | (repeat :inline t | |
403 | :tag "Other options" | |
404 | (choice | |
405 | (integer :tag "Windows codepage number") | |
406 | (cons (integer :tag "Windows codepage number") | |
407 | (integer :tag "iso-8859 charset number"))))) | |
408 | :version "22.1" ;; Gnus 5.10.9 | |
409 | :group 'mime) | |
410 | ||
411 | (defcustom mm-codepage-ibm-list | |
412 | (list 437 ;; (US etc.) | |
413 | 860 ;; (Portugal) | |
414 | 861 ;; (Iceland) | |
415 | 862 ;; (Israel) | |
416 | 863 ;; (Canadian French) | |
417 | 865 ;; (Nordic) | |
418 | 852 ;; | |
419 | 850 ;; (Latin 1) | |
420 | 855 ;; (Cyrillic) | |
421 | 866 ;; (Cyrillic - Russian) | |
422 | 857 ;; (Turkish) | |
423 | 864 ;; (Arabic) | |
424 | 869 ;; (Greek) | |
425 | 874);; (Thai) | |
426 | ;; In Emacs 23 (unicode), cp... and ibm... are aliases. | |
427 | ;; Cf. http://thread.gmane.org/v9lkng5nwy.fsf@marauder.physik.uni-ulm.de | |
428 | "List of IBM codepage numbers. | |
429 | ||
9858f6c3 | 430 | The codepage mappings slightly differ between IBM and other vendors. |
ab785936 MB |
431 | See \"ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/IBM/README.TXT\". |
432 | ||
433 | If an element is a number corresponding to a supported windows | |
434 | codepage, appropriate entries to `mm-charset-synonym-alist' are | |
435 | added by `mm-setup-codepage-ibm'." | |
436 | :type '(list (set :inline t | |
437 | (const 437 :tag "US etc.") | |
438 | (const 860 :tag "Portugal") | |
439 | (const 861 :tag "Iceland") | |
440 | (const 862 :tag "Israel") | |
441 | (const 863 :tag "Canadian French") | |
442 | (const 865 :tag "Nordic") | |
443 | (const 852) | |
444 | (const 850 :tag "Latin 1") | |
445 | (const 855 :tag "Cyrillic") | |
446 | (const 866 :tag "Cyrillic - Russian") | |
447 | (const 857 :tag "Turkish") | |
448 | (const 864 :tag "Arabic") | |
449 | (const 869 :tag "Greek") | |
450 | (const 874 :tag "Thai")) | |
451 | (repeat :inline t | |
452 | :tag "Other options" | |
453 | (integer :tag "Codepage number"))) | |
454 | :version "22.1" ;; Gnus 5.10.9 | |
455 | :group 'mime) | |
456 | ||
457 | (defun mm-setup-codepage-iso-8859 (&optional list) | |
458 | "Add appropriate entries to `mm-charset-synonym-alist'. | |
459 | Unless LIST is given, `mm-codepage-iso-8859-list' is used." | |
460 | (unless list | |
461 | (setq list mm-codepage-iso-8859-list)) | |
462 | (dolist (i list) | |
463 | (let (cp windows iso) | |
464 | (if (consp i) | |
465 | (setq cp (intern (format "cp%d" (car i))) | |
466 | windows (intern (format "windows-%d" (car i))) | |
467 | iso (intern (format "iso-8859-%d" (cdr i)))) | |
468 | (setq cp (intern (format "cp%d" i)) | |
469 | windows (intern (format "windows-%d" i)))) | |
470 | (unless (mm-coding-system-p windows) | |
471 | (if (mm-coding-system-p cp) | |
472 | (add-to-list 'mm-charset-synonym-alist (cons windows cp)) | |
473 | (add-to-list 'mm-charset-synonym-alist (cons windows iso))))))) | |
474 | ||
475 | (defun mm-setup-codepage-ibm (&optional list) | |
476 | "Add appropriate entries to `mm-charset-synonym-alist'. | |
477 | Unless LIST is given, `mm-codepage-ibm-list' is used." | |
478 | (unless list | |
479 | (setq list mm-codepage-ibm-list)) | |
480 | (dolist (number list) | |
481 | (let ((ibm (intern (format "ibm%d" number))) | |
482 | (cp (intern (format "cp%d" number)))) | |
483 | (when (and (not (mm-coding-system-p ibm)) | |
484 | (mm-coding-system-p cp)) | |
485 | (add-to-list 'mm-charset-synonym-alist (cons ibm cp)))))) | |
486 | ||
487 | ;; Initialize: | |
488 | (mm-setup-codepage-iso-8859) | |
489 | (mm-setup-codepage-ibm) | |
bd29ba20 | 490 | |
96a22201 KY |
491 | ;; Note: this has to be defined before `mm-charset-to-coding-system'. |
492 | (defcustom mm-charset-eval-alist | |
493 | (if (featurep 'xemacs) | |
494 | nil ;; I don't know what would be useful for XEmacs. | |
0c43b6f8 KY |
495 | '(;; Emacs 22 provides autoloads for 1250-1258 |
496 | ;; (i.e. `mm-codepage-setup' does nothing). | |
96a22201 KY |
497 | (windows-1250 . (mm-codepage-setup 1250 t)) |
498 | (windows-1251 . (mm-codepage-setup 1251 t)) | |
499 | (windows-1253 . (mm-codepage-setup 1253 t)) | |
500 | (windows-1257 . (mm-codepage-setup 1257 t)))) | |
501 | "An alist of (CHARSET . FORM) pairs. | |
502 | If an article is encoded in an unknown CHARSET, FORM is | |
503 | evaluated. This allows to load additional libraries providing | |
504 | charsets on demand. If supported by your Emacs version, you | |
505 | could use `autoload-coding-system' here." | |
506 | :version "22.1" ;; Gnus 5.10.9 | |
507 | :type '(list (set :inline t | |
508 | (const (windows-1250 . (mm-codepage-setup 1250 t))) | |
509 | (const (windows-1251 . (mm-codepage-setup 1251 t))) | |
510 | (const (windows-1253 . (mm-codepage-setup 1253 t))) | |
511 | (const (windows-1257 . (mm-codepage-setup 1257 t))) | |
512 | (const (cp850 . (mm-codepage-setup 850 nil)))) | |
513 | (repeat :inline t | |
514 | :tag "Other options" | |
515 | (cons (symbol :tag "charset") | |
516 | (symbol :tag "form")))) | |
517 | :group 'mime) | |
518 | (put 'mm-charset-eval-alist 'risky-local-variable t) | |
519 | ||
58d8c5cd GM |
520 | (defvar mm-charset-override-alist) |
521 | ||
96a22201 KY |
522 | ;; Note: this function has to be defined before `mm-charset-override-alist' |
523 | ;; since it will use this function in order to determine its default value | |
524 | ;; when loading mm-util.elc. | |
525 | (defun mm-charset-to-coding-system (charset &optional lbt | |
526 | allow-override silent) | |
527 | "Return coding-system corresponding to CHARSET. | |
528 | CHARSET is a symbol naming a MIME charset. | |
529 | If optional argument LBT (`unix', `dos' or `mac') is specified, it is | |
530 | used as the line break code type of the coding system. | |
531 | ||
532 | If ALLOW-OVERRIDE is given, use `mm-charset-override-alist' to | |
533 | map undesired charset names to their replacement. This should | |
534 | only be used for decoding, not for encoding. | |
535 | ||
536 | A non-nil value of SILENT means don't issue a warning even if CHARSET | |
537 | is not available." | |
538 | ;; OVERRIDE is used (only) in `mm-decode-body' and `mm-decode-string'. | |
539 | (when (stringp charset) | |
540 | (setq charset (intern (downcase charset)))) | |
541 | (when lbt | |
542 | (setq charset (intern (format "%s-%s" charset lbt)))) | |
543 | (cond | |
544 | ((null charset) | |
545 | charset) | |
546 | ;; Running in a non-MULE environment. | |
547 | ((or (null (mm-get-coding-system-list)) | |
548 | (not (fboundp 'coding-system-get))) | |
549 | charset) | |
550 | ;; Check override list quite early. Should only used for decoding, not for | |
551 | ;; encoding! | |
552 | ((and allow-override | |
553 | (let ((cs (cdr (assq charset mm-charset-override-alist)))) | |
554 | (and cs (mm-coding-system-p cs) cs)))) | |
555 | ;; ascii | |
faf3b348 LMI |
556 | ((or (eq charset 'us-ascii) |
557 | (string-match "ansi.x3.4" (symbol-name charset))) | |
96a22201 KY |
558 | 'ascii) |
559 | ;; Check to see whether we can handle this charset. (This depends | |
560 | ;; on there being some coding system matching each `mime-charset' | |
561 | ;; property defined, as there should be.) | |
562 | ((and (mm-coding-system-p charset) | |
563 | ;;; Doing this would potentially weed out incorrect charsets. | |
564 | ;;; charset | |
565 | ;;; (eq charset (coding-system-get charset 'mime-charset)) | |
566 | ) | |
567 | charset) | |
ddf5d974 KY |
568 | ;; Use coding system Emacs knows. |
569 | ((and (fboundp 'coding-system-from-name) | |
570 | (coding-system-from-name charset))) | |
96a22201 KY |
571 | ;; Eval expressions from `mm-charset-eval-alist' |
572 | ((let* ((el (assq charset mm-charset-eval-alist)) | |
573 | (cs (car el)) | |
574 | (form (cdr el))) | |
575 | (and cs | |
576 | form | |
577 | (prog2 | |
578 | ;; Avoid errors... | |
579 | (condition-case nil (eval form) (error nil)) | |
580 | ;; (message "Failed to eval `%s'" form)) | |
581 | (mm-coding-system-p cs) | |
582 | (message "Added charset `%s' via `mm-charset-eval-alist'" cs)) | |
583 | cs))) | |
584 | ;; Translate invalid charsets. | |
585 | ((let ((cs (cdr (assq charset mm-charset-synonym-alist)))) | |
586 | (and cs | |
587 | (mm-coding-system-p cs) | |
588 | ;; (message | |
589 | ;; "Using synonym `%s' from `mm-charset-synonym-alist' for `%s'" | |
590 | ;; cs charset) | |
591 | cs))) | |
592 | ;; Last resort: search the coding system list for entries which | |
593 | ;; have the right mime-charset in case the canonical name isn't | |
594 | ;; defined (though it should be). | |
595 | ((let (cs) | |
596 | ;; mm-get-coding-system-list returns a list of cs without lbt. | |
597 | ;; Do we need -lbt? | |
598 | (dolist (c (mm-get-coding-system-list)) | |
599 | (if (and (null cs) | |
600 | (eq charset (or (coding-system-get c :mime-charset) | |
601 | (coding-system-get c 'mime-charset)))) | |
602 | (setq cs c))) | |
603 | (unless (or silent cs) | |
604 | ;; Warn the user about unknown charset: | |
605 | (if (fboundp 'gnus-message) | |
606 | (gnus-message 7 "Unknown charset: %s" charset) | |
607 | (message "Unknown charset: %s" charset))) | |
608 | cs)))) | |
609 | ||
610 | ;; Note: `mm-charset-to-coding-system' has to be defined before this. | |
bd29ba20 | 611 | (defcustom mm-charset-override-alist |
96a22201 KY |
612 | ;; Note: pairs that cannot be used in the Emacs version currently running |
613 | ;; will be removed. | |
614 | '((gb2312 . gbk) | |
615 | (iso-8859-1 . windows-1252) | |
01c52d31 MB |
616 | (iso-8859-8 . windows-1255) |
617 | (iso-8859-9 . windows-1254)) | |
bd29ba20 RS |
618 | "A mapping from undesired charset names to their replacement. |
619 | ||
620 | You may add pairs like (iso-8859-1 . windows-1252) here, | |
621 | i.e. treat iso-8859-1 as windows-1252. windows-1252 is a | |
622 | superset of iso-8859-1." | |
b6b8f5fd KY |
623 | :type |
624 | '(list | |
625 | :convert-widget | |
626 | (lambda (widget) | |
627 | (let ((defaults | |
628 | (delq nil | |
629 | (mapcar (lambda (pair) | |
96a22201 KY |
630 | (if (mm-charset-to-coding-system (cdr pair) |
631 | nil nil t) | |
b6b8f5fd KY |
632 | pair)) |
633 | '((gb2312 . gbk) | |
634 | (iso-8859-1 . windows-1252) | |
635 | (iso-8859-8 . windows-1255) | |
636 | (iso-8859-9 . windows-1254) | |
637 | (undecided . windows-1252))))) | |
638 | (val (copy-sequence (default-value 'mm-charset-override-alist))) | |
639 | pair rest) | |
640 | (while val | |
641 | (push (if (and (prog1 | |
642 | (setq pair (assq (caar val) defaults)) | |
643 | (setq defaults (delq pair defaults))) | |
644 | (equal (car val) pair)) | |
645 | `(const ,pair) | |
646 | `(cons :format "%v" | |
647 | (const :format "(%v" ,(caar val)) | |
648 | (symbol :size 3 :format " . %v)\n" ,(cdar val)))) | |
649 | rest) | |
650 | (setq val (cdr val))) | |
651 | (while defaults | |
652 | (push `(const ,(pop defaults)) rest)) | |
653 | (widget-convert | |
654 | 'list | |
655 | `(set :inline t :format "%v" ,@(nreverse rest)) | |
656 | `(repeat :inline t :tag "Other options" | |
657 | (cons :format "%v" | |
658 | (symbol :size 3 :format "(%v") | |
659 | (symbol :size 3 :format " . %v)\n"))))))) | |
96a22201 KY |
660 | ;; Remove pairs that cannot be used in the Emacs version currently |
661 | ;; running. Note that this section will be evaluated when loading | |
662 | ;; mm-util.elc. | |
663 | :set (lambda (symbol value) | |
664 | (custom-set-default | |
665 | symbol (delq nil | |
666 | (mapcar (lambda (pair) | |
667 | (if (mm-charset-to-coding-system (cdr pair) | |
668 | nil nil t) | |
669 | pair)) | |
670 | value)))) | |
67099291 | 671 | :version "22.1" ;; Gnus 5.10.9 |
bd29ba20 RS |
672 | :group 'mime) |
673 | ||
c113de23 | 674 | (defvar mm-binary-coding-system |
95fa1ff7 | 675 | (cond |
c113de23 GM |
676 | ((mm-coding-system-p 'binary) 'binary) |
677 | ((mm-coding-system-p 'no-conversion) 'no-conversion) | |
678 | (t nil)) | |
679 | "100% binary coding system.") | |
680 | ||
681 | (defvar mm-text-coding-system | |
f5ec697d | 682 | (or (if (memq system-type '(windows-nt ms-dos)) |
c113de23 GM |
683 | (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos) |
684 | (and (mm-coding-system-p 'raw-text) 'raw-text)) | |
685 | mm-binary-coding-system) | |
686 | "Text-safe coding system (For removing ^M).") | |
687 | ||
688 | (defvar mm-text-coding-system-for-write nil | |
689 | "Text coding system for write.") | |
690 | ||
691 | (defvar mm-auto-save-coding-system | |
95fa1ff7 | 692 | (cond |
23f87bed | 693 | ((mm-coding-system-p 'utf-8-emacs) ; Mule 7 |
f5ec697d | 694 | (if (memq system-type '(windows-nt ms-dos)) |
56e09c09 DL |
695 | (if (mm-coding-system-p 'utf-8-emacs-dos) |
696 | 'utf-8-emacs-dos mm-binary-coding-system) | |
697 | 'utf-8-emacs)) | |
c113de23 | 698 | ((mm-coding-system-p 'emacs-mule) |
f5ec697d | 699 | (if (memq system-type '(windows-nt ms-dos)) |
95fa1ff7 | 700 | (if (mm-coding-system-p 'emacs-mule-dos) |
c113de23 GM |
701 | 'emacs-mule-dos mm-binary-coding-system) |
702 | 'emacs-mule)) | |
703 | ((mm-coding-system-p 'escape-quoted) 'escape-quoted) | |
704 | (t mm-binary-coding-system)) | |
705 | "Coding system of auto save file.") | |
706 | ||
95fa1ff7 | 707 | (defvar mm-universal-coding-system mm-auto-save-coding-system |
47b63dfa | 708 | "The universal coding system.") |
95fa1ff7 SZ |
709 | |
710 | ;; Fixme: some of the cars here aren't valid MIME charsets. That | |
711 | ;; should only matter with XEmacs, though. | |
712 | (defvar mm-mime-mule-charset-alist | |
713 | `((us-ascii ascii) | |
714 | (iso-8859-1 latin-iso8859-1) | |
715 | (iso-8859-2 latin-iso8859-2) | |
716 | (iso-8859-3 latin-iso8859-3) | |
717 | (iso-8859-4 latin-iso8859-4) | |
718 | (iso-8859-5 cyrillic-iso8859-5) | |
719 | ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters. | |
720 | ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default | |
721 | ;; charset is koi8-r, not iso-8859-5. | |
722 | (koi8-r cyrillic-iso8859-5 gnus-koi8-r) | |
723 | (iso-8859-6 arabic-iso8859-6) | |
724 | (iso-8859-7 greek-iso8859-7) | |
725 | (iso-8859-8 hebrew-iso8859-8) | |
726 | (iso-8859-9 latin-iso8859-9) | |
727 | (iso-8859-14 latin-iso8859-14) | |
728 | (iso-8859-15 latin-iso8859-15) | |
729 | (viscii vietnamese-viscii-lower) | |
730 | (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978) | |
731 | (euc-kr korean-ksc5601) | |
732 | (gb2312 chinese-gb2312) | |
13287a2a KH |
733 | (gbk chinese-gbk) |
734 | (gb18030 gb18030-2-byte | |
735 | gb18030-4-byte-bmp gb18030-4-byte-smp | |
736 | gb18030-4-byte-ext-1 gb18030-4-byte-ext-2) | |
95fa1ff7 SZ |
737 | (big5 chinese-big5-1 chinese-big5-2) |
738 | (tibetan tibetan) | |
739 | (thai-tis620 thai-tis620) | |
0683d241 | 740 | (windows-1251 cyrillic-iso8859-5) |
95fa1ff7 SZ |
741 | (iso-2022-7bit ethiopic arabic-1-column arabic-2-column) |
742 | (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7 | |
743 | latin-jisx0201 japanese-jisx0208-1978 | |
744 | chinese-gb2312 japanese-jisx0208 | |
0683d241 | 745 | korean-ksc5601 japanese-jisx0212) |
95fa1ff7 SZ |
746 | (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7 |
747 | latin-jisx0201 japanese-jisx0208-1978 | |
748 | chinese-gb2312 japanese-jisx0208 | |
749 | korean-ksc5601 japanese-jisx0212 | |
750 | chinese-cns11643-1 chinese-cns11643-2) | |
751 | (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2 | |
752 | cyrillic-iso8859-5 greek-iso8859-7 | |
753 | latin-jisx0201 japanese-jisx0208-1978 | |
754 | chinese-gb2312 japanese-jisx0208 | |
755 | korean-ksc5601 japanese-jisx0212 | |
756 | chinese-cns11643-1 chinese-cns11643-2 | |
757 | chinese-cns11643-3 chinese-cns11643-4 | |
758 | chinese-cns11643-5 chinese-cns11643-6 | |
759 | chinese-cns11643-7) | |
0683d241 MB |
760 | (iso-2022-jp-3 latin-jisx0201 japanese-jisx0208-1978 japanese-jisx0208 |
761 | japanese-jisx0213-1 japanese-jisx0213-2) | |
762 | (shift_jis latin-jisx0201 katakana-jisx0201 japanese-jisx0208) | |
26c9afc3 MB |
763 | ,(cond ((fboundp 'unicode-precedence-list) |
764 | (cons 'utf-8 (delq 'ascii (mapcar 'charset-name | |
765 | (unicode-precedence-list))))) | |
766 | ((or (not (fboundp 'charsetp)) ;; non-Mule case | |
767 | (charsetp 'unicode-a) | |
768 | (not (mm-coding-system-p 'mule-utf-8))) | |
769 | '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e)) | |
770 | (t ;; If we have utf-8 we're in Mule 5+. | |
771 | (append '(utf-8) | |
772 | (delete 'ascii | |
773 | (coding-system-get 'mule-utf-8 'safe-charsets)))))) | |
95fa1ff7 SZ |
774 | "Alist of MIME-charset/MULE-charsets.") |
775 | ||
0683d241 MB |
776 | (defun mm-enrich-utf-8-by-mule-ucs () |
777 | "Make the `utf-8' MIME charset usable by the Mule-UCS package. | |
778 | This function will run when the `un-define' module is loaded under | |
779 | XEmacs, and fill the `utf-8' entry in `mm-mime-mule-charset-alist' | |
780 | with Mule charsets. It is completely useless for Emacs." | |
0683d241 MB |
781 | (when (boundp 'unicode-basic-translation-charset-order-list) |
782 | (condition-case nil | |
783 | (let ((val (delq | |
784 | 'ascii | |
785 | (copy-sequence | |
786 | (symbol-value | |
787 | 'unicode-basic-translation-charset-order-list)))) | |
788 | (elem (assq 'utf-8 mm-mime-mule-charset-alist))) | |
789 | (if elem | |
790 | (setcdr elem val) | |
791 | (setq mm-mime-mule-charset-alist | |
792 | (nconc mm-mime-mule-charset-alist | |
793 | (list (cons 'utf-8 val)))))) | |
794 | (error)))) | |
795 | ||
796 | ;; Correct by construction, but should be unnecessary for Emacs: | |
797 | (if (featurep 'xemacs) | |
798 | (eval-after-load "un-define" '(mm-enrich-utf-8-by-mule-ucs)) | |
799 | (when (and (fboundp 'coding-system-list) | |
800 | (fboundp 'sort-coding-systems)) | |
801 | (let ((css (sort-coding-systems (coding-system-list 'base-only))) | |
802 | cs mime mule alist) | |
803 | (while css | |
804 | (setq cs (pop css) | |
5432dcf9 | 805 | mime (or (coding-system-get cs :mime-charset); Emacs 23 (unicode) |
0683d241 MB |
806 | (coding-system-get cs 'mime-charset))) |
807 | (when (and mime | |
808 | (not (eq t (setq mule | |
809 | (coding-system-get cs 'safe-charsets)))) | |
810 | (not (assq mime alist))) | |
811 | (push (cons mime (delq 'ascii mule)) alist))) | |
812 | (setq mm-mime-mule-charset-alist (nreverse alist))))) | |
95fa1ff7 | 813 | |
f5490ddc MB |
814 | (defvar mm-hack-charsets '(iso-8859-15 iso-2022-jp-2) |
815 | "A list of special charsets. | |
816 | Valid elements include: | |
817 | `iso-8859-15' convert ISO-8859-1, -9 to ISO-8859-15 if ISO-8859-15 exists. | |
818 | `iso-2022-jp-2' convert ISO-2022-jp to ISO-2022-jp-2 if ISO-2022-jp-2 exists." | |
819 | ) | |
820 | ||
821 | (defvar mm-iso-8859-15-compatible | |
822 | '((iso-8859-1 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE") | |
823 | (iso-8859-9 "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE\xD0\xDD\xDE\xF0\xFD\xFE")) | |
824 | "ISO-8859-15 exchangeable coding systems and inconvertible characters.") | |
825 | ||
826 | (defvar mm-iso-8859-x-to-15-table | |
827 | (and (fboundp 'coding-system-p) | |
828 | (mm-coding-system-p 'iso-8859-15) | |
829 | (mapcar | |
830 | (lambda (cs) | |
831 | (if (mm-coding-system-p (car cs)) | |
832 | (let ((c (string-to-char | |
833 | (decode-coding-string "\341" (car cs))))) | |
834 | (cons (char-charset c) | |
835 | (cons | |
836 | (- (string-to-char | |
837 | (decode-coding-string "\341" 'iso-8859-15)) c) | |
838 | (string-to-list (decode-coding-string (car (cdr cs)) | |
839 | (car cs)))))) | |
840 | '(gnus-charset 0))) | |
841 | mm-iso-8859-15-compatible)) | |
842 | "A table of the difference character between ISO-8859-X and ISO-8859-15.") | |
843 | ||
23f87bed | 844 | (defcustom mm-coding-system-priorities |
548f737d MB |
845 | (let ((lang (if (boundp 'current-language-environment) |
846 | (symbol-value 'current-language-environment)))) | |
847 | (cond (;; XEmacs without Mule but with `file-coding'. | |
848 | (not lang) nil) | |
849 | ;; In XEmacs 21.5 it may be the one like "Japanese (UTF-8)". | |
850 | ((string-match "\\`Japanese" lang) | |
dab3a8d5 KY |
851 | ;; Japanese users prefer iso-2022-jp to others usually used |
852 | ;; for `buffer-file-coding-system', however iso-8859-1 should | |
853 | ;; be used when there are only ASCII and Latin-1 characters. | |
854 | '(iso-8859-1 iso-2022-jp utf-8)))) | |
23f87bed MB |
855 | "Preferred coding systems for encoding outgoing messages. |
856 | ||
857 | More than one suitable coding system may be found for some text. | |
858 | By default, the coding system with the highest priority is used | |
859 | to encode outgoing messages (see `sort-coding-systems'). If this | |
860 | variable is set, it overrides the default priority." | |
dab3a8d5 | 861 | :version "24.4" |
23f87bed MB |
862 | :type '(repeat (symbol :tag "Coding system")) |
863 | :group 'mime) | |
864 | ||
865 | ;; ?? | |
1f7d2e14 SZ |
866 | (defvar mm-use-find-coding-systems-region |
867 | (fboundp 'find-coding-systems-region) | |
23f87bed MB |
868 | "Use `find-coding-systems-region' to find proper coding systems. |
869 | ||
870 | Setting it to nil is useful on Emacsen supporting Unicode if sending | |
871 | mail with multiple parts is preferred to sending a Unicode one.") | |
1f7d2e14 | 872 | |
c912b478 KY |
873 | (defvar mm-extra-numeric-entities |
874 | (mapcar | |
875 | (lambda (item) | |
876 | (cons (car item) (mm-ucs-to-char (cdr item)))) | |
877 | '((#x80 . #x20AC) (#x82 . #x201A) (#x83 . #x0192) (#x84 . #x201E) | |
878 | (#x85 . #x2026) (#x86 . #x2020) (#x87 . #x2021) (#x88 . #x02C6) | |
879 | (#x89 . #x2030) (#x8A . #x0160) (#x8B . #x2039) (#x8C . #x0152) | |
880 | (#x8E . #x017D) (#x91 . #x2018) (#x92 . #x2019) (#x93 . #x201C) | |
881 | (#x94 . #x201D) (#x95 . #x2022) (#x96 . #x2013) (#x97 . #x2014) | |
882 | (#x98 . #x02DC) (#x99 . #x2122) (#x9A . #x0161) (#x9B . #x203A) | |
883 | (#x9C . #x0153) (#x9E . #x017E) (#x9F . #x0178))) | |
884 | "*Alist of extra numeric entities and characters other than ISO 10646. | |
885 | This table is used for decoding extra numeric entities to characters, | |
886 | like \"€\" to the euro sign, mainly in html messages.") | |
887 | ||
c113de23 GM |
888 | ;;; Internal variables: |
889 | ||
890 | ;;; Functions: | |
891 | ||
892 | (defun mm-mule-charset-to-mime-charset (charset) | |
1c57d870 | 893 | "Return the MIME charset corresponding to the given Mule CHARSET." |
23f87bed MB |
894 | (if (and (fboundp 'find-coding-systems-for-charsets) |
895 | (fboundp 'sort-coding-systems)) | |
0683d241 MB |
896 | (let ((css (sort (sort-coding-systems |
897 | (find-coding-systems-for-charsets (list charset))) | |
898 | 'mm-sort-coding-systems-predicate)) | |
899 | cs mime) | |
900 | (while (and (not mime) | |
901 | css) | |
902 | (when (setq cs (pop css)) | |
903 | (setq mime (or (coding-system-get cs :mime-charset) | |
904 | (coding-system-get cs 'mime-charset))))) | |
95fa1ff7 | 905 | mime) |
0683d241 MB |
906 | (let ((alist (mapcar (lambda (cs) |
907 | (assq cs mm-mime-mule-charset-alist)) | |
908 | (sort (mapcar 'car mm-mime-mule-charset-alist) | |
909 | 'mm-sort-coding-systems-predicate))) | |
95fa1ff7 SZ |
910 | out) |
911 | (while alist | |
912 | (when (memq charset (cdar alist)) | |
913 | (setq out (caar alist) | |
914 | alist nil)) | |
915 | (pop alist)) | |
916 | out))) | |
c113de23 | 917 | |
95fa1ff7 | 918 | (eval-and-compile |
765d4319 KY |
919 | (if (featurep 'xemacs) |
920 | (defalias 'mm-enable-multibyte 'ignore) | |
921 | (defun mm-enable-multibyte () | |
922 | "Set the multibyte flag of the current buffer. | |
1c57d870 DL |
923 | Only do this if the default value of `enable-multibyte-characters' is |
924 | non-nil. This is a no-op in XEmacs." | |
144b7b5c | 925 | (set-buffer-multibyte 'to))) |
c113de23 | 926 | |
765d4319 KY |
927 | (if (featurep 'xemacs) |
928 | (defalias 'mm-disable-multibyte 'ignore) | |
929 | (defun mm-disable-multibyte () | |
930 | "Unset the multibyte flag of in the current buffer. | |
1c57d870 | 931 | This is a no-op in XEmacs." |
765d4319 | 932 | (set-buffer-multibyte nil)))) |
052802c1 | 933 | |
c113de23 GM |
934 | (defun mm-preferred-coding-system (charset) |
935 | ;; A typo in some Emacs versions. | |
47b63dfa SZ |
936 | (or (get-charset-property charset 'preferred-coding-system) |
937 | (get-charset-property charset 'prefered-coding-system))) | |
c113de23 | 938 | |
23f87bed MB |
939 | ;; Mule charsets shouldn't be used. |
940 | (defsubst mm-guess-charset () | |
941 | "Guess Mule charset from the language environment." | |
942 | (or | |
943 | mail-parse-mule-charset ;; cached mule-charset | |
944 | (progn | |
945 | (setq mail-parse-mule-charset | |
946 | (and (boundp 'current-language-environment) | |
947 | (car (last | |
948 | (assq 'charset | |
949 | (assoc current-language-environment | |
950 | language-info-alist)))))) | |
951 | (if (or (not mail-parse-mule-charset) | |
952 | (eq mail-parse-mule-charset 'ascii)) | |
953 | (setq mail-parse-mule-charset | |
954 | (or (car (last (assq mail-parse-charset | |
955 | mm-mime-mule-charset-alist))) | |
956 | ;; default | |
957 | 'latin-iso8859-1))) | |
958 | mail-parse-mule-charset))) | |
959 | ||
c113de23 GM |
960 | (defun mm-charset-after (&optional pos) |
961 | "Return charset of a character in current buffer at position POS. | |
cd1181db | 962 | If POS is nil, it defaults to the current point. |
c113de23 GM |
963 | If POS is out of range, the value is nil. |
964 | If the charset is `composition', return the actual one." | |
052802c1 DL |
965 | (let ((char (char-after pos)) charset) |
966 | (if (< (mm-char-int char) 128) | |
967 | (setq charset 'ascii) | |
968 | ;; charset-after is fake in some Emacsen. | |
969 | (setq charset (and (fboundp 'char-charset) (char-charset char))) | |
56e09c09 | 970 | (if (eq charset 'composition) ; Mule 4 |
052802c1 DL |
971 | (let ((p (or pos (point)))) |
972 | (cadr (find-charset-region p (1+ p)))) | |
973 | (if (and charset (not (memq charset '(ascii eight-bit-control | |
974 | eight-bit-graphic)))) | |
975 | charset | |
23f87bed | 976 | (mm-guess-charset)))))) |
c113de23 GM |
977 | |
978 | (defun mm-mime-charset (charset) | |
1c57d870 | 979 | "Return the MIME charset corresponding to the given Mule CHARSET." |
95fa1ff7 SZ |
980 | (if (eq charset 'unknown) |
981 | (error "The message contains non-printable characters, please use attachment")) | |
052802c1 | 982 | (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property)) |
c113de23 GM |
983 | (or |
984 | (and (mm-preferred-coding-system charset) | |
56e09c09 DL |
985 | (or (coding-system-get |
986 | (mm-preferred-coding-system charset) :mime-charset) | |
987 | (coding-system-get | |
988 | (mm-preferred-coding-system charset) 'mime-charset))) | |
c113de23 GM |
989 | (and (eq charset 'ascii) |
990 | 'us-ascii) | |
95fa1ff7 | 991 | (mm-preferred-coding-system charset) |
c113de23 GM |
992 | (mm-mule-charset-to-mime-charset charset)) |
993 | ;; This is for XEmacs. | |
994 | (mm-mule-charset-to-mime-charset charset))) | |
995 | ||
ed797193 | 996 | ;; `delete-dups' is not available in XEmacs 21.4. |
8753ddee MB |
997 | (if (fboundp 'delete-dups) |
998 | (defalias 'mm-delete-duplicates 'delete-dups) | |
999 | (defun mm-delete-duplicates (list) | |
1000 | "Destructively remove `equal' duplicates from LIST. | |
1001 | Store the result in LIST and return it. LIST must be a proper list. | |
1002 | Of several `equal' occurrences of an element in LIST, the first | |
1003 | one is kept. | |
1004 | ||
1005 | This is a compatibility function for Emacsen without `delete-dups'." | |
1006 | ;; Code from `subr.el' in Emacs 22: | |
1007 | (let ((tail list)) | |
1008 | (while tail | |
1009 | (setcdr tail (delete (car tail) (cdr tail))) | |
1010 | (setq tail (cdr tail)))) | |
1011 | list)) | |
c113de23 | 1012 | |
23f87bed MB |
1013 | ;; Fixme: This is used in places when it should be testing the |
1014 | ;; default multibyteness. See mm-default-multibyte-p. | |
1015 | (eval-and-compile | |
052802c1 DL |
1016 | (if (and (not (featurep 'xemacs)) |
1017 | (boundp 'enable-multibyte-characters)) | |
23f87bed MB |
1018 | (defun mm-multibyte-p () |
1019 | "Non-nil if multibyte is enabled in the current buffer." | |
1020 | enable-multibyte-characters) | |
1021 | (defun mm-multibyte-p () (featurep 'mule)))) | |
1022 | ||
1023 | (defun mm-default-multibyte-p () | |
1024 | "Return non-nil if the session is multibyte. | |
1025 | This affects whether coding conversion should be attempted generally." | |
1026 | (if (featurep 'mule) | |
2e62b574 GM |
1027 | (if (boundp 'enable-multibyte-characters) |
1028 | (default-value 'enable-multibyte-characters) | |
23f87bed | 1029 | t))) |
c113de23 | 1030 | |
f5490ddc MB |
1031 | (defun mm-iso-8859-x-to-15-region (&optional b e) |
1032 | (if (fboundp 'char-charset) | |
1033 | (let (charset item c inconvertible) | |
1034 | (save-restriction | |
1035 | (if e (narrow-to-region b e)) | |
1036 | (goto-char (point-min)) | |
1037 | (skip-chars-forward "\0-\177") | |
1038 | (while (not (eobp)) | |
1039 | (cond | |
1040 | ((not (setq item (assq (char-charset (setq c (char-after))) | |
1041 | mm-iso-8859-x-to-15-table))) | |
1042 | (forward-char)) | |
1043 | ((memq c (cdr (cdr item))) | |
1044 | (setq inconvertible t) | |
1045 | (forward-char)) | |
1046 | (t | |
1047 | (insert-before-markers (prog1 (+ c (car (cdr item))) | |
1048 | (delete-char 1))))) | |
1049 | (skip-chars-forward "\0-\177"))) | |
1050 | (not inconvertible)))) | |
1051 | ||
47b63dfa | 1052 | (defun mm-sort-coding-systems-predicate (a b) |
23f87bed MB |
1053 | (let ((priorities |
1054 | (mapcar (lambda (cs) | |
1055 | ;; Note: invalid entries are dropped silently | |
0683d241 | 1056 | (and (setq cs (mm-coding-system-p cs)) |
23f87bed MB |
1057 | (coding-system-base cs))) |
1058 | mm-coding-system-priorities))) | |
0683d241 MB |
1059 | (and (setq a (mm-coding-system-p a)) |
1060 | (if (setq b (mm-coding-system-p b)) | |
1061 | (> (length (memq (coding-system-base a) priorities)) | |
1062 | (length (memq (coding-system-base b) priorities))) | |
1063 | t)))) | |
47b63dfa | 1064 | |
aa0a8561 MB |
1065 | (eval-when-compile |
1066 | (autoload 'latin-unity-massage-name "latin-unity") | |
1067 | (autoload 'latin-unity-maybe-remap "latin-unity") | |
1068 | (autoload 'latin-unity-representations-feasible-region "latin-unity") | |
9efa445f DN |
1069 | (autoload 'latin-unity-representations-present-region "latin-unity")) |
1070 | ||
1071 | (defvar latin-unity-coding-systems) | |
1072 | (defvar latin-unity-ucs-list) | |
aa0a8561 MB |
1073 | |
1074 | (defun mm-xemacs-find-mime-charset-1 (begin end) | |
1075 | "Determine which MIME charset to use to send region as message. | |
1076 | This uses the XEmacs-specific latin-unity package to better handle the | |
1077 | case where identical characters from diverse ISO-8859-? character sets | |
1078 | can be encoded using a single one of the corresponding coding systems. | |
1079 | ||
1080 | It treats `mm-coding-system-priorities' as the list of preferred | |
1081 | coding systems; a useful example setting for this list in Western | |
1082 | Europe would be '(iso-8859-1 iso-8859-15 utf-8), which would default | |
1083 | to the very standard Latin 1 coding system, and only move to coding | |
1084 | systems that are less supported as is necessary to encode the | |
1085 | characters that exist in the buffer. | |
1086 | ||
1087 | Latin Unity doesn't know about those non-ASCII Roman characters that | |
1088 | are available in various East Asian character sets. As such, its | |
1089 | behavior if you have a JIS 0212 LATIN SMALL LETTER A WITH ACUTE in a | |
1090 | buffer and it can otherwise be encoded as Latin 1, won't be ideal. | |
1091 | But this is very much a corner case, so don't worry about it." | |
1092 | (let ((systems mm-coding-system-priorities) csets psets curset) | |
1093 | ||
1094 | ;; Load the Latin Unity library, if available. | |
1095 | (when (and (not (featurep 'latin-unity)) (locate-library "latin-unity")) | |
01c52d31 | 1096 | (require 'latin-unity)) |
aa0a8561 MB |
1097 | |
1098 | ;; Now, can we use it? | |
1099 | (if (featurep 'latin-unity) | |
1100 | (progn | |
1101 | (setq csets (latin-unity-representations-feasible-region begin end) | |
1102 | psets (latin-unity-representations-present-region begin end)) | |
1103 | ||
1104 | (catch 'done | |
1105 | ||
1106 | ;; Pass back the first coding system in the preferred list | |
1107 | ;; that can encode the whole region. | |
1108 | (dolist (curset systems) | |
1109 | (setq curset (latin-unity-massage-name 'buffer-default curset)) | |
1110 | ||
1111 | ;; If the coding system is a universal coding system, then | |
1112 | ;; it can certainly encode all the characters in the region. | |
1113 | (if (memq curset latin-unity-ucs-list) | |
1114 | (throw 'done (list curset))) | |
1115 | ||
1116 | ;; If a coding system isn't universal, and isn't in | |
1117 | ;; the list that latin unity knows about, we can't | |
1118 | ;; decide whether to use it here. Leave that until later | |
1119 | ;; in `mm-find-mime-charset-region' function, whence we | |
1120 | ;; have been called. | |
1121 | (unless (memq curset latin-unity-coding-systems) | |
1122 | (throw 'done nil)) | |
1123 | ||
1124 | ;; Right, we know about this coding system, and it may | |
1125 | ;; conceivably be able to encode all the characters in | |
1126 | ;; the region. | |
1127 | (if (latin-unity-maybe-remap begin end curset csets psets t) | |
1128 | (throw 'done (list curset)))) | |
1129 | ||
1130 | ;; Can't encode using anything from the | |
1131 | ;; `mm-coding-system-priorities' list. | |
1132 | ;; Leave `mm-find-mime-charset' to do most of the work. | |
1133 | nil)) | |
1134 | ||
1135 | ;; Right, latin unity isn't available; let `mm-find-charset-region' | |
1136 | ;; take its default action, which equally applies to GNU Emacs. | |
1137 | nil))) | |
1138 | ||
1139 | (defmacro mm-xemacs-find-mime-charset (begin end) | |
1140 | (when (featurep 'xemacs) | |
10ace8ea | 1141 | `(and (featurep 'mule) (mm-xemacs-find-mime-charset-1 ,begin ,end)))) |
aa0a8561 | 1142 | |
b5000590 GM |
1143 | (declare-function mm-delete-duplicates "mm-util" (list)) |
1144 | ||
47b63dfa | 1145 | (defun mm-find-mime-charset-region (b e &optional hack-charsets) |
95fa1ff7 | 1146 | "Return the MIME charsets needed to encode the region between B and E. |
f0529b5b | 1147 | nil means ASCII, a single-element list represents an appropriate MIME |
95fa1ff7 | 1148 | charset, and a longer list means no appropriate charset." |
47b63dfa SZ |
1149 | (let (charsets) |
1150 | ;; The return possibilities of this function are a mess... | |
1151 | (or (and (mm-multibyte-p) | |
1f7d2e14 | 1152 | mm-use-find-coding-systems-region |
47b63dfa SZ |
1153 | ;; Find the mime-charset of the most preferred coding |
1154 | ;; system that has one. | |
1155 | (let ((systems (find-coding-systems-region b e))) | |
1156 | (when mm-coding-system-priorities | |
a1506d29 | 1157 | (setq systems |
47b63dfa | 1158 | (sort systems 'mm-sort-coding-systems-predicate))) |
47b63dfa SZ |
1159 | (setq systems (delq 'compound-text systems)) |
1160 | (unless (equal systems '(undecided)) | |
1161 | (while systems | |
56e09c09 DL |
1162 | (let* ((head (pop systems)) |
1163 | (cs (or (coding-system-get head :mime-charset) | |
1164 | (coding-system-get head 'mime-charset)))) | |
23f87bed MB |
1165 | ;; The mime-charset (`x-ctext') of |
1166 | ;; `compound-text' is not in the IANA list. We | |
1167 | ;; shouldn't normally use anything here with a | |
1168 | ;; mime-charset having an `x-' prefix. | |
1169 | ;; Fixme: Allow this to be overridden, since | |
1170 | ;; there is existing use of x-ctext. | |
1171 | ;; Also people apparently need the coding system | |
1172 | ;; `iso-2022-jp-3' (which Mule-UCS defines with | |
1173 | ;; mime-charset, though it's not valid). | |
1174 | (if (and cs | |
1175 | (not (string-match "^[Xx]-" (symbol-name cs))) | |
1176 | ;; UTF-16 of any variety is invalid for | |
1177 | ;; text parts and, unfortunately, has | |
1178 | ;; mime-charset defined both in Mule-UCS | |
1179 | ;; and versions of Emacs. (The name | |
1180 | ;; might be `mule-utf-16...' or | |
1181 | ;; `utf-16...'.) | |
1182 | (not (string-match "utf-16" (symbol-name cs)))) | |
47b63dfa SZ |
1183 | (setq systems nil |
1184 | charsets (list cs)))))) | |
1185 | charsets)) | |
aa0a8561 MB |
1186 | ;; If we're XEmacs, and some coding system is appropriate, |
1187 | ;; mm-xemacs-find-mime-charset will return an appropriate list. | |
1188 | ;; Otherwise, we'll get nil, and the next setq will get invoked. | |
1189 | (setq charsets (mm-xemacs-find-mime-charset b e)) | |
1190 | ||
8589dc17 | 1191 | ;; Fixme: won't work for unibyte Emacs 23: |
0c129bca | 1192 | |
aa0a8561 | 1193 | ;; We're not multibyte, or a single coding system won't cover it. |
a1506d29 | 1194 | (setq charsets |
47b63dfa SZ |
1195 | (mm-delete-duplicates |
1196 | (mapcar 'mm-mime-charset | |
1197 | (delq 'ascii | |
1198 | (mm-find-charset-region b e)))))) | |
f5490ddc MB |
1199 | (if (and (> (length charsets) 1) |
1200 | (memq 'iso-8859-15 charsets) | |
1201 | (memq 'iso-8859-15 hack-charsets) | |
1202 | (save-excursion (mm-iso-8859-x-to-15-region b e))) | |
1203 | (dolist (x mm-iso-8859-15-compatible) | |
1204 | (setq charsets (delq (car x) charsets)))) | |
1205 | (if (and (memq 'iso-2022-jp-2 charsets) | |
1206 | (memq 'iso-2022-jp-2 hack-charsets)) | |
1207 | (setq charsets (delq 'iso-2022-jp charsets))) | |
1208 | ;; Attempt to reduce the number of charsets if utf-8 is available. | |
1209 | (if (and (featurep 'xemacs) | |
1210 | (> (length charsets) 1) | |
1211 | (mm-coding-system-p 'utf-8)) | |
1212 | (let ((mm-coding-system-priorities | |
1213 | (cons 'utf-8 mm-coding-system-priorities))) | |
1214 | (setq charsets | |
1215 | (mm-delete-duplicates | |
1216 | (mapcar 'mm-mime-charset | |
1217 | (delq 'ascii | |
1218 | (mm-find-charset-region b e))))))) | |
47b63dfa | 1219 | charsets)) |
95fa1ff7 | 1220 | |
c113de23 GM |
1221 | (defmacro mm-with-unibyte-buffer (&rest forms) |
1222 | "Create a temporary buffer, and evaluate FORMS there like `progn'. | |
1c57d870 | 1223 | Use unibyte mode for this." |
d37ded9e SM |
1224 | `(with-temp-buffer |
1225 | (mm-disable-multibyte) | |
1226 | ,@forms)) | |
c113de23 GM |
1227 | (put 'mm-with-unibyte-buffer 'lisp-indent-function 0) |
1228 | (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body)) | |
1229 | ||
23f87bed MB |
1230 | (defmacro mm-with-multibyte-buffer (&rest forms) |
1231 | "Create a temporary buffer, and evaluate FORMS there like `progn'. | |
1232 | Use multibyte mode for this." | |
d37ded9e SM |
1233 | `(with-temp-buffer |
1234 | (mm-enable-multibyte) | |
1235 | ,@forms)) | |
23f87bed MB |
1236 | (put 'mm-with-multibyte-buffer 'lisp-indent-function 0) |
1237 | (put 'mm-with-multibyte-buffer 'edebug-form-spec '(body)) | |
1238 | ||
c113de23 | 1239 | (defmacro mm-with-unibyte-current-buffer (&rest forms) |
56e09c09 | 1240 | "Evaluate FORMS with current buffer temporarily made unibyte. |
72e841ce KY |
1241 | Equivalent to `progn' in XEmacs. |
1242 | ||
1243 | Note: We recommend not using this macro any more; there should be | |
1244 | better ways to do a similar thing. The previous version of this macro | |
1245 | bound the default value of `enable-multibyte-characters' to nil while | |
1246 | evaluating FORMS but it is no longer done. So, some programs assuming | |
1247 | it if any may malfunction." | |
765d4319 KY |
1248 | (if (featurep 'xemacs) |
1249 | `(progn ,@forms) | |
72e841ce KY |
1250 | (let ((multibyte (make-symbol "multibyte"))) |
1251 | `(let ((,multibyte enable-multibyte-characters)) | |
1252 | (when ,multibyte | |
1253 | (set-buffer-multibyte nil)) | |
1254 | (prog1 | |
1255 | (progn ,@forms) | |
1256 | (when ,multibyte | |
1257 | (set-buffer-multibyte t))))))) | |
c113de23 GM |
1258 | (put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0) |
1259 | (put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body)) | |
1260 | ||
c113de23 | 1261 | (defun mm-find-charset-region (b e) |
1c57d870 | 1262 | "Return a list of Emacs charsets in the region B to E." |
c113de23 GM |
1263 | (cond |
1264 | ((and (mm-multibyte-p) | |
95fa1ff7 | 1265 | (fboundp 'find-charset-region)) |
c113de23 | 1266 | ;; Remove composition since the base charsets have been included. |
95fa1ff7 SZ |
1267 | ;; Remove eight-bit-*, treat them as ascii. |
1268 | (let ((css (find-charset-region b e))) | |
01c52d31 MB |
1269 | (dolist (cs |
1270 | '(composition eight-bit-control eight-bit-graphic control-1) | |
1271 | css) | |
1272 | (setq css (delq cs css))))) | |
052802c1 DL |
1273 | (t |
1274 | ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit. | |
c113de23 GM |
1275 | (save-excursion |
1276 | (save-restriction | |
1277 | (narrow-to-region b e) | |
1278 | (goto-char (point-min)) | |
1279 | (skip-chars-forward "\0-\177") | |
1280 | (if (eobp) | |
1281 | '(ascii) | |
052802c1 DL |
1282 | (let (charset) |
1283 | (setq charset | |
1284 | (and (boundp 'current-language-environment) | |
95fa1ff7 SZ |
1285 | (car (last (assq 'charset |
1286 | (assoc current-language-environment | |
052802c1 DL |
1287 | language-info-alist)))))) |
1288 | (if (eq charset 'ascii) (setq charset nil)) | |
1289 | (or charset | |
1290 | (setq charset | |
1291 | (car (last (assq mail-parse-charset | |
1292 | mm-mime-mule-charset-alist))))) | |
1293 | (list 'ascii (or charset 'latin-iso8859-1))))))))) | |
c113de23 | 1294 | |
c113de23 GM |
1295 | (defun mm-auto-mode-alist () |
1296 | "Return an `auto-mode-alist' with only the .gz (etc) thingies." | |
1297 | (let ((alist auto-mode-alist) | |
1298 | out) | |
1299 | (while alist | |
1300 | (when (listp (cdar alist)) | |
1301 | (push (car alist) out)) | |
1302 | (pop alist)) | |
1303 | (nreverse out))) | |
1304 | ||
1305 | (defvar mm-inhibit-file-name-handlers | |
01c52d31 | 1306 | '(jka-compr-handler image-file-handler epa-file-handler) |
c113de23 GM |
1307 | "A list of handlers doing (un)compression (etc) thingies.") |
1308 | ||
1309 | (defun mm-insert-file-contents (filename &optional visit beg end replace | |
1310 | inhibit) | |
23f87bed | 1311 | "Like `insert-file-contents', but only reads in the file. |
c113de23 GM |
1312 | A buffer may be modified in several ways after reading into the buffer due |
1313 | to advanced Emacs features, such as file-name-handlers, format decoding, | |
23f87bed | 1314 | `find-file-hooks', etc. |
56e09c09 | 1315 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'. |
c113de23 | 1316 | This function ensures that none of these modifications will take place." |
14acf2f5 SM |
1317 | (letf* ((format-alist nil) |
1318 | (auto-mode-alist (if inhibit nil (mm-auto-mode-alist))) | |
1319 | ((default-value 'major-mode) 'fundamental-mode) | |
1320 | (enable-local-variables nil) | |
1321 | (after-insert-file-functions nil) | |
1322 | (enable-local-eval nil) | |
1323 | (inhibit-file-name-operation (if inhibit | |
1324 | 'insert-file-contents | |
1325 | inhibit-file-name-operation)) | |
1326 | (inhibit-file-name-handlers | |
1327 | (if inhibit | |
1328 | (append mm-inhibit-file-name-handlers | |
1329 | inhibit-file-name-handlers) | |
1330 | inhibit-file-name-handlers)) | |
1331 | (ffh (if (boundp 'find-file-hook) | |
1332 | 'find-file-hook | |
1333 | 'find-file-hooks)) | |
1334 | (val (symbol-value ffh))) | |
4a43ee9b MB |
1335 | (set ffh nil) |
1336 | (unwind-protect | |
1337 | (insert-file-contents filename visit beg end replace) | |
1338 | (set ffh val)))) | |
c113de23 GM |
1339 | |
1340 | (defun mm-append-to-file (start end filename &optional codesys inhibit) | |
1341 | "Append the contents of the region to the end of file FILENAME. | |
1342 | When called from a function, expects three arguments, | |
1343 | START, END and FILENAME. START and END are buffer positions | |
1344 | saying what text to write. | |
1345 | Optional fourth argument specifies the coding system to use when | |
1346 | encoding the file. | |
23f87bed | 1347 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." |
95fa1ff7 SZ |
1348 | (let ((coding-system-for-write |
1349 | (or codesys mm-text-coding-system-for-write | |
c113de23 | 1350 | mm-text-coding-system)) |
95fa1ff7 | 1351 | (inhibit-file-name-operation (if inhibit |
c113de23 GM |
1352 | 'append-to-file |
1353 | inhibit-file-name-operation)) | |
1354 | (inhibit-file-name-handlers | |
1355 | (if inhibit | |
95fa1ff7 | 1356 | (append mm-inhibit-file-name-handlers |
c113de23 GM |
1357 | inhibit-file-name-handlers) |
1358 | inhibit-file-name-handlers))) | |
23f87bed MB |
1359 | (write-region start end filename t 'no-message) |
1360 | (message "Appended to %s" filename))) | |
c113de23 | 1361 | |
95fa1ff7 | 1362 | (defun mm-write-region (start end filename &optional append visit lockname |
c113de23 GM |
1363 | coding-system inhibit) |
1364 | ||
1365 | "Like `write-region'. | |
23f87bed | 1366 | If INHIBIT is non-nil, inhibit `mm-inhibit-file-name-handlers'." |
95fa1ff7 SZ |
1367 | (let ((coding-system-for-write |
1368 | (or coding-system mm-text-coding-system-for-write | |
c113de23 | 1369 | mm-text-coding-system)) |
95fa1ff7 | 1370 | (inhibit-file-name-operation (if inhibit |
c113de23 GM |
1371 | 'write-region |
1372 | inhibit-file-name-operation)) | |
1373 | (inhibit-file-name-handlers | |
1374 | (if inhibit | |
95fa1ff7 | 1375 | (append mm-inhibit-file-name-handlers |
c113de23 GM |
1376 | inhibit-file-name-handlers) |
1377 | inhibit-file-name-handlers))) | |
1378 | (write-region start end filename append visit lockname))) | |
1379 | ||
b5000590 | 1380 | (autoload 'gmm-write-region "gmm-utils") |
d1e12aef KY |
1381 | (declare-function help-function-arglist "help-fns" |
1382 | (def &optional preserve-names)) | |
b5000590 | 1383 | |
cf5a5c38 MB |
1384 | ;; It is not a MIME function, but some MIME functions use it. |
1385 | (if (and (fboundp 'make-temp-file) | |
1386 | (ignore-errors | |
cb8b0736 KY |
1387 | (let ((def (if (fboundp 'compiled-function-arglist) ;; XEmacs |
1388 | (eval (list 'compiled-function-arglist | |
1389 | (symbol-function 'make-temp-file))) | |
1390 | (require 'help-fns) | |
1391 | (help-function-arglist 'make-temp-file t)))) | |
1392 | (and (>= (length def) 4) | |
cf5a5c38 MB |
1393 | (eq (nth 3 def) 'suffix))))) |
1394 | (defalias 'mm-make-temp-file 'make-temp-file) | |
01c52d31 | 1395 | ;; Stolen (and modified for XEmacs) from Emacs 22. |
cf5a5c38 MB |
1396 | (defun mm-make-temp-file (prefix &optional dir-flag suffix) |
1397 | "Create a temporary file. | |
1398 | The returned file name (created by appending some random characters at the end | |
1399 | of PREFIX, and expanding against `temporary-file-directory' if necessary), | |
1400 | is guaranteed to point to a newly created empty file. | |
1401 | You can then use `write-region' to write new data into the file. | |
1402 | ||
1403 | If DIR-FLAG is non-nil, create a new empty directory instead of a file. | |
1404 | ||
1405 | If SUFFIX is non-nil, add that at the end of the file name." | |
1406 | (let ((umask (default-file-modes)) | |
1407 | file) | |
1408 | (unwind-protect | |
1409 | (progn | |
1410 | ;; Create temp files with strict access rights. It's easy to | |
1411 | ;; loosen them later, whereas it's impossible to close the | |
1412 | ;; time-window of loose permissions otherwise. | |
1413 | (set-default-file-modes 448) | |
1414 | (while (condition-case err | |
1415 | (progn | |
1416 | (setq file | |
1417 | (make-temp-name | |
1418 | (expand-file-name | |
1419 | prefix | |
1420 | (if (fboundp 'temp-directory) | |
1421 | ;; XEmacs | |
1422 | (temp-directory) | |
1423 | temporary-file-directory)))) | |
1424 | (if suffix | |
1425 | (setq file (concat file suffix))) | |
1426 | (if dir-flag | |
1427 | (make-directory file) | |
92edaeed MB |
1428 | ;; NOTE: This is unsafe if Emacs 20 |
1429 | ;; users and XEmacs users don't use | |
1430 | ;; a secure temp directory. | |
1431 | (gmm-write-region "" nil file nil 'silent | |
1432 | nil 'excl)) | |
cf5a5c38 MB |
1433 | nil) |
1434 | (file-already-exists t) | |
01c52d31 MB |
1435 | ;; The XEmacs version of `make-directory' issues |
1436 | ;; `file-error'. | |
1437 | (file-error (or (and (featurep 'xemacs) | |
cf5a5c38 MB |
1438 | (file-exists-p file)) |
1439 | (signal (car err) (cdr err))))) | |
1440 | ;; the file was somehow created by someone else between | |
1441 | ;; `make-temp-name' and `write-region', let's try again. | |
1442 | nil) | |
1443 | file) | |
1444 | ;; Reset the umask. | |
1445 | (set-default-file-modes umask))))) | |
1446 | ||
eecdcaf5 LMI |
1447 | (defvar mm-image-load-path-cache nil) |
1448 | ||
95fa1ff7 | 1449 | (defun mm-image-load-path (&optional package) |
eecdcaf5 LMI |
1450 | (if (and mm-image-load-path-cache |
1451 | (equal load-path (car mm-image-load-path-cache))) | |
1452 | (cdr mm-image-load-path-cache) | |
1453 | (let (dir result) | |
1454 | (dolist (path load-path) | |
1455 | (when (and path | |
1456 | (file-directory-p | |
1457 | (setq dir (concat (file-name-directory | |
1458 | (directory-file-name path)) | |
1459 | "etc/images/" (or package "gnus/"))))) | |
1460 | (push dir result))) | |
1461 | (setq result (nreverse result) | |
1462 | mm-image-load-path-cache (cons load-path result)) | |
1463 | result))) | |
95fa1ff7 | 1464 | |
23f87bed MB |
1465 | ;; Fixme: This doesn't look useful where it's used. |
1466 | (if (fboundp 'detect-coding-region) | |
1467 | (defun mm-detect-coding-region (start end) | |
1468 | "Like `detect-coding-region' except returning the best one." | |
1469 | (let ((coding-systems | |
9d9b0de9 | 1470 | (detect-coding-region start end))) |
23f87bed MB |
1471 | (or (car-safe coding-systems) |
1472 | coding-systems))) | |
1473 | (defun mm-detect-coding-region (start end) | |
1474 | (let ((point (point))) | |
1475 | (goto-char start) | |
1476 | (skip-chars-forward "\0-\177" end) | |
1477 | (prog1 | |
1478 | (if (eq (point) end) 'ascii (mm-guess-charset)) | |
1479 | (goto-char point))))) | |
1480 | ||
b5000590 GM |
1481 | (declare-function mm-detect-coding-region "mm-util" (start end)) |
1482 | ||
23f87bed MB |
1483 | (if (fboundp 'coding-system-get) |
1484 | (defun mm-detect-mime-charset-region (start end) | |
1485 | "Detect MIME charset of the text in the region between START and END." | |
1486 | (let ((cs (mm-detect-coding-region start end))) | |
bd29ba20 RS |
1487 | (or (coding-system-get cs :mime-charset) |
1488 | (coding-system-get cs 'mime-charset)))) | |
23f87bed MB |
1489 | (defun mm-detect-mime-charset-region (start end) |
1490 | "Detect MIME charset of the text in the region between START and END." | |
1491 | (let ((cs (mm-detect-coding-region start end))) | |
1492 | cs))) | |
1493 | ||
01c52d31 MB |
1494 | (eval-when-compile |
1495 | (unless (fboundp 'coding-system-to-mime-charset) | |
1496 | (defalias 'coding-system-to-mime-charset 'ignore))) | |
1497 | ||
1498 | (defun mm-coding-system-to-mime-charset (coding-system) | |
1499 | "Return the MIME charset corresponding to CODING-SYSTEM. | |
1500 | To make this function work with XEmacs, the APEL package is required." | |
1501 | (when coding-system | |
1502 | (or (and (fboundp 'coding-system-get) | |
1503 | (or (coding-system-get coding-system :mime-charset) | |
1504 | (coding-system-get coding-system 'mime-charset))) | |
1505 | (and (featurep 'xemacs) | |
1506 | (or (and (fboundp 'coding-system-to-mime-charset) | |
1507 | (not (eq (symbol-function 'coding-system-to-mime-charset) | |
1508 | 'ignore))) | |
1509 | (and (condition-case nil | |
1510 | (require 'mcharset) | |
1511 | (error nil)) | |
1512 | (fboundp 'coding-system-to-mime-charset))) | |
1513 | (coding-system-to-mime-charset coding-system))))) | |
1514 | ||
c7641e3c GM |
1515 | (defvar jka-compr-acceptable-retval-list) |
1516 | (declare-function jka-compr-make-temp-name "jka-compr" (&optional local)) | |
01c52d31 MB |
1517 | |
1518 | (defun mm-decompress-buffer (filename &optional inplace force) | |
1519 | "Decompress buffer's contents, depending on jka-compr. | |
1520 | Only when FORCE is t or `auto-compression-mode' is enabled and FILENAME | |
1521 | agrees with `jka-compr-compression-info-list', decompression is done. | |
1522 | Signal an error if FORCE is neither nil nor t and compressed data are | |
1523 | not decompressed because `auto-compression-mode' is disabled. | |
1524 | If INPLACE is nil, return decompressed data or nil without modifying | |
1525 | the buffer. Otherwise, replace the buffer's contents with the | |
1526 | decompressed data. The buffer's multibyteness must be turned off." | |
1527 | (when (and filename | |
1528 | (if force | |
1529 | (prog1 t (require 'jka-compr)) | |
1530 | (and (fboundp 'jka-compr-installed-p) | |
1531 | (jka-compr-installed-p)))) | |
1532 | (let ((info (jka-compr-get-compression-info filename))) | |
1533 | (when info | |
1534 | (unless (or (memq force (list nil t)) | |
1535 | (jka-compr-installed-p)) | |
1536 | (error "")) | |
1537 | (let ((prog (jka-compr-info-uncompress-program info)) | |
1538 | (args (jka-compr-info-uncompress-args info)) | |
1539 | (msg (format "%s %s..." | |
1540 | (jka-compr-info-uncompress-message info) | |
1541 | filename)) | |
1542 | (err-file (jka-compr-make-temp-name)) | |
1543 | (cur (current-buffer)) | |
1544 | (coding-system-for-read mm-binary-coding-system) | |
1545 | (coding-system-for-write mm-binary-coding-system) | |
1546 | retval err-msg) | |
1547 | (message "%s" msg) | |
1548 | (mm-with-unibyte-buffer | |
1549 | (insert-buffer-substring cur) | |
1550 | (condition-case err | |
1551 | (progn | |
1552 | (unless (memq (apply 'call-process-region | |
1553 | (point-min) (point-max) | |
1554 | prog t (list t err-file) nil args) | |
1555 | jka-compr-acceptable-retval-list) | |
1556 | (erase-buffer) | |
4def29e7 KY |
1557 | (insert (mapconcat 'identity |
1558 | (split-string | |
1559 | (prog2 | |
1560 | (insert-file-contents err-file) | |
1561 | (buffer-string) | |
1562 | (erase-buffer)) t) | |
1563 | " ") | |
01c52d31 MB |
1564 | "\n") |
1565 | (setq err-msg | |
1566 | (format "Error while executing \"%s %s < %s\"" | |
1567 | prog (mapconcat 'identity args " ") | |
1568 | filename))) | |
1569 | (setq retval (buffer-string))) | |
1570 | (error | |
1571 | (setq err-msg (error-message-string err))))) | |
1572 | (when (file-exists-p err-file) | |
61a9da25 | 1573 | (ignore-errors (delete-file err-file))) |
01c52d31 MB |
1574 | (when inplace |
1575 | (unless err-msg | |
1576 | (delete-region (point-min) (point-max)) | |
1577 | (insert retval)) | |
1578 | (setq retval nil)) | |
1579 | (message "%s" (or err-msg (concat msg "done"))) | |
1580 | retval))))) | |
1581 | ||
1582 | (eval-when-compile | |
1583 | (unless (fboundp 'coding-system-name) | |
1584 | (defalias 'coding-system-name 'ignore)) | |
1585 | (unless (fboundp 'find-file-coding-system-for-read-from-filename) | |
1586 | (defalias 'find-file-coding-system-for-read-from-filename 'ignore)) | |
1587 | (unless (fboundp 'find-operation-coding-system) | |
1588 | (defalias 'find-operation-coding-system 'ignore))) | |
1589 | ||
1590 | (defun mm-find-buffer-file-coding-system (&optional filename) | |
1591 | "Find coding system used to decode the contents of the current buffer. | |
1592 | This function looks for the coding system magic cookie or examines the | |
1593 | coding system specified by `file-coding-system-alist' being associated | |
1594 | with FILENAME which defaults to `buffer-file-name'. Data compressed by | |
1595 | gzip, bzip2, etc. are allowed." | |
1596 | (unless filename | |
1597 | (setq filename buffer-file-name)) | |
1598 | (save-excursion | |
89b163db | 1599 | (let ((decomp (unless ;; Not worth it to examine charset of tar files. |
01c52d31 MB |
1600 | (and filename |
1601 | (string-match | |
1602 | "\\.\\(?:tar\\.[^.]+\\|tbz\\|tgz\\)\\'" | |
1603 | filename)) | |
1604 | (mm-decompress-buffer filename nil t)))) | |
1605 | (when decomp | |
bd486b03 SM |
1606 | (set-buffer (generate-new-buffer " *temp*")) |
1607 | (mm-disable-multibyte) | |
01c52d31 MB |
1608 | (insert decomp) |
1609 | (setq filename (file-name-sans-extension filename))) | |
1610 | (goto-char (point-min)) | |
8dabbfd6 | 1611 | (unwind-protect |
01c52d31 MB |
1612 | (cond |
1613 | ((boundp 'set-auto-coding-function) ;; Emacs | |
1614 | (if filename | |
1615 | (or (funcall (symbol-value 'set-auto-coding-function) | |
1616 | filename (- (point-max) (point-min))) | |
1617 | (car (find-operation-coding-system 'insert-file-contents | |
1618 | filename))) | |
1619 | (let (auto-coding-alist) | |
1620 | (condition-case nil | |
1621 | (funcall (symbol-value 'set-auto-coding-function) | |
1622 | nil (- (point-max) (point-min))) | |
1623 | (error nil))))) | |
9efa445f | 1624 | ((and (featurep 'xemacs) (featurep 'file-coding)) ;; XEmacs |
01c52d31 MB |
1625 | (let ((case-fold-search t) |
1626 | (end (point-at-eol)) | |
1627 | codesys start) | |
1628 | (or | |
1629 | (and (re-search-forward "-\\*-+[\t ]*" end t) | |
1630 | (progn | |
1631 | (setq start (match-end 0)) | |
1632 | (re-search-forward "[\t ]*-+\\*-" end t)) | |
1633 | (progn | |
1634 | (setq end (match-beginning 0)) | |
1635 | (goto-char start) | |
1636 | (or (looking-at "coding:[\t ]*\\([^\t ;]+\\)") | |
1637 | (re-search-forward | |
1638 | "[\t ;]+coding:[\t ]*\\([^\t ;]+\\)" | |
1639 | end t))) | |
1640 | (find-coding-system (setq codesys | |
1641 | (intern (match-string 1)))) | |
1642 | codesys) | |
1643 | (and (re-search-forward "^[\t ]*;+[\t ]*Local[\t ]+Variables:" | |
1644 | nil t) | |
1645 | (progn | |
1646 | (setq start (match-end 0)) | |
1647 | (re-search-forward "^[\t ]*;+[\t ]*End:" nil t)) | |
1648 | (progn | |
1649 | (setq end (match-beginning 0)) | |
1650 | (goto-char start) | |
1651 | (re-search-forward | |
1652 | "^[\t ]*;+[\t ]*coding:[\t ]*\\([^\t\n\r ]+\\)" | |
1653 | end t)) | |
1654 | (find-coding-system (setq codesys | |
1655 | (intern (match-string 1)))) | |
1656 | codesys) | |
1657 | (and (progn | |
1658 | (goto-char (point-min)) | |
1659 | (setq case-fold-search nil) | |
1660 | (re-search-forward "^;;;coding system: " | |
1661 | ;;(+ (point-min) 3000) t)) | |
1662 | nil t)) | |
1663 | (looking-at "[^\t\n\r ]+") | |
1664 | (find-coding-system | |
1665 | (setq codesys (intern (match-string 0)))) | |
1666 | codesys) | |
1667 | (and filename | |
1668 | (setq codesys | |
1669 | (find-file-coding-system-for-read-from-filename | |
1670 | filename)) | |
1671 | (coding-system-name (coding-system-base codesys))))))) | |
1672 | (when decomp | |
1673 | (kill-buffer (current-buffer))))))) | |
3efe5554 | 1674 | |
c113de23 GM |
1675 | (provide 'mm-util) |
1676 | ||
1677 | ;;; mm-util.el ends here |