Commit | Line | Data |
---|---|---|
07513d64 | 1 | ;;; mule.el --- basic commands for multilingual environment |
4ed46869 | 2 | |
ba318903 | 3 | ;; Copyright (C) 1997-2014 Free Software Foundation, Inc. |
7976eda0 | 4 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
5df4f04c | 5 | ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
2fd125a3 KH |
6 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
7 | ;; Registration Number H14PRO021 | |
8f924df7 | 8 | ;; Copyright (C) 2003 |
c1841772 KH |
9 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
10 | ;; Registration Number H13PRO009 | |
4ed46869 KH |
11 | |
12 | ;; Keywords: mule, multilingual, character set, coding system | |
13 | ||
14 | ;; This file is part of GNU Emacs. | |
15 | ||
4936186e | 16 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
4ed46869 | 17 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
18 | ;; the Free Software Foundation, either version 3 of the License, or |
19 | ;; (at your option) any later version. | |
4ed46869 KH |
20 | |
21 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
22 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
23 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
24 | ;; GNU General Public License for more details. | |
25 | ||
26 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 27 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
4ed46869 | 28 | |
60370d40 PJ |
29 | ;;; Commentary: |
30 | ||
4ed46869 KH |
31 | ;;; Code: |
32 | ||
03794570 | 33 | ;; FIXME? Are these still relevant? Nothing uses them AFAICS. |
8f924df7 | 34 | (defconst mule-version "6.0 (HANACHIRUSATO)" "\ |
4ed46869 KH |
35 | Version number and name of this version of MULE (multilingual environment).") |
36 | ||
8f924df7 | 37 | (defconst mule-version-date "2003.9.1" "\ |
4ed46869 KH |
38 | Distribution date of this version of MULE (multilingual environment).") |
39 | ||
c1841772 KH |
40 | \f |
41 | ;;; CHARSET | |
42 | ||
6d2b6635 KH |
43 | ;; Backward compatibility code for handling emacs-mule charsets. |
44 | (defvar private-char-area-1-min #xF0000) | |
45 | (defvar private-char-area-1-max #xFFFFE) | |
46 | (defvar private-char-area-2-min #x100000) | |
47 | (defvar private-char-area-2-max #x10FFFE) | |
48 | ||
49 | ;; Table of emacs-mule charsets indexed by their emacs-mule ID. | |
50 | (defvar emacs-mule-charset-table (make-vector 256 nil)) | |
51 | (aset emacs-mule-charset-table 0 'ascii) | |
52 | ||
3dabda23 | 53 | ;; Convert the argument of old-style call of define-charset to a |
6d2b6635 KH |
54 | ;; property list used by the new-style. |
55 | ;; INFO-VECTOR is a vector of the format: | |
56 | ;; [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE | |
57 | ;; SHORT-NAME LONG-NAME DESCRIPTION] | |
58 | ||
59 | (defun convert-define-charset-argument (emacs-mule-id info-vector) | |
60 | (let* ((dim (aref info-vector 0)) | |
61 | (chars (aref info-vector 1)) | |
62 | (total (if (= dim 1) chars (* chars chars))) | |
63 | (code-space (if (= dim 1) (if (= chars 96) [32 127] [33 126]) | |
64 | (if (= chars 96) [32 127 32 127] [33 126 33 126]))) | |
65 | code-offset) | |
66 | (if (integerp emacs-mule-id) | |
67 | (or (= emacs-mule-id 0) | |
68 | (and (>= emacs-mule-id 129) (< emacs-mule-id 256)) | |
69 | (error "Invalid CHARSET-ID: %d" emacs-mule-id)) | |
70 | (let (from-id to-id) | |
71 | (if (= dim 1) (setq from-id 160 to-id 224) | |
72 | (setq from-id 224 to-id 255)) | |
73 | (while (and (< from-id to-id) | |
74 | (not (aref emacs-mule-charset-table from-id))) | |
75 | (setq from-id (1+ from-id))) | |
76 | (if (= from-id to-id) | |
77 | (error "No more room for the new Emacs-mule charset")) | |
78 | (setq emacs-mule-id from-id))) | |
79 | (if (> (- private-char-area-1-max private-char-area-1-min) total) | |
80 | (setq code-offset private-char-area-1-min | |
81 | private-char-area-1-min (+ private-char-area-1-min total)) | |
82 | (if (> (- private-char-area-2-max private-char-area-2-min) total) | |
83 | (setq code-offset private-char-area-2-min | |
84 | private-char-area-2-min (+ private-char-area-2-min total)) | |
d660b68f | 85 | (error "No more space for a new charset"))) |
6d2b6635 KH |
86 | (list :dimension dim |
87 | :code-space code-space | |
88 | :iso-final-char (aref info-vector 4) | |
89 | :code-offset code-offset | |
90 | :emacs-mule-id emacs-mule-id))) | |
91 | ||
c1841772 KH |
92 | (defun define-charset (name docstring &rest props) |
93 | "Define NAME (symbol) as a charset with DOCSTRING. | |
94 | The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE | |
07513d64 | 95 | may be any symbol. The following have special meanings, and one of |
bec25acc | 96 | `:code-offset', `:map', `:subset', `:superset' must be specified. |
c1841772 KH |
97 | |
98 | `:short-name' | |
99 | ||
100 | VALUE must be a short string to identify the charset. If omitted, | |
101 | NAME is used. | |
102 | ||
103 | `:long-name' | |
104 | ||
105 | VALUE must be a string longer than `:short-name' to identify the | |
07513d64 | 106 | charset. If omitted, the value of the `:short-name' attribute is used. |
c1841772 KH |
107 | |
108 | `:dimension' | |
109 | ||
110 | VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of | |
07513d64 DL |
111 | code-points of the charsets. If omitted, it is calculated from the |
112 | value of the `:code-space' attribute. | |
c1841772 KH |
113 | |
114 | `:code-space' | |
115 | ||
116 | VALUE must be a vector of length at most 8 specifying the byte code | |
117 | range of each dimension in this format: | |
118 | [ MIN-1 MAX-1 MIN-2 MAX-2 ... ] | |
07513d64 | 119 | where MIN-N is the minimum byte value of Nth dimension of code-point, |
c1841772 KH |
120 | MAX-N is the maximum byte value of that. |
121 | ||
b1a79461 KH |
122 | `:min-code' |
123 | ||
0b381c7e | 124 | VALUE must be an integer specifying the minimum code point of the |
b1a79461 KH |
125 | charset. If omitted, it is calculated from `:code-space'. VALUE may |
126 | be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of | |
127 | the code point and LOW is the least significant 16 bits. | |
128 | ||
1f32125f | 129 | `:max-code' |
b1a79461 | 130 | |
0b381c7e | 131 | VALUE must be an integer specifying the maximum code point of the |
b1a79461 KH |
132 | charset. If omitted, it is calculated from `:code-space'. VALUE may |
133 | be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of | |
134 | the code point and LOW is the least significant 16 bits. | |
135 | ||
c1841772 KH |
136 | `:iso-final-char' |
137 | ||
138 | VALUE must be a character in the range 32 to 127 (inclusive) | |
139 | specifying the final char of the charset for ISO-2022 encoding. If | |
140 | omitted, the charset can't be encoded by ISO-2022 based | |
141 | coding-systems. | |
142 | ||
143 | `:iso-revision-number' | |
144 | ||
145 | VALUE must be an integer in the range 0..63, specifying the revision | |
146 | number of the charset for ISO-2022 encoding. | |
147 | ||
148 | `:emacs-mule-id' | |
149 | ||
6d2b6635 | 150 | VALUE must be an integer of 0, 129..255. If omitted, the charset |
c1841772 KH |
151 | can't be encoded by coding-systems of type `emacs-mule'. |
152 | ||
153 | `:ascii-compatible-p' | |
154 | ||
07513d64 DL |
155 | VALUE must be nil or t (default nil). If VALUE is t, the charset is |
156 | compatible with ASCII, i.e. the first 128 code points map to ASCII. | |
c1841772 KH |
157 | |
158 | `:supplementary-p' | |
159 | ||
160 | VALUE must be nil or t. If the VALUE is t, the charset is | |
1376eeae KH |
161 | supplementary, which means it is used only as a parent or a |
162 | subset of some other charset, or it is provided just for backward | |
163 | compatibility. | |
c1841772 KH |
164 | |
165 | `:invalid-code' | |
166 | ||
167 | VALUE must be a nonnegative integer that can be used as an invalid | |
168 | code point of the charset. If the minimum code is 0 and the maximum | |
44e97401 | 169 | code is greater than Emacs's maximum integer value, `:invalid-code' |
c1841772 KH |
170 | should not be omitted. |
171 | ||
172 | `:code-offset' | |
173 | ||
07513d64 DL |
174 | VALUE must be an integer added to the index number of a character to |
175 | get the corresponding character code. | |
c1841772 KH |
176 | |
177 | `:map' | |
178 | ||
179 | VALUE must be vector or string. | |
180 | ||
181 | If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], | |
182 | where CODE-n is a code-point of the charset, and CHAR-n is the | |
07513d64 | 183 | corresponding character code. |
c1841772 KH |
184 | |
185 | If it is a string, it is a name of file that contains the above | |
3e4abc9e KH |
186 | information. Each line of the file must be this format: |
187 | 0xXXX 0xYYY | |
188 | where XXX is a hexadecimal representation of CODE-n and YYY is a | |
189 | hexadecimal representation of CHAR-n. A line starting with `#' is a | |
190 | comment line. | |
c1841772 | 191 | |
2c2a254f KH |
192 | `:subset' |
193 | ||
194 | VALUE must be a list: | |
195 | ( PARENT MIN-CODE MAX-CODE OFFSET ) | |
196 | PARENT is a parent charset. MIN-CODE and MAX-CODE specify the range | |
197 | of characters inherited from the parent. OFFSET is an integer value | |
198 | to add to a code point of the parent charset to get the corresponding | |
199 | code point of this charset. | |
200 | ||
201 | `:superset' | |
c1841772 KH |
202 | |
203 | VALUE must be a list of parent charsets. The charset inherits | |
204 | characters from them. Each element of the list may be a cons (PARENT | |
205 | . OFFSET), where PARENT is a parent charset, and OFFSET is an offset | |
2c2a254f KH |
206 | value to add to a code point of PARENT to get the corresponding code |
207 | point of this charset. | |
c1841772 KH |
208 | |
209 | `:unify-map' | |
210 | ||
211 | VALUE must be vector or string. | |
212 | ||
213 | If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], | |
214 | where CODE-n is a code-point of the charset, and CHAR-n is the | |
07513d64 | 215 | corresponding Unicode character code. |
c1841772 KH |
216 | |
217 | If it is a string, it is a name of file that contains the above | |
3e4abc9e KH |
218 | information. The file format is the same as what described for `:map' |
219 | attribute." | |
6d2b6635 KH |
220 | (when (vectorp (car props)) |
221 | ;; Old style code: | |
222 | ;; (define-charset CHARSET-ID CHARSET-SYMBOL INFO-VECTOR) | |
223 | ;; Convert the argument to make it fit with the current style. | |
224 | (let ((vec (car props))) | |
225 | (setq props (convert-define-charset-argument name vec) | |
226 | name docstring | |
227 | docstring (aref vec 8)))) | |
c1841772 KH |
228 | (let ((attrs (mapcar 'list '(:dimension |
229 | :code-space | |
b1a79461 KH |
230 | :min-code |
231 | :max-code | |
c1841772 KH |
232 | :iso-final-char |
233 | :iso-revision-number | |
234 | :emacs-mule-id | |
235 | :ascii-compatible-p | |
236 | :supplementary-p | |
237 | :invalid-code | |
238 | :code-offset | |
239 | :map | |
2c2a254f KH |
240 | :subset |
241 | :superset | |
c1841772 KH |
242 | :unify-map |
243 | :plist)))) | |
244 | ||
245 | ;; If :dimension is omitted, get the dimension from :code-space. | |
246 | (let ((dimension (plist-get props :dimension))) | |
247 | (or dimension | |
c04e918c KH |
248 | (let ((code-space (plist-get props :code-space))) |
249 | (setq dimension (if code-space (/ (length code-space) 2) 4)) | |
c1841772 KH |
250 | (setq props (plist-put props :dimension dimension))))) |
251 | ||
c04e918c KH |
252 | (let ((code-space (plist-get props :code-space))) |
253 | (or code-space | |
254 | (let ((dimension (plist-get props :dimension))) | |
255 | (setq code-space (make-vector 8 0)) | |
256 | (dotimes (i dimension) | |
257 | (aset code-space (1+ (* i 2)) #xFF)) | |
258 | (setq props (plist-put props :code-space code-space))))) | |
259 | ||
6d2b6635 KH |
260 | ;; If :emacs-mule-id is specified, update emacs-mule-charset-table. |
261 | (let ((emacs-mule-id (plist-get props :emacs-mule-id))) | |
262 | (if (integerp emacs-mule-id) | |
263 | (aset emacs-mule-charset-table emacs-mule-id name))) | |
264 | ||
c1841772 | 265 | (dolist (slot attrs) |
3b6acc72 | 266 | (setcdr slot (purecopy (plist-get props (car slot))))) |
c1841772 KH |
267 | |
268 | ;; Make sure that the value of :code-space is a vector of 8 | |
269 | ;; elements. | |
270 | (let* ((slot (assq :code-space attrs)) | |
271 | (val (cdr slot)) | |
272 | (len (length val))) | |
273 | (if (< len 8) | |
274 | (setcdr slot | |
275 | (vconcat val (make-vector (- 8 len) 0))))) | |
276 | ||
277 | ;; Add :name and :docstring properties to PROPS. | |
278 | (setq props | |
3b6acc72 | 279 | (cons :name (cons name (cons :docstring (cons (purecopy docstring) props))))) |
c1841772 KH |
280 | (or (plist-get props :short-name) |
281 | (plist-put props :short-name (symbol-name name))) | |
282 | (or (plist-get props :long-name) | |
283 | (plist-put props :long-name (plist-get props :short-name))) | |
7a84eee5 | 284 | (plist-put props :base name) |
e1e529fa DL |
285 | ;; We can probably get a worthwhile amount in purespace. |
286 | (setq props | |
287 | (mapcar (lambda (elt) | |
288 | (if (stringp elt) | |
289 | (purecopy elt) | |
290 | elt)) | |
291 | props)) | |
c1841772 KH |
292 | (setcdr (assq :plist attrs) props) |
293 | ||
294 | (apply 'define-charset-internal name (mapcar 'cdr attrs)))) | |
295 | ||
296 | ||
4ed46869 | 297 | (defun load-with-code-conversion (fullname file &optional noerror nomessage) |
0f69cb38 KH |
298 | "Execute a file of Lisp code named FILE whose absolute name is FULLNAME. |
299 | The file contents are decoded before evaluation if necessary. | |
5dd1c041 | 300 | If optional third arg NOERROR is non-nil, |
4ed46869 KH |
301 | report no error if FILE doesn't exist. |
302 | Print messages at start and end of loading unless | |
5dd1c041 | 303 | optional fourth arg NOMESSAGE is non-nil. |
4ed46869 KH |
304 | Return t if file exists." |
305 | (if (null (file-readable-p fullname)) | |
306 | (and (null noerror) | |
307 | (signal 'file-error (list "Cannot open load file" file))) | |
308 | ;; Read file with code conversion, and then eval. | |
309 | (let* ((buffer | |
72f16325 SM |
310 | ;; We can't use `generate-new-buffer' because files.el |
311 | ;; is not yet loaded. | |
312 | (get-buffer-create (generate-new-buffer-name " *load*"))) | |
db5cae4b SM |
313 | (load-in-progress t) |
314 | (source (save-match-data (string-match "\\.el\\'" fullname)))) | |
315 | (unless nomessage | |
316 | (if source | |
317 | (message "Loading %s (source)..." file) | |
318 | (message "Loading %s..." file))) | |
319 | (when purify-flag | |
3b6acc72 | 320 | (push (purecopy file) preloaded-file-list)) |
4ed46869 | 321 | (unwind-protect |
a6acd8a2 | 322 | (let ((load-file-name fullname) |
1c4cc63a | 323 | (set-auto-coding-for-load t) |
a6acd8a2 | 324 | (inhibit-file-name-operation nil)) |
053f45dd | 325 | (with-current-buffer buffer |
00fc37d1 SM |
326 | ;; So that we don't get completely screwed if the |
327 | ;; file is encoded in some complicated character set, | |
198a7a97 | 328 | ;; read it with real decoding, as a multibyte buffer. |
00fc37d1 | 329 | (set-buffer-multibyte t) |
9fe1108c RS |
330 | ;; Don't let deactivate-mark remain set. |
331 | (let (deactivate-mark) | |
332 | (insert-file-contents fullname)) | |
7d276780 EZ |
333 | ;; If the loaded file was inserted with no-conversion or |
334 | ;; raw-text coding system, make the buffer unibyte. | |
335 | ;; Otherwise, eval-buffer might try to interpret random | |
336 | ;; binary junk as multibyte characters. | |
337 | (if (and enable-multibyte-characters | |
8f924df7 KH |
338 | (or (eq (coding-system-type last-coding-system-used) |
339 | 'raw-text))) | |
7d276780 | 340 | (set-buffer-multibyte nil)) |
4ed46869 KH |
341 | ;; Make `kill-buffer' quiet. |
342 | (set-buffer-modified-p nil)) | |
0f69cb38 | 343 | ;; Have the original buffer current while we eval. |
01ae35c1 RS |
344 | (eval-buffer buffer nil |
345 | ;; This is compatible with what `load' does. | |
346 | (if purify-flag file fullname) | |
198a7a97 | 347 | nil t)) |
cfc70cdf RS |
348 | (let (kill-buffer-hook kill-buffer-query-functions) |
349 | (kill-buffer buffer))) | |
eb6f577b | 350 | (do-after-load-evaluation fullname) |
5dd1c041 | 351 | |
db5cae4b SM |
352 | (unless (or nomessage noninteractive) |
353 | (if source | |
354 | (message "Loading %s (source)...done" file) | |
355 | (message "Loading %s...done" file))) | |
4ed46869 KH |
356 | t))) |
357 | ||
8f924df7 | 358 | (defun charset-info (charset) |
4ed46869 | 359 | "Return a vector of information of CHARSET. |
8f924df7 | 360 | This function is provided for backward compatibility. |
4ed46869 | 361 | |
4ed46869 KH |
362 | The elements of the vector are: |
363 | CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION, | |
364 | LEADING-CODE-BASE, LEADING-CODE-EXT, | |
365 | ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE, | |
366 | REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION, | |
8f924df7 | 367 | PLIST. |
4ed46869 | 368 | where |
8f924df7 KH |
369 | CHARSET-ID is always 0. |
370 | BYTES is always 0. | |
371 | DIMENSION is the number of bytes of a code-point of the charset: | |
372 | 1, 2, 3, or 4. | |
373 | CHARS is the number of characters in a dimension: | |
374 | 94, 96, 128, or 256. | |
375 | WIDTH is always 0. | |
376 | DIRECTION is always 0. | |
377 | LEADING-CODE-BASE is always 0. | |
378 | LEADING-CODE-EXT is always 0. | |
4ed46869 | 379 | ISO-FINAL-CHAR (character) is the final character of the |
7dd4c92d KH |
380 | corresponding ISO 2022 charset. If the charset is not assigned |
381 | any final character, the value is -1. | |
8f924df7 KH |
382 | ISO-GRAPHIC-PLANE is always 0. |
383 | REVERSE-CHARSET is always -1. | |
4ed46869 KH |
384 | SHORT-NAME (string) is the short name to refer to the charset. |
385 | LONG-NAME (string) is the long name to refer to the charset | |
386 | DESCRIPTION (string) is the description string of the charset. | |
387 | PLIST (property list) may contain any type of information a user | |
388 | want to put and get by functions `put-charset-property' and | |
389 | `get-charset-property' respectively." | |
8f924df7 KH |
390 | (vector 0 |
391 | 0 | |
392 | (charset-dimension charset) | |
393 | (charset-chars charset) | |
394 | 0 | |
395 | 0 | |
396 | 0 | |
397 | 0 | |
398 | (charset-iso-final-char charset) | |
399 | 0 | |
400 | -1 | |
401 | (get-charset-property charset :short-name) | |
402 | (get-charset-property charset :short-name) | |
403 | (charset-description charset) | |
404 | (charset-plist charset))) | |
4ed46869 | 405 | |
40c81f74 PE |
406 | ;; It is better not to use backquote in this file, |
407 | ;; because that makes a bootstrapping problem | |
408 | ;; if you need to recompile all the Lisp files using interpreted code. | |
409 | ||
9d3aa82c | 410 | (defun charset-id (_charset) |
8f924df7 | 411 | "Always return 0. This is provided for backward compatibility." |
a5f2b6ec | 412 | (declare (obsolete nil "23.1")) |
8f924df7 | 413 | 0) |
4ed46869 | 414 | |
9d3aa82c | 415 | (defmacro charset-bytes (_charset) |
8f924df7 | 416 | "Always return 0. This is provided for backward compatibility." |
a5f2b6ec | 417 | (declare (obsolete nil "23.1")) |
8f924df7 | 418 | 0) |
c1841772 KH |
419 | |
420 | (defun get-charset-property (charset propname) | |
421 | "Return the value of CHARSET's PROPNAME property. | |
422 | This is the last value stored with | |
423 | (put-charset-property CHARSET PROPNAME VALUE)." | |
424 | (plist-get (charset-plist charset) propname)) | |
425 | ||
426 | (defun put-charset-property (charset propname value) | |
1f32125f | 427 | "Set CHARSETS's PROPNAME property to value VALUE. |
c1841772 KH |
428 | It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." |
429 | (set-charset-plist charset | |
3b6acc72 DN |
430 | (plist-put (charset-plist charset) propname |
431 | (if (stringp value) | |
432 | (purecopy value) | |
433 | value)))) | |
c1841772 | 434 | |
c1841772 KH |
435 | (defun charset-description (charset) |
436 | "Return description string of CHARSET." | |
437 | (plist-get (charset-plist charset) :docstring)) | |
438 | ||
439 | (defun charset-dimension (charset) | |
12504f57 | 440 | "Return dimension of CHARSET." |
c1841772 KH |
441 | (plist-get (charset-plist charset) :dimension)) |
442 | ||
346a8d64 | 443 | (defun charset-chars (charset &optional dimension) |
12504f57 | 444 | "Return number of characters contained in DIMENSION of CHARSET. |
346a8d64 DL |
445 | DIMENSION defaults to the first dimension." |
446 | (unless dimension (setq dimension 1)) | |
103cc921 | 447 | (let ((code-space (plist-get (charset-plist charset) :code-space))) |
346a8d64 DL |
448 | (1+ (- (aref code-space (1- (* 2 dimension))) |
449 | (aref code-space (- (* 2 dimension) 2)))))) | |
c1841772 KH |
450 | |
451 | (defun charset-iso-final-char (charset) | |
1d839a14 DL |
452 | "Return ISO-2022 final character of CHARSET. |
453 | Return -1 if charset isn't an ISO 2022 one." | |
c1841772 KH |
454 | (or (plist-get (charset-plist charset) :iso-final-char) |
455 | -1)) | |
4ed46869 KH |
456 | |
457 | (defmacro charset-short-name (charset) | |
c1841772 KH |
458 | "Return short name of CHARSET." |
459 | (plist-get (charset-plist charset) :short-name)) | |
4ed46869 KH |
460 | |
461 | (defmacro charset-long-name (charset) | |
c1841772 KH |
462 | "Return long name of CHARSET." |
463 | (plist-get (charset-plist charset) :long-name)) | |
4ed46869 | 464 | |
d3675a42 | 465 | (defun charset-list () |
f81b2db1 | 466 | "Return list of all charsets ever defined." |
a5f2b6ec | 467 | (declare (obsolete charset-list "23.1")) |
d3675a42 KH |
468 | charset-list) |
469 | ||
6d2b6635 KH |
470 | \f |
471 | ;;; CHARACTER | |
f81b2db1 | 472 | (define-obsolete-function-alias 'char-valid-p 'characterp "23.1") |
6d2b6635 | 473 | |
9d3aa82c | 474 | (defun generic-char-p (_char) |
8f924df7 | 475 | "Always return nil. This is provided for backward compatibility." |
a5f2b6ec | 476 | (declare (obsolete nil "23.1")) |
c1841772 | 477 | nil) |
6d2b6635 KH |
478 | |
479 | (defun make-char-internal (charset-id &optional code1 code2) | |
480 | (let ((charset (aref emacs-mule-charset-table charset-id))) | |
481 | (or charset | |
482 | (error "Invalid Emacs-mule charset ID: %d" charset-id)) | |
483 | (make-char charset code1 code2))) | |
0269ddfb | 484 | \f |
bd72e34f CY |
485 | ;; Save the ASCII case table in case we need it later. Some locales |
486 | ;; (such as Turkish) modify the case behavior of ASCII characters, | |
487 | ;; which can interfere with networking code that uses ASCII strings. | |
488 | ||
489 | (defvar ascii-case-table | |
490 | ;; Code copied from copy-case-table to avoid requiring case-table.el | |
491 | (let ((tbl (copy-sequence (standard-case-table))) | |
492 | (up (char-table-extra-slot (standard-case-table) 0))) | |
493 | (if up (set-char-table-extra-slot tbl 0 (copy-sequence up))) | |
494 | (set-char-table-extra-slot tbl 1 nil) | |
495 | (set-char-table-extra-slot tbl 2 nil) | |
496 | tbl) | |
497 | "Case table for the ASCII character set.") | |
498 | \f | |
e76938e7 | 499 | ;; Coding system stuff |
4ed46869 | 500 | |
c1841772 KH |
501 | ;; Coding system is a symbol that has been defined by the function |
502 | ;; `define-coding-system'. | |
4ed46869 | 503 | |
c1841772 KH |
504 | (defconst coding-system-iso-2022-flags |
505 | '(long-form | |
506 | ascii-at-eol | |
507 | ascii-at-cntl | |
508 | 7-bit | |
509 | locking-shift | |
510 | single-shift | |
511 | designation | |
512 | revision | |
513 | direction | |
514 | init-at-bol | |
515 | designate-at-bol | |
516 | safe | |
517 | latin-extra | |
518 | composition | |
3ed58a15 KH |
519 | euc-tw-shift |
520 | use-roman | |
eaaa8f3e | 521 | use-oldjis |
1451928f | 522 | 8-bit-level-4) |
c1841772 | 523 | "List of symbols that control ISO-2022 encoder/decoder. |
4ed46869 | 524 | |
12504f57 | 525 | The value of the `:flags' attribute in the argument of the function |
caa7db3a | 526 | `define-coding-system' must be one of them. |
4ed46869 | 527 | |
c1841772 KH |
528 | If `long-form' is specified, use a long designation sequence on |
529 | encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312', | |
530 | and `japanese-jisx0208'. The long designation sequence doesn't | |
12504f57 | 531 | conform to ISO 2022, but is used by such coding systems as |
c1841772 KH |
532 | `compound-text'. |
533 | ||
534 | If `ascii-at-eol' is specified, designate ASCII to g0 at end of line | |
535 | on encoding. | |
536 | ||
537 | If `ascii-at-cntl' is specified, designate ASCII to g0 before control | |
538 | codes and SPC on encoding. | |
539 | ||
540 | If `7-bit' is specified, use 7-bit code only on encoding. | |
541 | ||
542 | If `locking-shift' is specified, decode locking-shift code correctly | |
543 | on decoding, and use locking-shift to invoke a graphic element on | |
544 | encoding. | |
545 | ||
1145cc52 KH |
546 | If `single-shift' is specified, decode single-shift code |
547 | correctly on decoding, and use single-shift to invoke a graphic | |
548 | element on encoding. See also `8-bit-level-4' specification. | |
c1841772 KH |
549 | |
550 | If `designation' is specified, decode designation code correctly on | |
551 | decoding, and use designation to designate a charset to a graphic | |
552 | element on encoding. | |
553 | ||
554 | If `revision' is specified, produce an escape sequence to specify | |
555 | revision number of a charset on encoding. Such an escape sequence is | |
556 | always correctly decoded on decoding. | |
557 | ||
558 | If `direction' is specified, decode ISO6429's code for specifying | |
12504f57 | 559 | direction correctly, and produce the code on encoding. |
c1841772 KH |
560 | |
561 | If `init-at-bol' is specified, on encoding, it is assumed that | |
562 | invocation and designation statuses are reset at each beginning of | |
12504f57 | 563 | line even if `ascii-at-eol' is not specified; thus no codes for |
c1841772 KH |
564 | resetting them are produced. |
565 | ||
566 | If `safe' is specified, on encoding, characters not supported by a | |
567 | coding are replaced with `?'. | |
568 | ||
12504f57 | 569 | If `latin-extra' is specified, the code-detection routine assumes that a |
c1841772 KH |
570 | code specified in `latin-extra-code-table' (which see) is valid. |
571 | ||
572 | If `composition' is specified, an escape sequence to specify | |
12504f57 | 573 | composition sequence is correctly decoded on decoding, and is produced |
c1841772 KH |
574 | on encoding. |
575 | ||
576 | If `euc-tw-shift' is specified, the EUC-TW specific shifting code is | |
12504f57 | 577 | correctly decoded on decoding, and is produced on encoding. |
c1841772 | 578 | |
12504f57 DL |
579 | If `use-roman' is specified, JIS0201-1976-Roman is designated instead |
580 | of ASCII. | |
581 | ||
582 | If `use-oldjis' is specified, JIS0208-1976 is designated instead of | |
eaaa8f3e KH |
583 | JIS0208-1983. |
584 | ||
1145cc52 | 585 | If `8-bit-level-4' is specified, the decoder assumes the |
1451928f | 586 | implementation level \"4\" for 8-bit codes which means that GL is |
1145cc52 KH |
587 | identified as the single-shift area. The default implementation |
588 | level for 8-bit code is \"4A\" which means that GR is identified | |
589 | as the single-shift area.") | |
12504f57 | 590 | |
c1841772 | 591 | (defun define-coding-system (name docstring &rest props) |
12504f57 | 592 | "Define NAME (a symbol) as a coding system with DOCSTRING and attributes. |
c1841772 KH |
593 | The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE |
594 | may be any symbol. | |
595 | ||
12504f57 | 596 | The following attributes have special meanings. Those labeled as |
ed039e6c | 597 | \"(required)\" should not be omitted. |
c1841772 KH |
598 | |
599 | `:mnemonic' (required) | |
600 | ||
601 | VALUE is a character to display on mode line for the coding system. | |
602 | ||
603 | `:coding-type' (required) | |
604 | ||
605 | VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022', | |
1bfd603c | 606 | `emacs-mule', `shift-jis', `ccl', `raw-text', `undecided'. |
c1841772 | 607 | |
12504f57 | 608 | `:eol-type' |
c1841772 | 609 | |
12504f57 | 610 | VALUE is the EOL (end-of-line) format of the coding system. It must be |
c1841772 KH |
611 | one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL |
612 | \(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF), | |
9e8014c6 EZ |
613 | and `mac' means Mac-like EOL \(i.e. single CR). If omitted, Emacs |
614 | detects the EOL format automatically when decoding. | |
c1841772 | 615 | |
736345cb | 616 | `:charset-list' |
c1841772 KH |
617 | |
618 | VALUE must be a list of charsets supported by the coding system. On | |
619 | encoding by the coding system, if a character belongs to multiple | |
620 | charsets in the list, a charset that comes earlier in the list is | |
736345cb KH |
621 | selected. If `:coding-type' is `iso-2022', VALUE may be `iso-2022', |
622 | which indicates that the coding system supports all ISO-2022 based | |
623 | charsets. If `:coding-type' is `emacs-mule', VALUE may be | |
624 | `emacs-mule', which indicates that the coding system supports all | |
1d839a14 | 625 | charsets that have the `:emacs-mule-id' property. |
c1841772 | 626 | |
12504f57 | 627 | `:ascii-compatible-p' |
c1841772 KH |
628 | |
629 | If VALUE is non-nil, the coding system decodes all 7-bit bytes into | |
07513d64 | 630 | the corresponding ASCII characters, and encodes all ASCII characters |
12504f57 | 631 | back to the corresponding 7-bit bytes. VALUE defaults to nil. |
c1841772 | 632 | |
12504f57 | 633 | `:decode-translation-table' |
c1841772 KH |
634 | |
635 | VALUE must be a translation table to use on decoding. | |
636 | ||
12504f57 | 637 | `:encode-translation-table' |
c1841772 KH |
638 | |
639 | VALUE must be a translation table to use on encoding. | |
640 | ||
12504f57 | 641 | `:post-read-conversion' |
c1841772 KH |
642 | |
643 | VALUE must be a function to call after some text is inserted and | |
644 | decoded by the coding system itself and before any functions in | |
368b3544 KH |
645 | `after-insert-functions' are called. This function is passed one |
646 | argument; the number of characters in the text to convert, with | |
647 | point at the start of the text. The function should leave point | |
648 | the same, and return the new character count. | |
c1841772 KH |
649 | |
650 | `:pre-write-conversion' | |
651 | ||
652 | VALUE must be a function to call after all functions in | |
368b3544 KH |
653 | `write-region-annotate-functions' and `buffer-file-format' are |
654 | called, and before the text is encoded by the coding system | |
655 | itself. This function should convert the whole text in the | |
c0943d3d | 656 | current buffer. For backward compatibility, this function is |
368b3544 | 657 | passed two arguments which can be ignored. |
c1841772 KH |
658 | |
659 | `:default-char' | |
660 | ||
661 | VALUE must be a character. On encoding, a character not supported by | |
662 | the coding system is replaced with VALUE. | |
663 | ||
8f924df7 KH |
664 | `:for-unibyte' |
665 | ||
666 | VALUE non-nil means that visiting a file with the coding system | |
667 | results in a unibyte buffer. | |
668 | ||
c1841772 KH |
669 | `:mime-charset' |
670 | ||
12504f57 DL |
671 | VALUE must be a symbol whose name is that of a MIME charset converted |
672 | to lower case. | |
c1841772 | 673 | |
1bfd603c DL |
674 | `:mime-text-unsuitable' |
675 | ||
676 | VALUE non-nil means the `:mime-charset' property names a charset which | |
1894d108 | 677 | is unsuitable for the top-level media type \"text\". |
1bfd603c | 678 | |
c1841772 KH |
679 | `:flags' |
680 | ||
12504f57 DL |
681 | VALUE must be a list of symbols that control the ISO-2022 converter. |
682 | Each must be a member of the list `coding-system-iso-2022-flags' | |
1b796d6b | 683 | \(which see). This attribute is meaningful only when `:coding-type' |
c1841772 KH |
684 | is `iso-2022'. |
685 | ||
686 | `:designation' | |
687 | ||
12504f57 | 688 | VALUE must be a vector [G0-USAGE G1-USAGE G2-USAGE G3-USAGE]. |
c1841772 KH |
689 | GN-USAGE specifies the usage of graphic register GN as follows. |
690 | ||
691 | If it is nil, no charset can be designated to GN. | |
692 | ||
07513d64 | 693 | If it is a charset, the charset is initially designated to GN, and |
c1841772 KH |
694 | never used by the other charsets. |
695 | ||
696 | If it is a list, the elements must be charsets, nil, 94, or 96. GN | |
12504f57 DL |
697 | can be used by all the listed charsets. If the list contains 94, any |
698 | iso-2022 charset whose code-space ranges are 94 long can be designated | |
699 | to GN. If the list contains 96, any charsets whose whose ranges are | |
700 | 96 long can be designated to GN. If the first element is a charset, | |
701 | that charset is initially designated to GN. | |
c1841772 | 702 | |
1b796d6b | 703 | This attribute is meaningful only when `:coding-type' is `iso-2022'. |
c1841772 KH |
704 | |
705 | `:bom' | |
706 | ||
12504f57 | 707 | This attributes specifies whether the coding system uses a `byte order |
ed039e6c | 708 | mark'. VALUE must be nil, t, or cons of coding systems whose |
e4821482 | 709 | `:coding-type' is `utf-16' or `utf-8'. |
c1841772 | 710 | |
0ea1a6ca KH |
711 | If the value is nil, on decoding, don't treat the first two-byte as |
712 | BOM, and on encoding, don't produce BOM bytes. | |
713 | ||
714 | If the value is t, on decoding, skip the first two-byte as BOM, and on | |
e4920bc9 | 715 | encoding, produce BOM bytes according to the value of `:endian'. |
0ea1a6ca | 716 | |
3dabda23 | 717 | If the value is cons, on decoding, check the first two-byte. If they |
0ea1a6ca | 718 | are 0xFE 0xFF, use the car part coding system of the value. If they |
3dabda23 | 719 | are 0xFF 0xFE, use the cdr part coding system of the value. |
0ea1a6ca | 720 | Otherwise, treat them as bytes for a normal character. On encoding, |
e4920bc9 | 721 | produce BOM bytes according to the value of `:endian'. |
0ea1a6ca | 722 | |
1b796d6b | 723 | This attribute is meaningful only when `:coding-type' is `utf-16' or |
ed039e6c | 724 | `utf-8'. |
c1841772 KH |
725 | |
726 | `:endian' | |
727 | ||
0ea1a6ca KH |
728 | VALUE must be `big' or `little' specifying big-endian and |
729 | little-endian respectively. The default value is `big'. | |
c1841772 | 730 | |
1b796d6b | 731 | This attribute is meaningful only when `:coding-type' is `utf-16'. |
c1841772 KH |
732 | |
733 | `:ccl-decoder' | |
734 | ||
12504f57 | 735 | VALUE is a symbol representing the registered CCL program used for |
1b796d6b | 736 | decoding. This attribute is meaningful only when `:coding-type' is |
12504f57 | 737 | `ccl'. |
c1841772 KH |
738 | |
739 | `:ccl-encoder' | |
740 | ||
12504f57 | 741 | VALUE is a symbol representing the registered CCL program used for |
1b796d6b | 742 | encoding. This attribute is meaningful only when `:coding-type' is |
50960f36 KH |
743 | `ccl'. |
744 | ||
1b796d6b | 745 | `:inhibit-null-byte-detection' |
50960f36 KH |
746 | |
747 | VALUE non-nil means Emacs ignore null bytes on code detection. | |
748 | See the variable `inhibit-null-byte-detection'. This attribute | |
1b796d6b | 749 | is meaningful only when `:coding-type' is `undecided'. |
50960f36 | 750 | |
1b796d6b | 751 | `:inhibit-iso-escape-detection' |
50960f36 KH |
752 | |
753 | VALUE non-nil means Emacs ignores ISO-2022 escape sequences on | |
754 | code detection. See the variable `inhibit-iso-escape-detection'. | |
1b796d6b | 755 | This attribute is meaningful only when `:coding-type' is |
50960f36 KH |
756 | `undecided'. |
757 | ||
1b796d6b | 758 | `:prefer-utf-8' |
50960f36 KH |
759 | |
760 | VALUE non-nil means Emacs prefers UTF-8 on code detection for | |
1b796d6b | 761 | non-ASCII files. This attribute is meaningful only when |
50960f36 | 762 | `:coding-type' is `undecided'." |
c1841772 KH |
763 | (let* ((common-attrs (mapcar 'list |
764 | '(:mnemonic | |
765 | :coding-type | |
766 | :charset-list | |
767 | :ascii-compatible-p | |
1a9db556 | 768 | :decode-translation-table |
c1841772 KH |
769 | :encode-translation-table |
770 | :post-read-conversion | |
771 | :pre-write-conversion | |
772 | :default-char | |
7e742024 | 773 | :for-unibyte |
c1841772 KH |
774 | :plist |
775 | :eol-type))) | |
776 | (coding-type (plist-get props :coding-type)) | |
777 | (spec-attrs (mapcar 'list | |
778 | (cond ((eq coding-type 'iso-2022) | |
779 | '(:initial | |
780 | :reg-usage | |
781 | :request | |
782 | :flags)) | |
736c9276 KH |
783 | ((eq coding-type 'utf-8) |
784 | '(:bom)) | |
c1841772 KH |
785 | ((eq coding-type 'utf-16) |
786 | '(:bom | |
787 | :endian)) | |
788 | ((eq coding-type 'ccl) | |
789 | '(:ccl-decoder | |
790 | :ccl-encoder | |
50960f36 KH |
791 | :valids)) |
792 | ((eq coding-type 'undecided) | |
793 | '(:inhibit-null-byte-detection | |
794 | :inhibit-iso-escape-detection | |
795 | :prefer-utf-8)))))) | |
c1841772 KH |
796 | |
797 | (dolist (slot common-attrs) | |
798 | (setcdr slot (plist-get props (car slot)))) | |
799 | ||
800 | (dolist (slot spec-attrs) | |
801 | (setcdr slot (plist-get props (car slot)))) | |
802 | ||
803 | (if (eq coding-type 'iso-2022) | |
804 | (let ((designation (plist-get props :designation)) | |
805 | (flags (plist-get props :flags)) | |
806 | (initial (make-vector 4 nil)) | |
807 | (reg-usage (cons 4 4)) | |
808 | request elt) | |
809 | (dotimes (i 4) | |
810 | (setq elt (aref designation i)) | |
811 | (cond ((charsetp elt) | |
812 | (aset initial i elt) | |
813 | (setq request (cons (cons elt i) request))) | |
814 | ((consp elt) | |
815 | (aset initial i (car elt)) | |
816 | (if (charsetp (car elt)) | |
817 | (setq request (cons (cons (car elt) i) request))) | |
818 | (dolist (e (cdr elt)) | |
819 | (cond ((charsetp e) | |
820 | (setq request (cons (cons e i) request))) | |
821 | ((eq e 94) | |
822 | (setcar reg-usage i)) | |
823 | ((eq e 96) | |
824 | (setcdr reg-usage i)) | |
825 | ((eq e t) | |
826 | (setcar reg-usage i) | |
827 | (setcdr reg-usage i))))))) | |
828 | (setcdr (assq :initial spec-attrs) initial) | |
829 | (setcdr (assq :reg-usage spec-attrs) reg-usage) | |
830 | (setcdr (assq :request spec-attrs) request) | |
831 | ||
832 | ;; Change :flags value from a list to a bit-mask. | |
833 | (let ((bits 0) | |
834 | (i 0)) | |
835 | (dolist (elt coding-system-iso-2022-flags) | |
836 | (if (memq elt flags) | |
837 | (setq bits (logior bits (lsh 1 i)))) | |
838 | (setq i (1+ i))) | |
839 | (setcdr (assq :flags spec-attrs) bits)))) | |
840 | ||
841 | ;; Add :name and :docstring properties to PROPS. | |
842 | (setq props | |
e1e529fa DL |
843 | (cons :name (cons name (cons :docstring (cons (purecopy docstring) |
844 | props))))) | |
c1841772 | 845 | (setcdr (assq :plist common-attrs) props) |
f5d3a630 | 846 | (apply 'define-coding-system-internal |
c1841772 | 847 | name (mapcar 'cdr (append common-attrs spec-attrs))))) |
4ed46869 | 848 | |
8057896b | 849 | (defun coding-system-doc-string (coding-system) |
0269ddfb | 850 | "Return the documentation string for CODING-SYSTEM." |
c1841772 | 851 | (plist-get (coding-system-plist coding-system) :docstring)) |
4ed46869 | 852 | |
4ed46869 | 853 | (defun coding-system-mnemonic (coding-system) |
0269ddfb | 854 | "Return the mnemonic character of CODING-SYSTEM. |
12504f57 | 855 | The mnemonic character of a coding system is used in mode line to |
d660b68f | 856 | indicate the coding system. If CODING-SYSTEM is nil, return ?=." |
c1841772 | 857 | (plist-get (coding-system-plist coding-system) :mnemonic)) |
4ed46869 | 858 | |
c1841772 KH |
859 | (defun coding-system-type (coding-system) |
860 | "Return the coding type of CODING-SYSTEM. | |
861 | A coding type is a symbol indicating the encoding method of CODING-SYSTEM. | |
862 | See the function `define-coding-system' for more detail." | |
863 | (plist-get (coding-system-plist coding-system) :coding-type)) | |
d3675a42 | 864 | |
c1841772 | 865 | (defun coding-system-charset-list (coding-system) |
07513d64 | 866 | "Return list of charsets supported by CODING-SYSTEM. |
c1841772 KH |
867 | If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'. |
868 | If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'." | |
869 | (plist-get (coding-system-plist coding-system) :charset-list)) | |
0269ddfb | 870 | |
2f1e746b KH |
871 | (defun coding-system-category (coding-system) |
872 | "Return a category symbol of CODING-SYSTEM." | |
873 | (plist-get (coding-system-plist coding-system) :category)) | |
0269ddfb KH |
874 | |
875 | (defun coding-system-get (coding-system prop) | |
07513d64 DL |
876 | "Extract a value from CODING-SYSTEM's property list for property PROP. |
877 | For compatibility with Emacs 20/21, this accepts old-style symbols | |
878 | like `mime-charset' as well as the current style like `:mime-charset'." | |
879 | (or (plist-get (coding-system-plist coding-system) prop) | |
880 | (if (not (keywordp prop)) | |
057bce6f | 881 | ;; For backward compatibility. |
356384dc KH |
882 | (if (eq prop 'ascii-incompatible) |
883 | (not (plist-get (coding-system-plist coding-system) | |
884 | :ascii-compatible-p)) | |
885 | (plist-get (coding-system-plist coding-system) | |
886 | (intern (concat ":" (symbol-name prop)))))))) | |
0269ddfb | 887 | |
2e729bfa JB |
888 | (defun coding-system-eol-type-mnemonic (coding-system) |
889 | "Return the string indicating end-of-line format of CODING-SYSTEM." | |
890 | (let* ((eol-type (coding-system-eol-type coding-system)) | |
f4f00827 | 891 | (val (cond ((eq eol-type 0) eol-mnemonic-unix) |
2e729bfa JB |
892 | ((eq eol-type 1) eol-mnemonic-dos) |
893 | ((eq eol-type 2) eol-mnemonic-mac) | |
f4f00827 | 894 | (t eol-mnemonic-undecided)))) |
2e729bfa JB |
895 | (if (stringp val) |
896 | val | |
897 | (char-to-string val)))) | |
898 | ||
857ea15c AS |
899 | (defun coding-system-lessp (x y) |
900 | (cond ((eq x 'no-conversion) t) | |
901 | ((eq y 'no-conversion) nil) | |
902 | ((eq x 'emacs-mule) t) | |
903 | ((eq y 'emacs-mule) nil) | |
904 | ((eq x 'undecided) t) | |
905 | ((eq y 'undecided) nil) | |
906 | (t (let ((c1 (coding-system-mnemonic x)) | |
907 | (c2 (coding-system-mnemonic y))) | |
908 | (or (< (downcase c1) (downcase c2)) | |
909 | (and (not (> (downcase c1) (downcase c2))) | |
910 | (< c1 c2))))))) | |
911 | ||
5e2e859a KH |
912 | (defun coding-system-equal (coding-system-1 coding-system-2) |
913 | "Return t if and only if CODING-SYSTEM-1 and CODING-SYSTEM-2 are identical. | |
f81b2db1 | 914 | Two coding systems are identical if both symbols are equal |
5e2e859a KH |
915 | or one is an alias of the other." |
916 | (or (eq coding-system-1 coding-system-2) | |
daff7d74 KH |
917 | (and (equal (coding-system-plist coding-system-1) |
918 | (coding-system-plist coding-system-2)) | |
5e2e859a KH |
919 | (let ((eol-type-1 (coding-system-eol-type coding-system-1)) |
920 | (eol-type-2 (coding-system-eol-type coding-system-2))) | |
921 | (or (eq eol-type-1 eol-type-2) | |
922 | (and (vectorp eol-type-1) (vectorp eol-type-2))))))) | |
923 | ||
857ea15c | 924 | (defun add-to-coding-system-list (coding-system) |
9fc9a531 | 925 | "Add CODING-SYSTEM to variable `coding-system-list' while keeping it sorted." |
857ea15c AS |
926 | (if (or (null coding-system-list) |
927 | (coding-system-lessp coding-system (car coding-system-list))) | |
928 | (setq coding-system-list (cons coding-system coding-system-list)) | |
929 | (let ((len (length coding-system-list)) | |
930 | mid (tem coding-system-list)) | |
931 | (while (> len 1) | |
932 | (setq mid (nthcdr (/ len 2) tem)) | |
933 | (if (coding-system-lessp (car mid) coding-system) | |
934 | (setq tem mid | |
935 | len (- len (/ len 2))) | |
936 | (setq len (/ len 2)))) | |
937 | (setcdr tem (cons coding-system (cdr tem)))))) | |
938 | ||
80a7463d | 939 | (defun coding-system-list (&optional base-only) |
c11a8f77 | 940 | "Return a list of all existing non-subsidiary coding systems. |
12504f57 DL |
941 | If optional arg BASE-ONLY is non-nil, only base coding systems are |
942 | listed. The value doesn't include subsidiary coding systems which are | |
c11a8f77 KH |
943 | made from bases and aliases automatically for various end-of-line |
944 | formats (e.g. iso-latin-1-unix, koi8-r-dos)." | |
4f06ffe1 KH |
945 | (let ((codings nil)) |
946 | (dolist (coding coding-system-list) | |
947 | (if (eq (coding-system-base coding) coding) | |
948 | (if base-only | |
949 | (setq codings (cons coding codings)) | |
950 | (dolist (alias (coding-system-aliases coding)) | |
951 | (setq codings (cons alias codings)))))) | |
80a7463d KH |
952 | codings)) |
953 | ||
620956ca | 954 | (defconst char-coding-system-table nil |
f5d3a630 JB |
955 | "It exists just for backward compatibility, and the value is always nil.") |
956 | (make-obsolete-variable 'char-coding-system-table nil "23.1") | |
c11a8f77 | 957 | |
50c29104 KH |
958 | (defun transform-make-coding-system-args (name type &optional doc-string props) |
959 | "For internal use only. | |
960 | Transform XEmacs style args for `make-coding-system' to Emacs style. | |
961 | Value is a list of transformed arguments." | |
962 | (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?"))) | |
963 | (eol-type (plist-get props 'eol-type)) | |
964 | properties tmp) | |
965 | (cond | |
966 | ((eq eol-type 'lf) (setq eol-type 'unix)) | |
967 | ((eq eol-type 'crlf) (setq eol-type 'dos)) | |
968 | ((eq eol-type 'cr) (setq eol-type 'mac))) | |
969 | (if (setq tmp (plist-get props 'post-read-conversion)) | |
970 | (setq properties (plist-put properties 'post-read-conversion tmp))) | |
971 | (if (setq tmp (plist-get props 'pre-write-conversion)) | |
972 | (setq properties (plist-put properties 'pre-write-conversion tmp))) | |
973 | (cond | |
f4a012a6 KH |
974 | ((eq type 'shift-jis) |
975 | `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type)) | |
976 | ((eq type 'iso2022) ; This is not perfect. | |
977 | (if (plist-get props 'escape-quoted) | |
978 | (error "escape-quoted is not supported: %S" | |
979 | `(,name ,type ,doc-string ,props))) | |
980 | (let ((g0 (plist-get props 'charset-g0)) | |
981 | (g1 (plist-get props 'charset-g1)) | |
982 | (g2 (plist-get props 'charset-g2)) | |
983 | (g3 (plist-get props 'charset-g3)) | |
984 | (use-roman | |
985 | (and | |
986 | (eq (cadr (assoc 'latin-jisx0201 | |
987 | (plist-get props 'input-charset-conversion))) | |
988 | 'ascii) | |
989 | (eq (cadr (assoc 'ascii | |
990 | (plist-get props 'output-charset-conversion))) | |
991 | 'latin-jisx0201))) | |
992 | (use-oldjis | |
993 | (and | |
994 | (eq (cadr (assoc 'japanese-jisx0208-1978 | |
995 | (plist-get props 'input-charset-conversion))) | |
996 | 'japanese-jisx0208) | |
997 | (eq (cadr (assoc 'japanese-jisx0208 | |
998 | (plist-get props 'output-charset-conversion))) | |
999 | 'japanese-jisx0208-1978)))) | |
1000 | (if (charsetp g0) | |
1001 | (if (plist-get props 'force-g0-on-output) | |
1002 | (setq g0 `(nil ,g0)) | |
1003 | (setq g0 `(,g0 t)))) | |
1004 | (if (charsetp g1) | |
1005 | (if (plist-get props 'force-g1-on-output) | |
1006 | (setq g1 `(nil ,g1)) | |
1007 | (setq g1 `(,g1 t)))) | |
1008 | (if (charsetp g2) | |
1009 | (if (plist-get props 'force-g2-on-output) | |
1010 | (setq g2 `(nil ,g2)) | |
1011 | (setq g2 `(,g2 t)))) | |
1012 | (if (charsetp g3) | |
1013 | (if (plist-get props 'force-g3-on-output) | |
1014 | (setq g3 `(nil ,g3)) | |
1015 | (setq g3 `(,g3 t)))) | |
1016 | `(,name 2 ,mnemonic ,doc-string | |
1017 | (,g0 ,g1 ,g2 ,g3 | |
1018 | ,(plist-get props 'short) | |
1019 | ,(not (plist-get props 'no-ascii-eol)) | |
1020 | ,(not (plist-get props 'no-ascii-cntl)) | |
1021 | ,(plist-get props 'seven) | |
1022 | t | |
1023 | ,(not (plist-get props 'lock-shift)) | |
1024 | ,use-roman | |
1025 | ,use-oldjis | |
1026 | ,(plist-get props 'no-iso6429) | |
1027 | nil nil nil nil) | |
1028 | ,properties ,eol-type))) | |
1029 | ((eq type 'big5) | |
1030 | `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type)) | |
50c29104 | 1031 | ((eq type 'ccl) |
f4a012a6 | 1032 | `(,name 4 ,mnemonic ,doc-string |
50c29104 | 1033 | (,(plist-get props 'decode) . ,(plist-get props 'encode)) |
f4a012a6 | 1034 | ,properties ,eol-type)) |
50c29104 | 1035 | (t |
f4a012a6 | 1036 | (error "unsupported XEmacs style make-coding-style arguments: %S" |
50c29104 KH |
1037 | `(,name ,type ,doc-string ,props)))))) |
1038 | ||
8057896b | 1039 | (defun make-coding-system (coding-system type mnemonic doc-string |
1b46a680 KH |
1040 | &optional |
1041 | flags | |
1042 | properties | |
1043 | eol-type) | |
3bb1accb | 1044 | "Define a new coding system CODING-SYSTEM (symbol). |
f81b2db1 | 1045 | This function is provided for backward compatibility." |
a5f2b6ec | 1046 | (declare (obsolete define-coding-system "23.1")) |
057bce6f | 1047 | ;; For compatibility with XEmacs, we check the type of TYPE. If it |
c3d0ee51 EZ |
1048 | ;; is a symbol, perhaps, this function is called with XEmacs-style |
1049 | ;; arguments. Here, try to transform that kind of arguments to | |
50c29104 KH |
1050 | ;; Emacs style. |
1051 | (if (symbolp type) | |
1052 | (let ((args (transform-make-coding-system-args coding-system type | |
1053 | mnemonic doc-string))) | |
1054 | (setq coding-system (car args) | |
1053cc93 | 1055 | type (nth 1 args) |
50c29104 KH |
1056 | mnemonic (nth 2 args) |
1057 | doc-string (nth 3 args) | |
1058 | flags (nth 4 args) | |
1059 | properties (nth 5 args) | |
1060 | eol-type (nth 6 args)))) | |
1061 | ||
8f924df7 KH |
1062 | (setq type |
1063 | (cond ((eq type 0) 'emacs-mule) | |
1064 | ((eq type 1) 'shift-jis) | |
1065 | ((eq type 2) 'iso2022) | |
1066 | ((eq type 3) 'big5) | |
1067 | ((eq type 4) 'ccl) | |
1068 | ((eq type 5) 'raw-text) | |
1b46a680 | 1069 | (t |
8f924df7 KH |
1070 | (error "Invalid coding system type: %s" type)))) |
1071 | ||
1072 | (setq properties | |
1073 | (let ((plist nil) key) | |
1074 | (dolist (elt properties) | |
1075 | (setq key (car elt)) | |
1076 | (cond ((eq key 'post-read-conversion) | |
1077 | (setq key :post-read-conversion)) | |
1078 | ((eq key 'pre-write-conversion) | |
1079 | (setq key :pre-write-conversion)) | |
1080 | ((eq key 'translation-table-for-decode) | |
1081 | (setq key :decode-translation-table)) | |
1082 | ((eq key 'translation-table-for-encode) | |
1083 | (setq key :encode-translation-table)) | |
1084 | ((eq key 'safe-charsets) | |
1085 | (setq key :charset-list)) | |
1086 | ((eq key 'mime-charset) | |
1087 | (setq key :mime-charset)) | |
1088 | ((eq key 'valid-codes) | |
1089 | (setq key :valids))) | |
1090 | (setq plist (plist-put plist key (cdr elt)))) | |
1091 | plist)) | |
28380f17 | 1092 | (setq properties (plist-put properties :mnemonic mnemonic)) |
8f924df7 KH |
1093 | (plist-put properties :coding-type type) |
1094 | (cond ((eq eol-type 0) (setq eol-type 'unix)) | |
1095 | ((eq eol-type 1) (setq eol-type 'dos)) | |
1096 | ((eq eol-type 2) (setq eol-type 'mac)) | |
1097 | ((vectorp eol-type) (setq eol-type nil))) | |
1098 | (plist-put properties :eol-type eol-type) | |
1099 | ||
1100 | (cond | |
1101 | ((eq type 'iso2022) | |
1102 | (plist-put properties :flags | |
1103 | (list (and (or (consp (nth 0 flags)) | |
1104 | (consp (nth 1 flags)) | |
1105 | (consp (nth 2 flags)) | |
1106 | (consp (nth 3 flags))) 'designation) | |
1107 | (or (nth 4 flags) 'long-form) | |
1108 | (and (nth 5 flags) 'ascii-at-eol) | |
1109 | (and (nth 6 flags) 'ascii-at-cntl) | |
1110 | (and (nth 7 flags) '7-bit) | |
1111 | (and (nth 8 flags) 'locking-shift) | |
1112 | (and (nth 9 flags) 'single-shift) | |
1113 | (and (nth 10 flags) 'use-roman) | |
1114 | (and (nth 11 flags) 'use-oldjis) | |
1115 | (or (nth 12 flags) 'direction) | |
1116 | (and (nth 13 flags) 'init-at-bol) | |
1117 | (and (nth 14 flags) 'designate-at-bol) | |
1118 | (and (nth 15 flags) 'safe) | |
1119 | (and (nth 16 flags) 'latin-extra))) | |
1120 | (plist-put properties :designation | |
1121 | (let ((vec (make-vector 4 nil))) | |
1122 | (dotimes (i 4) | |
1123 | (let ((spec (nth i flags))) | |
1124 | (if (eq spec t) | |
1125 | (aset vec i '(94 96)) | |
1126 | (if (consp spec) | |
1127 | (progn | |
1128 | (if (memq t spec) | |
1129 | (setq spec (append (delq t spec) '(94 96)))) | |
1130 | (aset vec i spec)))))) | |
1131 | vec))) | |
1132 | ||
1133 | ((eq type 'ccl) | |
1134 | (plist-put properties :ccl-decoder (car flags)) | |
1135 | (plist-put properties :ccl-encoder (cdr flags)))) | |
1136 | ||
1137 | (apply 'define-coding-system coding-system doc-string properties)) | |
4ed46869 | 1138 | |
bbdea948 RS |
1139 | (defun merge-coding-systems (first second) |
1140 | "Fill in any unspecified aspects of coding system FIRST from SECOND. | |
1141 | Return the resulting coding system." | |
1142 | (let ((base (coding-system-base second)) | |
1143 | (eol (coding-system-eol-type second))) | |
1144 | ;; If FIRST doesn't specify text conversion, merge with that of SECOND. | |
1145 | (if (eq (coding-system-base first) 'undecided) | |
1146 | (setq first (coding-system-change-text-conversion first base))) | |
1147 | ;; If FIRST doesn't specify eol conversion, merge with that of SECOND. | |
1148 | (if (and (vectorp (coding-system-eol-type first)) | |
1149 | (numberp eol) (>= eol 0) (<= eol 2)) | |
1150 | (setq first (coding-system-change-eol-conversion | |
1151 | first eol))) | |
1152 | first)) | |
1153 | ||
2da14137 KH |
1154 | (defun autoload-coding-system (symbol form) |
1155 | "Define SYMBOL as a coding-system that is defined on demand. | |
1156 | ||
f81b2db1 | 1157 | FORM is a form to evaluate to define the coding-system." |
2da14137 KH |
1158 | (put symbol 'coding-system-define-form form) |
1159 | (setq coding-system-alist (cons (list (symbol-name symbol)) | |
1160 | coding-system-alist)) | |
1161 | (dolist (elt '("-unix" "-dos" "-mac")) | |
1162 | (let ((name (concat (symbol-name symbol) elt))) | |
1163 | (put (intern name) 'coding-system-define-form form) | |
1164 | (setq coding-system-alist (cons (list name) coding-system-alist))))) | |
1165 | ||
0ccdf61e | 1166 | ;; This variable is set in these two cases: |
4c549102 | 1167 | ;; (1) A file is read by a coding system specified explicitly. |
0ccdf61e CY |
1168 | ;; `after-insert-file-set-coding' sets the car of this value to |
1169 | ;; `coding-system-for-read', and sets the cdr to nil. | |
1170 | ;; (2) `set-buffer-file-coding-system' is called. | |
4c549102 | 1171 | ;; The cdr of this value is set to the specified coding system. |
0ccdf61e CY |
1172 | ;; This variable is used for decoding in `revert-buffer' and encoding |
1173 | ;; in `select-safe-coding-system'. | |
1174 | ;; | |
1175 | ;; When saving a buffer, if `buffer-file-coding-system-explicit' is | |
1176 | ;; already non-nil, `basic-save-buffer-1' sets its CAR to the value of | |
1177 | ;; `last-coding-system-used'. (It used to set it unconditionally, but | |
1178 | ;; that seems unnecessary; see Bug#4533.) | |
1179 | ||
4c549102 KH |
1180 | (defvar buffer-file-coding-system-explicit nil |
1181 | "The file coding system explicitly specified for the current buffer. | |
1182 | The value is a cons of coding systems for reading (decoding) and | |
1183 | writing (encoding). | |
1184 | Internal use only.") | |
1185 | (make-variable-buffer-local 'buffer-file-coding-system-explicit) | |
1186 | (put 'buffer-file-coding-system-explicit 'permanent-local t) | |
1187 | ||
87926e02 SM |
1188 | (defun read-buffer-file-coding-system () |
1189 | (let* ((bcss (find-coding-systems-region (point-min) (point-max))) | |
1190 | (css-table | |
1191 | (unless (equal bcss '(undecided)) | |
1192 | (append '("dos" "unix" "mac") | |
1193 | (delq nil (mapcar (lambda (cs) | |
1194 | (if (memq (coding-system-base cs) bcss) | |
1195 | (symbol-name cs))) | |
1196 | coding-system-list))))) | |
1197 | (combined-table | |
1198 | (if css-table | |
1199 | (completion-table-in-turn css-table coding-system-alist) | |
1200 | coding-system-alist)) | |
1201 | (auto-cs | |
1202 | (unless find-file-literally | |
1203 | (save-excursion | |
1204 | (save-restriction | |
1205 | (widen) | |
1206 | (goto-char (point-min)) | |
1207 | (funcall set-auto-coding-function | |
1208 | (or buffer-file-name "") (buffer-size)))))) | |
1209 | (preferred | |
1210 | (let ((bfcs (default-value 'buffer-file-coding-system))) | |
1211 | (cons (and (or (equal bcss '(undecided)) | |
1212 | (memq (coding-system-base bfcs) bcss)) | |
1213 | bfcs) | |
1214 | (mapcar (lambda (cs) | |
1215 | (and (coding-system-p cs) | |
1216 | (coding-system-get cs :mime-charset) | |
1217 | (or (equal bcss '(undecided)) | |
1218 | (memq (coding-system-base cs) bcss)) | |
1219 | cs)) | |
1220 | (coding-system-priority-list))))) | |
1221 | (default | |
1222 | (let ((current (coding-system-base buffer-file-coding-system))) | |
1223 | ;; Generally use as a default the first preferred coding-system | |
1224 | ;; different from the current coding-system, except for | |
1225 | ;; the case of auto-cs since choosing anything else is asking | |
1226 | ;; for trouble (would lead to using a different coding | |
1227 | ;; system than specified in the coding tag). | |
1228 | (or auto-cs | |
1229 | (car (delq nil | |
1230 | (mapcar (lambda (cs) | |
1231 | (if (eq current (coding-system-base cs)) | |
1232 | nil | |
1233 | cs)) | |
1234 | preferred)))))) | |
1235 | (completion-ignore-case t) | |
1236 | (completion-pcm--delim-wild-regex ; Let "u8" complete to "utf-8". | |
1237 | (concat completion-pcm--delim-wild-regex | |
1238 | "\\|\\([[:alpha:]]\\)[[:digit:]]")) | |
1239 | (cs (completing-read | |
1240 | (format "Coding system for saving file (default %s): " default) | |
1241 | combined-table | |
1242 | nil t nil 'coding-system-history | |
1243 | (if default (symbol-name default))))) | |
1244 | (unless (zerop (length cs)) (intern cs)))) | |
1245 | ||
14b3fa07 | 1246 | (defun set-buffer-file-coding-system (coding-system &optional force nomodify) |
358d28fb RS |
1247 | "Set the file coding-system of the current buffer to CODING-SYSTEM. |
1248 | This means that when you save the buffer, it will be converted | |
f81b2db1 JB |
1249 | according to CODING-SYSTEM. For a list of possible values of |
1250 | CODING-SYSTEM, use \\[list-coding-systems]. | |
358d28fb | 1251 | |
f81b2db1 JB |
1252 | If CODING-SYSTEM leaves the text conversion unspecified, or if it leaves |
1253 | the end-of-line conversion unspecified, FORCE controls what to do. | |
1254 | If FORCE is nil, get the unspecified aspect (or aspects) from the buffer's | |
1255 | previous `buffer-file-coding-system' value (if it is specified there). | |
1256 | Otherwise, leave it unspecified. | |
aeef8f07 KH |
1257 | |
1258 | This marks the buffer modified so that the succeeding \\[save-buffer] | |
1259 | surely saves the buffer with CODING-SYSTEM. From a program, if you | |
14b3fa07 RS |
1260 | don't want to mark the buffer modified, specify t for NOMODIFY. |
1261 | If you know exactly what coding system you want to use, | |
1262 | just set the variable `buffer-file-coding-system' directly." | |
87926e02 SM |
1263 | (interactive |
1264 | (list (read-buffer-file-coding-system) | |
1265 | current-prefix-arg)) | |
4ed46869 | 1266 | (check-coding-system coding-system) |
36d455c4 | 1267 | (if (and coding-system buffer-file-coding-system (null force)) |
bbdea948 RS |
1268 | (setq coding-system |
1269 | (merge-coding-systems coding-system buffer-file-coding-system))) | |
50960f36 KH |
1270 | (when (and (called-interactively-p 'interactive) |
1271 | (not (memq 'emacs (coding-system-get coding-system | |
1272 | :charset-list)))) | |
87926e02 SM |
1273 | ;; Check whether save would succeed, and jump to the offending char(s) |
1274 | ;; if not. | |
1275 | (let ((css (find-coding-systems-region (point-min) (point-max)))) | |
1276 | (unless (or (eq (car css) 'undecided) | |
1277 | (memq (coding-system-base coding-system) css)) | |
1278 | (setq coding-system (select-safe-coding-system-interactively | |
1279 | (point-min) (point-max) css | |
1280 | (list coding-system)))))) | |
4ed46869 | 1281 | (setq buffer-file-coding-system coding-system) |
4c549102 KH |
1282 | (if buffer-file-coding-system-explicit |
1283 | (setcdr buffer-file-coding-system-explicit coding-system) | |
1284 | (setq buffer-file-coding-system-explicit (cons nil coding-system))) | |
de5ffead RS |
1285 | (unless nomodify |
1286 | (set-buffer-modified-p t)) | |
4ed46869 KH |
1287 | (force-mode-line-update)) |
1288 | ||
bbdea948 RS |
1289 | (defun revert-buffer-with-coding-system (coding-system &optional force) |
1290 | "Visit the current buffer's file again using coding system CODING-SYSTEM. | |
1291 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. | |
1292 | ||
f81b2db1 JB |
1293 | If CODING-SYSTEM leaves the text conversion unspecified, or if it leaves |
1294 | the end-of-line conversion unspecified, FORCE controls what to do. | |
1295 | If FORCE is nil, get the unspecified aspect (or aspects) from the buffer's | |
1296 | previous `buffer-file-coding-system' value (if it is specified there). | |
1297 | Otherwise, determine it from the file contents as usual for visiting a file." | |
5b76833f | 1298 | (interactive "zCoding system for visited file (default nil): \nP") |
bbdea948 RS |
1299 | (check-coding-system coding-system) |
1300 | (if (and coding-system buffer-file-coding-system (null force)) | |
1301 | (setq coding-system | |
1302 | (merge-coding-systems coding-system buffer-file-coding-system))) | |
1303 | (let ((coding-system-for-read coding-system)) | |
1304 | (revert-buffer))) | |
1305 | ||
701414e3 KH |
1306 | (defun set-file-name-coding-system (coding-system) |
1307 | "Set coding system for decoding and encoding file names to CODING-SYSTEM. | |
f81b2db1 JB |
1308 | It actually just set the variable `file-name-coding-system' (which see) |
1309 | to CODING-SYSTEM." | |
5b76833f | 1310 | (interactive "zCoding system for file names (default nil): ") |
701414e3 | 1311 | (check-coding-system coding-system) |
356384dc KH |
1312 | (if (and coding-system |
1313 | (not (coding-system-get coding-system :ascii-compatible-p)) | |
1314 | (not (coding-system-get coding-system :suitable-for-file-name))) | |
1315 | (error "%s is not suitable for file names" coding-system)) | |
701414e3 KH |
1316 | (setq file-name-coding-system coding-system)) |
1317 | ||
358d28fb RS |
1318 | (defvar default-terminal-coding-system nil |
1319 | "Default value for the terminal coding system. | |
1320 | This is normally set according to the selected language environment. | |
1321 | See also the command `set-terminal-coding-system'.") | |
1322 | ||
4b77f8a3 | 1323 | (defun set-terminal-coding-system (coding-system &optional terminal) |
68bba4e4 | 1324 | "Set coding system of terminal output to CODING-SYSTEM. |
4b77f8a3 | 1325 | All text output to TERMINAL will be encoded |
358d28fb | 1326 | with the specified coding system. |
68bba4e4 | 1327 | |
358d28fb RS |
1328 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. |
1329 | The default is determined by the selected language environment | |
68bba4e4 KL |
1330 | or by the previous use of this command. |
1331 | ||
4b77f8a3 CY |
1332 | TERMINAL may be a terminal object, a frame, or nil for the |
1333 | selected frame's terminal. The setting has no effect on | |
1334 | graphical terminals." | |
358d28fb | 1335 | (interactive |
2e02a76f RS |
1336 | (list (let ((default (if (and (not (terminal-coding-system)) |
1337 | default-terminal-coding-system) | |
1338 | default-terminal-coding-system))) | |
1339 | (read-coding-system | |
5b76833f | 1340 | (format "Coding system for terminal display (default %s): " |
2e02a76f RS |
1341 | default) |
1342 | default)))) | |
358d28fb RS |
1343 | (if (and (not coding-system) |
1344 | (not (terminal-coding-system))) | |
1345 | (setq coding-system default-terminal-coding-system)) | |
1346 | (if coding-system | |
521d4010 | 1347 | (setq default-terminal-coding-system coding-system)) |
4b77f8a3 | 1348 | (set-terminal-coding-system-internal coding-system terminal) |
12b4c0ea | 1349 | (redraw-frame)) |
df100398 | 1350 | |
358d28fb RS |
1351 | (defvar default-keyboard-coding-system nil |
1352 | "Default value of the keyboard coding system. | |
1353 | This is normally set according to the selected language environment. | |
1354 | See also the command `set-keyboard-coding-system'.") | |
1355 | ||
4b77f8a3 CY |
1356 | (defun set-keyboard-coding-system (coding-system &optional terminal) |
1357 | "Set coding system for keyboard input on TERMINAL to CODING-SYSTEM. | |
68bba4e4 | 1358 | |
358d28fb RS |
1359 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. |
1360 | The default is determined by the selected language environment | |
68bba4e4 KL |
1361 | or by the previous use of this command. |
1362 | ||
3dcad254 KH |
1363 | If CODING-SYSTEM is nil or the coding-type of CODING-SYSTEM is |
1364 | `raw-text', the decoding of keyboard input is disabled. | |
1365 | ||
4b77f8a3 CY |
1366 | TERMINAL may be a terminal object, a frame, or nil for the |
1367 | selected frame's terminal. The setting has no effect on | |
1368 | graphical terminals." | |
358d28fb | 1369 | (interactive |
3dcad254 KH |
1370 | (list (let* ((coding (keyboard-coding-system nil)) |
1371 | (default (if (eq (coding-system-type coding) 'raw-text) | |
1372 | default-keyboard-coding-system))) | |
2e02a76f | 1373 | (read-coding-system |
5b76833f | 1374 | (format "Coding system for keyboard input (default %s): " |
2e02a76f RS |
1375 | default) |
1376 | default)))) | |
3dcad254 KH |
1377 | (let ((coding-type (coding-system-type coding-system)) |
1378 | (saved-meta-mode | |
1379 | (terminal-parameter terminal 'keyboard-coding-saved-meta-mode))) | |
1380 | (if (not (eq coding-type 'raw-text)) | |
1381 | (let (accept-8-bit) | |
1382 | (if (not (or (coding-system-get coding-system :suitable-for-keyboard) | |
1383 | (coding-system-get coding-system :ascii-compatible-p))) | |
1384 | (error "Unsuitable coding system for keyboard: %s" coding-system)) | |
1385 | (cond ((memq coding-type '(charset utf-8 shift-jis big5 ccl)) | |
1386 | (setq accept-8-bit t)) | |
1387 | ((eq coding-type 'iso-2022) | |
1388 | (let ((flags (coding-system-get coding-system :flags))) | |
1389 | (or (memq '7-bit flags) | |
1390 | (setq accept-8-bit t)))) | |
1391 | (t | |
1392 | (error "Unsupported coding system for keyboard: %s" | |
1393 | coding-system))) | |
b278604e KH |
1394 | (if accept-8-bit |
1395 | (progn | |
1396 | (or saved-meta-mode | |
1397 | (set-terminal-parameter terminal | |
1398 | 'keyboard-coding-saved-meta-mode | |
1399 | (cons (nth 2 (current-input-mode)) | |
1400 | nil))) | |
1401 | (set-input-meta-mode 8 terminal)) | |
1402 | (when saved-meta-mode | |
1403 | (set-input-meta-mode (car saved-meta-mode) terminal) | |
1404 | (set-terminal-parameter terminal | |
1405 | 'keyboard-coding-saved-meta-mode | |
1406 | nil))) | |
af0403e0 KH |
1407 | ;; Avoid end-of-line conversion. |
1408 | (setq coding-system | |
1409 | (coding-system-change-eol-conversion coding-system 'unix))) | |
3dcad254 KH |
1410 | |
1411 | (when saved-meta-mode | |
b278604e | 1412 | (set-input-meta-mode (car saved-meta-mode) terminal) |
3dcad254 KH |
1413 | (set-terminal-parameter terminal |
1414 | 'keyboard-coding-saved-meta-mode | |
1415 | nil)))) | |
4b77f8a3 | 1416 | (set-keyboard-coding-system-internal coding-system terminal) |
3dcad254 | 1417 | (setq keyboard-coding-system coding-system)) |
df100398 | 1418 | |
4c103b64 RT |
1419 | (defmacro defcustom (var val &rest ignore) |
1420 | `(defvar ,var ,val)) | |
1421 | ||
6d34f495 DL |
1422 | (defcustom keyboard-coding-system nil |
1423 | "Specify coding system for keyboard input. | |
1424 | If you set this on a terminal which can't distinguish Meta keys from | |
1425 | 8-bit characters, you will have to use ESC to type Meta characters. | |
50e5c885 | 1426 | See Info node `Terminal Coding' and Info node `Unibyte Mode'. |
6d34f495 | 1427 | |
237e5993 DL |
1428 | On non-windowing terminals, this is set from the locale by default. |
1429 | ||
6d34f495 | 1430 | Setting this variable directly does not take effect; |
6b61353c | 1431 | use either \\[customize] or \\[set-keyboard-coding-system]." |
6d34f495 | 1432 | :type '(coding-system :tag "Coding system") |
50e5c885 EZ |
1433 | :link '(info-link "(emacs)Terminal Coding") |
1434 | :link '(info-link "(emacs)Unibyte Mode") | |
9d3aa82c | 1435 | :set (lambda (_symbol value) |
2a42d440 KL |
1436 | ;; Don't load encoded-kb unnecessarily. |
1437 | (if (or value (boundp 'encoded-kbd-setup-display)) | |
6d34f495 DL |
1438 | (set-keyboard-coding-system value) |
1439 | (set-default 'keyboard-coding-system nil))) ; must initialize | |
bf247b6e | 1440 | :version "22.1" |
6d34f495 DL |
1441 | :group 'keyboard |
1442 | :group 'mule) | |
1443 | ||
df100398 | 1444 | (defun set-buffer-process-coding-system (decoding encoding) |
358d28fb | 1445 | "Set coding systems for the process associated with the current buffer. |
df100398 | 1446 | DECODING is the coding system to be used to decode input from the process, |
358d28fb RS |
1447 | ENCODING is the coding system to be used to encode output to the process. |
1448 | ||
f81b2db1 | 1449 | For a list of possible coding systems, use \\[list-coding-systems]." |
4ed46869 | 1450 | (interactive |
83911021 | 1451 | "zCoding-system for output from the process: \nzCoding-system for input to the process: ") |
4ed46869 KH |
1452 | (let ((proc (get-buffer-process (current-buffer)))) |
1453 | (if (null proc) | |
521d4010 | 1454 | (error "No process") |
df100398 KH |
1455 | (check-coding-system decoding) |
1456 | (check-coding-system encoding) | |
1457 | (set-process-coding-system proc decoding encoding))) | |
4ed46869 KH |
1458 | (force-mode-line-update)) |
1459 | ||
d0b99881 RS |
1460 | (defalias 'set-clipboard-coding-system 'set-selection-coding-system) |
1461 | ||
14915c37 | 1462 | (defun set-selection-coding-system (coding-system) |
8c52d564 | 1463 | "Make CODING-SYSTEM used for communicating with other X clients. |
b25eef20 KH |
1464 | When sending or receiving text via cut_buffer, selection, and clipboard, |
1465 | the text is encoded or decoded by CODING-SYSTEM." | |
a03b3ce1 | 1466 | (interactive "zCoding system for X selection: ") |
b25eef20 | 1467 | (check-coding-system coding-system) |
14915c37 | 1468 | (setq selection-coding-system coding-system)) |
b25eef20 | 1469 | |
e8dd0160 | 1470 | ;; Coding system lastly specified by the command |
a03b3ce1 KH |
1471 | ;; set-next-selection-coding-system. |
1472 | (defvar last-next-selection-coding-system nil) | |
1473 | ||
1474 | (defun set-next-selection-coding-system (coding-system) | |
12504f57 | 1475 | "Use CODING-SYSTEM for next communication with other window system clients. |
a03b3ce1 KH |
1476 | This setting is effective for the next communication only." |
1477 | (interactive | |
1478 | (list (read-coding-system | |
1479 | (if last-next-selection-coding-system | |
7b9dc9af | 1480 | (format "Coding system for the next selection (default %S): " |
a03b3ce1 | 1481 | last-next-selection-coding-system) |
12504f57 | 1482 | "Coding system for the next selection: ") |
a03b3ce1 KH |
1483 | last-next-selection-coding-system))) |
1484 | (if coding-system | |
1485 | (setq last-next-selection-coding-system coding-system) | |
1486 | (setq coding-system last-next-selection-coding-system)) | |
1487 | (check-coding-system coding-system) | |
1488 | ||
1489 | (setq next-selection-coding-system coding-system)) | |
1490 | ||
4ed46869 | 1491 | (defun set-coding-priority (arg) |
521d4010 | 1492 | "Set priority of coding categories according to ARG. |
c1841772 KH |
1493 | ARG is a list of coding categories ordered by priority. |
1494 | ||
f81b2db1 | 1495 | This function is provided for backward compatibility." |
a5f2b6ec | 1496 | (declare (obsolete set-coding-system-priority "23.1")) |
5d75f46f KH |
1497 | (apply 'set-coding-system-priority |
1498 | (mapcar #'(lambda (x) (symbol-value x)) arg))) | |
4ed46869 | 1499 | |
835cbadb EZ |
1500 | ;;; X selections |
1501 | ||
cc926903 | 1502 | (defvar ctext-non-standard-encodings-alist |
6d341a2a | 1503 | (mapcar 'purecopy |
e0bd7bb9 | 1504 | '(("big5-0" big5 2 big5) |
6b61353c | 1505 | ("ISO8859-14" iso-8859-14 1 latin-iso8859-14) |
e0bd7bb9 | 1506 | ("ISO8859-15" iso-8859-15 1 latin-iso8859-15) |
6b4d96c2 KH |
1507 | ("gbk-0" gbk 2 chinese-gbk) |
1508 | ("koi8-r" koi8-r 1 koi8-r) | |
1509 | ("microsoft-cp1251" windows-1251 1 windows-1251))) | |
6b61353c KH |
1510 | "Alist of non-standard encoding names vs the corresponding usages in CTEXT. |
1511 | ||
1512 | It controls how extended segments of a compound text are handled | |
1513 | by the coding system `compound-text-with-extensions'. | |
1514 | ||
1515 | Each element has the form (ENCODING-NAME CODING-SYSTEM N-OCTET CHARSET). | |
1516 | ||
f81b2db1 | 1517 | ENCODING-NAME is an encoding name of an \"extended segment\". |
6b61353c KH |
1518 | |
1519 | CODING-SYSTEM is the coding-system to encode (or decode) the | |
1520 | characters into (or from) the extended segment. | |
1521 | ||
1522 | N-OCTET is the number of octets (bytes) that encodes a character | |
1523 | in the segment. It can be 0 (meaning the number of octets per | |
1524 | character is variable), 1, 2, 3, or 4. | |
1525 | ||
f81b2db1 | 1526 | CHARSET is a character set containing characters that are encoded |
e0bd7bb9 | 1527 | in the segment. It can be a list of character sets. |
6b61353c KH |
1528 | |
1529 | On decoding CTEXT, all encoding names listed here are recognized. | |
1530 | ||
1531 | On encoding CTEXT, encoding names in the variable | |
1532 | `ctext-non-standard-encodings' (which see) and in the information | |
1533 | listed for the current language environment under the key | |
1534 | `ctext-non-standard-encodings' are used.") | |
1535 | ||
e0bd7bb9 | 1536 | (defvar ctext-non-standard-encodings nil |
6b61353c KH |
1537 | "List of non-standard encoding names used in extended segments of CTEXT. |
1538 | Each element must be one of the names listed in the variable | |
1539 | `ctext-non-standard-encodings-alist' (which see).") | |
cc926903 KH |
1540 | |
1541 | (defvar ctext-non-standard-encodings-regexp | |
6d341a2a | 1542 | (purecopy |
cc926903 KH |
1543 | (string-to-multibyte |
1544 | (concat | |
1545 | ;; For non-standard encodings. | |
1546 | "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)" | |
1547 | "\\|" | |
1548 | ;; For UTF-8 encoding. | |
6d341a2a | 1549 | "\\(\e%G[^\e]*\e%@\\)")))) |
835cbadb EZ |
1550 | |
1551 | ;; Functions to support "Non-Standard Character Set Encodings" defined | |
6b61353c KH |
1552 | ;; by the COMPOUND-TEXT spec. They also support "The UTF-8 encoding" |
1553 | ;; described in the section 7 of the documentation of COMPOUND-TEXT | |
1554 | ;; distributed with XFree86. | |
5c88a01e | 1555 | |
835cbadb EZ |
1556 | (defun ctext-post-read-conversion (len) |
1557 | "Decode LEN characters encoded as Compound Text with Extended Segments." | |
1894d108 KH |
1558 | ;; We don't need the following because it is expected that this |
1559 | ;; function is mainly used for decoding X selection which is not | |
1560 | ;; that big data. | |
1561 | ;;(buffer-disable-undo) ; minimize consing due to insertions and deletions | |
835cbadb | 1562 | (save-match-data |
cc926903 | 1563 | (save-restriction |
1894d108 | 1564 | (narrow-to-region (point) (+ (point) len)) |
cc926903 | 1565 | (let ((case-fold-search nil) |
cc926903 KH |
1566 | last-coding-system-used |
1567 | pos bytes) | |
cc926903 | 1568 | (decode-coding-region (point-min) (point-max) 'ctext) |
cc926903 KH |
1569 | (while (re-search-forward ctext-non-standard-encodings-regexp |
1570 | nil 'move) | |
1571 | (setq pos (match-beginning 0)) | |
1572 | (if (match-beginning 1) | |
1573 | ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES-- | |
e0bd7bb9 KH |
1574 | (let* ((M (multibyte-char-to-unibyte (char-after (+ pos 4)))) |
1575 | (L (multibyte-char-to-unibyte (char-after (+ pos 5)))) | |
cc926903 | 1576 | (encoding (match-string 2)) |
9857367f | 1577 | (encoding-info (assoc-string |
6b61353c | 1578 | encoding |
c12bc1fb | 1579 | ctext-non-standard-encodings-alist t)) |
6b61353c KH |
1580 | (coding (if encoding-info |
1581 | (nth 1 encoding-info) | |
1582 | (setq encoding (intern (downcase encoding))) | |
1583 | (and (coding-system-p encoding) | |
1584 | encoding)))) | |
cc926903 KH |
1585 | (setq bytes (- (+ (* (- M 128) 128) (- L 128)) |
1586 | (- (point) (+ pos 6)))) | |
1587 | (when coding | |
1588 | (delete-region pos (point)) | |
1589 | (forward-char bytes) | |
1590 | (decode-coding-region (- (point) bytes) (point) coding))) | |
1591 | ;; ESC % G --UTF-8-BYTES-- ESC % @ | |
6b61353c KH |
1592 | (delete-char -3) |
1593 | (delete-region pos (+ pos 3)) | |
1594 | (decode-coding-region pos (point) 'utf-8)))) | |
cc926903 KH |
1595 | (goto-char (point-min)) |
1596 | (- (point-max) (point))))) | |
835cbadb | 1597 | |
6b4d96c2 KH |
1598 | (defvar ctext-standard-encodings |
1599 | '(ascii latin-jisx0201 katakana-jisx0201 | |
1600 | latin-iso8859-1 latin-iso8859-2 latin-iso8859-3 latin-iso8859-4 | |
1601 | greek-iso8859-7 arabic-iso8859-6 hebrew-iso8859-8 cyrillic-iso8859-5 | |
1602 | latin-iso8859-9 | |
1603 | chinese-gb2312 japanese-jisx0208 korean-ksc5601) | |
1604 | "List of approved standard encodings (i.e. charsets) of X's Compound Text. | |
1605 | Coding-system `compound-text-with-extensions' encodes a character | |
1606 | belonging to any of those charsets using the normal ISO2022 | |
1607 | designation sequence unless the current language environment or | |
1608 | the variable `ctext-non-standard-encodings' decide to use an extended | |
1609 | segment of CTEXT for that character. See also the documentation | |
1610 | of `ctext-non-standard-encodings-alist'.") | |
1611 | ||
e0bd7bb9 KH |
1612 | ;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from |
1613 | ;; `ctext-non-standard-encodings' and a list specified by the key | |
c7015153 | 1614 | ;; `ctext-non-standard-encodings' for the current language |
e0bd7bb9 KH |
1615 | ;; environment. CTEXT-USAGE-INFO is one of the element of |
1616 | ;; `ctext-non-standard-encodings-alist' or nil. In the former case, a | |
1617 | ;; character in CHARSET is encoded using extended segment. In the | |
1618 | ;; latter case, a character in CHARSET is encoded using normal ISO2022 | |
1619 | ;; designation sequence. If a character is not in any of CHARSETs, it | |
cd1181db | 1620 | ;; is encoded using UTF-8 encoding extension. |
6b61353c KH |
1621 | |
1622 | (defun ctext-non-standard-encodings-table () | |
6b4d96c2 KH |
1623 | (let* ((table (append ctext-non-standard-encodings |
1624 | (copy-sequence | |
1625 | (get-language-info current-language-environment | |
1626 | 'ctext-non-standard-encodings)))) | |
1627 | (tail table) | |
1628 | elt) | |
1629 | (while tail | |
1630 | (setq elt (car tail)) | |
1631 | (let* ((slot (assoc elt ctext-non-standard-encodings-alist)) | |
6b61353c | 1632 | (charset (nth 3 slot))) |
e0bd7bb9 | 1633 | (if (charsetp charset) |
7a84eee5 KH |
1634 | (setcar tail |
1635 | (cons (plist-get (charset-plist charset) :base) slot)) | |
6b4d96c2 KH |
1636 | (setcar tail (cons (car charset) slot)) |
1637 | (dolist (cs (cdr charset)) | |
1638 | (setcdr tail | |
7a84eee5 KH |
1639 | (cons (cons (plist-get (charset-plist (car cs)) :base) slot) |
1640 | (cdr tail))) | |
6b4d96c2 KH |
1641 | (setq tail (cdr tail)))) |
1642 | (setq tail (cdr tail)))) | |
1643 | table)) | |
835cbadb EZ |
1644 | |
1645 | (defun ctext-pre-write-conversion (from to) | |
5dde3c71 EZ |
1646 | "Encode characters between FROM and TO as Compound Text w/Extended Segments. |
1647 | ||
6b4d96c2 KH |
1648 | If FROM is a string, generate a new temp buffer, insert the text, |
1649 | and convert it in the temporary buffer. Otherwise, convert | |
1650 | in-place." | |
835cbadb | 1651 | (save-match-data |
cc926903 | 1652 | ;; Setup a working buffer if necessary. |
f1beb0e0 KH |
1653 | (when (stringp from) |
1654 | (set-buffer (generate-new-buffer " *temp")) | |
1655 | (set-buffer-multibyte (multibyte-string-p from)) | |
6b4d96c2 | 1656 | (insert from) |
8dabbfd6 | 1657 | (setq from (point-min) to (point-max))) |
6b4d96c2 KH |
1658 | (save-restriction |
1659 | (narrow-to-region from to) | |
7a84eee5 | 1660 | (goto-char from) |
6b4d96c2 | 1661 | (let ((encoding-table (ctext-non-standard-encodings-table)) |
7a84eee5 KH |
1662 | (charset-list (sort-charsets |
1663 | (copy-sequence ctext-standard-encodings))) | |
1664 | (end-pos (make-marker)) | |
6b4d96c2 | 1665 | last-coding-system-used |
7a84eee5 | 1666 | last-pos charset encoding-info) |
6b4d96c2 KH |
1667 | (dolist (elt encoding-table) |
1668 | (push (car elt) charset-list)) | |
6b4d96c2 | 1669 | (setq end-pos (point-marker)) |
7a84eee5 | 1670 | (while (re-search-forward "[^\0-\177]+" nil t) |
6b4d96c2 | 1671 | ;; Found a sequence of non-ASCII characters. |
6b4d96c2 | 1672 | (set-marker end-pos (match-end 0)) |
7a84eee5 KH |
1673 | (goto-char (match-beginning 0)) |
1674 | (setq last-pos (point) | |
1675 | charset (char-charset (following-char) charset-list)) | |
1676 | (forward-char 1) | |
1677 | (while (and (< (point) end-pos) | |
1678 | (eq charset (char-charset (following-char) charset-list))) | |
1679 | (forward-char 1)) | |
1680 | (if charset | |
1681 | (if (setq encoding-info (cdr (assq charset encoding-table))) | |
1682 | ;; Encode this range using an extended segment. | |
1683 | (let ((encoding-name (car encoding-info)) | |
1684 | (coding-system (nth 1 encoding-info)) | |
1685 | (noctets (nth 2 encoding-info)) | |
1686 | len) | |
1687 | (encode-coding-region last-pos (point) coding-system) | |
1688 | (setq len (+ (length encoding-name) 1 | |
1689 | (- (point) last-pos))) | |
1690 | ;; According to the spec of CTEXT, it is not | |
1691 | ;; necessary to produce this extra designation | |
1692 | ;; sequence, but some buggy application | |
1693 | ;; (e.g. crxvt-gb) requires it. | |
1694 | (insert "\e(B") | |
1695 | (save-excursion | |
1696 | (goto-char last-pos) | |
1697 | (insert (format "\e%%/%d" noctets)) | |
1698 | (insert-byte (+ (/ len 128) 128) 1) | |
1699 | (insert-byte (+ (% len 128) 128) 1) | |
1700 | (insert encoding-name) | |
1701 | (insert 2))) | |
1702 | ;; Encode this range as characters in CHARSET. | |
1703 | (put-text-property last-pos (point) 'charset charset)) | |
cd1181db | 1704 | ;; Encode this range using UTF-8 encoding extension. |
7a84eee5 KH |
1705 | (encode-coding-region last-pos (point) 'mule-utf-8) |
1706 | (save-excursion | |
1707 | (goto-char last-pos) | |
1708 | (insert "\e%G")) | |
1709 | (insert "\e%@"))) | |
6b4d96c2 | 1710 | (goto-char (point-min))))) |
5dde3c71 | 1711 | ;; Must return nil, as build_annotations_2 expects that. |
835cbadb EZ |
1712 | nil) |
1713 | ||
4ed46869 KH |
1714 | ;;; FILE I/O |
1715 | ||
117a9ea1 | 1716 | ;; TODO many elements of this list are also in inhibit-local-variables-regexps. |
e76938e7 | 1717 | (defcustom auto-coding-alist |
3a6a5981 CY |
1718 | ;; .exe and .EXE are added to support archive-mode looking at DOS |
1719 | ;; self-extracting exe archives. | |
e8e4d5c8 SS |
1720 | (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg))) |
1721 | '(("\\.\\(\ | |
b3671a51 JL |
1722 | arc\\|zip\\|lzh\\|lha\\|zoo\\|[jew]ar\\|xpi\\|rar\\|7z\\|\ |
1723 | ARC\\|ZIP\\|LZH\\|LHA\\|ZOO\\|[JEW]AR\\|XPI\\|RAR\\|7Z\\)\\'" | |
e8e4d5c8 | 1724 | . no-conversion-multibyte) |
03f244e2 | 1725 | ("\\.\\(exe\\|EXE\\)\\'" . no-conversion) |
117a9ea1 | 1726 | ("\\.\\(sx[dmicw]\\|odt\\|tar\\|t[bg]z\\)\\'" . no-conversion) |
4c964351 | 1727 | ("\\.\\(gz\\|Z\\|bz\\|bz2\\|xz\\|gpg\\)\\'" . no-conversion) |
ba40634b | 1728 | ("\\.\\(jpe?g\\|png\\|gif\\|tiff?\\|p[bpgn]m\\)\\'" . no-conversion) |
6f37a576 | 1729 | ("\\.pdf\\'" . no-conversion) |
94df41ab | 1730 | ("/#[^/]+#\\'" . utf-8-emacs-unix))) |
835f49b8 KH |
1731 | "Alist of filename patterns vs corresponding coding systems. |
1732 | Each element looks like (REGEXP . CODING-SYSTEM). | |
558b0c86 | 1733 | A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading. |
835f49b8 | 1734 | |
7fed493a RS |
1735 | The settings in this alist take priority over `coding:' tags |
1736 | in the file (see the function `set-auto-coding') | |
e76938e7 | 1737 | and the contents of `file-coding-system-alist'." |
4c964351 | 1738 | :version "24.1" ; added xz |
e76938e7 DL |
1739 | :group 'files |
1740 | :group 'mule | |
1741 | :type '(repeat (cons (regexp :tag "File name regexp") | |
1742 | (symbol :tag "Coding system")))) | |
835f49b8 | 1743 | |
502522b2 | 1744 | (defcustom auto-coding-regexp-alist |
e8e4d5c8 | 1745 | (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg))) |
da332cfb | 1746 | '(("\\`BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion) |
dc5c3489 KH |
1747 | ("\\`\xFE\xFF" . utf-16be-with-signature) |
1748 | ("\\`\xFF\xFE" . utf-16le-with-signature) | |
c855d682 | 1749 | ("\\`\xEF\xBB\xBF" . utf-8-with-signature) |
3b6acc72 | 1750 | ("\\`;ELC\024\0\0\0" . emacs-mule))) ; Emacs 20-compiled |
502522b2 GM |
1751 | "Alist of patterns vs corresponding coding systems. |
1752 | Each element looks like (REGEXP . CODING-SYSTEM). | |
1753 | A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading. | |
1754 | ||
1755 | The settings in this alist take priority over `coding:' tags | |
1756 | in the file (see the function `set-auto-coding') | |
1757 | and the contents of `file-coding-system-alist'." | |
1758 | :group 'files | |
1759 | :group 'mule | |
1760 | :type '(repeat (cons (regexp :tag "Regexp") | |
1761 | (symbol :tag "Coding system")))) | |
1762 | ||
0814ca04 KH |
1763 | (defun auto-coding-regexp-alist-lookup (from to) |
1764 | "Lookup `auto-coding-regexp-alist' for the contents of the current buffer. | |
1765 | The value is a coding system is specified for the region FROM and TO, | |
1766 | or nil." | |
1767 | (save-excursion | |
1768 | (goto-char from) | |
1769 | (let ((alist auto-coding-regexp-alist) | |
1770 | coding-system) | |
1771 | (while (and alist (not coding-system)) | |
1772 | (let ((regexp (car (car alist)))) | |
1773 | (if enable-multibyte-characters | |
1774 | (setq regexp (string-to-multibyte regexp))) | |
1775 | (if (re-search-forward regexp to t) | |
1776 | (setq coding-system (cdr (car alist))) | |
1777 | (setq alist (cdr alist))))) | |
1778 | coding-system))) | |
1779 | ||
d9f6dfe6 | 1780 | ;; See the bottom of this file for built-in auto coding functions. |
447404a3 CW |
1781 | (defcustom auto-coding-functions '(sgml-xml-auto-coding-function |
1782 | sgml-html-meta-auto-coding-function) | |
d9f6dfe6 CW |
1783 | "A list of functions which attempt to determine a coding system. |
1784 | ||
66643502 RS |
1785 | Each function in this list should be written to operate on the |
1786 | current buffer, but should not modify it in any way. The buffer | |
1787 | will contain undecoded text of parts of the file. Each function | |
f81b2db1 JB |
1788 | should take one argument, SIZE, which says how many characters |
1789 | \(starting from point) it should look at. | |
66643502 RS |
1790 | |
1791 | If one of these functions succeeds in determining a coding | |
1792 | system, it should return that coding system. Otherwise, it | |
1793 | should return nil. | |
1794 | ||
1795 | If a file has a `coding:' tag, that takes precedence over these | |
1796 | functions, so they won't be called at all." | |
d9f6dfe6 CW |
1797 | :group 'files |
1798 | :group 'mule | |
1799 | :type '(repeat function)) | |
1800 | ||
1c4cc63a | 1801 | (defvar set-auto-coding-for-load nil |
8edb942b GM |
1802 | "Non-nil means respect a \"unibyte: t\" entry in file local variables. |
1803 | Emacs binds this variable to t when loading or byte-compiling Emacs Lisp | |
1804 | files.") | |
1c4cc63a | 1805 | |
8a592131 RS |
1806 | (defun auto-coding-alist-lookup (filename) |
1807 | "Return the coding system specified by `auto-coding-alist' for FILENAME." | |
1808 | (let ((alist auto-coding-alist) | |
7c2fb837 | 1809 | (case-fold-search (memq system-type '(windows-nt ms-dos cygwin))) |
8a592131 RS |
1810 | coding-system) |
1811 | (while (and alist (not coding-system)) | |
1812 | (if (string-match (car (car alist)) filename) | |
1813 | (setq coding-system (cdr (car alist))) | |
1814 | (setq alist (cdr alist)))) | |
1815 | coding-system)) | |
1816 | ||
09e5712d KH |
1817 | (put 'enable-character-translation 'permanent-local t) |
1818 | (put 'enable-character-translation 'safe-local-variable 'booleanp) | |
1819 | ||
e9b01d1f KH |
1820 | (defun find-auto-coding (filename size) |
1821 | "Find a coding system for a file FILENAME of which SIZE bytes follow point. | |
1c4cc63a KH |
1822 | These bytes should include at least the first 1k of the file |
1823 | and the last 3k of the file, but the middle may be omitted. | |
63561304 | 1824 | |
d21ba5e0 DL |
1825 | The function checks FILENAME against the variable `auto-coding-alist'. |
1826 | If FILENAME doesn't match any entries in the variable, it checks the | |
502522b2 | 1827 | contents of the current buffer following point against |
447404a3 | 1828 | `auto-coding-regexp-alist'. If no match is found, it checks for a |
502522b2 | 1829 | `coding:' tag in the first one or two lines following point. If no |
d21ba5e0 | 1830 | `coding:' tag is found, it checks any local variables list in the last |
447404a3 | 1831 | 3K bytes out of the SIZE bytes. Finally, if none of these methods |
d21ba5e0 DL |
1832 | succeed, it checks to see if any function in `auto-coding-functions' |
1833 | gives a match. | |
63561304 | 1834 | |
c80e3b4a | 1835 | If a coding system is specified, the return value is a cons |
f81b2db1 JB |
1836 | \(CODING . SOURCE), where CODING is the specified coding system and |
1837 | SOURCE is a symbol `auto-coding-alist', `auto-coding-regexp-alist', | |
d5c78b09 | 1838 | `:coding', or `auto-coding-functions' indicating by what CODING is |
f81b2db1 JB |
1839 | specified. Note that the validity of CODING is not checked; |
1840 | it's the caller's responsibility to check it. | |
e9b01d1f | 1841 | |
0814ca04 | 1842 | If nothing is specified, the return value is nil." |
e9b01d1f KH |
1843 | (or (let ((coding-system (auto-coding-alist-lookup filename))) |
1844 | (if coding-system | |
1845 | (cons coding-system 'auto-coding-alist))) | |
447404a3 | 1846 | ;; Try using `auto-coding-regexp-alist'. |
0814ca04 KH |
1847 | (let ((coding-system (auto-coding-regexp-alist-lookup (point) |
1848 | (+ (point) size)))) | |
1849 | (if coding-system | |
1850 | (cons coding-system 'auto-coding-regexp-alist))) | |
502522b2 GM |
1851 | (let* ((case-fold-search t) |
1852 | (head-start (point)) | |
1853 | (head-end (+ head-start (min size 1024))) | |
1854 | (tail-start (+ head-start (max (- size 3072) 0))) | |
1855 | (tail-end (+ head-start size)) | |
9d3aa82c | 1856 | coding-system head-found tail-found char-trans) |
502522b2 GM |
1857 | ;; Try a short cut by searching for the string "coding:" |
1858 | ;; and for "unibyte:" at the head and tail of SIZE bytes. | |
1859 | (setq head-found (or (search-forward "coding:" head-end t) | |
09e5712d | 1860 | (search-forward "unibyte:" head-end t) |
5dd1c041 | 1861 | (search-forward "enable-character-translation:" |
36e02850 | 1862 | head-end t))) |
502522b2 GM |
1863 | (if (and head-found (> head-found tail-start)) |
1864 | ;; Head and tail are overlapped. | |
1865 | (setq tail-found head-found) | |
1866 | (goto-char tail-start) | |
1867 | (setq tail-found (or (search-forward "coding:" tail-end t) | |
09e5712d KH |
1868 | (search-forward "unibyte:" tail-end t) |
1869 | (search-forward "enable-character-translation:" | |
1870 | tail-end t)))) | |
502522b2 GM |
1871 | |
1872 | ;; At first check the head. | |
1873 | (when head-found | |
1874 | (goto-char head-start) | |
6b66d028 RS |
1875 | (setq head-end (set-auto-mode-1)) |
1876 | (setq head-start (point)) | |
1d8e9a7c | 1877 | (when (and head-end (< head-found head-end)) |
835f49b8 | 1878 | (goto-char head-start) |
502522b2 GM |
1879 | (when (and set-auto-coding-for-load |
1880 | (re-search-forward | |
6b66d028 | 1881 | "\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" |
502522b2 | 1882 | head-end t)) |
175600da SM |
1883 | (display-warning 'mule |
1884 | (format "\"unibyte: t\" (in %s) is obsolete; \ | |
1885 | use \"coding: 'raw-text\" instead." | |
1886 | (file-relative-name filename)) | |
1887 | :warning) | |
502522b2 GM |
1888 | (setq coding-system 'raw-text)) |
1889 | (when (and (not coding-system) | |
1890 | (re-search-forward | |
6b66d028 | 1891 | "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" |
502522b2 | 1892 | head-end t)) |
09e5712d KH |
1893 | (setq coding-system (intern (match-string 2)))) |
1894 | (when (re-search-forward | |
36e02850 | 1895 | "\\(.*;\\)?[ \t]*enable-character-translation:[ \t]*\\([^ ;]+\\)" |
09e5712d | 1896 | head-end t) |
36e02850 | 1897 | (setq char-trans (match-string 2))))) |
502522b2 GM |
1898 | |
1899 | ;; If no coding: tag in the head, check the tail. | |
6b61353c KH |
1900 | ;; Here we must pay attention to the case that the end-of-line |
1901 | ;; is just "\r" and we can't use "^" nor "$" in regexp. | |
09e5712d | 1902 | (when (and tail-found (or (not coding-system) (not char-trans))) |
502522b2 | 1903 | (goto-char tail-start) |
df94067b | 1904 | (re-search-forward "[\r\n]\^L" tail-end t) |
502522b2 | 1905 | (if (re-search-forward |
9857367f | 1906 | "[\r\n]\\([^[\r\n]*\\)[ \t]*Local Variables:[ \t]*\\([^\r\n]*\\)[\r\n]" |
6b61353c KH |
1907 | tail-end t) |
1908 | ;; The prefix is what comes before "local variables:" in its | |
1909 | ;; line. The suffix is what comes after "local variables:" | |
502522b2 GM |
1910 | ;; in its line. |
1911 | (let* ((prefix (regexp-quote (match-string 1))) | |
1912 | (suffix (regexp-quote (match-string 2))) | |
1913 | (re-coding | |
1914 | (concat | |
6b61353c | 1915 | "[\r\n]" prefix |
cfe98f50 GM |
1916 | ;; N.B. without the \n below, the regexp can |
1917 | ;; eat newlines. | |
6b61353c KH |
1918 | "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*" |
1919 | suffix "[\r\n]")) | |
502522b2 GM |
1920 | (re-unibyte |
1921 | (concat | |
6b61353c KH |
1922 | "[\r\n]" prefix |
1923 | "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*" | |
1924 | suffix "[\r\n]")) | |
09e5712d KH |
1925 | (re-char-trans |
1926 | (concat | |
1927 | "[\r\n]" prefix | |
36e02850 | 1928 | "[ \t]*enable-character-translation[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*" |
09e5712d | 1929 | suffix "[\r\n]")) |
502522b2 | 1930 | (re-end |
9857367f | 1931 | (concat "[\r\n]" prefix "[ \t]*End *:[ \t]*" suffix |
6b61353c KH |
1932 | "[\r\n]?")) |
1933 | (pos (1- (point)))) | |
1934 | (forward-char -1) ; skip back \r or \n. | |
502522b2 GM |
1935 | (re-search-forward re-end tail-end 'move) |
1936 | (setq tail-end (point)) | |
1937 | (goto-char pos) | |
1938 | (when (and set-auto-coding-for-load | |
1939 | (re-search-forward re-unibyte tail-end t)) | |
fdcbcff2 GM |
1940 | (display-warning 'mule "`unibyte: t' is obsolete; \ |
1941 | use \"coding: 'raw-text\" instead." :warning) | |
502522b2 GM |
1942 | (setq coding-system 'raw-text)) |
1943 | (when (and (not coding-system) | |
1944 | (re-search-forward re-coding tail-end t)) | |
09e5712d KH |
1945 | (setq coding-system (intern (match-string 1)))) |
1946 | (when (and (not char-trans) | |
1947 | (re-search-forward re-char-trans tail-end t)) | |
36e02850 | 1948 | (setq char-trans (match-string 1)))))) |
09e5712d KH |
1949 | (if coding-system |
1950 | ;; If the coding-system name ends with "!", remove it and | |
1951 | ;; set char-trans to "nil". | |
1952 | (let ((name (symbol-name coding-system))) | |
1953 | (if (= (aref name (1- (length name))) ?!) | |
1954 | (setq coding-system (intern (substring name 0 -1)) | |
1955 | char-trans "nil")))) | |
1956 | (when (and char-trans | |
1957 | (not (setq char-trans (intern char-trans)))) | |
1958 | (make-local-variable 'enable-character-translation) | |
1959 | (setq enable-character-translation nil)) | |
e9b01d1f KH |
1960 | (if coding-system |
1961 | (cons coding-system :coding))) | |
447404a3 CW |
1962 | ;; Finally, try all the `auto-coding-functions'. |
1963 | (let ((funcs auto-coding-functions) | |
1964 | (coding-system nil)) | |
1965 | (while (and funcs (not coding-system)) | |
9d3aa82c JB |
1966 | (setq coding-system (ignore-errors |
1967 | (save-excursion | |
1968 | (goto-char (point-min)) | |
1969 | (funcall (pop funcs) size))))) | |
e9b01d1f KH |
1970 | (if coding-system |
1971 | (cons coding-system 'auto-coding-functions))))) | |
1972 | ||
1973 | (defun set-auto-coding (filename size) | |
1974 | "Return coding system for a file FILENAME of which SIZE bytes follow point. | |
1975 | See `find-auto-coding' for how the coding system is found. | |
0814ca04 KH |
1976 | Return nil if an invalid coding system is found. |
1977 | ||
1978 | The variable `set-auto-coding-function' (which see) is set to this | |
1979 | function by default." | |
e9b01d1f KH |
1980 | (let ((found (find-auto-coding filename size))) |
1981 | (if (and found (coding-system-p (car found))) | |
1982 | (car found)))) | |
63561304 KH |
1983 | |
1984 | (setq set-auto-coding-function 'set-auto-coding) | |
87aba788 | 1985 | |
0436cc1b | 1986 | (defun after-insert-file-set-coding (inserted &optional visit) |
872a0a6f RS |
1987 | "Set `buffer-file-coding-system' of current buffer after text is inserted. |
1988 | INSERTED is the number of characters that were inserted, as figured | |
1989 | in the situation before this function. Return the number of characters | |
1990 | inserted, as figured in the situation after. The two numbers can be | |
0436cc1b KH |
1991 | different if the buffer has become unibyte. |
1992 | The optional second arg VISIT non-nil means that we are visiting a file." | |
1993 | (if (and visit | |
1994 | coding-system-for-read | |
1995 | (not (eq coding-system-for-read 'auto-save-coding))) | |
4c549102 KH |
1996 | (setq buffer-file-coding-system-explicit |
1997 | (cons coding-system-for-read nil))) | |
4ed46869 KH |
1998 | (if last-coding-system-used |
1999 | (let ((coding-system | |
14839656 | 2000 | (find-new-buffer-file-coding-system last-coding-system-used))) |
a04f3650 KH |
2001 | (if coding-system |
2002 | (setq buffer-file-coding-system coding-system)))) | |
d0c26c63 | 2003 | inserted) |
4ed46869 | 2004 | |
8057896b | 2005 | ;; The coding-spec and eol-type of coding-system returned is decided |
4ed46869 KH |
2006 | ;; independently in the following order. |
2007 | ;; 1. That of buffer-file-coding-system locally bound. | |
2008 | ;; 2. That of CODING. | |
2009 | ||
2010 | (defun find-new-buffer-file-coding-system (coding) | |
2011 | "Return a coding system for a buffer when a file of CODING is inserted. | |
a73a8c89 KH |
2012 | The local variable `buffer-file-coding-system' of the current buffer |
2013 | is set to the returned value. | |
509064c5 | 2014 | Return nil if there's no need to set `buffer-file-coding-system'." |
4ed46869 | 2015 | (let (local-coding local-eol |
b685f8d6 | 2016 | found-coding found-eol |
4ed46869 KH |
2017 | new-coding new-eol) |
2018 | (if (null coding) | |
2019 | ;; Nothing found about coding. | |
2020 | nil | |
2021 | ||
b685f8d6 RS |
2022 | ;; Get information of `buffer-file-coding-system' in LOCAL-EOL |
2023 | ;; and LOCAL-CODING. | |
2024 | (setq local-eol (coding-system-eol-type buffer-file-coding-system)) | |
2025 | (if (null (numberp local-eol)) | |
2026 | ;; But eol-type is not yet set. | |
2027 | (setq local-eol nil)) | |
0269ddfb | 2028 | (if (and buffer-file-coding-system |
c1841772 KH |
2029 | (not (eq (coding-system-type buffer-file-coding-system) |
2030 | 'undecided))) | |
0269ddfb | 2031 | (setq local-coding (coding-system-base buffer-file-coding-system))) |
b685f8d6 RS |
2032 | |
2033 | (if (and (local-variable-p 'buffer-file-coding-system) | |
2034 | local-eol local-coding) | |
4ed46869 KH |
2035 | ;; The current buffer has already set full coding-system, we |
2036 | ;; had better not change it. | |
2037 | nil | |
2038 | ||
8057896b | 2039 | (setq found-eol (coding-system-eol-type coding)) |
4ed46869 | 2040 | (if (null (numberp found-eol)) |
be02cd54 EZ |
2041 | ;; But eol-type is not found. |
2042 | ;; If EOL conversions are inhibited, force unix eol-type. | |
2043 | (setq found-eol (if inhibit-eol-conversion 0))) | |
c1841772 | 2044 | (setq found-coding (coding-system-base coding)) |
c76b5c99 KH |
2045 | |
2046 | (if (and (not found-eol) (eq found-coding 'undecided)) | |
2047 | ;; No valid coding information found. | |
2048 | nil | |
2049 | ||
2050 | ;; Some coding information (eol or text) found. | |
2051 | ||
2052 | ;; The local setting takes precedence over the found one. | |
2053 | (setq new-coding (if (local-variable-p 'buffer-file-coding-system) | |
2054 | (or local-coding found-coding) | |
2055 | (or found-coding local-coding))) | |
2056 | (setq new-eol (if (local-variable-p 'buffer-file-coding-system) | |
2057 | (or local-eol found-eol) | |
2058 | (or found-eol local-eol))) | |
2059 | ||
2060 | (let ((eol-type (coding-system-eol-type new-coding))) | |
2061 | (if (and (numberp new-eol) (vectorp eol-type)) | |
2062 | (aref eol-type new-eol) | |
2063 | new-coding))))))) | |
4ed46869 | 2064 | |
fe831d33 GV |
2065 | (defun modify-coding-system-alist (target-type regexp coding-system) |
2066 | "Modify one of look up tables for finding a coding system on I/O operation. | |
8c453b46 RS |
2067 | There are three of such tables, `file-coding-system-alist', |
2068 | `process-coding-system-alist', and `network-coding-system-alist'. | |
fe831d33 GV |
2069 | |
2070 | TARGET-TYPE specifies which of them to modify. | |
8c453b46 RS |
2071 | If it is `file', it affects `file-coding-system-alist' (which see). |
2072 | If it is `process', it affects `process-coding-system-alist' (which see). | |
e8dd0160 | 2073 | If it is `network', it affects `network-coding-system-alist' (which see). |
fe831d33 GV |
2074 | |
2075 | REGEXP is a regular expression matching a target of I/O operation. | |
2076 | The target is a file name if TARGET-TYPE is `file', a program name if | |
2077 | TARGET-TYPE is `process', or a network service name or a port number | |
2078 | to connect to if TARGET-TYPE is `network'. | |
2079 | ||
2080 | CODING-SYSTEM is a coding system to perform code conversion on the I/O | |
f81b2db1 JB |
2081 | operation, or a cons cell (DECODING . ENCODING) specifying the coding |
2082 | systems for decoding and encoding respectively, or a function symbol | |
2083 | which, when called, returns such a cons cell." | |
fe831d33 GV |
2084 | (or (memq target-type '(file process network)) |
2085 | (error "Invalid target type: %s" target-type)) | |
2086 | (or (stringp regexp) | |
2087 | (and (eq target-type 'network) (integerp regexp)) | |
2088 | (error "Invalid regular expression: %s" regexp)) | |
2089 | (if (symbolp coding-system) | |
2090 | (if (not (fboundp coding-system)) | |
2091 | (progn | |
2092 | (check-coding-system coding-system) | |
2093 | (setq coding-system (cons coding-system coding-system)))) | |
2094 | (check-coding-system (car coding-system)) | |
2095 | (check-coding-system (cdr coding-system))) | |
2096 | (cond ((eq target-type 'file) | |
2097 | (let ((slot (assoc regexp file-coding-system-alist))) | |
2098 | (if slot | |
2099 | (setcdr slot coding-system) | |
2100 | (setq file-coding-system-alist | |
2101 | (cons (cons regexp coding-system) | |
2102 | file-coding-system-alist))))) | |
2103 | ((eq target-type 'process) | |
2104 | (let ((slot (assoc regexp process-coding-system-alist))) | |
2105 | (if slot | |
2106 | (setcdr slot coding-system) | |
2107 | (setq process-coding-system-alist | |
2108 | (cons (cons regexp coding-system) | |
2109 | process-coding-system-alist))))) | |
2110 | (t | |
2111 | (let ((slot (assoc regexp network-coding-system-alist))) | |
2112 | (if slot | |
2113 | (setcdr slot coding-system) | |
2114 | (setq network-coding-system-alist | |
2115 | (cons (cons regexp coding-system) | |
2116 | network-coding-system-alist))))))) | |
2117 | ||
db046b7d KH |
2118 | (defun decode-coding-inserted-region (from to filename |
2119 | &optional visit beg end replace) | |
f29387e8 | 2120 | "Decode the region between FROM and TO as if it is read from file FILENAME. |
9c848353 | 2121 | The idea is that the text between FROM and TO was just inserted somehow. |
f29387e8 | 2122 | Optional arguments VISIT, BEG, END, and REPLACE are the same as those |
9c848353 RS |
2123 | of the function `insert-file-contents'. |
2124 | Part of the job of this function is setting `buffer-undo-list' appropriately." | |
f29387e8 KH |
2125 | (save-excursion |
2126 | (save-restriction | |
9c848353 RS |
2127 | (let ((coding coding-system-for-read) |
2128 | undo-list-saved) | |
2129 | (if visit | |
2130 | ;; Temporarily turn off undo recording, if we're decoding the | |
2131 | ;; text of a visited file. | |
2132 | (setq buffer-undo-list t) | |
2133 | ;; Otherwise, if we can recognize the undo elt for the insertion, | |
2134 | ;; remove it and get ready to replace it later. | |
2135 | ;; In the mean time, turn off undo recording. | |
bf247b6e | 2136 | (let ((last (car-safe buffer-undo-list))) |
9c848353 RS |
2137 | (if (and (consp last) (eql (car last) from) (eql (cdr last) to)) |
2138 | (setq undo-list-saved (cdr buffer-undo-list) | |
2139 | buffer-undo-list t)))) | |
2140 | (narrow-to-region from to) | |
2141 | (goto-char (point-min)) | |
f29387e8 KH |
2142 | (or coding |
2143 | (setq coding (funcall set-auto-coding-function | |
2144 | filename (- (point-max) (point-min))))) | |
2145 | (or coding | |
6b61353c KH |
2146 | (setq coding (car (find-operation-coding-system |
2147 | 'insert-file-contents | |
47a355de KH |
2148 | (cons filename (current-buffer)) |
2149 | visit beg end replace)))) | |
f29387e8 KH |
2150 | (if (coding-system-p coding) |
2151 | (or enable-multibyte-characters | |
2152 | (setq coding | |
2153 | (coding-system-change-text-conversion coding 'raw-text))) | |
2154 | (setq coding nil)) | |
2155 | (if coding | |
b12e19b2 | 2156 | (decode-coding-region (point-min) (point-max) coding) |
9c848353 RS |
2157 | (setq last-coding-system-used coding)) |
2158 | ;; If we're decoding the text of a visited file, | |
2159 | ;; the undo list should start out empty. | |
2160 | (if visit | |
2161 | (setq buffer-undo-list nil) | |
2162 | ;; If we decided to replace the undo entry for the insertion, | |
2163 | ;; do so now. | |
2164 | (if undo-list-saved | |
2165 | (setq buffer-undo-list | |
2166 | (cons (cons from (point-max)) undo-list-saved)))))))) | |
f29387e8 | 2167 | |
27a91cf7 KH |
2168 | (defun recode-region (start end new-coding coding) |
2169 | "Re-decode the region (previously decoded by CODING) by NEW-CODING." | |
2170 | (interactive | |
2171 | (list (region-beginning) (region-end) | |
2172 | (read-coding-system "Text was really in: ") | |
2173 | (let ((coding (or buffer-file-coding-system last-coding-system-used))) | |
2174 | (read-coding-system | |
2175 | (concat "But was interpreted as" | |
2176 | (if coding (format " (default %S): " coding) ": ")) | |
2177 | coding)))) | |
2178 | (or (and new-coding coding) | |
2179 | (error "Coding system not specified")) | |
2180 | ;; Check it before we encode the region. | |
2181 | (check-coding-system new-coding) | |
2182 | (save-restriction | |
2183 | (narrow-to-region start end) | |
2184 | (encode-coding-region (point-min) (point-max) coding) | |
98c51a88 CY |
2185 | (decode-coding-region (point-min) (point-max) new-coding)) |
2186 | (if (region-active-p) | |
2187 | (deactivate-mark))) | |
f29387e8 | 2188 | |
b25eef20 | 2189 | (defun make-translation-table (&rest args) |
a284eea3 | 2190 | "Make a translation table from arguments. |
d38b07f9 | 2191 | A translation table is a char table intended for character |
a284eea3 DL |
2192 | translation in CCL programs. |
2193 | ||
d38b07f9 | 2194 | Each argument is a list of elements of the form (FROM . TO), where FROM |
a284eea3 | 2195 | is a character to be translated to TO. |
13d5617d | 2196 | |
4e003d37 KH |
2197 | The arguments and forms in each argument are processed in the given |
2198 | order, and if a previous form already translates TO to some other | |
2199 | character, say TO-ALT, FROM is also translated to TO-ALT." | |
f967223b | 2200 | (let ((table (make-char-table 'translation-table)) |
a73a8c89 | 2201 | revlist) |
5d75f46f KH |
2202 | (dolist (elts args) |
2203 | (dolist (elt elts) | |
2204 | (let ((from (car elt)) | |
2205 | (to (cdr elt)) | |
2206 | to-alt rev-from rev-to) | |
2207 | ;; If we have already translated TO to TO-ALT, FROM should | |
2208 | ;; also be translated to TO-ALT. | |
2209 | (if (setq to-alt (aref table to)) | |
2210 | (setq to to-alt)) | |
2211 | (aset table from to) | |
2212 | ;; If we have already translated some chars to FROM, they | |
2213 | ;; should also be translated to TO. | |
2214 | (when (setq rev-from (assq from revlist)) | |
2215 | (dolist (elt (cdr rev-from)) | |
2216 | (aset table elt to)) | |
2217 | (setq revlist (delq rev-from revlist) | |
2218 | rev-from (cdr rev-from))) | |
2219 | ;; Now update REVLIST. | |
2220 | (setq rev-to (assq to revlist)) | |
2221 | (if rev-to | |
2222 | (setcdr rev-to (cons from (cdr rev-to))) | |
2223 | (setq rev-to (list to from) | |
2224 | revlist (cons rev-to revlist))) | |
2225 | (if rev-from | |
2226 | (setcdr rev-to (append rev-from (cdr rev-to))))))) | |
a73a8c89 | 2227 | ;; Return TABLE just created. |
350cd166 | 2228 | (set-char-table-extra-slot table 1 1) |
a73a8c89 KH |
2229 | table)) |
2230 | ||
c76b5c99 KH |
2231 | (defun make-translation-table-from-vector (vec) |
2232 | "Make translation table from decoding vector VEC. | |
9e3b6057 | 2233 | VEC is an array of 256 elements to map unibyte codes to multibyte |
18ccd78a | 2234 | characters. Elements may be nil for undefined code points." |
c76b5c99 KH |
2235 | (let ((table (make-char-table 'translation-table)) |
2236 | (rev-table (make-char-table 'translation-table)) | |
c76b5c99 | 2237 | ch) |
9e3b6057 | 2238 | (dotimes (i 256) |
c76b5c99 | 2239 | (setq ch (aref vec i)) |
9e3b6057 DL |
2240 | (when ch |
2241 | (aset table i ch) | |
2242 | (if (>= ch 256) | |
2243 | (aset rev-table ch i)))) | |
c76b5c99 | 2244 | (set-char-table-extra-slot table 0 rev-table) |
350cd166 KH |
2245 | (set-char-table-extra-slot table 1 1) |
2246 | (set-char-table-extra-slot rev-table 1 1) | |
c76b5c99 KH |
2247 | table)) |
2248 | ||
a6d1872e KH |
2249 | (defun make-translation-table-from-alist (alist) |
2250 | "Make translation table from N<->M mapping in ALIST. | |
2251 | ALIST is an alist, each element has the form (FROM . TO). | |
2252 | FROM and TO are a character or a vector of characters. | |
2253 | If FROM is a character, that character is translated to TO. | |
2254 | If FROM is a vector of characters, that sequence is translated to TO. | |
350cd166 KH |
2255 | The first extra-slot of the value is a translation table for reverse mapping." |
2256 | (let ((tables (vector (make-char-table 'translation-table) | |
2257 | (make-char-table 'translation-table))) | |
2258 | table max-lookup from to idx val) | |
2259 | (dotimes (i 2) | |
2260 | (setq table (aref tables i)) | |
2261 | (setq max-lookup 1) | |
2262 | (dolist (elt alist) | |
2263 | (if (= i 0) | |
2264 | (setq from (car elt) to (cdr elt)) | |
2265 | (setq from (cdr elt) to (car elt))) | |
2266 | (if (characterp from) | |
2267 | (setq idx from) | |
2268 | (setq idx (aref from 0) | |
2269 | max-lookup (max max-lookup (length from)))) | |
2270 | (setq val (aref table idx)) | |
2271 | (if val | |
2272 | (progn | |
2273 | (or (consp val) | |
2274 | (setq val (list (cons (vector idx) val)))) | |
2275 | (if (characterp from) | |
2276 | (setq from (vector from))) | |
2277 | (setq val (nconc val (list (cons from to))))) | |
2278 | (if (characterp from) | |
2279 | (setq val to) | |
2280 | (setq val (list (cons from to))))) | |
2281 | (aset table idx val)) | |
2282 | (set-char-table-extra-slot table 1 max-lookup)) | |
2283 | (set-char-table-extra-slot (aref tables 0) 0 (aref tables 1)) | |
2284 | (aref tables 0))) | |
a6d1872e | 2285 | |
f967223b | 2286 | (defun define-translation-table (symbol &rest args) |
a284eea3 DL |
2287 | "Define SYMBOL as the name of translation table made by ARGS. |
2288 | This sets up information so that the table can be used for | |
2289 | translations in a CCL program. | |
b25eef20 | 2290 | |
a284eea3 DL |
2291 | If the first element of ARGS is a char-table whose purpose is |
2292 | `translation-table', just define SYMBOL to name it. (Note that this | |
2293 | function does not bind SYMBOL.) | |
007c79c8 | 2294 | |
a284eea3 | 2295 | Any other ARGS should be suitable as arguments of the function |
007c79c8 | 2296 | `make-translation-table' (which see). |
b25eef20 | 2297 | |
452fdb31 | 2298 | This function sets properties `translation-table' and |
521d4010 DL |
2299 | `translation-table-id' of SYMBOL to the created table itself and the |
2300 | identification number of the table respectively. It also registers | |
2301 | the table in `translation-table-vector'." | |
007c79c8 KH |
2302 | (let ((table (if (and (char-table-p (car args)) |
2303 | (eq (char-table-subtype (car args)) | |
2304 | 'translation-table)) | |
2305 | (car args) | |
2306 | (apply 'make-translation-table args))) | |
f967223b | 2307 | (len (length translation-table-vector)) |
d9e3229d | 2308 | (id 0) |
b25eef20 | 2309 | (done nil)) |
f967223b | 2310 | (put symbol 'translation-table table) |
b25eef20 KH |
2311 | (while (not done) |
2312 | (if (>= id len) | |
f967223b KH |
2313 | (setq translation-table-vector |
2314 | (vconcat translation-table-vector (make-vector len nil)))) | |
2315 | (let ((slot (aref translation-table-vector id))) | |
b25eef20 KH |
2316 | (if (or (not slot) |
2317 | (eq (car slot) symbol)) | |
2318 | (progn | |
f967223b | 2319 | (aset translation-table-vector id (cons symbol table)) |
007c79c8 KH |
2320 | (setq done t)) |
2321 | (setq id (1+ id))))) | |
f967223b | 2322 | (put symbol 'translation-table-id id) |
d9e3229d KH |
2323 | id)) |
2324 | ||
16431d3c KH |
2325 | (defun translate-region (start end table) |
2326 | "From START to END, translate characters according to TABLE. | |
2327 | TABLE is a string or a char-table. | |
2328 | If TABLE is a string, the Nth character in it is the mapping | |
2329 | for the character with code N. | |
2330 | If TABLE is a char-table, the element for character N is the mapping | |
2331 | for the character with code N. | |
2332 | It returns the number of characters changed." | |
2333 | (interactive | |
2334 | (list (region-beginning) | |
2335 | (region-end) | |
2336 | (let (table l) | |
2337 | (dotimes (i (length translation-table-vector)) | |
2338 | (if (consp (aref translation-table-vector i)) | |
2339 | (push (list (symbol-name | |
2340 | (car (aref translation-table-vector i)))) l))) | |
2341 | (if (not l) | |
2342 | (error "No translation table defined")) | |
2343 | (while (not table) | |
2344 | (setq table (completing-read "Translation table: " l nil t))) | |
2345 | (intern table)))) | |
2346 | (if (symbolp table) | |
2347 | (let ((val (get table 'translation-table))) | |
2348 | (or (char-table-p val) | |
2349 | (error "Invalid translation table name: %s" table)) | |
2350 | (setq table val))) | |
2351 | (translate-region-internal start end table)) | |
2352 | ||
ef6e365d | 2353 | (defmacro with-category-table (table &rest body) |
f81b2db1 | 2354 | "Execute BODY like `progn' with TABLE the current category table. |
ef6e365d JPW |
2355 | The category table of the current buffer is saved, BODY is evaluated, |
2356 | then the saved table is restored, even in case of an abnormal exit. | |
2357 | Value is what BODY returns." | |
f291fe60 | 2358 | (declare (indent 1) (debug t)) |
ef6e365d JPW |
2359 | (let ((old-table (make-symbol "old-table")) |
2360 | (old-buffer (make-symbol "old-buffer"))) | |
2361 | `(let ((,old-table (category-table)) | |
2362 | (,old-buffer (current-buffer))) | |
2363 | (unwind-protect | |
2364 | (progn | |
2365 | (set-category-table ,table) | |
2366 | ,@body) | |
053f45dd | 2367 | (with-current-buffer ,old-buffer |
ef6e365d | 2368 | (set-category-table ,old-table)))))) |
35554641 | 2369 | |
394e4eb0 DL |
2370 | (defun define-translation-hash-table (symbol table) |
2371 | "Define SYMBOL as the name of the hash translation TABLE for use in CCL. | |
2372 | ||
2373 | Analogous to `define-translation-table', but updates | |
2374 | `translation-hash-table-vector' and the table is for use in the CCL | |
2375 | `lookup-integer' and `lookup-character' functions." | |
2376 | (unless (and (symbolp symbol) | |
2377 | (hash-table-p table)) | |
2378 | (error "Bad args to define-translation-hash-table")) | |
2379 | (let ((len (length translation-hash-table-vector)) | |
2380 | (id 0) | |
2381 | done) | |
2382 | (put symbol 'translation-hash-table table) | |
2383 | (while (not done) | |
2384 | (if (>= id len) | |
2385 | (setq translation-hash-table-vector | |
2386 | (vconcat translation-hash-table-vector [nil]))) | |
2387 | (let ((slot (aref translation-hash-table-vector id))) | |
2388 | (if (or (not slot) | |
2389 | (eq (car slot) symbol)) | |
2390 | (progn | |
2391 | (aset translation-hash-table-vector id (cons symbol table)) | |
2392 | (setq done t)) | |
2393 | (setq id (1+ id))))) | |
2394 | (put symbol 'translation-hash-table-id id) | |
2395 | id)) | |
2396 | ||
69eba008 KH |
2397 | ;;; Initialize some variables. |
2398 | ||
2399 | (put 'use-default-ascent 'char-table-extra-slots 0) | |
2400 | (setq use-default-ascent (make-char-table 'use-default-ascent)) | |
d6d6d592 KH |
2401 | (put 'ignore-relative-composition 'char-table-extra-slots 0) |
2402 | (setq ignore-relative-composition | |
2403 | (make-char-table 'ignore-relative-composition)) | |
69eba008 | 2404 | |
d9f6dfe6 CW |
2405 | ;;; Built-in auto-coding-functions: |
2406 | ||
2407 | (defun sgml-xml-auto-coding-function (size) | |
2408 | "Determine whether the buffer is XML, and if so, its encoding. | |
2409 | This function is intended to be added to `auto-coding-functions'." | |
c069d3ac SM |
2410 | (setq size (+ (point) size)) |
2411 | (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" size t) | |
d9f6dfe6 CW |
2412 | (let ((end (save-excursion |
2413 | ;; This is a hack. | |
c2c51a11 | 2414 | (re-search-forward "[\"']\\s-*\\?>" size t)))) |
d9f6dfe6 | 2415 | (when end |
c2c51a11 | 2416 | (if (re-search-forward "encoding=[\"']\\(.+?\\)[\"']" end t) |
447404a3 CW |
2417 | (let* ((match (match-string 1)) |
2418 | (sym (intern (downcase match)))) | |
2419 | (if (coding-system-p sym) | |
2420 | sym | |
2421 | (message "Warning: unknown coding system \"%s\"" match) | |
2422 | nil)) | |
c6578617 JR |
2423 | ;; Files without an encoding tag should be UTF-8. But users |
2424 | ;; may be naive about encodings, and have saved the file from | |
2425 | ;; another editor that does not help them get the encoding right. | |
2426 | ;; Detect the encoding and warn the user if it is detected as | |
2427 | ;; something other than UTF-8. | |
2428 | (let ((detected | |
2429 | (with-coding-priority '(utf-8) | |
2430 | (coding-system-base | |
2431 | (detect-coding-region (point-min) size t))))) | |
2432 | ;; Pure ASCII always comes back as undecided. | |
2433 | (if (memq detected '(utf-8 undecided)) | |
2434 | 'utf-8 | |
2435 | (warn "File contents detected as %s. | |
2436 | Consider adding an encoding attribute to the xml declaration, | |
2437 | or saving as utf-8, as mandated by the xml specification." detected) | |
2438 | detected))))))) | |
d9f6dfe6 | 2439 | |
447404a3 CW |
2440 | (defun sgml-html-meta-auto-coding-function (size) |
2441 | "If the buffer has an HTML meta tag, use it to determine encoding. | |
2442 | This function is intended to be added to `auto-coding-functions'." | |
df3c9fe7 GM |
2443 | (let ((case-fold-search t)) |
2444 | (setq size (min (+ (point) size) | |
2445 | (save-excursion | |
2446 | ;; Limit the search by the end of the HTML header. | |
9e5e233a | 2447 | (or (search-forward "</head>" (+ (point) size) t) |
df3c9fe7 GM |
2448 | ;; In case of no header, search only 10 lines. |
2449 | (forward-line 10)) | |
2450 | (point)))) | |
2451 | ;; Make sure that the buffer really contains an HTML document, by | |
2452 | ;; checking that it starts with a doctype or a <HTML> start tag | |
2453 | ;; (allowing for whitespace at bob). Note: 'DOCTYPE NETSCAPE' is | |
2454 | ;; useful for Mozilla bookmark files. | |
2455 | (when (and (re-search-forward "\\`[[:space:]\n]*\\(<!doctype[[:space:]\n]+\\(html\\|netscape\\)\\|<html\\)" size t) | |
b31a5677 JK |
2456 | (re-search-forward "<meta\\s-+\\(http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']text/\\sw+;\\s-*\\)?charset=[\"']?\\(.+?\\)[\"'\\s-/>]" size t)) |
2457 | (let* ((match (match-string 2)) | |
df3c9fe7 GM |
2458 | (sym (intern (downcase match)))) |
2459 | (if (coding-system-p sym) | |
2460 | sym | |
2461 | (message "Warning: unknown coding system \"%s\"" match) | |
2462 | nil))))) | |
0bca779a | 2463 | |
c6578617 JR |
2464 | (defun xml-find-file-coding-system (args) |
2465 | "Determine the coding system of an XML file without a declaration. | |
2466 | Strictly speaking, the file should be utf-8, but mistakes are | |
2467 | made, and there are genuine cases where XML fragments are saved, | |
2468 | with the encoding properly specified in a master document, or | |
2469 | added by processing software." | |
2470 | (if (eq (car args) 'insert-file-contents) | |
2471 | (let ((detected | |
2472 | (with-coding-priority '(utf-8) | |
2473 | (coding-system-base | |
2474 | (detect-coding-region (point-min) (point-max) t))))) | |
2475 | ;; Pure ASCII always comes back as undecided. | |
228de8de JR |
2476 | (cond |
2477 | ((memq detected '(utf-8 undecided)) | |
2478 | 'utf-8) | |
2479 | ((eq detected 'utf-16le-with-signature) 'utf-16le-with-signature) | |
2480 | ((eq detected 'utf-16be-with-signature) 'utf-16be-with-signature) | |
2481 | (t | |
c6578617 JR |
2482 | (warn "File contents detected as %s. |
2483 | Consider adding an xml declaration with the encoding specified, | |
2484 | or saving as utf-8, as mandated by the xml specification." detected) | |
228de8de | 2485 | detected))) |
c6578617 JR |
2486 | ;; Don't interfere with the user's wishes for saving the buffer. |
2487 | ;; We did what we could when the buffer was created to ensure the | |
2488 | ;; correct encoding was used, or the user was warned, so any | |
2489 | ;; non-conformity here is deliberate on the part of the user. | |
2490 | 'undecided)) | |
2491 | ||
69eba008 | 2492 | ;;; |
4ed46869 KH |
2493 | (provide 'mule) |
2494 | ||
2495 | ;;; mule.el ends here |