Commit | Line | Data |
---|---|---|
4ed46869 KH |
1 | ;;; mule.el --- basic commands for mulitilingual environment |
2 | ||
d7a0267c | 3 | ;; Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 |
2fd125a3 | 4 | ;; Free Software Foundation, Inc. |
7976eda0 | 5 | ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
d7a0267c | 6 | ;; 2005, 2006, 2007 |
2fd125a3 KH |
7 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
8 | ;; Registration Number H14PRO021 | |
4ed46869 KH |
9 | |
10 | ;; Keywords: mule, multilingual, character set, coding system | |
11 | ||
12 | ;; This file is part of GNU Emacs. | |
13 | ||
14 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
15 | ;; it under the terms of the GNU General Public License as published by | |
16 | ;; the Free Software Foundation; either version 2, or (at your option) | |
17 | ;; any later version. | |
18 | ||
19 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
20 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 | ;; GNU General Public License for more details. | |
23 | ||
24 | ;; You should have received a copy of the GNU General Public License | |
369314dc | 25 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
3a35cf56 LK |
26 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
27 | ;; Boston, MA 02110-1301, USA. | |
4ed46869 | 28 | |
60370d40 PJ |
29 | ;;; Commentary: |
30 | ||
4ed46869 KH |
31 | ;;; Code: |
32 | ||
d2542b42 | 33 | (defconst mule-version "5.0 (SAKAKI)" "\ |
4ed46869 KH |
34 | Version number and name of this version of MULE (multilingual environment).") |
35 | ||
35554641 | 36 | (defconst mule-version-date "1999.12.7" "\ |
4ed46869 KH |
37 | Distribution date of this version of MULE (multilingual environment).") |
38 | ||
39 | (defun load-with-code-conversion (fullname file &optional noerror nomessage) | |
0f69cb38 KH |
40 | "Execute a file of Lisp code named FILE whose absolute name is FULLNAME. |
41 | The file contents are decoded before evaluation if necessary. | |
5dd1c041 | 42 | If optional third arg NOERROR is non-nil, |
4ed46869 KH |
43 | report no error if FILE doesn't exist. |
44 | Print messages at start and end of loading unless | |
5dd1c041 | 45 | optional fourth arg NOMESSAGE is non-nil. |
4ed46869 KH |
46 | Return t if file exists." |
47 | (if (null (file-readable-p fullname)) | |
48 | (and (null noerror) | |
49 | (signal 'file-error (list "Cannot open load file" file))) | |
50 | ;; Read file with code conversion, and then eval. | |
51 | (let* ((buffer | |
52 | ;; To avoid any autoloading, set default-major-mode to | |
53 | ;; fundamental-mode. | |
88162676 RS |
54 | ;; So that we don't get completely screwed if the |
55 | ;; file is encoded in some complicated character set, | |
56 | ;; read it with real decoding, as a multibyte buffer, | |
57 | ;; even if this is a --unibyte Emacs session. | |
58 | (let ((default-major-mode 'fundamental-mode) | |
59 | (default-enable-multibyte-characters t)) | |
4ed46869 KH |
60 | ;; We can't use `generate-new-buffer' because files.el |
61 | ;; is not yet loaded. | |
62 | (get-buffer-create (generate-new-buffer-name " *load*")))) | |
db5cae4b SM |
63 | (load-in-progress t) |
64 | (source (save-match-data (string-match "\\.el\\'" fullname)))) | |
65 | (unless nomessage | |
66 | (if source | |
67 | (message "Loading %s (source)..." file) | |
68 | (message "Loading %s..." file))) | |
69 | (when purify-flag | |
4c86cca0 | 70 | (push file preloaded-file-list)) |
4ed46869 | 71 | (unwind-protect |
a6acd8a2 | 72 | (let ((load-file-name fullname) |
1c4cc63a | 73 | (set-auto-coding-for-load t) |
a6acd8a2 | 74 | (inhibit-file-name-operation nil)) |
053f45dd | 75 | (with-current-buffer buffer |
9fe1108c RS |
76 | ;; Don't let deactivate-mark remain set. |
77 | (let (deactivate-mark) | |
78 | (insert-file-contents fullname)) | |
7d276780 EZ |
79 | ;; If the loaded file was inserted with no-conversion or |
80 | ;; raw-text coding system, make the buffer unibyte. | |
81 | ;; Otherwise, eval-buffer might try to interpret random | |
82 | ;; binary junk as multibyte characters. | |
83 | (if (and enable-multibyte-characters | |
84 | (or (eq (coding-system-type last-coding-system-used) 5) | |
85 | (eq last-coding-system-used 'no-conversion))) | |
86 | (set-buffer-multibyte nil)) | |
4ed46869 KH |
87 | ;; Make `kill-buffer' quiet. |
88 | (set-buffer-modified-p nil)) | |
0f69cb38 | 89 | ;; Have the original buffer current while we eval. |
01ae35c1 RS |
90 | (eval-buffer buffer nil |
91 | ;; This is compatible with what `load' does. | |
92 | (if purify-flag file fullname) | |
88162676 RS |
93 | ;; If this Emacs is running with --unibyte, |
94 | ;; convert multibyte strings to unibyte | |
95 | ;; after reading them. | |
ba74e833 | 96 | ;; (not default-enable-multibyte-characters) |
8dd08b5b | 97 | nil t |
ba74e833 | 98 | )) |
cfc70cdf RS |
99 | (let (kill-buffer-hook kill-buffer-query-functions) |
100 | (kill-buffer buffer))) | |
33d74677 AM |
101 | (unless purify-flag |
102 | (do-after-load-evaluation fullname)) | |
5dd1c041 | 103 | |
db5cae4b SM |
104 | (unless (or nomessage noninteractive) |
105 | (if source | |
106 | (message "Loading %s (source)...done" file) | |
107 | (message "Loading %s...done" file))) | |
4ed46869 KH |
108 | t))) |
109 | ||
110 | ;; API (Application Program Interface) for charsets. | |
111 | ||
2e02a76f | 112 | (defsubst charset-quoted-standard-p (obj) |
521d4010 | 113 | "Return t if OBJ is a quoted symbol, and is the name of a standard charset." |
2e02a76f RS |
114 | (and (listp obj) (eq (car obj) 'quote) |
115 | (symbolp (car-safe (cdr obj))) | |
116 | (let ((vector (get (car-safe (cdr obj)) 'charset))) | |
117 | (and (vectorp vector) | |
118 | (< (aref vector 0) 160))))) | |
4ed46869 KH |
119 | |
120 | (defsubst charsetp (object) | |
5dd1c041 | 121 | "Return t if OBJECT is a charset." |
4ed46869 KH |
122 | (and (symbolp object) (vectorp (get object 'charset)))) |
123 | ||
124 | (defsubst charset-info (charset) | |
125 | "Return a vector of information of CHARSET. | |
126 | The elements of the vector are: | |
127 | CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION, | |
128 | LEADING-CODE-BASE, LEADING-CODE-EXT, | |
129 | ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE, | |
130 | REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION, | |
131 | PLIST, | |
132 | where | |
133 | CHARSET-ID (integer) is the identification number of the charset. | |
aa8bb645 KH |
134 | BYTES (integer) is the length of multi-byte form of a character in |
135 | the charset: one of 1, 2, 3, and 4. | |
4ed46869 KH |
136 | DIMENSION (integer) is the number of bytes to represent a character of |
137 | the charset: 1 or 2. | |
138 | CHARS (integer) is the number of characters in a dimension: 94 or 96. | |
4ed46869 KH |
139 | WIDTH (integer) is the number of columns a character in the charset |
140 | occupies on the screen: one of 0, 1, and 2. | |
141 | DIRECTION (integer) is the rendering direction of characters in the | |
08b4ace5 KH |
142 | charset when rendering. If 0, render from left to right, else |
143 | render from right to left. | |
4ed46869 KH |
144 | LEADING-CODE-BASE (integer) is the base leading-code for the |
145 | charset. | |
146 | LEADING-CODE-EXT (integer) is the extended leading-code for the | |
147 | charset. All charsets of less than 0xA0 has the value 0. | |
148 | ISO-FINAL-CHAR (character) is the final character of the | |
7dd4c92d KH |
149 | corresponding ISO 2022 charset. If the charset is not assigned |
150 | any final character, the value is -1. | |
4ed46869 KH |
151 | ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked |
152 | while encoding to variants of ISO 2022 coding system, one of the | |
153 | following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). | |
7dd4c92d | 154 | If the charset is not assigned any final character, the value is -1. |
4ed46869 KH |
155 | REVERSE-CHARSET (integer) is the charset which differs only in |
156 | LEFT-TO-RIGHT value from the charset. If there's no such a | |
157 | charset, the value is -1. | |
158 | SHORT-NAME (string) is the short name to refer to the charset. | |
159 | LONG-NAME (string) is the long name to refer to the charset | |
160 | DESCRIPTION (string) is the description string of the charset. | |
161 | PLIST (property list) may contain any type of information a user | |
162 | want to put and get by functions `put-charset-property' and | |
163 | `get-charset-property' respectively." | |
164 | (get charset 'charset)) | |
165 | ||
40c81f74 PE |
166 | ;; It is better not to use backquote in this file, |
167 | ;; because that makes a bootstrapping problem | |
168 | ;; if you need to recompile all the Lisp files using interpreted code. | |
169 | ||
4ed46869 KH |
170 | (defmacro charset-id (charset) |
171 | "Return charset identification number of CHARSET." | |
2e02a76f | 172 | (if (charset-quoted-standard-p charset) |
4ed46869 | 173 | (aref (charset-info (nth 1 charset)) 0) |
40c81f74 | 174 | (list 'aref (list 'charset-info charset) 0))) |
4ed46869 KH |
175 | |
176 | (defmacro charset-bytes (charset) | |
900dc6e3 KH |
177 | "Return bytes of CHARSET. |
178 | See the function `charset-info' for more detail." | |
2e02a76f | 179 | (if (charset-quoted-standard-p charset) |
4ed46869 | 180 | (aref (charset-info (nth 1 charset)) 1) |
40c81f74 | 181 | (list 'aref (list 'charset-info charset) 1))) |
4ed46869 KH |
182 | |
183 | (defmacro charset-dimension (charset) | |
900dc6e3 KH |
184 | "Return dimension of CHARSET. |
185 | See the function `charset-info' for more detail." | |
2e02a76f | 186 | (if (charset-quoted-standard-p charset) |
4ed46869 | 187 | (aref (charset-info (nth 1 charset)) 2) |
40c81f74 | 188 | (list 'aref (list 'charset-info charset) 2))) |
4ed46869 KH |
189 | |
190 | (defmacro charset-chars (charset) | |
900dc6e3 KH |
191 | "Return character numbers contained in a dimension of CHARSET. |
192 | See the function `charset-info' for more detail." | |
2e02a76f | 193 | (if (charset-quoted-standard-p charset) |
4ed46869 | 194 | (aref (charset-info (nth 1 charset)) 3) |
40c81f74 | 195 | (list 'aref (list 'charset-info charset) 3))) |
4ed46869 KH |
196 | |
197 | (defmacro charset-width (charset) | |
900dc6e3 KH |
198 | "Return width (how many column occupied on a screen) of CHARSET. |
199 | See the function `charset-info' for more detail." | |
2e02a76f | 200 | (if (charset-quoted-standard-p charset) |
4ed46869 | 201 | (aref (charset-info (nth 1 charset)) 4) |
40c81f74 | 202 | (list 'aref (list 'charset-info charset) 4))) |
4ed46869 KH |
203 | |
204 | (defmacro charset-direction (charset) | |
900dc6e3 KH |
205 | "Return direction of CHARSET. |
206 | See the function `charset-info' for more detail." | |
2e02a76f | 207 | (if (charset-quoted-standard-p charset) |
4ed46869 | 208 | (aref (charset-info (nth 1 charset)) 5) |
40c81f74 | 209 | (list 'aref (list 'charset-info charset) 5))) |
4ed46869 KH |
210 | |
211 | (defmacro charset-iso-final-char (charset) | |
900dc6e3 KH |
212 | "Return final char of CHARSET. |
213 | See the function `charset-info' for more detail." | |
2e02a76f | 214 | (if (charset-quoted-standard-p charset) |
4ed46869 | 215 | (aref (charset-info (nth 1 charset)) 8) |
40c81f74 | 216 | (list 'aref (list 'charset-info charset) 8))) |
4ed46869 KH |
217 | |
218 | (defmacro charset-iso-graphic-plane (charset) | |
900dc6e3 KH |
219 | "Return graphic plane of CHARSET. |
220 | See the function `charset-info' for more detail." | |
2e02a76f | 221 | (if (charset-quoted-standard-p charset) |
4ed46869 | 222 | (aref (charset-info (nth 1 charset)) 9) |
40c81f74 | 223 | (list 'aref (list 'charset-info charset) 9))) |
4ed46869 KH |
224 | |
225 | (defmacro charset-reverse-charset (charset) | |
900dc6e3 KH |
226 | "Return reverse charset of CHARSET. |
227 | See the function `charset-info' for more detail." | |
2e02a76f | 228 | (if (charset-quoted-standard-p charset) |
4ed46869 | 229 | (aref (charset-info (nth 1 charset)) 10) |
40c81f74 | 230 | (list 'aref (list 'charset-info charset) 10))) |
4ed46869 KH |
231 | |
232 | (defmacro charset-short-name (charset) | |
900dc6e3 KH |
233 | "Return short name of CHARSET. |
234 | See the function `charset-info' for more detail." | |
2e02a76f | 235 | (if (charset-quoted-standard-p charset) |
4ed46869 | 236 | (aref (charset-info (nth 1 charset)) 11) |
40c81f74 | 237 | (list 'aref (list 'charset-info charset) 11))) |
4ed46869 KH |
238 | |
239 | (defmacro charset-long-name (charset) | |
900dc6e3 KH |
240 | "Return long name of CHARSET. |
241 | See the function `charset-info' for more detail." | |
2e02a76f | 242 | (if (charset-quoted-standard-p charset) |
4ed46869 | 243 | (aref (charset-info (nth 1 charset)) 12) |
40c81f74 | 244 | (list 'aref (list 'charset-info charset) 12))) |
4ed46869 KH |
245 | |
246 | (defmacro charset-description (charset) | |
0611934b | 247 | "Return description of CHARSET. |
900dc6e3 | 248 | See the function `charset-info' for more detail." |
2e02a76f | 249 | (if (charset-quoted-standard-p charset) |
4ed46869 | 250 | (aref (charset-info (nth 1 charset)) 13) |
40c81f74 | 251 | (list 'aref (list 'charset-info charset) 13))) |
4ed46869 KH |
252 | |
253 | (defmacro charset-plist (charset) | |
900dc6e3 KH |
254 | "Return list charset property of CHARSET. |
255 | See the function `charset-info' for more detail." | |
40c81f74 PE |
256 | (list 'aref |
257 | (if (charset-quoted-standard-p charset) | |
258 | (charset-info (nth 1 charset)) | |
259 | (list 'charset-info charset)) | |
260 | 14)) | |
4ed46869 KH |
261 | |
262 | (defun set-charset-plist (charset plist) | |
e8dd0160 | 263 | "Set CHARSET's property list to PLIST, and return PLIST." |
4ed46869 KH |
264 | (aset (charset-info charset) 14 plist)) |
265 | ||
d3e7e7cf EZ |
266 | (defun make-char (charset &optional code1 code2) |
267 | "Return a character of CHARSET whose position codes are CODE1 and CODE2. | |
f98e2797 | 268 | CODE1 and CODE2 are optional, but if you don't supply |
a9fb0b58 KH |
269 | sufficient position codes, return a generic character which stands for |
270 | all characters or group of characters in the character set. | |
5dd1c041 | 271 | A generic character can be used to index a char table (e.g. `syntax-table'). |
9ff05eae KH |
272 | |
273 | Such character sets as ascii, eight-bit-control, and eight-bit-graphic | |
274 | don't have corresponding generic characters. If CHARSET is one of | |
275 | them and you don't supply CODE1, return the character of the smallest | |
276 | code in CHARSET. | |
277 | ||
6fb7766c DL |
278 | If CODE1 or CODE2 are invalid (out of range), this function signals an |
279 | error. However, the eighth bit of both CODE1 and CODE2 is zeroed | |
280 | before they are used to index CHARSET. Thus you may use, say, the | |
281 | actual ISO 8859 character code rather than subtracting 128, as you | |
282 | would need to index the corresponding Emacs charset." | |
636799f2 | 283 | (make-char-internal (charset-id charset) code1 code2)) |
a6acd8a2 KH |
284 | |
285 | (put 'make-char 'byte-compile | |
2850984d SM |
286 | (lambda (form) |
287 | (let ((charset (nth 1 form))) | |
288 | (byte-compile-normal-call | |
289 | (cons 'make-char-internal | |
290 | (cons (if (charset-quoted-standard-p charset) | |
291 | (charset-id (nth 1 charset)) | |
292 | (list 'charset-id charset)) | |
293 | (nthcdr 2 form))))))) | |
4ed46869 | 294 | |
d3675a42 | 295 | (defun charset-list () |
900dc6e3 KH |
296 | "Return list of charsets ever defined. |
297 | ||
d3675a42 | 298 | This function is provided for backward compatibility. |
900dc6e3 | 299 | Now we have the variable `charset-list'." |
d3675a42 KH |
300 | charset-list) |
301 | ||
13d5617d KH |
302 | (defsubst generic-char-p (char) |
303 | "Return t if and only if CHAR is a generic character. | |
521d4010 | 304 | See also the documentation of `make-char'." |
1d935669 KH |
305 | (and (>= char 0400) |
306 | (let ((l (split-char char))) | |
307 | (and (or (= (nth 1 l) 0) (eq (nth 2 l) 0)) | |
308 | (not (eq (car l) 'composition)))))) | |
69eba008 | 309 | |
cc57cc54 | 310 | (defun decode-char (ccs code-point &optional restriction) |
7f341749 | 311 | "Return character specified by coded character set CCS and CODE-POINT in it. |
cc57cc54 | 312 | Return nil if such a character is not supported. |
e76938e7 | 313 | Currently the only supported coded character set is `ucs' (ISO/IEC |
9fb1b1a8 | 314 | 10646: Universal Multi-Octet Coded Character Set), and the result is |
0c76a98d | 315 | translated through the translation-table named |
d1efee22 KH |
316 | `utf-translation-table-for-decode', or through the |
317 | translation-hash-table named `utf-subst-table-for-decode' | |
318 | \(if `utf-translate-cjk-mode' is non-nil). | |
cc57cc54 KH |
319 | |
320 | Optional argument RESTRICTION specifies a way to map the pair of CCS | |
e7fea150 | 321 | and CODE-POINT to a character. Currently not supported and just ignored." |
9fb1b1a8 DL |
322 | (cond |
323 | ((eq ccs 'ucs) | |
d1efee22 KH |
324 | (or (and utf-translate-cjk-mode |
325 | (utf-lookup-subst-table-for-decode code-point)) | |
0c76a98d KH |
326 | (let ((c (cond |
327 | ((< code-point 160) | |
328 | code-point) | |
329 | ((< code-point 256) | |
330 | (make-char 'latin-iso8859-1 code-point)) | |
331 | ((< code-point #x2500) | |
332 | (setq code-point (- code-point #x0100)) | |
333 | (make-char 'mule-unicode-0100-24ff | |
334 | (+ (/ code-point 96) 32) (+ (% code-point 96) 32))) | |
335 | ((< code-point #x3400) | |
336 | (setq code-point (- code-point #x2500)) | |
337 | (make-char 'mule-unicode-2500-33ff | |
338 | (+ (/ code-point 96) 32) (+ (% code-point 96) 32))) | |
339 | ((and (>= code-point #xe000) (< code-point #x10000)) | |
340 | (setq code-point (- code-point #xe000)) | |
341 | (make-char 'mule-unicode-e000-ffff | |
342 | (+ (/ code-point 96) 32) | |
343 | (+ (% code-point 96) 32)))))) | |
344 | (when c | |
345 | (or (aref (get 'utf-translation-table-for-decode | |
346 | 'translation-table) c) | |
347 | c))))))) | |
cc57cc54 KH |
348 | |
349 | (defun encode-char (char ccs &optional restriction) | |
7f341749 | 350 | "Return code-point in coded character set CCS that corresponds to CHAR. |
cc57cc54 | 351 | Return nil if CHAR is not included in CCS. |
e76938e7 | 352 | Currently the only supported coded character set is `ucs' (ISO/IEC |
9fb1b1a8 | 353 | 10646: Universal Multi-Octet Coded Character Set), and CHAR is first |
0c76a98d | 354 | translated through the translation-table named |
d1efee22 KH |
355 | `utf-translation-table-for-encode', or through the |
356 | translation-hash-table named `utf-subst-table-for-encode' \(if | |
357 | `utf-translate-cjk-mode' is non-nil). | |
7f341749 KH |
358 | |
359 | CHAR should be in one of these charsets: | |
cc57cc54 | 360 | ascii, latin-iso8859-1, mule-unicode-0100-24ff, mule-unicode-2500-33ff, |
6ba9b20b | 361 | mule-unicode-e000-ffff, eight-bit-control |
cc57cc54 KH |
362 | Otherwise, return nil. |
363 | ||
364 | Optional argument RESTRICTION specifies a way to map CHAR to a | |
365 | code-point in CCS. Currently not supported and just ignored." | |
366 | (let* ((split (split-char char)) | |
9fb1b1a8 DL |
367 | (charset (car split)) |
368 | trans) | |
cc57cc54 | 369 | (cond ((eq ccs 'ucs) |
d1efee22 KH |
370 | (or (and utf-translate-cjk-mode |
371 | (utf-lookup-subst-table-for-encode char)) | |
0c76a98d KH |
372 | (let ((table (get 'utf-translation-table-for-encode |
373 | 'translation-table))) | |
374 | (setq trans (aref table char)) | |
375 | (if trans | |
376 | (setq split (split-char trans) | |
377 | charset (car split))) | |
378 | (cond ((eq charset 'ascii) | |
bb55c6c9 | 379 | (or trans char)) |
0c76a98d KH |
380 | ((eq charset 'latin-iso8859-1) |
381 | (+ (nth 1 split) 128)) | |
382 | ((eq charset 'mule-unicode-0100-24ff) | |
383 | (+ #x0100 (+ (* (- (nth 1 split) 32) 96) | |
384 | (- (nth 2 split) 32)))) | |
385 | ((eq charset 'mule-unicode-2500-33ff) | |
386 | (+ #x2500 (+ (* (- (nth 1 split) 32) 96) | |
387 | (- (nth 2 split) 32)))) | |
388 | ((eq charset 'mule-unicode-e000-ffff) | |
389 | (+ #xe000 (+ (* (- (nth 1 split) 32) 96) | |
390 | (- (nth 2 split) 32)))) | |
391 | ((eq charset 'eight-bit-control) | |
392 | char)))))))) | |
cc57cc54 | 393 | |
0269ddfb | 394 | \f |
e76938e7 | 395 | ;; Coding system stuff |
4ed46869 | 396 | |
8057896b | 397 | ;; Coding system is a symbol that has the property `coding-system'. |
4ed46869 | 398 | ;; |
8057896b KH |
399 | ;; The value of the property `coding-system' is a vector of the |
400 | ;; following format: | |
d3675a42 | 401 | ;; [TYPE MNEMONIC DOC-STRING PLIST FLAGS] |
8057896b | 402 | ;; We call this vector as coding-spec. See comments in src/coding.c |
521d4010 | 403 | ;; for more detail. |
8057896b KH |
404 | |
405 | (defconst coding-spec-type-idx 0) | |
406 | (defconst coding-spec-mnemonic-idx 1) | |
407 | (defconst coding-spec-doc-string-idx 2) | |
0269ddfb | 408 | (defconst coding-spec-plist-idx 3) |
8057896b KH |
409 | (defconst coding-spec-flags-idx 4) |
410 | ||
a6acd8a2 KH |
411 | ;; PLIST is a property list of a coding system. To share PLIST among |
412 | ;; alias coding systems, a coding system has PLIST in coding-spec | |
413 | ;; instead of having it in normal property list of Lisp symbol. | |
414 | ;; Here's a list of coding system properties currently being used. | |
0269ddfb KH |
415 | ;; |
416 | ;; o coding-category | |
417 | ;; | |
418 | ;; The value is a coding category the coding system belongs to. The | |
521d4010 | 419 | ;; function `make-coding-system' sets this value automatically |
1b46a680 | 420 | ;; unless its argument PROPERTIES specifies this property. |
4ed46869 | 421 | ;; |
0269ddfb | 422 | ;; o alias-coding-systems |
4ed46869 | 423 | ;; |
0269ddfb KH |
424 | ;; The value is a list of coding systems of the same alias group. The |
425 | ;; first element is the coding system made at first, which we call as | |
1b46a680 KH |
426 | ;; `base coding system'. The function `make-coding-system' sets this |
427 | ;; value automatically and `define-coding-system-alias' updates it. | |
0269ddfb | 428 | ;; |
3c948dde KH |
429 | ;; See the documentation of make-coding-system for the meanings of the |
430 | ;; following properties. | |
4ed46869 | 431 | ;; |
3c948dde | 432 | ;; o post-read-conversion |
0269ddfb | 433 | ;; o pre-write-conversion |
f967223b | 434 | ;; o translation-table-for-decode |
f967223b | 435 | ;; o translation-table-for-encode |
c11a8f77 | 436 | ;; o safe-chars |
a6acd8a2 | 437 | ;; o safe-charsets |
b25eef20 | 438 | ;; o mime-charset |
c76b5c99 | 439 | ;; o valid-codes (meaningful only for a coding system based on CCL) |
c76b5c99 | 440 | |
0269ddfb | 441 | |
0269ddfb | 442 | (defsubst coding-system-spec (coding-system) |
521d4010 | 443 | "Return coding-spec of CODING-SYSTEM." |
0269ddfb | 444 | (get (check-coding-system coding-system) 'coding-system)) |
4ed46869 | 445 | |
4ed46869 | 446 | (defun coding-system-type (coding-system) |
0269ddfb KH |
447 | "Return the coding type of CODING-SYSTEM. |
448 | A coding type is an integer value indicating the encoding method | |
449 | of CODING-SYSTEM. See the function `make-coding-system' for more detail." | |
2653e2a7 | 450 | (aref (coding-system-spec coding-system) coding-spec-type-idx)) |
4ed46869 | 451 | |
4ed46869 | 452 | (defun coding-system-mnemonic (coding-system) |
0269ddfb | 453 | "Return the mnemonic character of CODING-SYSTEM. |
6e2c8840 KH |
454 | The mnemonic character of a coding system is used in mode line |
455 | to indicate the coding system. If the arg is nil, return ?-." | |
456 | (let ((spec (coding-system-spec coding-system))) | |
457 | (if spec (aref spec coding-spec-mnemonic-idx) ?-))) | |
4ed46869 | 458 | |
8057896b | 459 | (defun coding-system-doc-string (coding-system) |
0269ddfb | 460 | "Return the documentation string for CODING-SYSTEM." |
2653e2a7 | 461 | (aref (coding-system-spec coding-system) coding-spec-doc-string-idx)) |
4ed46869 | 462 | |
d3675a42 | 463 | (defun coding-system-plist (coding-system) |
0269ddfb | 464 | "Return the property list of CODING-SYSTEM." |
2653e2a7 | 465 | (aref (coding-system-spec coding-system) coding-spec-plist-idx)) |
d3675a42 | 466 | |
4ed46869 | 467 | (defun coding-system-flags (coding-system) |
0269ddfb KH |
468 | "Return `flags' of CODING-SYSTEM. |
469 | A `flags' of a coding system is a vector of length 32 indicating detailed | |
470 | information of a coding system. See the function `make-coding-system' | |
471 | for more detail." | |
2653e2a7 | 472 | (aref (coding-system-spec coding-system) coding-spec-flags-idx)) |
0269ddfb KH |
473 | |
474 | (defun coding-system-get (coding-system prop) | |
475 | "Extract a value from CODING-SYSTEM's property list for property PROP." | |
476 | (plist-get (coding-system-plist coding-system) prop)) | |
477 | ||
478 | (defun coding-system-put (coding-system prop val) | |
479 | "Change value in CODING-SYSTEM's property list PROP to VAL." | |
480 | (let ((plist (coding-system-plist coding-system))) | |
481 | (if plist | |
482 | (plist-put plist prop val) | |
2653e2a7 KH |
483 | (aset (coding-system-spec coding-system) coding-spec-plist-idx |
484 | (list prop val))))) | |
0269ddfb KH |
485 | |
486 | (defun coding-system-category (coding-system) | |
72f73748 DL |
487 | "Return the coding category of CODING-SYSTEM. |
488 | See also `coding-category-list'." | |
0269ddfb KH |
489 | (coding-system-get coding-system 'coding-category)) |
490 | ||
491 | (defun coding-system-base (coding-system) | |
492 | "Return the base coding system of CODING-SYSTEM. | |
d9e3229d KH |
493 | A base coding system is what made by `make-coding-system'. |
494 | Any alias nor subsidiary coding systems are not base coding system." | |
0269ddfb KH |
495 | (car (coding-system-get coding-system 'alias-coding-systems))) |
496 | ||
0269ddfb KH |
497 | ;; Coding system also has a property `eol-type'. |
498 | ;; | |
499 | ;; This property indicates how the coding system handles end-of-line | |
500 | ;; format. The value is integer 0, 1, 2, or a vector of three coding | |
501 | ;; systems. Each integer value 0, 1, and 2 indicates the format of | |
502 | ;; end-of-line LF, CRLF, and CR respectively. A vector value | |
503 | ;; indicates that the format of end-of-line should be detected | |
504 | ;; automatically. Nth element of the vector is the subsidiary coding | |
505 | ;; system whose `eol-type' property is N. | |
4ed46869 | 506 | |
8057896b | 507 | (defun coding-system-eol-type (coding-system) |
0269ddfb KH |
508 | "Return eol-type of CODING-SYSTEM. |
509 | An eol-type is integer 0, 1, 2, or a vector of coding systems. | |
4ed46869 | 510 | |
0269ddfb KH |
511 | Integer values 0, 1, and 2 indicate a format of end-of-line; LF, |
512 | CRLF, and CR respectively. | |
513 | ||
514 | A vector value indicates that a format of end-of-line should be | |
515 | detected automatically. Nth element of the vector is the subsidiary | |
516 | coding system whose eol-type is N." | |
517 | (get coding-system 'eol-type)) | |
bd882697 | 518 | |
2e729bfa JB |
519 | (defun coding-system-eol-type-mnemonic (coding-system) |
520 | "Return the string indicating end-of-line format of CODING-SYSTEM." | |
521 | (let* ((eol-type (coding-system-eol-type coding-system)) | |
f4f00827 | 522 | (val (cond ((eq eol-type 0) eol-mnemonic-unix) |
2e729bfa JB |
523 | ((eq eol-type 1) eol-mnemonic-dos) |
524 | ((eq eol-type 2) eol-mnemonic-mac) | |
f4f00827 | 525 | (t eol-mnemonic-undecided)))) |
2e729bfa JB |
526 | (if (stringp val) |
527 | val | |
528 | (char-to-string val)))) | |
529 | ||
857ea15c AS |
530 | (defun coding-system-lessp (x y) |
531 | (cond ((eq x 'no-conversion) t) | |
532 | ((eq y 'no-conversion) nil) | |
533 | ((eq x 'emacs-mule) t) | |
534 | ((eq y 'emacs-mule) nil) | |
535 | ((eq x 'undecided) t) | |
536 | ((eq y 'undecided) nil) | |
537 | (t (let ((c1 (coding-system-mnemonic x)) | |
538 | (c2 (coding-system-mnemonic y))) | |
539 | (or (< (downcase c1) (downcase c2)) | |
540 | (and (not (> (downcase c1) (downcase c2))) | |
541 | (< c1 c2))))))) | |
542 | ||
5e2e859a KH |
543 | (defun coding-system-equal (coding-system-1 coding-system-2) |
544 | "Return t if and only if CODING-SYSTEM-1 and CODING-SYSTEM-2 are identical. | |
545 | Two coding systems are identical if two symbols are equal | |
546 | or one is an alias of the other." | |
547 | (or (eq coding-system-1 coding-system-2) | |
548 | (and (equal (coding-system-spec coding-system-1) | |
549 | (coding-system-spec coding-system-2)) | |
550 | (let ((eol-type-1 (coding-system-eol-type coding-system-1)) | |
551 | (eol-type-2 (coding-system-eol-type coding-system-2))) | |
552 | (or (eq eol-type-1 eol-type-2) | |
553 | (and (vectorp eol-type-1) (vectorp eol-type-2))))))) | |
554 | ||
857ea15c | 555 | (defun add-to-coding-system-list (coding-system) |
521d4010 | 556 | "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted." |
857ea15c AS |
557 | (if (or (null coding-system-list) |
558 | (coding-system-lessp coding-system (car coding-system-list))) | |
559 | (setq coding-system-list (cons coding-system coding-system-list)) | |
560 | (let ((len (length coding-system-list)) | |
561 | mid (tem coding-system-list)) | |
562 | (while (> len 1) | |
563 | (setq mid (nthcdr (/ len 2) tem)) | |
564 | (if (coding-system-lessp (car mid) coding-system) | |
565 | (setq tem mid | |
566 | len (- len (/ len 2))) | |
567 | (setq len (/ len 2)))) | |
568 | (setcdr tem (cons coding-system (cdr tem)))))) | |
569 | ||
80a7463d | 570 | (defun coding-system-list (&optional base-only) |
c11a8f77 KH |
571 | "Return a list of all existing non-subsidiary coding systems. |
572 | If optional arg BASE-ONLY is non-nil, only base coding systems are listed. | |
573 | The value doesn't include subsidiary coding systems which are what | |
574 | made from bases and aliases automatically for various end-of-line | |
575 | formats (e.g. iso-latin-1-unix, koi8-r-dos)." | |
80a7463d KH |
576 | (let* ((codings (copy-sequence coding-system-list)) |
577 | (tail (cons nil codings))) | |
578 | ;; Remove subsidiary coding systems (eol variants) and alias | |
579 | ;; coding systems (if necessary). | |
580 | (while (cdr tail) | |
581 | (let* ((coding (car (cdr tail))) | |
582 | (aliases (coding-system-get coding 'alias-coding-systems))) | |
583 | (if (or | |
584 | ;; CODING is an eol variant if not in ALIASES. | |
585 | (not (memq coding aliases)) | |
586 | ;; CODING is an alias if it is not car of ALIASES. | |
587 | (and base-only (not (eq coding (car aliases))))) | |
588 | (setcdr tail (cdr (cdr tail))) | |
589 | (setq tail (cdr tail))))) | |
590 | codings)) | |
591 | ||
6fb7766c DL |
592 | (defun map-charset-chars (func charset) |
593 | "Use FUNC to map over all characters in CHARSET for side effects. | |
594 | FUNC is a function of two args, the start and end (inclusive) of a | |
595 | character code range. Thus FUNC should iterate over [START, END]." | |
596 | (let* ((dim (charset-dimension charset)) | |
597 | (chars (charset-chars charset)) | |
598 | (start (if (= chars 94) | |
599 | 33 | |
600 | 32))) | |
601 | (if (= dim 1) | |
602 | (funcall func | |
603 | (make-char charset start) | |
604 | (make-char charset (+ start chars -1))) | |
605 | (dotimes (i chars) | |
606 | (funcall func | |
607 | (make-char charset (+ i start) start) | |
608 | (make-char charset (+ i start) (+ start chars -1))))))) | |
609 | ||
9857367f | 610 | (defalias 'register-char-codings 'ignore "") |
620956ca | 611 | (make-obsolete 'register-char-codings |
9857367f | 612 | "it exists just for backward compatibility, and does nothing." |
620956ca KH |
613 | "21.3") |
614 | ||
615 | (defconst char-coding-system-table nil | |
616 | "This is an obsolete variable. | |
617 | It exists just for backward compatibility, and the value is always nil.") | |
c11a8f77 | 618 | |
6e9722b0 | 619 | (defun make-subsidiary-coding-system (coding-system) |
521d4010 | 620 | "Make subsidiary coding systems (eol-type variants) of CODING-SYSTEM." |
0269ddfb KH |
621 | (let ((coding-spec (coding-system-spec coding-system)) |
622 | (subsidiaries (vector (intern (format "%s-unix" coding-system)) | |
8057896b KH |
623 | (intern (format "%s-dos" coding-system)) |
624 | (intern (format "%s-mac" coding-system)))) | |
3bfa5b1f KH |
625 | elt) |
626 | (dotimes (i 3) | |
627 | (setq elt (aref subsidiaries i)) | |
628 | (put elt 'coding-system coding-spec) | |
629 | (put elt 'eol-type i) | |
630 | (put elt 'coding-system-define-form nil) | |
631 | (add-to-coding-system-list elt) | |
632 | (or (assoc (symbol-name elt) coding-system-alist) | |
633 | (setq coding-system-alist | |
634 | (cons (list (symbol-name elt)) coding-system-alist)))) | |
8057896b | 635 | subsidiaries)) |
4ed46869 | 636 | |
50c29104 KH |
637 | (defun transform-make-coding-system-args (name type &optional doc-string props) |
638 | "For internal use only. | |
639 | Transform XEmacs style args for `make-coding-system' to Emacs style. | |
640 | Value is a list of transformed arguments." | |
641 | (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?"))) | |
642 | (eol-type (plist-get props 'eol-type)) | |
643 | properties tmp) | |
644 | (cond | |
645 | ((eq eol-type 'lf) (setq eol-type 'unix)) | |
646 | ((eq eol-type 'crlf) (setq eol-type 'dos)) | |
647 | ((eq eol-type 'cr) (setq eol-type 'mac))) | |
648 | (if (setq tmp (plist-get props 'post-read-conversion)) | |
649 | (setq properties (plist-put properties 'post-read-conversion tmp))) | |
650 | (if (setq tmp (plist-get props 'pre-write-conversion)) | |
651 | (setq properties (plist-put properties 'pre-write-conversion tmp))) | |
652 | (cond | |
f4a012a6 KH |
653 | ((eq type 'shift-jis) |
654 | `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type)) | |
655 | ((eq type 'iso2022) ; This is not perfect. | |
656 | (if (plist-get props 'escape-quoted) | |
657 | (error "escape-quoted is not supported: %S" | |
658 | `(,name ,type ,doc-string ,props))) | |
659 | (let ((g0 (plist-get props 'charset-g0)) | |
660 | (g1 (plist-get props 'charset-g1)) | |
661 | (g2 (plist-get props 'charset-g2)) | |
662 | (g3 (plist-get props 'charset-g3)) | |
663 | (use-roman | |
664 | (and | |
665 | (eq (cadr (assoc 'latin-jisx0201 | |
666 | (plist-get props 'input-charset-conversion))) | |
667 | 'ascii) | |
668 | (eq (cadr (assoc 'ascii | |
669 | (plist-get props 'output-charset-conversion))) | |
670 | 'latin-jisx0201))) | |
671 | (use-oldjis | |
672 | (and | |
673 | (eq (cadr (assoc 'japanese-jisx0208-1978 | |
674 | (plist-get props 'input-charset-conversion))) | |
675 | 'japanese-jisx0208) | |
676 | (eq (cadr (assoc 'japanese-jisx0208 | |
677 | (plist-get props 'output-charset-conversion))) | |
678 | 'japanese-jisx0208-1978)))) | |
679 | (if (charsetp g0) | |
680 | (if (plist-get props 'force-g0-on-output) | |
681 | (setq g0 `(nil ,g0)) | |
682 | (setq g0 `(,g0 t)))) | |
683 | (if (charsetp g1) | |
684 | (if (plist-get props 'force-g1-on-output) | |
685 | (setq g1 `(nil ,g1)) | |
686 | (setq g1 `(,g1 t)))) | |
687 | (if (charsetp g2) | |
688 | (if (plist-get props 'force-g2-on-output) | |
689 | (setq g2 `(nil ,g2)) | |
690 | (setq g2 `(,g2 t)))) | |
691 | (if (charsetp g3) | |
692 | (if (plist-get props 'force-g3-on-output) | |
693 | (setq g3 `(nil ,g3)) | |
694 | (setq g3 `(,g3 t)))) | |
695 | `(,name 2 ,mnemonic ,doc-string | |
696 | (,g0 ,g1 ,g2 ,g3 | |
697 | ,(plist-get props 'short) | |
698 | ,(not (plist-get props 'no-ascii-eol)) | |
699 | ,(not (plist-get props 'no-ascii-cntl)) | |
700 | ,(plist-get props 'seven) | |
701 | t | |
702 | ,(not (plist-get props 'lock-shift)) | |
703 | ,use-roman | |
704 | ,use-oldjis | |
705 | ,(plist-get props 'no-iso6429) | |
706 | nil nil nil nil) | |
707 | ,properties ,eol-type))) | |
708 | ((eq type 'big5) | |
709 | `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type)) | |
50c29104 | 710 | ((eq type 'ccl) |
f4a012a6 | 711 | `(,name 4 ,mnemonic ,doc-string |
50c29104 | 712 | (,(plist-get props 'decode) . ,(plist-get props 'encode)) |
f4a012a6 | 713 | ,properties ,eol-type)) |
50c29104 | 714 | (t |
f4a012a6 | 715 | (error "unsupported XEmacs style make-coding-style arguments: %S" |
50c29104 KH |
716 | `(,name ,type ,doc-string ,props)))))) |
717 | ||
8057896b | 718 | (defun make-coding-system (coding-system type mnemonic doc-string |
1b46a680 KH |
719 | &optional |
720 | flags | |
721 | properties | |
722 | eol-type) | |
3bb1accb | 723 | "Define a new coding system CODING-SYSTEM (symbol). |
521d4010 | 724 | Remaining arguments are TYPE, MNEMONIC, DOC-STRING, FLAGS (optional), |
d9e3229d | 725 | and PROPERTIES (optional) which construct a coding-spec of CODING-SYSTEM |
a6acd8a2 | 726 | in the following format: |
0269ddfb | 727 | [TYPE MNEMONIC DOC-STRING PLIST FLAGS] |
3bb1accb KH |
728 | |
729 | TYPE is an integer value indicating the type of the coding system as follows: | |
4ed46869 | 730 | 0: Emacs internal format, |
d9f6dfe6 | 731 | 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PCs, |
4ed46869 | 732 | 2: ISO-2022 including many variants, |
d9f6dfe6 | 733 | 3: Big5 used mainly on Chinese PCs, |
cd9c3177 | 734 | 4: private, CCL programs provide encoding/decoding algorithm, |
521d4010 | 735 | 5: Raw-text, which means that text contains random 8-bit codes. |
0269ddfb | 736 | |
3bb1accb | 737 | MNEMONIC is a character to be displayed on mode line for the coding system. |
0269ddfb | 738 | |
3bb1accb | 739 | DOC-STRING is a documentation string for the coding system. |
0269ddfb | 740 | |
3bb1accb | 741 | FLAGS specifies more detailed information of the coding system as follows: |
d6d6d592 | 742 | |
3bb1accb | 743 | If TYPE is 2 (ISO-2022), FLAGS is a list of these elements: |
4ed46869 KH |
744 | CHARSET0, CHARSET1, CHARSET2, CHARSET3, SHORT-FORM, |
745 | ASCII-EOL, ASCII-CNTL, SEVEN, LOCKING-SHIFT, SINGLE-SHIFT, | |
d6d6d592 | 746 | USE-ROMAN, USE-OLDJIS, NO-ISO6429, INIT-BOL, DESIGNATION-BOL, |
850101ed | 747 | SAFE, ACCEPT-LATIN-EXTRA-CODE. |
4ed46869 KH |
748 | CHARSETn are character sets initially designated to Gn graphic registers. |
749 | If CHARSETn is nil, Gn is never used. | |
750 | If CHARSETn is t, Gn can be used but nothing designated initially. | |
751 | If CHARSETn is a list of character sets, those character sets are | |
752 | designated to Gn on output, but nothing designated to Gn initially. | |
421e3b4e | 753 | But, character set `ascii' can be designated only to G0. |
4ed46869 KH |
754 | SHORT-FORM non-nil means use short designation sequence on output. |
755 | ASCII-EOL non-nil means designate ASCII to g0 at end of line on output. | |
756 | ASCII-CNTL non-nil means designate ASCII to g0 before control codes and | |
757 | SPACE on output. | |
758 | SEVEN non-nil means use 7-bit code only on output. | |
759 | LOCKING-SHIFT non-nil means use locking-shift. | |
760 | SINGLE-SHIFT non-nil means use single-shift. | |
761 | USE-ROMAN non-nil means designate JIS0201-1976-Roman instead of ASCII. | |
762 | USE-OLDJIS non-nil means designate JIS0208-1976 instead of JIS0208-1983. | |
763 | NO-ISO6429 non-nil means not use ISO6429's direction specification. | |
69eba008 KH |
764 | INIT-BOL non-nil means any designation state is assumed to be reset |
765 | to initial at each beginning of line on output. | |
766 | DESIGNATION-BOL non-nil means designation sequences should be placed | |
767 | at beginning of line on output. | |
a6acd8a2 | 768 | SAFE non-nil means convert unsafe characters to `?' on output. |
c11a8f77 KH |
769 | Characters not specified in the property `safe-charsets' nor |
770 | `safe-chars' are unsafe. | |
850101ed RS |
771 | ACCEPT-LATIN-EXTRA-CODE non-nil means code-detection routine accepts |
772 | a code specified in `latin-extra-code-table' (which see) as a valid | |
773 | code of the coding system. | |
d6d6d592 | 774 | |
3bb1accb KH |
775 | If TYPE is 4 (private), FLAGS should be a cons of CCL programs, for |
776 | decoding and encoding. CCL programs should be specified by their | |
777 | symbols. | |
d9e3229d | 778 | |
a2852e45 KH |
779 | PROPERTIES is an alist of properties vs the corresponding values. The |
780 | following properties are recognized: | |
781 | ||
782 | o post-read-conversion | |
0bca779a | 783 | |
a2852e45 KH |
784 | The value is a function to call after some text is inserted and |
785 | decoded by the coding system itself and before any functions in | |
521d4010 DL |
786 | `after-insert-functions' are called. The argument of this |
787 | function is the same as for a function in | |
788 | `after-insert-file-functions', i.e. LENGTH of the text inserted, | |
789 | with point at the head of the text to be decoded. | |
0bca779a | 790 | |
a2852e45 | 791 | o pre-write-conversion |
0bca779a | 792 | |
a2852e45 KH |
793 | The value is a function to call after all functions in |
794 | `write-region-annotate-functions' and `buffer-file-format' are | |
795 | called, and before the text is encoded by the coding system itself. | |
a284eea3 DL |
796 | The arguments to this function are the same as those of a function |
797 | in `write-region-annotate-functions', i.e. FROM and TO, specifying | |
521d4010 | 798 | a region of text. |
0bca779a | 799 | |
a2852e45 | 800 | o translation-table-for-decode |
0bca779a | 801 | |
a2852e45 KH |
802 | The value is a translation table to be applied on decoding. See |
803 | the function `make-translation-table' for the format of translation | |
521d4010 | 804 | table. This is not applicable to type 4 (CCL-based) coding systems. |
0bca779a | 805 | |
a2852e45 | 806 | o translation-table-for-encode |
0bca779a | 807 | |
521d4010 DL |
808 | The value is a translation table to be applied on encoding. This is |
809 | not applicable to type 4 (CCL-based) coding systems. | |
0bca779a | 810 | |
a2852e45 | 811 | o safe-chars |
0bca779a | 812 | |
a2852e45 KH |
813 | The value is a char table. If a character has non-nil value in it, |
814 | the character is safely supported by the coding system. This | |
815 | overrides the specification of safe-charsets. | |
816 | ||
817 | o safe-charsets | |
521d4010 | 818 | |
a2852e45 KH |
819 | The value is a list of charsets safely supported by the coding |
820 | system. The value t means that all charsets Emacs handles are | |
821 | supported. Even if some charset is not in this list, it doesn't | |
521d4010 DL |
822 | mean that the charset can't be encoded in the coding system; |
823 | it just means that some other receiver of text encoded | |
a2852e45 | 824 | in the coding system won't be able to handle that charset. |
0bca779a | 825 | |
a2852e45 | 826 | o mime-charset |
0bca779a | 827 | |
d9f6dfe6 | 828 | The value is a symbol whose name is the `MIME-charset' parameter of |
a2852e45 | 829 | the coding system. |
0bca779a | 830 | |
3b6cc699 DL |
831 | o mime-text-unsuitable |
832 | ||
833 | A non-nil value means the `mime-charset' property names a charset | |
834 | which is unsuitable for the top-level media type \"text\". | |
835 | ||
a2852e45 | 836 | o valid-codes (meaningful only for a coding system based on CCL) |
0bca779a | 837 | |
a2852e45 KH |
838 | The value is a list to indicate valid byte ranges of the encoded |
839 | file. Each element of the list is an integer or a cons of integer. | |
840 | In the former case, the integer value is a valid byte code. In the | |
6fb7766c | 841 | latter case, the integers specify the range of valid byte codes. |
a2852e45 | 842 | |
1e7f6327 KH |
843 | o composition (meaningful only when TYPE is 0 or 2) |
844 | ||
0ea0b6d2 DL |
845 | If the value is non-nil, the coding system preserves composition |
846 | information. | |
1e7f6327 | 847 | |
43b557d5 KH |
848 | o ascii-incompatible |
849 | ||
850 | If the value is non-nil, the coding system is not compatible | |
851 | with ASCII, which means it encodes or decodes ASCII character | |
852 | string to the different byte sequence. | |
853 | ||
d9e3229d KH |
854 | These properties are set in PLIST, a property list. This function |
855 | also sets properties `coding-category' and `alias-coding-systems' | |
856 | automatically. | |
4ed46869 | 857 | |
1b46a680 KH |
858 | EOL-TYPE specifies the EOL type of the coding-system in one of the |
859 | following formats: | |
860 | ||
861 | o symbol (unix, dos, or mac) | |
862 | ||
863 | The symbol `unix' means Unix-like EOL (LF), `dos' means | |
864 | DOS-like EOL (CRLF), and `mac' means MAC-like EOL (CR). | |
865 | ||
866 | o number (0, 1, or 2) | |
867 | ||
868 | The number 0, 1, and 2 mean UNIX, DOS, and MAC-like EOL | |
869 | respectively. | |
870 | ||
871 | o vector of coding-systems of length 3 | |
872 | ||
873 | The EOL type is detected automatically for the coding system. | |
874 | And, according to the detected EOL type, one of the coding | |
875 | systems in the vector is selected. Elements of the vector | |
4d3a9228 | 876 | corresponds to Unix-like EOL, DOS-like EOL, and Mac-like EOL |
1b46a680 KH |
877 | in this order. |
878 | ||
3bb1accb KH |
879 | Kludgy features for backward compatibility: |
880 | ||
881 | 1. If TYPE is 4 and car or cdr of FLAGS is a vector, the vector is | |
882 | treated as a compiled CCL code. | |
883 | ||
884 | 2. If PROPERTIES is just a list of character sets, the list is set as | |
885 | a value of `safe-charsets' in PLIST." | |
50c29104 KH |
886 | |
887 | ;; For compatiblity with XEmacs, we check the type of TYPE. If it | |
c3d0ee51 EZ |
888 | ;; is a symbol, perhaps, this function is called with XEmacs-style |
889 | ;; arguments. Here, try to transform that kind of arguments to | |
50c29104 KH |
890 | ;; Emacs style. |
891 | (if (symbolp type) | |
892 | (let ((args (transform-make-coding-system-args coding-system type | |
893 | mnemonic doc-string))) | |
894 | (setq coding-system (car args) | |
1053cc93 | 895 | type (nth 1 args) |
50c29104 KH |
896 | mnemonic (nth 2 args) |
897 | doc-string (nth 3 args) | |
898 | flags (nth 4 args) | |
899 | properties (nth 5 args) | |
900 | eol-type (nth 6 args)))) | |
901 | ||
0269ddfb | 902 | ;; Set a value of `coding-system' property. |
6e9722b0 | 903 | (let ((coding-spec (make-vector 5 nil)) |
d9e3229d KH |
904 | (no-initial-designation t) |
905 | (no-alternative-designation t) | |
e6cddb1a | 906 | (accept-latin-extra-code nil) |
6e9722b0 | 907 | coding-category) |
cd9c3177 | 908 | (if (or (not (integerp type)) (< type 0) (> type 5)) |
0269ddfb | 909 | (error "TYPE argument must be 0..5")) |
8057896b | 910 | (if (or (not (integerp mnemonic)) (<= mnemonic ? ) (> mnemonic 127)) |
521d4010 | 911 | (error "MNEMONIC argument must be an ASCII printable character")) |
0269ddfb KH |
912 | (aset coding-spec coding-spec-type-idx type) |
913 | (aset coding-spec coding-spec-mnemonic-idx mnemonic) | |
914 | (aset coding-spec coding-spec-doc-string-idx | |
e127a722 | 915 | (purecopy (if (stringp doc-string) doc-string ""))) |
6e9722b0 KH |
916 | (cond ((= type 0) |
917 | (setq coding-category 'coding-category-emacs-mule)) | |
918 | ((= type 1) | |
919 | (setq coding-category 'coding-category-sjis)) | |
920 | ((= type 2) ; ISO2022 | |
4ed46869 | 921 | (let ((i 0) |
6e9722b0 | 922 | (vec (make-vector 32 nil)) |
05134257 KH |
923 | (g1-designation nil) |
924 | (fl flags)) | |
4ed46869 | 925 | (while (< i 4) |
05134257 | 926 | (let ((charset (car fl))) |
6e9722b0 KH |
927 | (if (and no-initial-designation |
928 | (> i 0) | |
929 | (or (charsetp charset) | |
930 | (and (consp charset) | |
931 | (charsetp (car charset))))) | |
932 | (setq no-initial-designation nil)) | |
933 | (if (charsetp charset) | |
934 | (if (= i 1) (setq g1-designation charset)) | |
935 | (if (consp charset) | |
936 | (let ((tail charset) | |
937 | elt) | |
938 | (while tail | |
939 | (setq elt (car tail)) | |
d9e3229d KH |
940 | (if (eq elt t) |
941 | (setq no-alternative-designation nil) | |
942 | (if (and elt (not (charsetp elt))) | |
943 | (error "Invalid charset: %s" elt))) | |
6e9722b0 KH |
944 | (setq tail (cdr tail))) |
945 | (setq g1-designation (car charset))) | |
d9e3229d KH |
946 | (if charset |
947 | (if (eq charset t) | |
948 | (setq no-alternative-designation nil) | |
949 | (error "Invalid charset: %s" charset))))) | |
4ed46869 | 950 | (aset vec i charset)) |
05134257 KH |
951 | (setq fl (cdr fl) i (1+ i))) |
952 | (while (and (< i 32) fl) | |
953 | (aset vec i (car fl)) | |
e6cddb1a KH |
954 | (if (and (= i 16) ; ACCEPT-LATIN-EXTRA-CODE |
955 | (car fl)) | |
956 | (setq accept-latin-extra-code t)) | |
05134257 | 957 | (setq fl (cdr fl) i (1+ i))) |
6e9722b0 | 958 | (aset coding-spec 4 vec) |
6e9722b0 KH |
959 | (setq coding-category |
960 | (if (aref vec 8) ; Use locking-shift. | |
dc64cd19 KH |
961 | (or (and (aref vec 7) 'coding-category-iso-7-else) |
962 | 'coding-category-iso-8-else) | |
6e9722b0 KH |
963 | (if (aref vec 7) ; 7-bit only. |
964 | (if (aref vec 9) ; Use single-shift. | |
dc64cd19 | 965 | 'coding-category-iso-7-else |
d9e3229d KH |
966 | (if no-alternative-designation |
967 | 'coding-category-iso-7-tight | |
968 | 'coding-category-iso-7)) | |
3ad911d8 KH |
969 | (if (or no-initial-designation |
970 | (not no-alternative-designation)) | |
dc64cd19 | 971 | 'coding-category-iso-8-else |
6e9722b0 KH |
972 | (if (and (charsetp g1-designation) |
973 | (= (charset-dimension g1-designation) 2)) | |
974 | 'coding-category-iso-8-2 | |
975 | 'coding-category-iso-8-1))))))) | |
976 | ((= type 3) | |
977 | (setq coding-category 'coding-category-big5)) | |
978 | ((= type 4) ; private | |
c76b5c99 | 979 | (setq coding-category 'coding-category-ccl) |
3bb1accb KH |
980 | (if (not (consp flags)) |
981 | (error "Invalid FLAGS argument for TYPE 4 (CCL)") | |
982 | (let ((decoder (check-ccl-program | |
983 | (car flags) | |
984 | (intern (format "%s-decoder" coding-system)))) | |
985 | (encoder (check-ccl-program | |
986 | (cdr flags) | |
987 | (intern (format "%s-encoder" coding-system))))) | |
988 | (if (and decoder encoder) | |
989 | (aset coding-spec 4 (cons decoder encoder)) | |
990 | (error "Invalid FLAGS argument for TYPE 4 (CCL)"))))) | |
cd9c3177 KH |
991 | (t ; i.e. (= type 5) |
992 | (setq coding-category 'coding-category-raw-text))) | |
0269ddfb KH |
993 | |
994 | (let ((plist (list 'coding-category coding-category | |
d9e3229d | 995 | 'alias-coding-systems (list coding-system)))) |
0269ddfb | 996 | (if no-initial-designation |
d9e3229d KH |
997 | (plist-put plist 'no-initial-designation t)) |
998 | (if (and properties | |
999 | (or (eq properties t) | |
1000 | (not (consp (car properties))))) | |
1001 | ;; In the old version, the arg PROPERTIES is a list to be | |
1002 | ;; set in PLIST as a value of property `safe-charsets'. | |
c11a8f77 KH |
1003 | (setq properties (list (cons 'safe-charsets properties)))) |
1004 | ;; In the current version PROPERTIES is a property list. | |
1005 | ;; Reflect it into PLIST one by one while handling safe-chars | |
1006 | ;; specially. | |
1007 | (let ((safe-charsets (cdr (assq 'safe-charsets properties))) | |
1008 | (safe-chars (cdr (assq 'safe-chars properties))) | |
1009 | (l properties) | |
1010 | prop val) | |
1011 | ;; If only safe-charsets is specified, make a char-table from | |
1012 | ;; it, and store that char-table as the value of `safe-chars'. | |
1013 | (if (and (not safe-chars) safe-charsets) | |
1014 | (let (charset) | |
1015 | (if (eq safe-charsets t) | |
1016 | (setq safe-chars t) | |
1017 | (setq safe-chars (make-char-table 'safe-chars)) | |
1018 | (while safe-charsets | |
1019 | (setq charset (car safe-charsets) | |
1020 | safe-charsets (cdr safe-charsets)) | |
1021 | (cond ((eq charset 'ascii)) ; just ignore | |
1022 | ((eq charset 'eight-bit-control) | |
1023 | (let ((i 128)) | |
1024 | (while (< i 160) | |
1025 | (aset safe-chars i t) | |
1026 | (setq i (1+ i))))) | |
1027 | ((eq charset 'eight-bit-graphic) | |
1028 | (let ((i 160)) | |
1029 | (while (< i 256) | |
1030 | (aset safe-chars i t) | |
1031 | (setq i (1+ i))))) | |
1032 | (t | |
e6cddb1a KH |
1033 | (aset safe-chars (make-char charset) t)))) |
1034 | (if accept-latin-extra-code | |
1035 | (let ((i 128)) | |
1036 | (while (< i 160) | |
1037 | (if (aref latin-extra-code-table i) | |
1038 | (aset safe-chars i t)) | |
1039 | (setq i (1+ i)))))) | |
c11a8f77 KH |
1040 | (setq l (cons (cons 'safe-chars safe-chars) l)))) |
1041 | (while l | |
1042 | (setq prop (car (car l)) val (cdr (car l)) l (cdr l)) | |
1043 | (if (eq prop 'safe-chars) | |
1044 | (progn | |
cc57cc54 KH |
1045 | (if (and (symbolp val) |
1046 | (get val 'translation-table)) | |
1047 | (setq safe-chars (get val 'translation-table))) | |
cc57cc54 | 1048 | (setq val safe-chars))) |
c11a8f77 | 1049 | (plist-put plist prop val))) |
1b46a680 KH |
1050 | ;; The property `coding-category' may have been set differently |
1051 | ;; through PROPERTIES. | |
1052 | (setq coding-category (plist-get plist 'coding-category)) | |
0269ddfb | 1053 | (aset coding-spec coding-spec-plist-idx plist)) |
6e9722b0 | 1054 | (put coding-system 'coding-system coding-spec) |
c95cf5ac | 1055 | (put coding-system 'coding-system-define-form nil) |
6e9722b0 KH |
1056 | (put coding-category 'coding-systems |
1057 | (cons coding-system (get coding-category 'coding-systems)))) | |
4ed46869 | 1058 | |
1b46a680 | 1059 | ;; Next, set a value of `eol-type' property. |
af873cd1 | 1060 | (if (not eol-type) |
1b46a680 KH |
1061 | ;; If EOL-TYPE is nil, set a vector of subsidiary coding |
1062 | ;; systems, each corresponds to a coding system for the detected | |
1063 | ;; EOL format. | |
1064 | (setq eol-type (make-subsidiary-coding-system coding-system))) | |
1065 | (setq eol-type | |
1066 | (cond ((or (eq eol-type 'unix) (null eol-type)) | |
1067 | 0) | |
1068 | ((eq eol-type 'dos) | |
1069 | 1) | |
1070 | ((eq eol-type 'mac) | |
1071 | 2) | |
1072 | ((or (and (vectorp eol-type) | |
1073 | (= (length eol-type) 3)) | |
1074 | (and (numberp eol-type) | |
1075 | (and (>= eol-type 0) | |
1076 | (<= eol-type 2)))) | |
1077 | eol-type) | |
1078 | (t | |
1079 | (error "Invalid EOL-TYPE spec:%S" eol-type)))) | |
1080 | (put coding-system 'eol-type eol-type) | |
0269ddfb | 1081 | |
620956ca KH |
1082 | (define-coding-system-internal coding-system) |
1083 | ||
0269ddfb KH |
1084 | ;; At last, register CODING-SYSTEM in `coding-system-list' and |
1085 | ;; `coding-system-alist'. | |
857ea15c | 1086 | (add-to-coding-system-list coding-system) |
3bfa5b1f KH |
1087 | (or (assoc (symbol-name coding-system) coding-system-alist) |
1088 | (setq coding-system-alist (cons (list (symbol-name coding-system)) | |
1089 | coding-system-alist))) | |
05134257 KH |
1090 | |
1091 | ;; For a coding system of cateogory iso-8-1 and iso-8-2, create | |
1092 | ;; XXX-with-esc variants. | |
1093 | (let ((coding-category (coding-system-category coding-system))) | |
1094 | (if (or (eq coding-category 'coding-category-iso-8-1) | |
1095 | (eq coding-category 'coding-category-iso-8-2)) | |
1096 | (let ((esc (intern (concat (symbol-name coding-system) "-with-esc"))) | |
c11a8f77 KH |
1097 | (doc (format "Same as %s but can handle any charsets by ISO's escape sequences." coding-system)) |
1098 | (safe-charsets (assq 'safe-charsets properties)) | |
1099 | (mime-charset (assq 'mime-charset properties))) | |
1100 | (if safe-charsets | |
1101 | (setcdr safe-charsets t) | |
1102 | (setq properties (cons (cons 'safe-charsets t) properties))) | |
1103 | (if mime-charset | |
1104 | (setcdr mime-charset nil)) | |
05134257 KH |
1105 | (make-coding-system esc type mnemonic doc |
1106 | (if (listp (car flags)) | |
1107 | (cons (append (car flags) '(t)) (cdr flags)) | |
1108 | (cons (list (car flags) t) (cdr flags))) | |
c11a8f77 | 1109 | properties)))) |
05134257 | 1110 | |
d9e3229d | 1111 | coding-system) |
8057896b | 1112 | |
5470d5b6 KH |
1113 | (put 'safe-chars 'char-table-extra-slots 0) |
1114 | ||
a42763dc | 1115 | (defun define-coding-system-alias (alias coding-system) |
358d28fb | 1116 | "Define ALIAS as an alias for coding system CODING-SYSTEM." |
0269ddfb | 1117 | (put alias 'coding-system (coding-system-spec coding-system)) |
c95cf5ac | 1118 | (put alias 'coding-system-define-form nil) |
857ea15c | 1119 | (add-to-coding-system-list alias) |
3bfa5b1f KH |
1120 | (or (assoc (symbol-name alias) coding-system-alist) |
1121 | (setq coding-system-alist (cons (list (symbol-name alias)) | |
1122 | coding-system-alist))) | |
0269ddfb KH |
1123 | (let ((eol-type (coding-system-eol-type coding-system))) |
1124 | (if (vectorp eol-type) | |
708fc465 KH |
1125 | (progn |
1126 | (nconc (coding-system-get alias 'alias-coding-systems) (list alias)) | |
1127 | (put alias 'eol-type (make-subsidiary-coding-system alias))) | |
0269ddfb | 1128 | (put alias 'eol-type eol-type)))) |
4ed46869 | 1129 | |
bbdea948 RS |
1130 | (defun merge-coding-systems (first second) |
1131 | "Fill in any unspecified aspects of coding system FIRST from SECOND. | |
1132 | Return the resulting coding system." | |
1133 | (let ((base (coding-system-base second)) | |
1134 | (eol (coding-system-eol-type second))) | |
1135 | ;; If FIRST doesn't specify text conversion, merge with that of SECOND. | |
1136 | (if (eq (coding-system-base first) 'undecided) | |
1137 | (setq first (coding-system-change-text-conversion first base))) | |
1138 | ;; If FIRST doesn't specify eol conversion, merge with that of SECOND. | |
1139 | (if (and (vectorp (coding-system-eol-type first)) | |
1140 | (numberp eol) (>= eol 0) (<= eol 2)) | |
1141 | (setq first (coding-system-change-eol-conversion | |
1142 | first eol))) | |
1143 | first)) | |
1144 | ||
2b96ab33 KH |
1145 | (defun autoload-coding-system (symbol form) |
1146 | "Define SYMBOL as a coding-system that is defined on demand. | |
1147 | ||
1148 | FROM is a form to evaluate to define the coding-system." | |
28849ffc KH |
1149 | (put symbol 'coding-system-define-form form) |
1150 | (setq coding-system-alist (cons (list (symbol-name symbol)) | |
0814ca04 KH |
1151 | coding-system-alist)) |
1152 | (dolist (elt '("-unix" "-dos" "-mac")) | |
1153 | (let ((name (concat (symbol-name symbol) elt))) | |
1154 | (put (intern name) 'coding-system-define-form form) | |
1155 | (setq coding-system-alist (cons (list name) coding-system-alist))))) | |
2b96ab33 | 1156 | |
14b3fa07 | 1157 | (defun set-buffer-file-coding-system (coding-system &optional force nomodify) |
358d28fb RS |
1158 | "Set the file coding-system of the current buffer to CODING-SYSTEM. |
1159 | This means that when you save the buffer, it will be converted | |
1160 | according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM, | |
1161 | use \\[list-coding-systems]. | |
1162 | ||
bbdea948 RS |
1163 | If CODING-SYSTEM leaves the text conversion unspecified, or if it |
1164 | leaves the end-of-line conversion unspecified, FORCE controls what to | |
1165 | do. If FORCE is nil, get the unspecified aspect (or aspects) from the | |
1166 | buffer's previous `buffer-file-coding-system' value (if it is | |
3b6cc699 | 1167 | specified there). Otherwise, leave it unspecified. |
aeef8f07 KH |
1168 | |
1169 | This marks the buffer modified so that the succeeding \\[save-buffer] | |
1170 | surely saves the buffer with CODING-SYSTEM. From a program, if you | |
14b3fa07 RS |
1171 | don't want to mark the buffer modified, specify t for NOMODIFY. |
1172 | If you know exactly what coding system you want to use, | |
1173 | just set the variable `buffer-file-coding-system' directly." | |
5b76833f | 1174 | (interactive "zCoding system for saving file (default nil): \nP") |
4ed46869 | 1175 | (check-coding-system coding-system) |
36d455c4 | 1176 | (if (and coding-system buffer-file-coding-system (null force)) |
bbdea948 RS |
1177 | (setq coding-system |
1178 | (merge-coding-systems coding-system buffer-file-coding-system))) | |
4ed46869 | 1179 | (setq buffer-file-coding-system coding-system) |
38a1356d RS |
1180 | ;; This is in case of an explicit call. Normally, `normal-mode' and |
1181 | ;; `set-buffer-major-mode-hook' take care of setting the table. | |
1182 | (if (fboundp 'ucs-set-table-for-input) ; don't lose when building | |
1183 | (ucs-set-table-for-input)) | |
de5ffead RS |
1184 | (unless nomodify |
1185 | (set-buffer-modified-p t)) | |
4ed46869 KH |
1186 | (force-mode-line-update)) |
1187 | ||
bbdea948 RS |
1188 | (defun revert-buffer-with-coding-system (coding-system &optional force) |
1189 | "Visit the current buffer's file again using coding system CODING-SYSTEM. | |
1190 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. | |
1191 | ||
1192 | If CODING-SYSTEM leaves the text conversion unspecified, or if it | |
1193 | leaves the end-of-line conversion unspecified, FORCE controls what to | |
1194 | do. If FORCE is nil, get the unspecified aspect (or aspects) from the | |
1195 | buffer's previous `buffer-file-coding-system' value (if it is | |
1196 | specified there). Otherwise, determine it from the file contents as | |
1197 | usual for visiting a file." | |
5b76833f | 1198 | (interactive "zCoding system for visited file (default nil): \nP") |
bbdea948 RS |
1199 | (check-coding-system coding-system) |
1200 | (if (and coding-system buffer-file-coding-system (null force)) | |
1201 | (setq coding-system | |
1202 | (merge-coding-systems coding-system buffer-file-coding-system))) | |
1203 | (let ((coding-system-for-read coding-system)) | |
1204 | (revert-buffer))) | |
1205 | ||
701414e3 KH |
1206 | (defun set-file-name-coding-system (coding-system) |
1207 | "Set coding system for decoding and encoding file names to CODING-SYSTEM. | |
1208 | It actually just set the variable `file-name-coding-system' (which | |
1209 | see) to CODING-SYSTEM." | |
5b76833f | 1210 | (interactive "zCoding system for file names (default nil): ") |
701414e3 | 1211 | (check-coding-system coding-system) |
43b557d5 KH |
1212 | (if (and coding-system |
1213 | (coding-system-get coding-system 'ascii-incompatible)) | |
1214 | (error "%s is not ASCII-compatible" coding-system)) | |
701414e3 KH |
1215 | (setq file-name-coding-system coding-system)) |
1216 | ||
358d28fb RS |
1217 | (defvar default-terminal-coding-system nil |
1218 | "Default value for the terminal coding system. | |
1219 | This is normally set according to the selected language environment. | |
1220 | See also the command `set-terminal-coding-system'.") | |
1221 | ||
df100398 KH |
1222 | (defun set-terminal-coding-system (coding-system) |
1223 | "Set coding system of your terminal to CODING-SYSTEM. | |
358d28fb RS |
1224 | All text output to the terminal will be encoded |
1225 | with the specified coding system. | |
1226 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. | |
1227 | The default is determined by the selected language environment | |
1228 | or by the previous use of this command." | |
1229 | (interactive | |
2e02a76f RS |
1230 | (list (let ((default (if (and (not (terminal-coding-system)) |
1231 | default-terminal-coding-system) | |
1232 | default-terminal-coding-system))) | |
1233 | (read-coding-system | |
5b76833f | 1234 | (format "Coding system for terminal display (default %s): " |
2e02a76f RS |
1235 | default) |
1236 | default)))) | |
358d28fb RS |
1237 | (if (and (not coding-system) |
1238 | (not (terminal-coding-system))) | |
1239 | (setq coding-system default-terminal-coding-system)) | |
1240 | (if coding-system | |
521d4010 | 1241 | (setq default-terminal-coding-system coding-system)) |
df100398 KH |
1242 | (set-terminal-coding-system-internal coding-system) |
1243 | (redraw-frame (selected-frame))) | |
1244 | ||
358d28fb RS |
1245 | (defvar default-keyboard-coding-system nil |
1246 | "Default value of the keyboard coding system. | |
1247 | This is normally set according to the selected language environment. | |
1248 | See also the command `set-keyboard-coding-system'.") | |
1249 | ||
df100398 | 1250 | (defun set-keyboard-coding-system (coding-system) |
358d28fb RS |
1251 | "Set coding system for keyboard input to CODING-SYSTEM. |
1252 | In addition, this command enables Encoded-kbd minor mode. | |
6d34f495 DL |
1253 | \(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off -- see |
1254 | `encoded-kbd-mode'.) | |
358d28fb RS |
1255 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. |
1256 | The default is determined by the selected language environment | |
1257 | or by the previous use of this command." | |
1258 | (interactive | |
2e02a76f RS |
1259 | (list (let ((default (if (and (not (keyboard-coding-system)) |
1260 | default-keyboard-coding-system) | |
1261 | default-keyboard-coding-system))) | |
1262 | (read-coding-system | |
5b76833f | 1263 | (format "Coding system for keyboard input (default %s): " |
2e02a76f RS |
1264 | default) |
1265 | default)))) | |
358d28fb RS |
1266 | (if (and (not coding-system) |
1267 | (not (keyboard-coding-system))) | |
1268 | (setq coding-system default-keyboard-coding-system)) | |
1269 | (if coding-system | |
1270 | (setq default-keyboard-coding-system coding-system)) | |
43b557d5 KH |
1271 | (if (and coding-system |
1272 | (coding-system-get coding-system 'ascii-incompatible)) | |
1273 | (error "%s is not ASCII-compatible" coding-system)) | |
df100398 | 1274 | (set-keyboard-coding-system-internal coding-system) |
b23bad0b | 1275 | (setq keyboard-coding-system coding-system) |
df100398 KH |
1276 | (encoded-kbd-mode (if coding-system 1 0))) |
1277 | ||
6d34f495 DL |
1278 | (defcustom keyboard-coding-system nil |
1279 | "Specify coding system for keyboard input. | |
1280 | If you set this on a terminal which can't distinguish Meta keys from | |
1281 | 8-bit characters, you will have to use ESC to type Meta characters. | |
50e5c885 | 1282 | See Info node `Terminal Coding' and Info node `Unibyte Mode'. |
6d34f495 | 1283 | |
237e5993 DL |
1284 | On non-windowing terminals, this is set from the locale by default. |
1285 | ||
6d34f495 | 1286 | Setting this variable directly does not take effect; |
c4131d96 | 1287 | use either \\[customize] or \\[set-keyboard-coding-system]." |
6d34f495 | 1288 | :type '(coding-system :tag "Coding system") |
50e5c885 EZ |
1289 | :link '(info-link "(emacs)Terminal Coding") |
1290 | :link '(info-link "(emacs)Unibyte Mode") | |
6d34f495 DL |
1291 | :set (lambda (symbol value) |
1292 | ;; Don't load encoded-kbd-mode unnecessarily. | |
1293 | (if (or value (boundp 'encoded-kbd-mode)) | |
1294 | (set-keyboard-coding-system value) | |
1295 | (set-default 'keyboard-coding-system nil))) ; must initialize | |
bf247b6e | 1296 | :version "22.1" |
6d34f495 DL |
1297 | :group 'keyboard |
1298 | :group 'mule) | |
1299 | ||
df100398 | 1300 | (defun set-buffer-process-coding-system (decoding encoding) |
358d28fb | 1301 | "Set coding systems for the process associated with the current buffer. |
df100398 | 1302 | DECODING is the coding system to be used to decode input from the process, |
358d28fb RS |
1303 | ENCODING is the coding system to be used to encode output to the process. |
1304 | ||
1305 | For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]." | |
4ed46869 | 1306 | (interactive |
83911021 | 1307 | "zCoding-system for output from the process: \nzCoding-system for input to the process: ") |
4ed46869 KH |
1308 | (let ((proc (get-buffer-process (current-buffer)))) |
1309 | (if (null proc) | |
521d4010 | 1310 | (error "No process") |
df100398 KH |
1311 | (check-coding-system decoding) |
1312 | (check-coding-system encoding) | |
1313 | (set-process-coding-system proc decoding encoding))) | |
4ed46869 KH |
1314 | (force-mode-line-update)) |
1315 | ||
d0b99881 RS |
1316 | (defalias 'set-clipboard-coding-system 'set-selection-coding-system) |
1317 | ||
14915c37 | 1318 | (defun set-selection-coding-system (coding-system) |
8c52d564 | 1319 | "Make CODING-SYSTEM used for communicating with other X clients. |
b25eef20 KH |
1320 | When sending or receiving text via cut_buffer, selection, and clipboard, |
1321 | the text is encoded or decoded by CODING-SYSTEM." | |
a03b3ce1 | 1322 | (interactive "zCoding system for X selection: ") |
b25eef20 | 1323 | (check-coding-system coding-system) |
14915c37 | 1324 | (setq selection-coding-system coding-system)) |
b25eef20 | 1325 | |
e8dd0160 | 1326 | ;; Coding system lastly specified by the command |
a03b3ce1 KH |
1327 | ;; set-next-selection-coding-system. |
1328 | (defvar last-next-selection-coding-system nil) | |
1329 | ||
1330 | (defun set-next-selection-coding-system (coding-system) | |
1331 | "Make CODING-SYSTEM used for the next communication with other X clients. | |
1332 | This setting is effective for the next communication only." | |
1333 | (interactive | |
1334 | (list (read-coding-system | |
1335 | (if last-next-selection-coding-system | |
5b76833f | 1336 | (format "Coding system for the next X selection (default %S): " |
a03b3ce1 KH |
1337 | last-next-selection-coding-system) |
1338 | "Coding system for the next X selection: ") | |
1339 | last-next-selection-coding-system))) | |
1340 | (if coding-system | |
1341 | (setq last-next-selection-coding-system coding-system) | |
1342 | (setq coding-system last-next-selection-coding-system)) | |
1343 | (check-coding-system coding-system) | |
1344 | ||
1345 | (setq next-selection-coding-system coding-system)) | |
1346 | ||
4ed46869 | 1347 | (defun set-coding-priority (arg) |
521d4010 DL |
1348 | "Set priority of coding categories according to ARG. |
1349 | ARG is a list of coding categories ordered by priority." | |
d9e3229d KH |
1350 | (let ((l arg) |
1351 | (current-list (copy-sequence coding-category-list))) | |
e8dd0160 | 1352 | ;; Check the validity of ARG while deleting coding categories in |
d9e3229d KH |
1353 | ;; ARG from CURRENT-LIST. We assume that CODING-CATEGORY-LIST |
1354 | ;; contains all coding categories. | |
1355 | (while l | |
1356 | (if (or (null (get (car l) 'coding-category-index)) | |
1357 | (null (memq (car l) current-list))) | |
1358 | (error "Invalid or duplicated element in argument: %s" arg)) | |
1359 | (setq current-list (delq (car l) current-list)) | |
1360 | (setq l (cdr l))) | |
4ed46869 | 1361 | ;; Update `coding-category-list' and return it. |
2feaf204 KH |
1362 | (setq coding-category-list (append arg current-list)) |
1363 | (set-coding-priority-internal))) | |
4ed46869 | 1364 | |
835cbadb EZ |
1365 | ;;; X selections |
1366 | ||
cc926903 | 1367 | (defvar ctext-non-standard-encodings-alist |
6c805023 KH |
1368 | '(("big5-0" big5 2 (chinese-big5-1 chinese-big5-2)) |
1369 | ("ISO8859-14" iso-8859-14 1 latin-iso8859-14) | |
1370 | ("ISO8859-15" iso-8859-15 1 latin-iso8859-15)) | |
1371 | "Alist of non-standard encoding names vs the corresponding usages in CTEXT. | |
1372 | ||
1373 | It controls how extended segments of a compound text are handled | |
1374 | by the coding system `compound-text-with-extensions'. | |
1375 | ||
1376 | Each element has the form (ENCODING-NAME CODING-SYSTEM N-OCTET CHARSET). | |
1377 | ||
1378 | ENCODING-NAME is an encoding name of an \"extended segments\". | |
1379 | ||
1380 | CODING-SYSTEM is the coding-system to encode (or decode) the | |
1381 | characters into (or from) the extended segment. | |
1382 | ||
1383 | N-OCTET is the number of octets (bytes) that encodes a character | |
1384 | in the segment. It can be 0 (meaning the number of octets per | |
1385 | character is variable), 1, 2, 3, or 4. | |
1386 | ||
1387 | CHARSET is a charater set containing characters that are encoded | |
1388 | in the segment. It can be a list of character sets. It can also | |
1389 | be a char-table, in which case characters that have non-nil value | |
1390 | in the char-table are the target. | |
1391 | ||
1392 | On decoding CTEXT, all encoding names listed here are recognized. | |
1393 | ||
1394 | On encoding CTEXT, encoding names in the variable | |
1395 | `ctext-non-standard-encodings' (which see) and in the information | |
1396 | listed for the current language environment under the key | |
1397 | `ctext-non-standard-encodings' are used.") | |
1398 | ||
1399 | (defvar ctext-non-standard-encodings | |
1400 | '("big5-0") | |
1401 | "List of non-standard encoding names used in extended segments of CTEXT. | |
1402 | Each element must be one of the names listed in the variable | |
1403 | `ctext-non-standard-encodings-alist' (which see).") | |
cc926903 KH |
1404 | |
1405 | (defvar ctext-non-standard-encodings-regexp | |
1406 | (string-to-multibyte | |
1407 | (concat | |
1408 | ;; For non-standard encodings. | |
1409 | "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)" | |
1410 | "\\|" | |
1411 | ;; For UTF-8 encoding. | |
1412 | "\\(\e%G[^\e]*\e%@\\)"))) | |
835cbadb EZ |
1413 | |
1414 | ;; Functions to support "Non-Standard Character Set Encodings" defined | |
6c805023 KH |
1415 | ;; by the COMPOUND-TEXT spec. They also support "The UTF-8 encoding" |
1416 | ;; described in the section 7 of the documentation of COMPOUND-TEXT | |
1417 | ;; distributed with XFree86. | |
5c88a01e | 1418 | |
835cbadb EZ |
1419 | (defun ctext-post-read-conversion (len) |
1420 | "Decode LEN characters encoded as Compound Text with Extended Segments." | |
835cbadb | 1421 | (save-match-data |
cc926903 KH |
1422 | (save-restriction |
1423 | (let ((case-fold-search nil) | |
1424 | (in-workbuf (string= (buffer-name) " *code-converting-work*")) | |
1425 | last-coding-system-used | |
1426 | pos bytes) | |
1427 | (or in-workbuf | |
1428 | (narrow-to-region (point) (+ (point) len))) | |
cc926903 KH |
1429 | (if in-workbuf |
1430 | (set-buffer-multibyte t)) | |
1431 | (while (re-search-forward ctext-non-standard-encodings-regexp | |
1432 | nil 'move) | |
1433 | (setq pos (match-beginning 0)) | |
1434 | (if (match-beginning 1) | |
1435 | ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES-- | |
1436 | (let* ((M (char-after (+ pos 4))) | |
1437 | (L (char-after (+ pos 5))) | |
1438 | (encoding (match-string 2)) | |
9857367f | 1439 | (encoding-info (assoc-string |
6c805023 | 1440 | encoding |
c12bc1fb | 1441 | ctext-non-standard-encodings-alist t)) |
6c805023 KH |
1442 | (coding (if encoding-info |
1443 | (nth 1 encoding-info) | |
1444 | (setq encoding (intern (downcase encoding))) | |
1445 | (and (coding-system-p encoding) | |
1446 | encoding)))) | |
cc926903 KH |
1447 | (setq bytes (- (+ (* (- M 128) 128) (- L 128)) |
1448 | (- (point) (+ pos 6)))) | |
1449 | (when coding | |
1450 | (delete-region pos (point)) | |
1451 | (forward-char bytes) | |
1452 | (decode-coding-region (- (point) bytes) (point) coding))) | |
1453 | ;; ESC % G --UTF-8-BYTES-- ESC % @ | |
6c805023 KH |
1454 | (delete-char -3) |
1455 | (delete-region pos (+ pos 3)) | |
1456 | (decode-coding-region pos (point) 'utf-8)))) | |
cc926903 KH |
1457 | (goto-char (point-min)) |
1458 | (- (point-max) (point))))) | |
835cbadb | 1459 | |
6c805023 KH |
1460 | ;; Return a char table of extended segment usage for each character. |
1461 | ;; Each value of the char table is nil, one of the elements of | |
1462 | ;; `ctext-non-standard-encodings-alist', or the symbol `utf-8'. | |
1463 | ||
1464 | (defun ctext-non-standard-encodings-table () | |
1465 | (let ((table (make-char-table 'translation-table))) | |
1466 | (aset table (make-char 'mule-unicode-0100-24ff) 'utf-8) | |
1467 | (aset table (make-char 'mule-unicode-2500-33ff) 'utf-8) | |
1468 | (aset table (make-char 'mule-unicode-e000-ffff) 'utf-8) | |
1469 | (dolist (encoding (reverse | |
1470 | (append | |
1471 | (get-language-info current-language-environment | |
1472 | 'ctext-non-standard-encodings) | |
1473 | ctext-non-standard-encodings))) | |
1474 | (let* ((slot (assoc encoding ctext-non-standard-encodings-alist)) | |
1475 | (charset (nth 3 slot))) | |
1476 | (if charset | |
1477 | (cond ((charsetp charset) | |
1478 | (aset table (make-char charset) slot)) | |
1479 | ((listp charset) | |
1480 | (dolist (elt charset) | |
1481 | (aset table (make-char elt) slot))) | |
1482 | ((char-table-p charset) | |
9857367f | 1483 | (map-char-table #'(lambda (k v) |
6c805023 KH |
1484 | (if (and v (> k 128)) (aset table k slot))) |
1485 | charset)))))) | |
1486 | table)) | |
835cbadb EZ |
1487 | |
1488 | (defun ctext-pre-write-conversion (from to) | |
5dde3c71 EZ |
1489 | "Encode characters between FROM and TO as Compound Text w/Extended Segments. |
1490 | ||
1491 | If FROM is a string, or if the current buffer is not the one set up for us | |
cc926903 | 1492 | by encode-coding-string, generate a new temp buffer, insert the |
5dde3c71 | 1493 | text, and convert it in the temporary buffer. Otherwise, convert in-place." |
835cbadb | 1494 | (save-match-data |
002dc239 KH |
1495 | (let ((workbuf (get-buffer-create " *code-conversion-work*"))) |
1496 | ;; Setup a working buffer if necessary. | |
1497 | (cond ((stringp from) | |
1498 | (set-buffer workbuf) | |
1499 | (erase-buffer) | |
cc926903 | 1500 | (set-buffer-multibyte (multibyte-string-p from)) |
002dc239 KH |
1501 | (insert from)) |
1502 | ((not (eq (current-buffer) workbuf)) | |
1503 | (let ((buf (current-buffer)) | |
1504 | (multibyte enable-multibyte-characters)) | |
1505 | (set-buffer workbuf) | |
1506 | (erase-buffer) | |
1507 | (set-buffer-multibyte multibyte) | |
1508 | (insert-buffer-substring buf from to))))) | |
cc926903 KH |
1509 | |
1510 | ;; Now we can encode the whole buffer. | |
6c805023 | 1511 | (let ((encoding-table (ctext-non-standard-encodings-table)) |
cc926903 | 1512 | last-coding-system-used |
6c805023 KH |
1513 | last-pos last-encoding-info |
1514 | encoding-info end-pos) | |
1515 | (goto-char (setq last-pos (point-min))) | |
1516 | (setq end-pos (point-marker)) | |
1517 | (while (re-search-forward "[^\000-\177]+" nil t) | |
1518 | ;; Found a sequence of non-ASCII characters. | |
1519 | (setq last-pos (match-beginning 0) | |
1520 | last-encoding-info (aref encoding-table (char-after last-pos))) | |
1521 | (set-marker end-pos (match-end 0)) | |
1522 | (goto-char (1+ last-pos)) | |
1523 | (catch 'tag | |
1524 | (while t | |
1525 | (setq encoding-info | |
1526 | (if (< (point) end-pos) | |
1527 | (aref encoding-table (following-char)))) | |
1528 | (unless (eq last-encoding-info encoding-info) | |
1529 | (cond ((consp last-encoding-info) | |
1530 | ;; Encode the previous range using an extended | |
1531 | ;; segment. | |
1532 | (let ((encoding-name (car last-encoding-info)) | |
1533 | (coding-system (nth 1 last-encoding-info)) | |
1534 | (noctets (nth 2 last-encoding-info)) | |
1535 | len) | |
1536 | (encode-coding-region last-pos (point) coding-system) | |
1537 | (setq len (+ (length encoding-name) 1 | |
1538 | (- (point) last-pos))) | |
1539 | (save-excursion | |
1540 | (goto-char last-pos) | |
9857367f | 1541 | (insert (string-to-multibyte |
6c805023 KH |
1542 | (format "\e%%/%d%c%c%s\ 2" |
1543 | noctets | |
1544 | (+ (/ len 128) 128) | |
1545 | (+ (% len 128) 128) | |
1546 | encoding-name)))))) | |
1547 | ((eq last-encoding-info 'utf-8) | |
1548 | ;; Encode the previous range using UTF-8 encoding | |
1549 | ;; extention. | |
1550 | (encode-coding-region last-pos (point) 'mule-utf-8) | |
1551 | (save-excursion | |
1552 | (goto-char last-pos) | |
1553 | (insert "\e%G")) | |
1554 | (insert "\e%@"))) | |
1555 | (setq last-pos (point) | |
1556 | last-encoding-info encoding-info)) | |
1557 | (if (< (point) end-pos) | |
1558 | (forward-char 1) | |
1559 | (throw 'tag nil))))) | |
1560 | (set-marker end-pos nil) | |
cc926903 | 1561 | (goto-char (point-min)))) |
5dde3c71 | 1562 | ;; Must return nil, as build_annotations_2 expects that. |
835cbadb EZ |
1563 | nil) |
1564 | ||
4ed46869 KH |
1565 | ;;; FILE I/O |
1566 | ||
e76938e7 | 1567 | (defcustom auto-coding-alist |
9247c2f2 EZ |
1568 | '(("\\.\\(arc\\|zip\\|lzh\\|lha\\|zoo\\|[jew]ar\\|xpi\\)\\'" . no-conversion) |
1569 | ("\\.\\(ARC\\|ZIP\\|LZH\\|LHA\\|ZOO\\|[JEW]AR\\|XPI\\)\\'" . no-conversion) | |
86167621 | 1570 | ("\\.\\(sx[dmicw]\\|odt\\|tar\\|tgz\\)\\'" . no-conversion) |
6caef2da | 1571 | ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion) |
ba40634b | 1572 | ("\\.\\(jpe?g\\|png\\|gif\\|tiff?\\|p[bpgn]m\\)\\'" . no-conversion) |
45885400 | 1573 | ("/#[^/]+#\\'" . emacs-mule)) |
835f49b8 KH |
1574 | "Alist of filename patterns vs corresponding coding systems. |
1575 | Each element looks like (REGEXP . CODING-SYSTEM). | |
558b0c86 | 1576 | A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading. |
835f49b8 | 1577 | |
7fed493a RS |
1578 | The settings in this alist take priority over `coding:' tags |
1579 | in the file (see the function `set-auto-coding') | |
e76938e7 DL |
1580 | and the contents of `file-coding-system-alist'." |
1581 | :group 'files | |
1582 | :group 'mule | |
1583 | :type '(repeat (cons (regexp :tag "File name regexp") | |
1584 | (symbol :tag "Coding system")))) | |
835f49b8 | 1585 | |
502522b2 | 1586 | (defcustom auto-coding-regexp-alist |
dc5c3489 KH |
1587 | '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion) |
1588 | ("\\`\xFE\xFF" . utf-16be-with-signature) | |
1589 | ("\\`\xFF\xFE" . utf-16le-with-signature) | |
1590 | ("\\`\xEF\xBB\xBF" . utf-8)) | |
502522b2 GM |
1591 | "Alist of patterns vs corresponding coding systems. |
1592 | Each element looks like (REGEXP . CODING-SYSTEM). | |
1593 | A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading. | |
1594 | ||
1595 | The settings in this alist take priority over `coding:' tags | |
1596 | in the file (see the function `set-auto-coding') | |
1597 | and the contents of `file-coding-system-alist'." | |
1598 | :group 'files | |
1599 | :group 'mule | |
1600 | :type '(repeat (cons (regexp :tag "Regexp") | |
1601 | (symbol :tag "Coding system")))) | |
1602 | ||
0814ca04 KH |
1603 | (defun auto-coding-regexp-alist-lookup (from to) |
1604 | "Lookup `auto-coding-regexp-alist' for the contents of the current buffer. | |
1605 | The value is a coding system is specified for the region FROM and TO, | |
1606 | or nil." | |
1607 | (save-excursion | |
1608 | (goto-char from) | |
1609 | (let ((alist auto-coding-regexp-alist) | |
1610 | coding-system) | |
1611 | (while (and alist (not coding-system)) | |
1612 | (let ((regexp (car (car alist)))) | |
1613 | (if enable-multibyte-characters | |
1614 | (setq regexp (string-to-multibyte regexp))) | |
1615 | (if (re-search-forward regexp to t) | |
1616 | (setq coding-system (cdr (car alist))) | |
1617 | (setq alist (cdr alist))))) | |
1618 | coding-system))) | |
1619 | ||
d9f6dfe6 | 1620 | ;; See the bottom of this file for built-in auto coding functions. |
447404a3 CW |
1621 | (defcustom auto-coding-functions '(sgml-xml-auto-coding-function |
1622 | sgml-html-meta-auto-coding-function) | |
d9f6dfe6 CW |
1623 | "A list of functions which attempt to determine a coding system. |
1624 | ||
66643502 RS |
1625 | Each function in this list should be written to operate on the |
1626 | current buffer, but should not modify it in any way. The buffer | |
1627 | will contain undecoded text of parts of the file. Each function | |
1628 | should take one argument, SIZE, which says how many | |
1629 | characters (starting from point) it should look at. | |
1630 | ||
1631 | If one of these functions succeeds in determining a coding | |
1632 | system, it should return that coding system. Otherwise, it | |
1633 | should return nil. | |
1634 | ||
1635 | If a file has a `coding:' tag, that takes precedence over these | |
1636 | functions, so they won't be called at all." | |
d9f6dfe6 CW |
1637 | :group 'files |
1638 | :group 'mule | |
1639 | :type '(repeat function)) | |
1640 | ||
1c4cc63a KH |
1641 | (defvar set-auto-coding-for-load nil |
1642 | "Non-nil means look for `load-coding' property instead of `coding'. | |
1643 | This is used for loading and byte-compiling Emacs Lisp files.") | |
1644 | ||
8a592131 RS |
1645 | (defun auto-coding-alist-lookup (filename) |
1646 | "Return the coding system specified by `auto-coding-alist' for FILENAME." | |
1647 | (let ((alist auto-coding-alist) | |
c60ee5e7 | 1648 | (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos cygwin))) |
8a592131 RS |
1649 | coding-system) |
1650 | (while (and alist (not coding-system)) | |
1651 | (if (string-match (car (car alist)) filename) | |
1652 | (setq coding-system (cdr (car alist))) | |
1653 | (setq alist (cdr alist)))) | |
1654 | coding-system)) | |
1655 | ||
09e5712d KH |
1656 | (put 'enable-character-translation 'permanent-local t) |
1657 | (put 'enable-character-translation 'safe-local-variable 'booleanp) | |
1658 | ||
e9b01d1f KH |
1659 | (defun find-auto-coding (filename size) |
1660 | "Find a coding system for a file FILENAME of which SIZE bytes follow point. | |
1c4cc63a KH |
1661 | These bytes should include at least the first 1k of the file |
1662 | and the last 3k of the file, but the middle may be omitted. | |
63561304 | 1663 | |
d21ba5e0 DL |
1664 | The function checks FILENAME against the variable `auto-coding-alist'. |
1665 | If FILENAME doesn't match any entries in the variable, it checks the | |
502522b2 | 1666 | contents of the current buffer following point against |
447404a3 | 1667 | `auto-coding-regexp-alist'. If no match is found, it checks for a |
502522b2 | 1668 | `coding:' tag in the first one or two lines following point. If no |
d21ba5e0 | 1669 | `coding:' tag is found, it checks any local variables list in the last |
447404a3 | 1670 | 3K bytes out of the SIZE bytes. Finally, if none of these methods |
d21ba5e0 DL |
1671 | succeed, it checks to see if any function in `auto-coding-functions' |
1672 | gives a match. | |
63561304 | 1673 | |
e9b01d1f KH |
1674 | If a coding system is specifed, the return value is a |
1675 | cons (CODING . SOURCE), where CODING is the specified coding | |
1676 | system and SOURCE is a symbol `auto-coding-alist', | |
1677 | `auto-coding-regexp-alist', `coding:', or `auto-coding-functions' | |
1678 | indicating by what CODING is specified. Note that the validity | |
5dd1c041 | 1679 | of CODING is not checked; it's callers responsibility to check it. |
e9b01d1f | 1680 | |
0814ca04 | 1681 | If nothing is specified, the return value is nil." |
e9b01d1f KH |
1682 | (or (let ((coding-system (auto-coding-alist-lookup filename))) |
1683 | (if coding-system | |
1684 | (cons coding-system 'auto-coding-alist))) | |
447404a3 | 1685 | ;; Try using `auto-coding-regexp-alist'. |
0814ca04 KH |
1686 | (let ((coding-system (auto-coding-regexp-alist-lookup (point) |
1687 | (+ (point) size)))) | |
1688 | (if coding-system | |
1689 | (cons coding-system 'auto-coding-regexp-alist))) | |
502522b2 GM |
1690 | (let* ((case-fold-search t) |
1691 | (head-start (point)) | |
1692 | (head-end (+ head-start (min size 1024))) | |
1693 | (tail-start (+ head-start (max (- size 3072) 0))) | |
1694 | (tail-end (+ head-start size)) | |
09e5712d | 1695 | coding-system head-found tail-found pos char-trans) |
502522b2 GM |
1696 | ;; Try a short cut by searching for the string "coding:" |
1697 | ;; and for "unibyte:" at the head and tail of SIZE bytes. | |
1698 | (setq head-found (or (search-forward "coding:" head-end t) | |
09e5712d | 1699 | (search-forward "unibyte:" head-end t) |
5dd1c041 | 1700 | (search-forward "enable-character-translation:" |
36e02850 | 1701 | head-end t))) |
502522b2 GM |
1702 | (if (and head-found (> head-found tail-start)) |
1703 | ;; Head and tail are overlapped. | |
1704 | (setq tail-found head-found) | |
1705 | (goto-char tail-start) | |
1706 | (setq tail-found (or (search-forward "coding:" tail-end t) | |
09e5712d KH |
1707 | (search-forward "unibyte:" tail-end t) |
1708 | (search-forward "enable-character-translation:" | |
1709 | tail-end t)))) | |
502522b2 GM |
1710 | |
1711 | ;; At first check the head. | |
1712 | (when head-found | |
1713 | (goto-char head-start) | |
6b66d028 RS |
1714 | (setq head-end (set-auto-mode-1)) |
1715 | (setq head-start (point)) | |
1d8e9a7c | 1716 | (when (and head-end (< head-found head-end)) |
835f49b8 | 1717 | (goto-char head-start) |
502522b2 GM |
1718 | (when (and set-auto-coding-for-load |
1719 | (re-search-forward | |
6b66d028 | 1720 | "\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" |
502522b2 GM |
1721 | head-end t)) |
1722 | (setq coding-system 'raw-text)) | |
1723 | (when (and (not coding-system) | |
1724 | (re-search-forward | |
6b66d028 | 1725 | "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" |
502522b2 | 1726 | head-end t)) |
09e5712d KH |
1727 | (setq coding-system (intern (match-string 2)))) |
1728 | (when (re-search-forward | |
36e02850 | 1729 | "\\(.*;\\)?[ \t]*enable-character-translation:[ \t]*\\([^ ;]+\\)" |
09e5712d | 1730 | head-end t) |
36e02850 | 1731 | (setq char-trans (match-string 2))))) |
502522b2 GM |
1732 | |
1733 | ;; If no coding: tag in the head, check the tail. | |
c251286b KH |
1734 | ;; Here we must pay attention to the case that the end-of-line |
1735 | ;; is just "\r" and we can't use "^" nor "$" in regexp. | |
09e5712d | 1736 | (when (and tail-found (or (not coding-system) (not char-trans))) |
502522b2 | 1737 | (goto-char tail-start) |
c251286b | 1738 | (re-search-forward "[\r\n]\^L" nil t) |
502522b2 | 1739 | (if (re-search-forward |
9857367f | 1740 | "[\r\n]\\([^[\r\n]*\\)[ \t]*Local Variables:[ \t]*\\([^\r\n]*\\)[\r\n]" |
c251286b KH |
1741 | tail-end t) |
1742 | ;; The prefix is what comes before "local variables:" in its | |
1743 | ;; line. The suffix is what comes after "local variables:" | |
502522b2 GM |
1744 | ;; in its line. |
1745 | (let* ((prefix (regexp-quote (match-string 1))) | |
1746 | (suffix (regexp-quote (match-string 2))) | |
1747 | (re-coding | |
1748 | (concat | |
c251286b | 1749 | "[\r\n]" prefix |
cfe98f50 GM |
1750 | ;; N.B. without the \n below, the regexp can |
1751 | ;; eat newlines. | |
c251286b KH |
1752 | "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*" |
1753 | suffix "[\r\n]")) | |
502522b2 GM |
1754 | (re-unibyte |
1755 | (concat | |
c251286b KH |
1756 | "[\r\n]" prefix |
1757 | "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*" | |
1758 | suffix "[\r\n]")) | |
09e5712d KH |
1759 | (re-char-trans |
1760 | (concat | |
1761 | "[\r\n]" prefix | |
36e02850 | 1762 | "[ \t]*enable-character-translation[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*" |
09e5712d | 1763 | suffix "[\r\n]")) |
502522b2 | 1764 | (re-end |
9857367f | 1765 | (concat "[\r\n]" prefix "[ \t]*End *:[ \t]*" suffix |
c251286b KH |
1766 | "[\r\n]?")) |
1767 | (pos (1- (point)))) | |
1768 | (forward-char -1) ; skip back \r or \n. | |
502522b2 GM |
1769 | (re-search-forward re-end tail-end 'move) |
1770 | (setq tail-end (point)) | |
1771 | (goto-char pos) | |
1772 | (when (and set-auto-coding-for-load | |
1773 | (re-search-forward re-unibyte tail-end t)) | |
1774 | (setq coding-system 'raw-text)) | |
1775 | (when (and (not coding-system) | |
1776 | (re-search-forward re-coding tail-end t)) | |
09e5712d KH |
1777 | (setq coding-system (intern (match-string 1)))) |
1778 | (when (and (not char-trans) | |
1779 | (re-search-forward re-char-trans tail-end t)) | |
36e02850 | 1780 | (setq char-trans (match-string 1)))))) |
09e5712d KH |
1781 | (if coding-system |
1782 | ;; If the coding-system name ends with "!", remove it and | |
1783 | ;; set char-trans to "nil". | |
1784 | (let ((name (symbol-name coding-system))) | |
1785 | (if (= (aref name (1- (length name))) ?!) | |
1786 | (setq coding-system (intern (substring name 0 -1)) | |
1787 | char-trans "nil")))) | |
1788 | (when (and char-trans | |
1789 | (not (setq char-trans (intern char-trans)))) | |
1790 | (make-local-variable 'enable-character-translation) | |
1791 | (setq enable-character-translation nil)) | |
e9b01d1f KH |
1792 | (if coding-system |
1793 | (cons coding-system :coding))) | |
447404a3 CW |
1794 | ;; Finally, try all the `auto-coding-functions'. |
1795 | (let ((funcs auto-coding-functions) | |
1796 | (coding-system nil)) | |
1797 | (while (and funcs (not coding-system)) | |
1798 | (setq coding-system (condition-case e | |
1799 | (save-excursion | |
1800 | (goto-char (point-min)) | |
1801 | (funcall (pop funcs) size)) | |
1802 | (error nil)))) | |
e9b01d1f KH |
1803 | (if coding-system |
1804 | (cons coding-system 'auto-coding-functions))))) | |
1805 | ||
1806 | (defun set-auto-coding (filename size) | |
1807 | "Return coding system for a file FILENAME of which SIZE bytes follow point. | |
1808 | See `find-auto-coding' for how the coding system is found. | |
0814ca04 KH |
1809 | Return nil if an invalid coding system is found. |
1810 | ||
1811 | The variable `set-auto-coding-function' (which see) is set to this | |
1812 | function by default." | |
e9b01d1f KH |
1813 | (let ((found (find-auto-coding filename size))) |
1814 | (if (and found (coding-system-p (car found))) | |
1815 | (car found)))) | |
63561304 KH |
1816 | |
1817 | (setq set-auto-coding-function 'set-auto-coding) | |
87aba788 | 1818 | |
0436cc1b KH |
1819 | ;; This variable is set in these two cases: |
1820 | ;; (1) A file is read by a coding system specified explicitly. | |
1821 | ;; after-insert-file-set-coding sets this value to | |
1822 | ;; coding-system-for-read. | |
1823 | ;; (2) A buffer is saved. | |
1824 | ;; After writing, basic-save-buffer-1 sets this value to | |
1825 | ;; last-coding-system-used. | |
1826 | ;; This variable is used for decoding in revert-buffer. | |
e205f8c1 | 1827 | (defvar buffer-file-coding-system-explicit nil |
0436cc1b KH |
1828 | "The file coding system explicitly specified for the current buffer. |
1829 | Internal use only.") | |
e205f8c1 KH |
1830 | (make-variable-buffer-local 'buffer-file-coding-system-explicit) |
1831 | (put 'buffer-file-coding-system-explicit 'permanent-local t) | |
0436cc1b KH |
1832 | |
1833 | (defun after-insert-file-set-coding (inserted &optional visit) | |
872a0a6f RS |
1834 | "Set `buffer-file-coding-system' of current buffer after text is inserted. |
1835 | INSERTED is the number of characters that were inserted, as figured | |
1836 | in the situation before this function. Return the number of characters | |
1837 | inserted, as figured in the situation after. The two numbers can be | |
0436cc1b KH |
1838 | different if the buffer has become unibyte. |
1839 | The optional second arg VISIT non-nil means that we are visiting a file." | |
1840 | (if (and visit | |
1841 | coding-system-for-read | |
1842 | (not (eq coding-system-for-read 'auto-save-coding))) | |
e205f8c1 | 1843 | (setq buffer-file-coding-system-explicit coding-system-for-read)) |
4ed46869 KH |
1844 | (if last-coding-system-used |
1845 | (let ((coding-system | |
1846 | (find-new-buffer-file-coding-system last-coding-system-used)) | |
1847 | (modified-p (buffer-modified-p))) | |
0269ddfb | 1848 | (when coding-system |
14b3fa07 RS |
1849 | ;; Tell set-buffer-file-coding-system not to mark the file |
1850 | ;; as modified; we just read it, and it's supposed to be unmodified. | |
1851 | ;; Marking it modified would try to lock it, which would | |
1852 | ;; check the modtime, and we don't want to do that again now. | |
1853 | (set-buffer-file-coding-system coding-system t t) | |
d0c26c63 KH |
1854 | (if (and enable-multibyte-characters |
1855 | (or (eq coding-system 'no-conversion) | |
ccb77b4e | 1856 | (eq (coding-system-type coding-system) 5)) |
136e48e4 KH |
1857 | ;; If buffer was unmodified and the size is the |
1858 | ;; same as INSERTED, we must be visiting it. | |
1859 | (not modified-p) | |
1860 | (= (buffer-size) inserted)) | |
ccb77b4e RS |
1861 | ;; For coding systems no-conversion and raw-text..., |
1862 | ;; edit the buffer as unibyte. | |
14b3fa07 RS |
1863 | (let ((pos-marker (copy-marker (+ (point) inserted))) |
1864 | ;; Prevent locking. | |
1865 | (buffer-file-name nil)) | |
fa601de5 RS |
1866 | (if visit |
1867 | ;; If we're doing this for find-file, | |
1868 | ;; don't record undo info; this counts as | |
1869 | ;; part of producing the buffer's initial contents. | |
1870 | (let ((buffer-undo-list t)) | |
1871 | (set-buffer-multibyte nil)) | |
1872 | (set-buffer-multibyte nil)) | |
872a0a6f | 1873 | (setq inserted (- pos-marker (point))))) |
053f45dd | 1874 | (restore-buffer-modified-p modified-p)))) |
d0c26c63 | 1875 | inserted) |
4ed46869 | 1876 | |
8057896b | 1877 | ;; The coding-spec and eol-type of coding-system returned is decided |
4ed46869 KH |
1878 | ;; independently in the following order. |
1879 | ;; 1. That of buffer-file-coding-system locally bound. | |
1880 | ;; 2. That of CODING. | |
1881 | ||
1882 | (defun find-new-buffer-file-coding-system (coding) | |
1883 | "Return a coding system for a buffer when a file of CODING is inserted. | |
a73a8c89 KH |
1884 | The local variable `buffer-file-coding-system' of the current buffer |
1885 | is set to the returned value. | |
509064c5 | 1886 | Return nil if there's no need to set `buffer-file-coding-system'." |
4ed46869 | 1887 | (let (local-coding local-eol |
b685f8d6 | 1888 | found-coding found-eol |
4ed46869 KH |
1889 | new-coding new-eol) |
1890 | (if (null coding) | |
1891 | ;; Nothing found about coding. | |
1892 | nil | |
1893 | ||
b685f8d6 RS |
1894 | ;; Get information of `buffer-file-coding-system' in LOCAL-EOL |
1895 | ;; and LOCAL-CODING. | |
1896 | (setq local-eol (coding-system-eol-type buffer-file-coding-system)) | |
1897 | (if (null (numberp local-eol)) | |
1898 | ;; But eol-type is not yet set. | |
1899 | (setq local-eol nil)) | |
0269ddfb KH |
1900 | (if (and buffer-file-coding-system |
1901 | (not (eq (coding-system-type buffer-file-coding-system) t))) | |
1902 | ;; This is not `undecided'. | |
1903 | (setq local-coding (coding-system-base buffer-file-coding-system))) | |
b685f8d6 RS |
1904 | |
1905 | (if (and (local-variable-p 'buffer-file-coding-system) | |
1906 | local-eol local-coding) | |
4ed46869 KH |
1907 | ;; The current buffer has already set full coding-system, we |
1908 | ;; had better not change it. | |
1909 | nil | |
1910 | ||
8057896b | 1911 | (setq found-eol (coding-system-eol-type coding)) |
4ed46869 | 1912 | (if (null (numberp found-eol)) |
be02cd54 EZ |
1913 | ;; But eol-type is not found. |
1914 | ;; If EOL conversions are inhibited, force unix eol-type. | |
1915 | (setq found-eol (if inhibit-eol-conversion 0))) | |
c76b5c99 KH |
1916 | (if (eq (coding-system-type coding) t) |
1917 | (setq found-coding 'undecided) | |
1918 | (setq found-coding (coding-system-base coding))) | |
1919 | ||
1920 | (if (and (not found-eol) (eq found-coding 'undecided)) | |
1921 | ;; No valid coding information found. | |
1922 | nil | |
1923 | ||
1924 | ;; Some coding information (eol or text) found. | |
1925 | ||
1926 | ;; The local setting takes precedence over the found one. | |
1927 | (setq new-coding (if (local-variable-p 'buffer-file-coding-system) | |
1928 | (or local-coding found-coding) | |
1929 | (or found-coding local-coding))) | |
1930 | (setq new-eol (if (local-variable-p 'buffer-file-coding-system) | |
1931 | (or local-eol found-eol) | |
1932 | (or found-eol local-eol))) | |
1933 | ||
1934 | (let ((eol-type (coding-system-eol-type new-coding))) | |
1935 | (if (and (numberp new-eol) (vectorp eol-type)) | |
1936 | (aref eol-type new-eol) | |
1937 | new-coding))))))) | |
4ed46869 | 1938 | |
fe831d33 GV |
1939 | (defun modify-coding-system-alist (target-type regexp coding-system) |
1940 | "Modify one of look up tables for finding a coding system on I/O operation. | |
8c453b46 RS |
1941 | There are three of such tables, `file-coding-system-alist', |
1942 | `process-coding-system-alist', and `network-coding-system-alist'. | |
fe831d33 GV |
1943 | |
1944 | TARGET-TYPE specifies which of them to modify. | |
8c453b46 RS |
1945 | If it is `file', it affects `file-coding-system-alist' (which see). |
1946 | If it is `process', it affects `process-coding-system-alist' (which see). | |
e8dd0160 | 1947 | If it is `network', it affects `network-coding-system-alist' (which see). |
fe831d33 GV |
1948 | |
1949 | REGEXP is a regular expression matching a target of I/O operation. | |
1950 | The target is a file name if TARGET-TYPE is `file', a program name if | |
1951 | TARGET-TYPE is `process', or a network service name or a port number | |
1952 | to connect to if TARGET-TYPE is `network'. | |
1953 | ||
1954 | CODING-SYSTEM is a coding system to perform code conversion on the I/O | |
8c453b46 RS |
1955 | operation, or a cons cell (DECODING . ENCODING) specifying the coding systems |
1956 | for decoding and encoding respectively, | |
1957 | or a function symbol which, when called, returns such a cons cell." | |
fe831d33 GV |
1958 | (or (memq target-type '(file process network)) |
1959 | (error "Invalid target type: %s" target-type)) | |
1960 | (or (stringp regexp) | |
1961 | (and (eq target-type 'network) (integerp regexp)) | |
1962 | (error "Invalid regular expression: %s" regexp)) | |
1963 | (if (symbolp coding-system) | |
1964 | (if (not (fboundp coding-system)) | |
1965 | (progn | |
1966 | (check-coding-system coding-system) | |
1967 | (setq coding-system (cons coding-system coding-system)))) | |
1968 | (check-coding-system (car coding-system)) | |
1969 | (check-coding-system (cdr coding-system))) | |
1970 | (cond ((eq target-type 'file) | |
1971 | (let ((slot (assoc regexp file-coding-system-alist))) | |
1972 | (if slot | |
1973 | (setcdr slot coding-system) | |
1974 | (setq file-coding-system-alist | |
1975 | (cons (cons regexp coding-system) | |
1976 | file-coding-system-alist))))) | |
1977 | ((eq target-type 'process) | |
1978 | (let ((slot (assoc regexp process-coding-system-alist))) | |
1979 | (if slot | |
1980 | (setcdr slot coding-system) | |
1981 | (setq process-coding-system-alist | |
1982 | (cons (cons regexp coding-system) | |
1983 | process-coding-system-alist))))) | |
1984 | (t | |
1985 | (let ((slot (assoc regexp network-coding-system-alist))) | |
1986 | (if slot | |
1987 | (setcdr slot coding-system) | |
1988 | (setq network-coding-system-alist | |
1989 | (cons (cons regexp coding-system) | |
1990 | network-coding-system-alist))))))) | |
1991 | ||
db046b7d KH |
1992 | (defun decode-coding-inserted-region (from to filename |
1993 | &optional visit beg end replace) | |
f29387e8 | 1994 | "Decode the region between FROM and TO as if it is read from file FILENAME. |
9c848353 | 1995 | The idea is that the text between FROM and TO was just inserted somehow. |
f29387e8 | 1996 | Optional arguments VISIT, BEG, END, and REPLACE are the same as those |
9c848353 RS |
1997 | of the function `insert-file-contents'. |
1998 | Part of the job of this function is setting `buffer-undo-list' appropriately." | |
f29387e8 KH |
1999 | (save-excursion |
2000 | (save-restriction | |
9c848353 RS |
2001 | (let ((coding coding-system-for-read) |
2002 | undo-list-saved) | |
2003 | (if visit | |
2004 | ;; Temporarily turn off undo recording, if we're decoding the | |
2005 | ;; text of a visited file. | |
2006 | (setq buffer-undo-list t) | |
2007 | ;; Otherwise, if we can recognize the undo elt for the insertion, | |
2008 | ;; remove it and get ready to replace it later. | |
2009 | ;; In the mean time, turn off undo recording. | |
bf247b6e | 2010 | (let ((last (car-safe buffer-undo-list))) |
9c848353 RS |
2011 | (if (and (consp last) (eql (car last) from) (eql (cdr last) to)) |
2012 | (setq undo-list-saved (cdr buffer-undo-list) | |
2013 | buffer-undo-list t)))) | |
2014 | (narrow-to-region from to) | |
2015 | (goto-char (point-min)) | |
f29387e8 KH |
2016 | (or coding |
2017 | (setq coding (funcall set-auto-coding-function | |
2018 | filename (- (point-max) (point-min))))) | |
2019 | (or coding | |
b69ff51b KH |
2020 | (setq coding (car (find-operation-coding-system |
2021 | 'insert-file-contents | |
47a355de KH |
2022 | (cons filename (current-buffer)) |
2023 | visit beg end replace)))) | |
f29387e8 KH |
2024 | (if (coding-system-p coding) |
2025 | (or enable-multibyte-characters | |
2026 | (setq coding | |
2027 | (coding-system-change-text-conversion coding 'raw-text))) | |
2028 | (setq coding nil)) | |
2029 | (if coding | |
b12e19b2 | 2030 | (decode-coding-region (point-min) (point-max) coding) |
9c848353 RS |
2031 | (setq last-coding-system-used coding)) |
2032 | ;; If we're decoding the text of a visited file, | |
2033 | ;; the undo list should start out empty. | |
2034 | (if visit | |
2035 | (setq buffer-undo-list nil) | |
2036 | ;; If we decided to replace the undo entry for the insertion, | |
2037 | ;; do so now. | |
2038 | (if undo-list-saved | |
2039 | (setq buffer-undo-list | |
2040 | (cons (cons from (point-max)) undo-list-saved)))))))) | |
f29387e8 | 2041 | |
27a91cf7 KH |
2042 | (defun recode-region (start end new-coding coding) |
2043 | "Re-decode the region (previously decoded by CODING) by NEW-CODING." | |
2044 | (interactive | |
2045 | (list (region-beginning) (region-end) | |
2046 | (read-coding-system "Text was really in: ") | |
2047 | (let ((coding (or buffer-file-coding-system last-coding-system-used))) | |
2048 | (read-coding-system | |
2049 | (concat "But was interpreted as" | |
2050 | (if coding (format " (default %S): " coding) ": ")) | |
2051 | coding)))) | |
2052 | (or (and new-coding coding) | |
2053 | (error "Coding system not specified")) | |
2054 | ;; Check it before we encode the region. | |
2055 | (check-coding-system new-coding) | |
2056 | (save-restriction | |
2057 | (narrow-to-region start end) | |
2058 | (encode-coding-region (point-min) (point-max) coding) | |
2059 | (decode-coding-region (point-min) (point-max) new-coding))) | |
2060 | ||
b25eef20 | 2061 | (defun make-translation-table (&rest args) |
a284eea3 | 2062 | "Make a translation table from arguments. |
d38b07f9 | 2063 | A translation table is a char table intended for character |
a284eea3 DL |
2064 | translation in CCL programs. |
2065 | ||
d38b07f9 | 2066 | Each argument is a list of elements of the form (FROM . TO), where FROM |
a284eea3 | 2067 | is a character to be translated to TO. |
13d5617d | 2068 | |
452fdb31 | 2069 | FROM can be a generic character (see `make-char'). In this case, TO is |
d38b07f9 | 2070 | a generic character containing the same number of characters, or an |
452fdb31 | 2071 | ordinary character. If FROM and TO are both generic characters, all |
b25eef20 | 2072 | characters belonging to FROM are translated to characters belonging to TO |
4e003d37 KH |
2073 | without changing their position code(s). |
2074 | ||
2075 | The arguments and forms in each argument are processed in the given | |
2076 | order, and if a previous form already translates TO to some other | |
2077 | character, say TO-ALT, FROM is also translated to TO-ALT." | |
f967223b | 2078 | (let ((table (make-char-table 'translation-table)) |
a73a8c89 KH |
2079 | revlist) |
2080 | (while args | |
2081 | (let ((elts (car args))) | |
2082 | (while elts | |
13d5617d KH |
2083 | (let* ((from (car (car elts))) |
2084 | (from-i 0) ; degree of freedom of FROM | |
2085 | (from-rev (nreverse (split-char from))) | |
2086 | (to (cdr (car elts))) | |
2087 | (to-i 0) ; degree of freedom of TO | |
2088 | (to-rev (nreverse (split-char to)))) | |
2089 | ;; Check numbers of heading 0s in FROM-REV and TO-REV. | |
2090 | (while (eq (car from-rev) 0) | |
2091 | (setq from-i (1+ from-i) from-rev (cdr from-rev))) | |
2092 | (while (eq (car to-rev) 0) | |
2093 | (setq to-i (1+ to-i) to-rev (cdr to-rev))) | |
2094 | (if (and (/= from-i to-i) (/= to-i 0)) | |
2095 | (error "Invalid character pair (%d . %d)" from to)) | |
b25eef20 KH |
2096 | ;; If we have already translated TO to TO-ALT, FROM should |
2097 | ;; also be translated to TO-ALT. But, this is only if TO | |
2098 | ;; is a generic character or TO-ALT is not a generic | |
13d5617d KH |
2099 | ;; character. |
2100 | (let ((to-alt (aref table to))) | |
2101 | (if (and to-alt | |
2102 | (or (> to-i 0) (not (generic-char-p to-alt)))) | |
2103 | (setq to to-alt))) | |
2104 | (if (> from-i 0) | |
2105 | (set-char-table-default table from to) | |
2106 | (aset table from to)) | |
b25eef20 KH |
2107 | ;; If we have already translated some chars to FROM, they |
2108 | ;; should also be translated to TO. | |
a73a8c89 KH |
2109 | (let ((l (assq from revlist))) |
2110 | (if l | |
2111 | (let ((ch (car l))) | |
2112 | (setcar l to) | |
2113 | (setq l (cdr l)) | |
2114 | (while l | |
2115 | (aset table ch to) | |
2116 | (setq l (cdr l)) )))) | |
2117 | ;; Now update REVLIST. | |
2118 | (let ((l (assq to revlist))) | |
2119 | (if l | |
2120 | (setcdr l (cons from (cdr l))) | |
2121 | (setq revlist (cons (list to from) revlist))))) | |
2122 | (setq elts (cdr elts)))) | |
2123 | (setq args (cdr args))) | |
2124 | ;; Return TABLE just created. | |
2125 | table)) | |
2126 | ||
c76b5c99 KH |
2127 | (defun make-translation-table-from-vector (vec) |
2128 | "Make translation table from decoding vector VEC. | |
9e3b6057 DL |
2129 | VEC is an array of 256 elements to map unibyte codes to multibyte |
2130 | characters. Elements may be nil for undefined code points. | |
c76b5c99 KH |
2131 | See also the variable `nonascii-translation-table'." |
2132 | (let ((table (make-char-table 'translation-table)) | |
2133 | (rev-table (make-char-table 'translation-table)) | |
c76b5c99 | 2134 | ch) |
9e3b6057 | 2135 | (dotimes (i 256) |
c76b5c99 | 2136 | (setq ch (aref vec i)) |
9e3b6057 DL |
2137 | (when ch |
2138 | (aset table i ch) | |
2139 | (if (>= ch 256) | |
2140 | (aset rev-table ch i)))) | |
c76b5c99 KH |
2141 | (set-char-table-extra-slot table 0 rev-table) |
2142 | table)) | |
2143 | ||
f967223b | 2144 | (defun define-translation-table (symbol &rest args) |
a284eea3 DL |
2145 | "Define SYMBOL as the name of translation table made by ARGS. |
2146 | This sets up information so that the table can be used for | |
2147 | translations in a CCL program. | |
b25eef20 | 2148 | |
a284eea3 DL |
2149 | If the first element of ARGS is a char-table whose purpose is |
2150 | `translation-table', just define SYMBOL to name it. (Note that this | |
2151 | function does not bind SYMBOL.) | |
007c79c8 | 2152 | |
a284eea3 | 2153 | Any other ARGS should be suitable as arguments of the function |
007c79c8 | 2154 | `make-translation-table' (which see). |
b25eef20 | 2155 | |
452fdb31 | 2156 | This function sets properties `translation-table' and |
521d4010 DL |
2157 | `translation-table-id' of SYMBOL to the created table itself and the |
2158 | identification number of the table respectively. It also registers | |
2159 | the table in `translation-table-vector'." | |
007c79c8 KH |
2160 | (let ((table (if (and (char-table-p (car args)) |
2161 | (eq (char-table-subtype (car args)) | |
2162 | 'translation-table)) | |
2163 | (car args) | |
2164 | (apply 'make-translation-table args))) | |
f967223b | 2165 | (len (length translation-table-vector)) |
d9e3229d | 2166 | (id 0) |
b25eef20 | 2167 | (done nil)) |
f967223b | 2168 | (put symbol 'translation-table table) |
b25eef20 KH |
2169 | (while (not done) |
2170 | (if (>= id len) | |
f967223b KH |
2171 | (setq translation-table-vector |
2172 | (vconcat translation-table-vector (make-vector len nil)))) | |
2173 | (let ((slot (aref translation-table-vector id))) | |
b25eef20 KH |
2174 | (if (or (not slot) |
2175 | (eq (car slot) symbol)) | |
2176 | (progn | |
f967223b | 2177 | (aset translation-table-vector id (cons symbol table)) |
007c79c8 KH |
2178 | (setq done t)) |
2179 | (setq id (1+ id))))) | |
f967223b | 2180 | (put symbol 'translation-table-id id) |
d9e3229d KH |
2181 | id)) |
2182 | ||
0e86dca1 KH |
2183 | (defun translate-region (start end table) |
2184 | "From START to END, translate characters according to TABLE. | |
2185 | TABLE is a string or a char-table. | |
2186 | If TABLE is a string, the Nth character in it is the mapping | |
2187 | for the character with code N. | |
2188 | If TABLE is a char-table, the element for character N is the mapping | |
2189 | for the character with code N. | |
2190 | It returns the number of characters changed." | |
2191 | (interactive | |
2192 | (list (region-beginning) | |
2193 | (region-end) | |
2194 | (let (table l) | |
2195 | (dotimes (i (length translation-table-vector)) | |
2196 | (if (consp (aref translation-table-vector i)) | |
2197 | (push (list (symbol-name | |
2198 | (car (aref translation-table-vector i)))) l))) | |
2199 | (if (not l) | |
2200 | (error "No translation table defined")) | |
2201 | (while (not table) | |
2202 | (setq table (completing-read "Translation table: " l nil t))) | |
2203 | (intern table)))) | |
2204 | (if (symbolp table) | |
2205 | (let ((val (get table 'translation-table))) | |
2206 | (or (char-table-p val) | |
2207 | (error "Invalid translation table name: %s" table)) | |
2208 | (setq table val))) | |
2209 | (translate-region-internal start end table)) | |
2210 | ||
35554641 KH |
2211 | (put 'with-category-table 'lisp-indent-function 1) |
2212 | ||
ef6e365d JPW |
2213 | (defmacro with-category-table (table &rest body) |
2214 | "Evaluate BODY with category table of current buffer set to TABLE. | |
2215 | The category table of the current buffer is saved, BODY is evaluated, | |
2216 | then the saved table is restored, even in case of an abnormal exit. | |
2217 | Value is what BODY returns." | |
2218 | (let ((old-table (make-symbol "old-table")) | |
2219 | (old-buffer (make-symbol "old-buffer"))) | |
2220 | `(let ((,old-table (category-table)) | |
2221 | (,old-buffer (current-buffer))) | |
2222 | (unwind-protect | |
2223 | (progn | |
2224 | (set-category-table ,table) | |
2225 | ,@body) | |
053f45dd | 2226 | (with-current-buffer ,old-buffer |
ef6e365d | 2227 | (set-category-table ,old-table)))))) |
35554641 | 2228 | |
394e4eb0 DL |
2229 | (defun define-translation-hash-table (symbol table) |
2230 | "Define SYMBOL as the name of the hash translation TABLE for use in CCL. | |
2231 | ||
2232 | Analogous to `define-translation-table', but updates | |
2233 | `translation-hash-table-vector' and the table is for use in the CCL | |
2234 | `lookup-integer' and `lookup-character' functions." | |
2235 | (unless (and (symbolp symbol) | |
2236 | (hash-table-p table)) | |
2237 | (error "Bad args to define-translation-hash-table")) | |
2238 | (let ((len (length translation-hash-table-vector)) | |
2239 | (id 0) | |
2240 | done) | |
2241 | (put symbol 'translation-hash-table table) | |
2242 | (while (not done) | |
2243 | (if (>= id len) | |
2244 | (setq translation-hash-table-vector | |
2245 | (vconcat translation-hash-table-vector [nil]))) | |
2246 | (let ((slot (aref translation-hash-table-vector id))) | |
2247 | (if (or (not slot) | |
2248 | (eq (car slot) symbol)) | |
2249 | (progn | |
2250 | (aset translation-hash-table-vector id (cons symbol table)) | |
2251 | (setq done t)) | |
2252 | (setq id (1+ id))))) | |
2253 | (put symbol 'translation-hash-table-id id) | |
2254 | id)) | |
2255 | ||
69eba008 KH |
2256 | ;;; Initialize some variables. |
2257 | ||
2258 | (put 'use-default-ascent 'char-table-extra-slots 0) | |
2259 | (setq use-default-ascent (make-char-table 'use-default-ascent)) | |
d6d6d592 KH |
2260 | (put 'ignore-relative-composition 'char-table-extra-slots 0) |
2261 | (setq ignore-relative-composition | |
2262 | (make-char-table 'ignore-relative-composition)) | |
69eba008 | 2263 | |
d9f6dfe6 CW |
2264 | |
2265 | ;;; Built-in auto-coding-functions: | |
2266 | ||
2267 | (defun sgml-xml-auto-coding-function (size) | |
2268 | "Determine whether the buffer is XML, and if so, its encoding. | |
2269 | This function is intended to be added to `auto-coding-functions'." | |
c069d3ac SM |
2270 | (setq size (+ (point) size)) |
2271 | (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" size t) | |
d9f6dfe6 CW |
2272 | (let ((end (save-excursion |
2273 | ;; This is a hack. | |
c2c51a11 | 2274 | (re-search-forward "[\"']\\s-*\\?>" size t)))) |
d9f6dfe6 | 2275 | (when end |
c2c51a11 | 2276 | (if (re-search-forward "encoding=[\"']\\(.+?\\)[\"']" end t) |
447404a3 CW |
2277 | (let* ((match (match-string 1)) |
2278 | (sym (intern (downcase match)))) | |
2279 | (if (coding-system-p sym) | |
2280 | sym | |
2281 | (message "Warning: unknown coding system \"%s\"" match) | |
2282 | nil)) | |
d9f6dfe6 CW |
2283 | 'utf-8))))) |
2284 | ||
447404a3 CW |
2285 | (defun sgml-html-meta-auto-coding-function (size) |
2286 | "If the buffer has an HTML meta tag, use it to determine encoding. | |
2287 | This function is intended to be added to `auto-coding-functions'." | |
c069d3ac | 2288 | (setq size (min (+ (point) size) |
447404a3 | 2289 | (save-excursion |
b3a726d5 KH |
2290 | ;; Limit the search by the end of the HTML header. |
2291 | (or (search-forward "</head>" size t) | |
2292 | ;; In case of no header, search only 10 lines. | |
2293 | (forward-line 10)) | |
447404a3 | 2294 | (point)))) |
129f1989 RF |
2295 | ;; Make sure that the buffer really contains an HTML document, by |
2296 | ;; checking that it starts with a doctype or a <HTML> start tag | |
2297 | ;; (allowing for whitespace at bob). Note: 'DOCTYPE NETSCAPE' is | |
2298 | ;; useful for Mozilla bookmark files. | |
2299 | (when (and (re-search-forward "\\`[[:space:]\n]*\\(<!doctype[[:space:]\n]+\\(html\\|netscape\\)\\|<html\\)" size t) | |
2300 | (re-search-forward "<meta\\s-+http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']text/\\sw+;\\s-*charset=\\(.+?\\)[\"']" size t)) | |
21b28a27 JL |
2301 | (let* ((match (match-string 1)) |
2302 | (sym (intern (downcase match)))) | |
2303 | (if (coding-system-p sym) | |
2304 | sym | |
2305 | (message "Warning: unknown coding system \"%s\"" match) | |
2306 | nil)))) | |
0bca779a | 2307 | |
69eba008 | 2308 | ;;; |
4ed46869 KH |
2309 | (provide 'mule) |
2310 | ||
2850984d | 2311 | ;; arch-tag: 9aebaa6e-0e8a-40a9-b857-cb5d04a39e7c |
4ed46869 | 2312 | ;;; mule.el ends here |