Commit | Line | Data |
---|---|---|
3263d5a2 | 1 | /* Header for charset handler. |
ba318903 | 2 | Copyright (C) 2001-2014 Free Software Foundation, Inc. |
7976eda0 | 3 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
5df4f04c | 4 | 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
ce03bf76 KH |
5 | National Institute of Advanced Industrial Science and Technology (AIST) |
6 | Registration Number H14PRO021 | |
4ed46869 | 7 | |
8f924df7 | 8 | Copyright (C) 2003 |
3263d5a2 KH |
9 | National Institute of Advanced Industrial Science and Technology (AIST) |
10 | Registration Number H13PRO009 | |
4ed46869 | 11 | |
369314dc KH |
12 | This file is part of GNU Emacs. |
13 | ||
b9b1cc14 | 14 | GNU Emacs is free software: you can redistribute it and/or modify |
369314dc | 15 | it under the terms of the GNU General Public License as published by |
b9b1cc14 GM |
16 | the Free Software Foundation, either version 3 of the License, or |
17 | (at your option) any later version. | |
4ed46869 | 18 | |
369314dc KH |
19 | GNU Emacs is distributed in the hope that it will be useful, |
20 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 | GNU General Public License for more details. | |
4ed46869 | 23 | |
369314dc | 24 | You should have received a copy of the GNU General Public License |
b9b1cc14 | 25 | along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ |
4ed46869 | 26 | |
aa01a892 KH |
27 | #ifndef EMACS_CHARSET_H |
28 | #define EMACS_CHARSET_H | |
4ed46869 | 29 | |
34206dd2 PE |
30 | #include <verify.h> |
31 | ||
f162bcc3 | 32 | INLINE_HEADER_BEGIN |
f162bcc3 | 33 | |
3263d5a2 KH |
34 | /* Index to arguments of Fdefine_charset_internal. */ |
35 | ||
36 | enum define_charset_arg_index | |
37 | { | |
38 | charset_arg_name, | |
39 | charset_arg_dimension, | |
40 | charset_arg_code_space, | |
315c0139 KH |
41 | charset_arg_min_code, |
42 | charset_arg_max_code, | |
3263d5a2 KH |
43 | charset_arg_iso_final, |
44 | charset_arg_iso_revision, | |
45 | charset_arg_emacs_mule_id, | |
46 | charset_arg_ascii_compatible_p, | |
47 | charset_arg_supplementary_p, | |
48 | charset_arg_invalid_code, | |
49 | charset_arg_code_offset, | |
50 | charset_arg_map, | |
ec7dd615 KH |
51 | charset_arg_subset, |
52 | charset_arg_superset, | |
3263d5a2 KH |
53 | charset_arg_unify_map, |
54 | charset_arg_plist, | |
55 | charset_arg_max | |
56 | }; | |
57 | ||
58 | ||
59 | /* Indices to charset attributes vector. */ | |
60 | ||
61 | enum charset_attr_index | |
62 | { | |
63 | /* ID number of the charset. */ | |
64 | charset_id, | |
4ed46869 | 65 | |
3263d5a2 KH |
66 | /* Name of the charset (symbol). */ |
67 | charset_name, | |
68 | ||
69 | /* Property list of the charset. */ | |
70 | charset_plist, | |
71 | ||
2bc20f0b KH |
72 | /* If the method of the charset is `MAP', the value is a mapping |
73 | vector or a file name that contains mapping vector. Otherwise, | |
74 | nil. */ | |
3263d5a2 KH |
75 | charset_map, |
76 | ||
77 | /* If the method of the charset is `MAP', the value is a vector | |
78 | that maps code points of the charset to characters. The vector | |
79 | is indexed by a character index. A character index is | |
80 | calculated from a code point and the code-space table of the | |
81 | charset. */ | |
82 | charset_decoder, | |
83 | ||
84 | /* If the method of the charset is `MAP', the value is a | |
85 | char-table that maps characters of the charset to code | |
86 | points. */ | |
87 | charset_encoder, | |
88 | ||
ec7dd615 KH |
89 | /* If the method of the charset is `SUBSET', the value is a vector |
90 | that has this form: | |
91 | ||
92 | [ CHARSET-ID MIN-CODE MAX-CODE OFFSET ] | |
93 | ||
94 | CHARSET-ID is an ID number of a parent charset. MIN-CODE and | |
95 | MAX-CODE specify the range of characters inherited from the | |
96 | parent. OFFSET is an integer value to add to a code point of | |
97 | the parent charset to get the corresponding code point of this | |
98 | charset. */ | |
99 | charset_subset, | |
100 | ||
101 | /* If the method of the charset is `SUPERSET', the value is a list | |
102 | whose elements have this form: | |
103 | ||
104 | (CHARSET-ID . OFFSET) | |
105 | ||
924b41ee JB |
106 | CHARSET-IDs are ID numbers of parent charsets. OFFSET is an |
107 | integer value to add to a code point of the parent charset to | |
108 | get the corresponding code point of this charset. */ | |
ec7dd615 | 109 | charset_superset, |
3263d5a2 | 110 | |
7619dee9 DL |
111 | /* The value is a mapping vector or a file name that contains the |
112 | mapping. This defines how characters in the charset should be | |
113 | unified with Unicode. The value of the member | |
3e4abc9e | 114 | `charset_deunifier' is created from this information. */ |
3263d5a2 KH |
115 | charset_unify_map, |
116 | ||
3e4abc9e | 117 | /* If characters in the charset must be unified Unicode, the value |
1c305bc1 KH |
118 | is a char table that maps a unified Unicode character code to |
119 | the non-unified character code in the charset. */ | |
3263d5a2 KH |
120 | charset_deunifier, |
121 | ||
7619dee9 | 122 | /* The length of the charset attribute vector. */ |
3263d5a2 KH |
123 | charset_attr_max |
124 | }; | |
125 | ||
126 | /* Methods for converting code points and characters of charsets. */ | |
127 | ||
128 | enum charset_method | |
129 | { | |
130 | /* For a charset of this method, a character code is calculated | |
131 | from a character index (which is calculated from a code point) | |
132 | simply by adding an offset value. */ | |
133 | CHARSET_METHOD_OFFSET, | |
134 | ||
135 | /* For a charset of this method, a decoder vector and an encoder | |
136 | char-table is used for code point <-> character code | |
137 | conversion. */ | |
138 | CHARSET_METHOD_MAP, | |
139 | ||
7619dee9 | 140 | /* A charset of this method is a subset of another charset. */ |
ec7dd615 KH |
141 | CHARSET_METHOD_SUBSET, |
142 | ||
7619dee9 | 143 | /* A charset of this method is a superset of other charsets. */ |
ec7dd615 | 144 | CHARSET_METHOD_SUPERSET |
3263d5a2 KH |
145 | }; |
146 | ||
147 | struct charset | |
148 | { | |
3e4abc9e | 149 | /* Index to charset_table. */ |
3263d5a2 | 150 | int id; |
4ed46869 | 151 | |
3e4abc9e | 152 | /* Index to Vcharset_hash_table. */ |
d3411f89 | 153 | ptrdiff_t hash_index; |
3263d5a2 KH |
154 | |
155 | /* Dimension of the charset: 1, 2, 3, or 4. */ | |
156 | int dimension; | |
157 | ||
0b381c7e | 158 | /* Byte code range of each dimension. <code_space>[4N] is a minimum |
ac6a8028 KH |
159 | byte code of the (N+1)th dimension, <code_space>[4N+1] is a |
160 | maximum byte code of the (N+1)th dimension, <code_space>[4N+2] is | |
161 | (<code_space>[4N+1] - <code_space>[4N] + 1), <code_space>[4N+3] | |
c032b5f8 PE |
162 | is the number of characters contained in the first through (N+1)th |
163 | dimensions, except that there is no <code_space>[15]. | |
164 | We get `char-index' of a `code-point' from this | |
ac6a8028 | 165 | information. */ |
c032b5f8 | 166 | int code_space[15]; |
54e15bb9 | 167 | |
ac6a8028 KH |
168 | /* If B is a byte of Nth dimension of a code-point, the (N-1)th bit |
169 | of code_space_mask[B] is set. This array is used to quickly | |
170 | check if a code-point is in a valid range. */ | |
171 | unsigned char *code_space_mask; | |
d3985b78 | 172 | |
d5172d4f | 173 | /* True if there's no gap in code-points. */ |
96c06863 | 174 | bool_bf code_linear_p : 1; |
d3985b78 | 175 | |
d5172d4f PE |
176 | /* True if the charset is treated as 96 chars in ISO-2022 |
177 | as opposed to 94 chars. */ | |
96c06863 | 178 | bool_bf iso_chars_96 : 1; |
d5172d4f PE |
179 | |
180 | /* True if the charset is compatible with ASCII. */ | |
96c06863 | 181 | bool_bf ascii_compatible_p : 1; |
d5172d4f PE |
182 | |
183 | /* True if the charset is supplementary. */ | |
96c06863 | 184 | bool_bf supplementary_p : 1; |
d5172d4f PE |
185 | |
186 | /* True if all the code points are representable by Lisp_Int. */ | |
96c06863 | 187 | bool_bf compact_codes_p : 1; |
d5172d4f PE |
188 | |
189 | /* True if the charset is unified with Unicode. */ | |
96c06863 | 190 | bool_bf unified_p : 1; |
d3985b78 | 191 | |
3e4abc9e KH |
192 | /* ISO final byte of the charset: 48..127. It may be -1 if the |
193 | charset doesn't conform to ISO-2022. */ | |
3263d5a2 | 194 | int iso_final; |
d3985b78 | 195 | |
3e4abc9e | 196 | /* ISO revision number of the charset. */ |
3263d5a2 | 197 | int iso_revision; |
d3985b78 | 198 | |
3263d5a2 KH |
199 | /* If the charset is identical to what supported by Emacs 21 and the |
200 | priors, the identification number of the charset used in those | |
201 | version. Otherwise, -1. */ | |
202 | int emacs_mule_id; | |
203 | ||
3263d5a2 KH |
204 | /* The method for encoding/decoding characters of the charset. */ |
205 | enum charset_method method; | |
206 | ||
0b381c7e | 207 | /* Minimum and Maximum code points of the charset. */ |
3263d5a2 KH |
208 | unsigned min_code, max_code; |
209 | ||
315c0139 | 210 | /* Offset value used by macros CODE_POINT_TO_INDEX and |
f224e500 | 211 | INDEX_TO_CODE_POINT. */ |
315c0139 KH |
212 | unsigned char_index_offset; |
213 | ||
0b381c7e | 214 | /* Minimum and Maximum character codes of the charset. If the |
3263d5a2 | 215 | charset is compatible with ASCII, min_char is a minimum non-ASCII |
39d992af KH |
216 | character of the charset. If the method of charset is |
217 | CHARSET_METHOD_OFFSET, even if the charset is unified, min_char | |
218 | and max_char doesn't change. */ | |
3263d5a2 KH |
219 | int min_char, max_char; |
220 | ||
221 | /* The code returned by ENCODE_CHAR if a character is not encodable | |
222 | by the charset. */ | |
223 | unsigned invalid_code; | |
224 | ||
225 | /* If the method of the charset is CHARSET_METHOD_MAP, this is a | |
226 | table of bits used to quickly and roughly guess if a character | |
227 | belongs to the charset. | |
228 | ||
229 | The first 64 elements are 512 bits for characters less than | |
230 | 0x10000. Each bit corresponds to 128-character block. The last | |
231 | 126 elements are 1008 bits for the greater characters | |
232 | (0x10000..0x3FFFFF). Each bit corresponds to 4096-character | |
233 | block. | |
234 | ||
7619dee9 | 235 | If a bit is 1, at least one character in the corresponding block is |
3263d5a2 KH |
236 | in this charset. */ |
237 | unsigned char fast_map[190]; | |
238 | ||
239 | /* Offset value to calculate a character code from code-point, and | |
240 | visa versa. */ | |
241 | int code_offset; | |
3263d5a2 KH |
242 | }; |
243 | ||
bbd240ce | 244 | /* Hash table of charset symbols vs. the corresponding attribute |
3263d5a2 KH |
245 | vectors. */ |
246 | extern Lisp_Object Vcharset_hash_table; | |
247 | ||
248 | /* Table of struct charset. */ | |
249 | extern struct charset *charset_table; | |
3263d5a2 KH |
250 | |
251 | #define CHARSET_FROM_ID(id) (charset_table + (id)) | |
252 | ||
39d992af | 253 | extern Lisp_Object Vcharset_ordered_list; |
370cefff | 254 | extern Lisp_Object Vcharset_non_preferred_head; |
39d992af KH |
255 | |
256 | /* Incremented everytime we change the priority of charsets. */ | |
a0b31951 | 257 | extern unsigned short charset_ordered_list_tick; |
39d992af | 258 | |
3263d5a2 KH |
259 | extern Lisp_Object Viso_2022_charset_list; |
260 | extern Lisp_Object Vemacs_mule_charset_list; | |
261 | ||
b84ae584 | 262 | extern int emacs_mule_charset[256]; |
3263d5a2 | 263 | |
3263d5a2 KH |
264 | /* Macros to access information about charset. */ |
265 | ||
266 | /* Return the attribute vector of charset whose symbol is SYMBOL. */ | |
267 | #define CHARSET_SYMBOL_ATTRIBUTES(symbol) \ | |
268 | Fgethash ((symbol), Vcharset_hash_table, Qnil) | |
269 | ||
270 | #define CHARSET_ATTR_ID(attrs) AREF ((attrs), charset_id) | |
271 | #define CHARSET_ATTR_NAME(attrs) AREF ((attrs), charset_name) | |
272 | #define CHARSET_ATTR_PLIST(attrs) AREF ((attrs), charset_plist) | |
273 | #define CHARSET_ATTR_MAP(attrs) AREF ((attrs), charset_map) | |
274 | #define CHARSET_ATTR_DECODER(attrs) AREF ((attrs), charset_decoder) | |
275 | #define CHARSET_ATTR_ENCODER(attrs) AREF ((attrs), charset_encoder) | |
ec7dd615 KH |
276 | #define CHARSET_ATTR_SUBSET(attrs) AREF ((attrs), charset_subset) |
277 | #define CHARSET_ATTR_SUPERSET(attrs) AREF ((attrs), charset_superset) | |
3263d5a2 KH |
278 | #define CHARSET_ATTR_UNIFY_MAP(attrs) AREF ((attrs), charset_unify_map) |
279 | #define CHARSET_ATTR_DEUNIFIER(attrs) AREF ((attrs), charset_deunifier) | |
280 | ||
281 | #define CHARSET_SYMBOL_ID(symbol) \ | |
282 | CHARSET_ATTR_ID (CHARSET_SYMBOL_ATTRIBUTES (symbol)) | |
283 | ||
284 | /* Return an index to Vcharset_hash_table of the charset whose symbol | |
285 | is SYMBOL. */ | |
286 | #define CHARSET_SYMBOL_HASH_INDEX(symbol) \ | |
287 | hash_lookup (XHASH_TABLE (Vcharset_hash_table), symbol, NULL) | |
288 | ||
289 | /* Return the attribute vector of CHARSET. */ | |
290 | #define CHARSET_ATTRIBUTES(charset) \ | |
291 | (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), (charset)->hash_index)) | |
292 | ||
293 | #define CHARSET_ID(charset) ((charset)->id) | |
294 | #define CHARSET_HASH_INDEX(charset) ((charset)->hash_index) | |
295 | #define CHARSET_DIMENSION(charset) ((charset)->dimension) | |
296 | #define CHARSET_CODE_SPACE(charset) ((charset)->code_space) | |
297 | #define CHARSET_CODE_LINEAR_P(charset) ((charset)->code_linear_p) | |
298 | #define CHARSET_ISO_CHARS_96(charset) ((charset)->iso_chars_96) | |
299 | #define CHARSET_ISO_FINAL(charset) ((charset)->iso_final) | |
300 | #define CHARSET_ISO_PLANE(charset) ((charset)->iso_plane) | |
301 | #define CHARSET_ISO_REVISION(charset) ((charset)->iso_revision) | |
302 | #define CHARSET_EMACS_MULE_ID(charset) ((charset)->emacs_mule_id) | |
303 | #define CHARSET_ASCII_COMPATIBLE_P(charset) ((charset)->ascii_compatible_p) | |
304 | #define CHARSET_COMPACT_CODES_P(charset) ((charset)->compact_codes_p) | |
305 | #define CHARSET_METHOD(charset) ((charset)->method) | |
306 | #define CHARSET_MIN_CODE(charset) ((charset)->min_code) | |
307 | #define CHARSET_MAX_CODE(charset) ((charset)->max_code) | |
308 | #define CHARSET_INVALID_CODE(charset) ((charset)->invalid_code) | |
309 | #define CHARSET_MIN_CHAR(charset) ((charset)->min_char) | |
310 | #define CHARSET_MAX_CHAR(charset) ((charset)->max_char) | |
311 | #define CHARSET_CODE_OFFSET(charset) ((charset)->code_offset) | |
312 | #define CHARSET_UNIFIED_P(charset) ((charset)->unified_p) | |
313 | ||
314 | #define CHARSET_NAME(charset) \ | |
315 | (CHARSET_ATTR_NAME (CHARSET_ATTRIBUTES (charset))) | |
316 | #define CHARSET_MAP(charset) \ | |
317 | (CHARSET_ATTR_MAP (CHARSET_ATTRIBUTES (charset))) | |
318 | #define CHARSET_DECODER(charset) \ | |
319 | (CHARSET_ATTR_DECODER (CHARSET_ATTRIBUTES (charset))) | |
320 | #define CHARSET_ENCODER(charset) \ | |
321 | (CHARSET_ATTR_ENCODER (CHARSET_ATTRIBUTES (charset))) | |
ec7dd615 KH |
322 | #define CHARSET_SUBSET(charset) \ |
323 | (CHARSET_ATTR_SUBSET (CHARSET_ATTRIBUTES (charset))) | |
324 | #define CHARSET_SUPERSET(charset) \ | |
325 | (CHARSET_ATTR_SUPERSET (CHARSET_ATTRIBUTES (charset))) | |
3263d5a2 KH |
326 | #define CHARSET_UNIFY_MAP(charset) \ |
327 | (CHARSET_ATTR_UNIFY_MAP (CHARSET_ATTRIBUTES (charset))) | |
328 | #define CHARSET_DEUNIFIER(charset) \ | |
329 | (CHARSET_ATTR_DEUNIFIER (CHARSET_ATTRIBUTES (charset))) | |
330 | ||
00382e8b | 331 | INLINE void |
4939150c PE |
332 | set_charset_attr (struct charset *charset, enum charset_attr_index idx, |
333 | Lisp_Object val) | |
334 | { | |
335 | ASET (CHARSET_ATTRIBUTES (charset), idx, val); | |
336 | } | |
337 | ||
3263d5a2 | 338 | |
b2e6b10f | 339 | /* Nonzero if OBJ is a valid charset symbol. */ |
3263d5a2 KH |
340 | #define CHARSETP(obj) (CHARSET_SYMBOL_HASH_INDEX (obj) >= 0) |
341 | ||
342 | /* Check if X is a valid charset symbol. If not, signal an error. */ | |
343 | #define CHECK_CHARSET(x) \ | |
384107f2 | 344 | do { \ |
3263d5a2 | 345 | if (! SYMBOLP (x) || CHARSET_SYMBOL_HASH_INDEX (x) < 0) \ |
43d1af23 | 346 | wrong_type_argument (Qcharsetp, (x)); \ |
96c06863 | 347 | } while (false) |
6e4dc3e1 | 348 | |
6e4dc3e1 | 349 | |
3263d5a2 KH |
350 | /* Check if X is a valid charset symbol. If valid, set ID to the id |
351 | number of the charset. Otherwise, signal an error. */ | |
352 | #define CHECK_CHARSET_GET_ID(x, id) \ | |
353 | do { \ | |
d3411f89 | 354 | ptrdiff_t idx; \ |
3263d5a2 KH |
355 | \ |
356 | if (! SYMBOLP (x) || (idx = CHARSET_SYMBOL_HASH_INDEX (x)) < 0) \ | |
43d1af23 | 357 | wrong_type_argument (Qcharsetp, (x)); \ |
73e92b8c DL |
358 | id = XINT (AREF (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), idx), \ |
359 | charset_id)); \ | |
96c06863 | 360 | } while (false) |
4ed46869 | 361 | |
6e4dc3e1 | 362 | |
3263d5a2 KH |
363 | /* Check if X is a valid charset symbol. If valid, set ATTR to the |
364 | attr vector of the charset. Otherwise, signal an error. */ | |
365 | #define CHECK_CHARSET_GET_ATTR(x, attr) \ | |
366 | do { \ | |
367 | if (!SYMBOLP (x) || NILP (attr = CHARSET_SYMBOL_ATTRIBUTES (x))) \ | |
43d1af23 | 368 | wrong_type_argument (Qcharsetp, (x)); \ |
96c06863 | 369 | } while (false) |
6e4dc3e1 | 370 | |
6e4dc3e1 | 371 | |
3263d5a2 KH |
372 | #define CHECK_CHARSET_GET_CHARSET(x, charset) \ |
373 | do { \ | |
f6095868 PE |
374 | int csid; \ |
375 | CHECK_CHARSET_GET_ID (x, csid); \ | |
376 | charset = CHARSET_FROM_ID (csid); \ | |
96c06863 | 377 | } while (false) |
c399b461 | 378 | |
c399b461 | 379 | |
924b41ee | 380 | /* Lookup Vcharset_ordered_list and return the first charset that |
3263d5a2 | 381 | contains the character C. */ |
ec91b3c9 KH |
382 | #define CHAR_CHARSET(c) \ |
383 | ((c) < 0x80 ? CHARSET_FROM_ID (charset_ascii) \ | |
384 | : char_charset ((c), Qnil, NULL)) | |
c399b461 | 385 | |
96c06863 | 386 | #if false |
3263d5a2 KH |
387 | /* Char-table of charset-sets. Each element is a bool vector indexed |
388 | by a charset ID. */ | |
389 | extern Lisp_Object Vchar_charset_set; | |
6e4dc3e1 | 390 | |
3263d5a2 KH |
391 | /* Charset-bag of character C. */ |
392 | #define CHAR_CHARSET_SET(c) \ | |
393 | CHAR_TABLE_REF (Vchar_charset_set, c) | |
6e4dc3e1 | 394 | |
3263d5a2 KH |
395 | /* Check if two characters C1 and C2 belong to the same charset. */ |
396 | #define SAME_CHARSET_P(c1, c2) \ | |
397 | intersection_p (CHAR_CHARSET_SET (c1), CHAR_CHARSET_SET (c2)) | |
398 | ||
399 | #endif | |
400 | ||
401 | ||
bbd240ce | 402 | /* Return a character corresponding to the code-point CODE of CHARSET. |
3263d5a2 KH |
403 | Try some optimization before calling decode_char. */ |
404 | ||
405 | #define DECODE_CHAR(charset, code) \ | |
406 | ((ASCII_BYTE_P (code) && (charset)->ascii_compatible_p) \ | |
407 | ? (code) \ | |
408 | : ((code) < (charset)->min_code || (code) > (charset)->max_code) \ | |
409 | ? -1 \ | |
410 | : (charset)->unified_p \ | |
411 | ? decode_char ((charset), (code)) \ | |
412 | : (charset)->method == CHARSET_METHOD_OFFSET \ | |
413 | ? ((charset)->code_linear_p \ | |
60ad3eab | 414 | ? (int) ((code) - (charset)->min_code) + (charset)->code_offset \ |
3263d5a2 KH |
415 | : decode_char ((charset), (code))) \ |
416 | : (charset)->method == CHARSET_METHOD_MAP \ | |
2bc20f0b KH |
417 | ? (((charset)->code_linear_p \ |
418 | && VECTORP (CHARSET_DECODER (charset))) \ | |
3263d5a2 | 419 | ? XINT (AREF (CHARSET_DECODER (charset), \ |
2bc20f0b | 420 | (code) - (charset)->min_code)) \ |
3263d5a2 KH |
421 | : decode_char ((charset), (code))) \ |
422 | : decode_char ((charset), (code))) | |
423 | ||
ec7dd615 KH |
424 | extern Lisp_Object charset_work; |
425 | ||
3263d5a2 KH |
426 | /* Return a code point of CHAR in CHARSET. |
427 | Try some optimization before calling encode_char. */ | |
428 | ||
a2a01861 PE |
429 | #define ENCODE_CHAR(charset, c) \ |
430 | (verify_expr \ | |
431 | (sizeof (c) <= sizeof (int), \ | |
432 | (ASCII_CHAR_P (c) && (charset)->ascii_compatible_p \ | |
60ad3eab | 433 | ? (unsigned) (c) \ |
a2a01861 PE |
434 | : ((charset)->unified_p \ |
435 | || (charset)->method == CHARSET_METHOD_SUBSET \ | |
436 | || (charset)->method == CHARSET_METHOD_SUPERSET) \ | |
437 | ? encode_char (charset, c) \ | |
438 | : (c) < (charset)->min_char || (c) > (charset)->max_char \ | |
439 | ? (charset)->invalid_code \ | |
440 | : (charset)->method == CHARSET_METHOD_OFFSET \ | |
441 | ? ((charset)->code_linear_p \ | |
60ad3eab | 442 | ? (unsigned) ((c) - (charset)->code_offset) + (charset)->min_code \ |
a2a01861 PE |
443 | : encode_char (charset, c)) \ |
444 | : (charset)->method == CHARSET_METHOD_MAP \ | |
445 | ? (((charset)->compact_codes_p \ | |
446 | && CHAR_TABLE_P (CHARSET_ENCODER (charset))) \ | |
447 | ? (charset_work = CHAR_TABLE_REF (CHARSET_ENCODER (charset), c), \ | |
448 | (NILP (charset_work) \ | |
449 | ? (charset)->invalid_code \ | |
60ad3eab | 450 | : (unsigned) XFASTINT (charset_work))) \ |
a2a01861 PE |
451 | : encode_char (charset, c)) \ |
452 | : encode_char (charset, c)))) | |
3263d5a2 KH |
453 | |
454 | ||
96c06863 | 455 | /* Set to true when a charset map is loaded to warn that a buffer text |
3263d5a2 | 456 | and a string data may be relocated. */ |
d5172d4f | 457 | extern bool charset_map_loaded; |
3263d5a2 KH |
458 | |
459 | ||
460 | /* Set CHARSET to the charset highest priority of C, CODE to the | |
461 | code-point of C in CHARSET. */ | |
462 | #define SPLIT_CHAR(c, charset, code) \ | |
463 | ((charset) = char_charset ((c), Qnil, &(code))) | |
464 | ||
465 | ||
466 | #define ISO_MAX_DIMENSION 3 | |
467 | #define ISO_MAX_CHARS 2 | |
468 | #define ISO_MAX_FINAL 0x80 /* only 0x30..0xFF are used */ | |
469 | ||
470 | /* Mapping table from ISO2022's charset (specified by DIMENSION, | |
471 | CHARS, and FINAL_CHAR) to Emacs' charset ID. Should be accessed by | |
472 | macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */ | |
473 | extern int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL]; | |
c399b461 | 474 | |
d5172d4f | 475 | /* A charset of type iso2022 who has DIMENSION, CHARS_96, and FINAL |
3263d5a2 KH |
476 | (final character). */ |
477 | #define ISO_CHARSET_TABLE(dimension, chars_96, final) \ | |
d5172d4f | 478 | iso_charset_table[(dimension) - 1][chars_96][final] |
6e4dc3e1 | 479 | |
b2e6b10f | 480 | /* Nonzero if the charset who has FAST_MAP may contain C. */ |
3263d5a2 KH |
481 | #define CHARSET_FAST_MAP_REF(c, fast_map) \ |
482 | ((c) < 0x10000 \ | |
483 | ? fast_map[(c) >> 10] & (1 << (((c) >> 7) & 7)) \ | |
484 | : fast_map[((c) >> 15) + 62] & (1 << (((c) >> 12) & 7))) | |
6e4dc3e1 | 485 | |
3263d5a2 | 486 | #define CHARSET_FAST_MAP_SET(c, fast_map) \ |
384107f2 | 487 | do { \ |
3263d5a2 KH |
488 | if ((c) < 0x10000) \ |
489 | (fast_map)[(c) >> 10] |= 1 << (((c) >> 7) & 7); \ | |
384107f2 | 490 | else \ |
3263d5a2 | 491 | (fast_map)[((c) >> 15) + 62] |= 1 << (((c) >> 12) & 7); \ |
96c06863 | 492 | } while (false) |
384107f2 | 493 | |
6e4dc3e1 | 494 | |
6e4dc3e1 | 495 | |
f10fe38f | 496 | /* True if CHARSET may contain the character C. */ |
ec7dd615 KH |
497 | #define CHAR_CHARSET_P(c, charset) \ |
498 | ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \ | |
1c305bc1 KH |
499 | || ((CHARSET_UNIFIED_P (charset) \ |
500 | || (charset)->method == CHARSET_METHOD_SUBSET \ | |
501 | || (charset)->method == CHARSET_METHOD_SUPERSET) \ | |
ec7dd615 KH |
502 | ? encode_char ((charset), (c)) != (charset)->invalid_code \ |
503 | : (CHARSET_FAST_MAP_REF ((c), (charset)->fast_map) \ | |
504 | && ((charset)->method == CHARSET_METHOD_OFFSET \ | |
505 | ? (c) >= (charset)->min_char && (c) <= (charset)->max_char \ | |
506 | : ((charset)->method == CHARSET_METHOD_MAP \ | |
8cc53f96 KH |
507 | && (charset)->compact_codes_p \ |
508 | && CHAR_TABLE_P (CHARSET_ENCODER (charset))) \ | |
ec7dd615 | 509 | ? ! NILP (CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c))) \ |
3263d5a2 | 510 | : encode_char ((charset), (c)) != (charset)->invalid_code)))) |
6e4dc3e1 | 511 | |
39d992af KH |
512 | \f |
513 | /* Special macros for emacs-mule encoding. */ | |
4ed46869 | 514 | |
39d992af KH |
515 | /* Leading-code followed by extended leading-code. DIMENSION/COLUMN */ |
516 | #define EMACS_MULE_LEADING_CODE_PRIVATE_11 0x9A /* 1/1 */ | |
517 | #define EMACS_MULE_LEADING_CODE_PRIVATE_12 0x9B /* 1/2 */ | |
518 | #define EMACS_MULE_LEADING_CODE_PRIVATE_21 0x9C /* 2/2 */ | |
519 | #define EMACS_MULE_LEADING_CODE_PRIVATE_22 0x9D /* 2/2 */ | |
520 | ||
39d992af | 521 | \f |
4ed46869 | 522 | |
3263d5a2 KH |
523 | extern Lisp_Object Qcharsetp; |
524 | ||
955cbe7b | 525 | extern Lisp_Object Qascii; |
21f9c39d | 526 | extern int charset_ascii, charset_eight_bit; |
96f45cf0 | 527 | extern int charset_unicode; |
e73576a3 KH |
528 | extern int charset_jisx0201_roman; |
529 | extern int charset_jisx0208_1978; | |
530 | extern int charset_jisx0208; | |
3c908a57 | 531 | extern int charset_ksc5601; |
3263d5a2 | 532 | |
3fd31366 KH |
533 | extern int charset_unibyte; |
534 | ||
383e0970 J |
535 | extern struct charset *char_charset (int, Lisp_Object, unsigned *); |
536 | extern Lisp_Object charset_attributes (int); | |
3263d5a2 | 537 | |
383e0970 J |
538 | extern int decode_char (struct charset *, unsigned); |
539 | extern unsigned encode_char (struct charset *, int); | |
540 | extern int string_xstring_p (Lisp_Object); | |
3263d5a2 | 541 | |
383e0970 J |
542 | extern void map_charset_chars (void (*) (Lisp_Object, Lisp_Object), |
543 | Lisp_Object, Lisp_Object, | |
544 | struct charset *, unsigned, unsigned); | |
ec7dd615 | 545 | |
f162bcc3 PE |
546 | INLINE_HEADER_END |
547 | ||
aa01a892 | 548 | #endif /* EMACS_CHARSET_H */ |