Commit | Line | Data |
---|---|---|
3263d5a2 | 1 | /* Header for charset handler. |
4a2f9c6a | 2 | Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN. |
75c8c592 | 3 | Licensed to the Free Software Foundation. |
e06aa1f9 | 4 | Copyright (C) 2001 Free Software Foundation, Inc. |
3263d5a2 KH |
5 | Copyright (C) 2001, 2002 |
6 | National Institute of Advanced Industrial Science and Technology (AIST) | |
7 | Registration Number H13PRO009 | |
4ed46869 | 8 | |
369314dc KH |
9 | This file is part of GNU Emacs. |
10 | ||
11 | GNU Emacs is free software; you can redistribute it and/or modify | |
12 | it under the terms of the GNU General Public License as published by | |
13 | the Free Software Foundation; either version 2, or (at your option) | |
14 | any later version. | |
4ed46869 | 15 | |
369314dc KH |
16 | GNU Emacs is distributed in the hope that it will be useful, |
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | GNU General Public License for more details. | |
4ed46869 | 20 | |
369314dc KH |
21 | You should have received a copy of the GNU General Public License |
22 | along with GNU Emacs; see the file COPYING. If not, write to | |
23 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
24 | Boston, MA 02111-1307, USA. */ | |
4ed46869 | 25 | |
aa01a892 KH |
26 | #ifndef EMACS_CHARSET_H |
27 | #define EMACS_CHARSET_H | |
4ed46869 | 28 | |
3263d5a2 KH |
29 | /* Index to arguments of Fdefine_charset_internal. */ |
30 | ||
31 | enum define_charset_arg_index | |
32 | { | |
33 | charset_arg_name, | |
34 | charset_arg_dimension, | |
35 | charset_arg_code_space, | |
315c0139 KH |
36 | charset_arg_min_code, |
37 | charset_arg_max_code, | |
3263d5a2 KH |
38 | charset_arg_iso_final, |
39 | charset_arg_iso_revision, | |
40 | charset_arg_emacs_mule_id, | |
41 | charset_arg_ascii_compatible_p, | |
42 | charset_arg_supplementary_p, | |
43 | charset_arg_invalid_code, | |
44 | charset_arg_code_offset, | |
45 | charset_arg_map, | |
ec7dd615 KH |
46 | charset_arg_subset, |
47 | charset_arg_superset, | |
3263d5a2 KH |
48 | charset_arg_unify_map, |
49 | charset_arg_plist, | |
50 | charset_arg_max | |
51 | }; | |
52 | ||
53 | ||
54 | /* Indices to charset attributes vector. */ | |
55 | ||
56 | enum charset_attr_index | |
57 | { | |
58 | /* ID number of the charset. */ | |
59 | charset_id, | |
4ed46869 | 60 | |
3263d5a2 KH |
61 | /* Name of the charset (symbol). */ |
62 | charset_name, | |
63 | ||
64 | /* Property list of the charset. */ | |
65 | charset_plist, | |
66 | ||
67 | /* If the method of the charset is `MAP_DEFERRED', the value is a | |
3e4abc9e | 68 | mapping vector or a file name that contains mapping vector. |
3263d5a2 KH |
69 | Otherwise, nil. */ |
70 | charset_map, | |
71 | ||
72 | /* If the method of the charset is `MAP', the value is a vector | |
73 | that maps code points of the charset to characters. The vector | |
74 | is indexed by a character index. A character index is | |
75 | calculated from a code point and the code-space table of the | |
76 | charset. */ | |
77 | charset_decoder, | |
78 | ||
79 | /* If the method of the charset is `MAP', the value is a | |
80 | char-table that maps characters of the charset to code | |
81 | points. */ | |
82 | charset_encoder, | |
83 | ||
ec7dd615 KH |
84 | /* If the method of the charset is `SUBSET', the value is a vector |
85 | that has this form: | |
86 | ||
87 | [ CHARSET-ID MIN-CODE MAX-CODE OFFSET ] | |
88 | ||
89 | CHARSET-ID is an ID number of a parent charset. MIN-CODE and | |
90 | MAX-CODE specify the range of characters inherited from the | |
91 | parent. OFFSET is an integer value to add to a code point of | |
92 | the parent charset to get the corresponding code point of this | |
93 | charset. */ | |
94 | charset_subset, | |
95 | ||
96 | /* If the method of the charset is `SUPERSET', the value is a list | |
97 | whose elements have this form: | |
98 | ||
99 | (CHARSET-ID . OFFSET) | |
100 | ||
101 | CHARSET-IDs are ID numbers of parent charsets. OFFSET is an | |
102 | integer value to add to a code point of the parent charset to | |
103 | get the corresponding code point of this charset. */ | |
104 | charset_superset, | |
3263d5a2 | 105 | |
7619dee9 DL |
106 | /* The value is a mapping vector or a file name that contains the |
107 | mapping. This defines how characters in the charset should be | |
108 | unified with Unicode. The value of the member | |
3e4abc9e | 109 | `charset_deunifier' is created from this information. */ |
3263d5a2 KH |
110 | charset_unify_map, |
111 | ||
3e4abc9e KH |
112 | /* If characters in the charset must be unified Unicode, the value |
113 | is a char table that maps a character code in the charset to | |
114 | the corresponding Unicode character. */ | |
3263d5a2 KH |
115 | charset_deunifier, |
116 | ||
7619dee9 | 117 | /* The length of the charset attribute vector. */ |
3263d5a2 KH |
118 | charset_attr_max |
119 | }; | |
120 | ||
121 | /* Methods for converting code points and characters of charsets. */ | |
122 | ||
123 | enum charset_method | |
124 | { | |
125 | /* For a charset of this method, a character code is calculated | |
126 | from a character index (which is calculated from a code point) | |
127 | simply by adding an offset value. */ | |
128 | CHARSET_METHOD_OFFSET, | |
129 | ||
130 | /* For a charset of this method, a decoder vector and an encoder | |
131 | char-table is used for code point <-> character code | |
132 | conversion. */ | |
133 | CHARSET_METHOD_MAP, | |
134 | ||
135 | /* Same as above but decoder and encoder are loaded from a file on | |
136 | demand. Once loaded, the method is changed to | |
137 | CHARSET_METHOD_MAP. */ | |
138 | CHARSET_METHOD_MAP_DEFERRED, | |
139 | ||
7619dee9 | 140 | /* A charset of this method is a subset of another charset. */ |
ec7dd615 KH |
141 | CHARSET_METHOD_SUBSET, |
142 | ||
7619dee9 | 143 | /* A charset of this method is a superset of other charsets. */ |
ec7dd615 | 144 | CHARSET_METHOD_SUPERSET |
3263d5a2 KH |
145 | }; |
146 | ||
147 | struct charset | |
148 | { | |
3e4abc9e | 149 | /* Index to charset_table. */ |
3263d5a2 KH |
150 | int id; |
151 | ||
3e4abc9e | 152 | /* Index to Vcharset_hash_table. */ |
3263d5a2 KH |
153 | int hash_index; |
154 | ||
155 | /* Dimension of the charset: 1, 2, 3, or 4. */ | |
156 | int dimension; | |
157 | ||
ac6a8028 KH |
158 | /* Byte code range of each dimension. <code_space>[4N] is a mininum |
159 | byte code of the (N+1)th dimension, <code_space>[4N+1] is a | |
160 | maximum byte code of the (N+1)th dimension, <code_space>[4N+2] is | |
161 | (<code_space>[4N+1] - <code_space>[4N] + 1), <code_space>[4N+3] | |
162 | is a number of characters containd in the first to (N+1)th | |
163 | dismesions. We get `char-index' of a `code-point' from this | |
164 | information. */ | |
3263d5a2 KH |
165 | int code_space[16]; |
166 | ||
ac6a8028 KH |
167 | /* If B is a byte of Nth dimension of a code-point, the (N-1)th bit |
168 | of code_space_mask[B] is set. This array is used to quickly | |
169 | check if a code-point is in a valid range. */ | |
170 | unsigned char *code_space_mask; | |
171 | ||
3263d5a2 KH |
172 | /* 1 if there's no gap in code-points. */ |
173 | int code_linear_p; | |
174 | ||
175 | /* If the charset is treated as 94-chars in ISO-2022, the value is 0. | |
176 | If the charset is treated as 96-chars in ISO-2022, the value is 1. */ | |
177 | int iso_chars_96; | |
178 | ||
3e4abc9e KH |
179 | /* ISO final byte of the charset: 48..127. It may be -1 if the |
180 | charset doesn't conform to ISO-2022. */ | |
3263d5a2 KH |
181 | int iso_final; |
182 | ||
3e4abc9e | 183 | /* ISO revision number of the charset. */ |
3263d5a2 KH |
184 | int iso_revision; |
185 | ||
186 | /* If the charset is identical to what supported by Emacs 21 and the | |
187 | priors, the identification number of the charset used in those | |
188 | version. Otherwise, -1. */ | |
189 | int emacs_mule_id; | |
190 | ||
191 | /* Nonzero iff the charset is compatible with ASCII. */ | |
192 | int ascii_compatible_p; | |
193 | ||
194 | /* Nonzero iff the charset is supplementary. */ | |
195 | int supplementary_p; | |
196 | ||
197 | /* Nonzero iff all the code points are representable by Lisp_Int. */ | |
198 | int compact_codes_p; | |
199 | ||
200 | /* The method for encoding/decoding characters of the charset. */ | |
201 | enum charset_method method; | |
202 | ||
203 | /* Mininum and Maximum code points of the charset. */ | |
204 | unsigned min_code, max_code; | |
205 | ||
315c0139 KH |
206 | /* Offset value used by macros CODE_POINT_TO_INDEX and |
207 | INDEX_TO_CODE_POINT. . */ | |
208 | unsigned char_index_offset; | |
209 | ||
3263d5a2 KH |
210 | /* Mininum and Maximum character codes of the charset. If the |
211 | charset is compatible with ASCII, min_char is a minimum non-ASCII | |
212 | character of the charset. */ | |
213 | int min_char, max_char; | |
214 | ||
215 | /* The code returned by ENCODE_CHAR if a character is not encodable | |
216 | by the charset. */ | |
217 | unsigned invalid_code; | |
218 | ||
219 | /* If the method of the charset is CHARSET_METHOD_MAP, this is a | |
220 | table of bits used to quickly and roughly guess if a character | |
221 | belongs to the charset. | |
222 | ||
223 | The first 64 elements are 512 bits for characters less than | |
224 | 0x10000. Each bit corresponds to 128-character block. The last | |
225 | 126 elements are 1008 bits for the greater characters | |
226 | (0x10000..0x3FFFFF). Each bit corresponds to 4096-character | |
227 | block. | |
228 | ||
7619dee9 | 229 | If a bit is 1, at least one character in the corresponding block is |
3263d5a2 KH |
230 | in this charset. */ |
231 | unsigned char fast_map[190]; | |
232 | ||
233 | /* Offset value to calculate a character code from code-point, and | |
234 | visa versa. */ | |
235 | int code_offset; | |
236 | ||
237 | int unified_p; | |
238 | }; | |
239 | ||
240 | /* Hash table of charset symbols vs. the correponding attribute | |
241 | vectors. */ | |
242 | extern Lisp_Object Vcharset_hash_table; | |
243 | ||
244 | /* Table of struct charset. */ | |
245 | extern struct charset *charset_table; | |
246 | extern int charset_table_used; | |
247 | ||
248 | #define CHARSET_FROM_ID(id) (charset_table + (id)) | |
249 | ||
250 | extern Lisp_Object Vcharset_list; | |
251 | extern Lisp_Object Viso_2022_charset_list; | |
252 | extern Lisp_Object Vemacs_mule_charset_list; | |
253 | ||
254 | extern struct charset *emacs_mule_charset[256]; | |
255 | ||
256 | ||
257 | /* Macros to access information about charset. */ | |
258 | ||
259 | /* Return the attribute vector of charset whose symbol is SYMBOL. */ | |
260 | #define CHARSET_SYMBOL_ATTRIBUTES(symbol) \ | |
261 | Fgethash ((symbol), Vcharset_hash_table, Qnil) | |
262 | ||
263 | #define CHARSET_ATTR_ID(attrs) AREF ((attrs), charset_id) | |
264 | #define CHARSET_ATTR_NAME(attrs) AREF ((attrs), charset_name) | |
265 | #define CHARSET_ATTR_PLIST(attrs) AREF ((attrs), charset_plist) | |
266 | #define CHARSET_ATTR_MAP(attrs) AREF ((attrs), charset_map) | |
267 | #define CHARSET_ATTR_DECODER(attrs) AREF ((attrs), charset_decoder) | |
268 | #define CHARSET_ATTR_ENCODER(attrs) AREF ((attrs), charset_encoder) | |
ec7dd615 KH |
269 | #define CHARSET_ATTR_SUBSET(attrs) AREF ((attrs), charset_subset) |
270 | #define CHARSET_ATTR_SUPERSET(attrs) AREF ((attrs), charset_superset) | |
3263d5a2 KH |
271 | #define CHARSET_ATTR_UNIFY_MAP(attrs) AREF ((attrs), charset_unify_map) |
272 | #define CHARSET_ATTR_DEUNIFIER(attrs) AREF ((attrs), charset_deunifier) | |
273 | ||
274 | #define CHARSET_SYMBOL_ID(symbol) \ | |
275 | CHARSET_ATTR_ID (CHARSET_SYMBOL_ATTRIBUTES (symbol)) | |
276 | ||
277 | /* Return an index to Vcharset_hash_table of the charset whose symbol | |
278 | is SYMBOL. */ | |
279 | #define CHARSET_SYMBOL_HASH_INDEX(symbol) \ | |
280 | hash_lookup (XHASH_TABLE (Vcharset_hash_table), symbol, NULL) | |
281 | ||
282 | /* Return the attribute vector of CHARSET. */ | |
283 | #define CHARSET_ATTRIBUTES(charset) \ | |
284 | (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), (charset)->hash_index)) | |
285 | ||
286 | #define CHARSET_ID(charset) ((charset)->id) | |
287 | #define CHARSET_HASH_INDEX(charset) ((charset)->hash_index) | |
288 | #define CHARSET_DIMENSION(charset) ((charset)->dimension) | |
289 | #define CHARSET_CODE_SPACE(charset) ((charset)->code_space) | |
290 | #define CHARSET_CODE_LINEAR_P(charset) ((charset)->code_linear_p) | |
291 | #define CHARSET_ISO_CHARS_96(charset) ((charset)->iso_chars_96) | |
292 | #define CHARSET_ISO_FINAL(charset) ((charset)->iso_final) | |
293 | #define CHARSET_ISO_PLANE(charset) ((charset)->iso_plane) | |
294 | #define CHARSET_ISO_REVISION(charset) ((charset)->iso_revision) | |
295 | #define CHARSET_EMACS_MULE_ID(charset) ((charset)->emacs_mule_id) | |
296 | #define CHARSET_ASCII_COMPATIBLE_P(charset) ((charset)->ascii_compatible_p) | |
297 | #define CHARSET_COMPACT_CODES_P(charset) ((charset)->compact_codes_p) | |
298 | #define CHARSET_METHOD(charset) ((charset)->method) | |
299 | #define CHARSET_MIN_CODE(charset) ((charset)->min_code) | |
300 | #define CHARSET_MAX_CODE(charset) ((charset)->max_code) | |
301 | #define CHARSET_INVALID_CODE(charset) ((charset)->invalid_code) | |
302 | #define CHARSET_MIN_CHAR(charset) ((charset)->min_char) | |
303 | #define CHARSET_MAX_CHAR(charset) ((charset)->max_char) | |
304 | #define CHARSET_CODE_OFFSET(charset) ((charset)->code_offset) | |
305 | #define CHARSET_UNIFIED_P(charset) ((charset)->unified_p) | |
306 | ||
307 | #define CHARSET_NAME(charset) \ | |
308 | (CHARSET_ATTR_NAME (CHARSET_ATTRIBUTES (charset))) | |
309 | #define CHARSET_MAP(charset) \ | |
310 | (CHARSET_ATTR_MAP (CHARSET_ATTRIBUTES (charset))) | |
311 | #define CHARSET_DECODER(charset) \ | |
312 | (CHARSET_ATTR_DECODER (CHARSET_ATTRIBUTES (charset))) | |
313 | #define CHARSET_ENCODER(charset) \ | |
314 | (CHARSET_ATTR_ENCODER (CHARSET_ATTRIBUTES (charset))) | |
ec7dd615 KH |
315 | #define CHARSET_SUBSET(charset) \ |
316 | (CHARSET_ATTR_SUBSET (CHARSET_ATTRIBUTES (charset))) | |
317 | #define CHARSET_SUPERSET(charset) \ | |
318 | (CHARSET_ATTR_SUPERSET (CHARSET_ATTRIBUTES (charset))) | |
3263d5a2 KH |
319 | #define CHARSET_UNIFY_MAP(charset) \ |
320 | (CHARSET_ATTR_UNIFY_MAP (CHARSET_ATTRIBUTES (charset))) | |
321 | #define CHARSET_DEUNIFIER(charset) \ | |
322 | (CHARSET_ATTR_DEUNIFIER (CHARSET_ATTRIBUTES (charset))) | |
323 | ||
324 | ||
325 | /* Nonzero iff OBJ is a valid charset symbol. */ | |
326 | #define CHARSETP(obj) (CHARSET_SYMBOL_HASH_INDEX (obj) >= 0) | |
327 | ||
328 | /* Check if X is a valid charset symbol. If not, signal an error. */ | |
329 | #define CHECK_CHARSET(x) \ | |
384107f2 | 330 | do { \ |
3263d5a2 KH |
331 | if (! SYMBOLP (x) || CHARSET_SYMBOL_HASH_INDEX (x) < 0) \ |
332 | x = wrong_type_argument (Qcharsetp, (x)); \ | |
384107f2 | 333 | } while (0) |
54e15bb9 | 334 | |
4ed46869 | 335 | |
3263d5a2 KH |
336 | /* Check if X is a valid charset symbol. If valid, set ID to the id |
337 | number of the charset. Otherwise, signal an error. */ | |
338 | #define CHECK_CHARSET_GET_ID(x, id) \ | |
339 | do { \ | |
340 | int idx; \ | |
341 | \ | |
342 | if (! SYMBOLP (x) || (idx = CHARSET_SYMBOL_HASH_INDEX (x)) < 0) \ | |
343 | x = wrong_type_argument (Qcharsetp, (x)); \ | |
344 | id = AREF (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), idx), \ | |
345 | charset_id); \ | |
4ed46869 KH |
346 | } while (0) |
347 | ||
6e4dc3e1 | 348 | |
3263d5a2 KH |
349 | /* Check if X is a valid charset symbol. If valid, set ATTR to the |
350 | attr vector of the charset. Otherwise, signal an error. */ | |
351 | #define CHECK_CHARSET_GET_ATTR(x, attr) \ | |
352 | do { \ | |
353 | if (!SYMBOLP (x) || NILP (attr = CHARSET_SYMBOL_ATTRIBUTES (x))) \ | |
354 | x = wrong_type_argument (Qcharsetp, (x)); \ | |
6e4dc3e1 KH |
355 | } while (0) |
356 | ||
6e4dc3e1 | 357 | |
3263d5a2 KH |
358 | #define CHECK_CHARSET_GET_CHARSET(x, charset) \ |
359 | do { \ | |
360 | int id; \ | |
361 | CHECK_CHARSET_GET_ID (x, id); \ | |
362 | charset = CHARSET_FROM_ID (id); \ | |
c399b461 RS |
363 | } while (0) |
364 | ||
c399b461 | 365 | |
3263d5a2 KH |
366 | /* Lookup Vcharset_order_list and return the first charset that |
367 | contains the character C. */ | |
368 | #define CHAR_CHARSET(c) \ | |
369 | char_charset ((c), Qnil, NULL) | |
370 | ||
371 | #if 0 | |
372 | /* Char-table of charset-sets. Each element is a bool vector indexed | |
373 | by a charset ID. */ | |
374 | extern Lisp_Object Vchar_charset_set; | |
375 | ||
376 | /* Charset-bag of character C. */ | |
377 | #define CHAR_CHARSET_SET(c) \ | |
378 | CHAR_TABLE_REF (Vchar_charset_set, c) | |
379 | ||
380 | /* Check if two characters C1 and C2 belong to the same charset. */ | |
381 | #define SAME_CHARSET_P(c1, c2) \ | |
382 | intersection_p (CHAR_CHARSET_SET (c1), CHAR_CHARSET_SET (c2)) | |
383 | ||
384 | #endif | |
385 | ||
386 | ||
387 | /* Return a character correponding to the code-point CODE of CHARSET. | |
388 | Try some optimization before calling decode_char. */ | |
389 | ||
390 | #define DECODE_CHAR(charset, code) \ | |
391 | ((ASCII_BYTE_P (code) && (charset)->ascii_compatible_p) \ | |
392 | ? (code) \ | |
393 | : ((code) < (charset)->min_code || (code) > (charset)->max_code) \ | |
394 | ? -1 \ | |
395 | : (charset)->unified_p \ | |
396 | ? decode_char ((charset), (code)) \ | |
397 | : (charset)->method == CHARSET_METHOD_OFFSET \ | |
398 | ? ((charset)->code_linear_p \ | |
399 | ? (code) - (charset)->min_code + (charset)->code_offset \ | |
400 | : decode_char ((charset), (code))) \ | |
401 | : (charset)->method == CHARSET_METHOD_MAP \ | |
402 | ? ((charset)->code_linear_p \ | |
403 | ? XINT (AREF (CHARSET_DECODER (charset), \ | |
404 | (code) - (charset)->min_code)) \ | |
405 | : decode_char ((charset), (code))) \ | |
406 | : decode_char ((charset), (code))) | |
407 | ||
408 | ||
ec7dd615 KH |
409 | extern Lisp_Object charset_work; |
410 | ||
3263d5a2 KH |
411 | /* Return a code point of CHAR in CHARSET. |
412 | Try some optimization before calling encode_char. */ | |
413 | ||
ec7dd615 KH |
414 | #define ENCODE_CHAR(charset, c) \ |
415 | ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \ | |
416 | ? (c) \ | |
417 | : (charset)->unified_p \ | |
418 | ? encode_char ((charset), (c)) \ | |
419 | : ((c) < (charset)->min_char || (c) > (charset)->max_char) \ | |
420 | ? (charset)->invalid_code \ | |
421 | : (charset)->method == CHARSET_METHOD_OFFSET \ | |
422 | ? ((charset)->code_linear_p \ | |
423 | ? (c) - (charset)->code_offset + (charset)->min_code \ | |
424 | : encode_char ((charset), (c))) \ | |
425 | : (charset)->method == CHARSET_METHOD_MAP \ | |
426 | ? ((charset)->compact_codes_p \ | |
427 | ? (charset_work = CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c)), \ | |
428 | (NILP (charset_work) \ | |
429 | ? (charset)->invalid_code \ | |
430 | : XFASTINT (charset_work))) \ | |
431 | : encode_char ((charset), (c))) \ | |
3263d5a2 KH |
432 | : encode_char ((charset), (c))) |
433 | ||
434 | ||
435 | /* Set to 1 when a charset map is loaded to warn that a buffer text | |
436 | and a string data may be relocated. */ | |
437 | extern int charset_map_loaded; | |
438 | ||
439 | ||
440 | /* Set CHARSET to the charset highest priority of C, CODE to the | |
441 | code-point of C in CHARSET. */ | |
442 | #define SPLIT_CHAR(c, charset, code) \ | |
443 | ((charset) = char_charset ((c), Qnil, &(code))) | |
444 | ||
445 | ||
446 | #define ISO_MAX_DIMENSION 3 | |
447 | #define ISO_MAX_CHARS 2 | |
448 | #define ISO_MAX_FINAL 0x80 /* only 0x30..0xFF are used */ | |
449 | ||
450 | /* Mapping table from ISO2022's charset (specified by DIMENSION, | |
451 | CHARS, and FINAL_CHAR) to Emacs' charset ID. Should be accessed by | |
452 | macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */ | |
453 | extern int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL]; | |
c399b461 | 454 | |
3263d5a2 KH |
455 | /* A charset of type iso2022 who has DIMENSION, CHARS, and FINAL |
456 | (final character). */ | |
457 | #define ISO_CHARSET_TABLE(dimension, chars_96, final) \ | |
458 | iso_charset_table[(dimension) - 1][(chars_96)][(final)] | |
6e4dc3e1 | 459 | |
3263d5a2 KH |
460 | /* Nonzero iff the charset who has FAST_MAP may contain C. */ |
461 | #define CHARSET_FAST_MAP_REF(c, fast_map) \ | |
462 | ((c) < 0x10000 \ | |
463 | ? fast_map[(c) >> 10] & (1 << (((c) >> 7) & 7)) \ | |
464 | : fast_map[((c) >> 15) + 62] & (1 << (((c) >> 12) & 7))) | |
6e4dc3e1 | 465 | |
3263d5a2 | 466 | #define CHARSET_FAST_MAP_SET(c, fast_map) \ |
384107f2 | 467 | do { \ |
3263d5a2 KH |
468 | if ((c) < 0x10000) \ |
469 | (fast_map)[(c) >> 10] |= 1 << (((c) >> 7) & 7); \ | |
384107f2 | 470 | else \ |
3263d5a2 | 471 | (fast_map)[((c) >> 15) + 62] |= 1 << (((c) >> 12) & 7); \ |
384107f2 KH |
472 | } while (0) |
473 | ||
6e4dc3e1 | 474 | |
6e4dc3e1 | 475 | |
3263d5a2 | 476 | /* 1 iff CHARSET may contain the character C. */ |
ec7dd615 KH |
477 | #define CHAR_CHARSET_P(c, charset) \ |
478 | ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \ | |
479 | || (CHARSET_UNIFIED_P (charset) \ | |
480 | ? encode_char ((charset), (c)) != (charset)->invalid_code \ | |
481 | : (CHARSET_FAST_MAP_REF ((c), (charset)->fast_map) \ | |
482 | && ((charset)->method == CHARSET_METHOD_OFFSET \ | |
483 | ? (c) >= (charset)->min_char && (c) <= (charset)->max_char \ | |
484 | : ((charset)->method == CHARSET_METHOD_MAP \ | |
485 | && (charset)->compact_codes_p) \ | |
486 | ? ! NILP (CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c))) \ | |
3263d5a2 | 487 | : encode_char ((charset), (c)) != (charset)->invalid_code)))) |
6e4dc3e1 | 488 | |
4ed46869 | 489 | |
3263d5a2 KH |
490 | extern Lisp_Object Qcharsetp; |
491 | ||
492 | extern Lisp_Object Qascii, Qunicode; | |
493 | extern int charset_ascii, charset_8_bit_control, charset_8_bit_graphic; | |
494 | extern int charset_iso_8859_1; | |
495 | extern int charset_primary; | |
e73576a3 KH |
496 | extern int charset_jisx0201_roman; |
497 | extern int charset_jisx0208_1978; | |
498 | extern int charset_jisx0208; | |
3263d5a2 KH |
499 | |
500 | extern struct charset *char_charset P_ ((int, Lisp_Object, unsigned *)); | |
501 | extern Lisp_Object charset_attributes P_ ((int)); | |
502 | ||
503 | extern int decode_char P_ ((struct charset *, unsigned)); | |
504 | extern unsigned encode_char P_ ((struct charset *, int)); | |
505 | extern int string_xstring_p P_ ((Lisp_Object)); | |
506 | ||
ec7dd615 KH |
507 | extern void map_charset_chars P_ ((void (*) (Lisp_Object, Lisp_Object), |
508 | Lisp_Object, Lisp_Object, | |
509 | struct charset *, unsigned, unsigned)); | |
510 | ||
3263d5a2 | 511 | EXFUN (Funify_charset, 2); |
c1f6608b | 512 | |
aa01a892 | 513 | #endif /* EMACS_CHARSET_H */ |