| 1 | /* Header for charset handler. |
| 2 | Copyright (C) 2001-2011 Free Software Foundation, Inc. |
| 3 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
| 4 | 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| 5 | National Institute of Advanced Industrial Science and Technology (AIST) |
| 6 | Registration Number H14PRO021 |
| 7 | |
| 8 | Copyright (C) 2003 |
| 9 | National Institute of Advanced Industrial Science and Technology (AIST) |
| 10 | Registration Number H13PRO009 |
| 11 | |
| 12 | This file is part of GNU Emacs. |
| 13 | |
| 14 | GNU Emacs is free software: you can redistribute it and/or modify |
| 15 | it under the terms of the GNU General Public License as published by |
| 16 | the Free Software Foundation, either version 3 of the License, or |
| 17 | (at your option) any later version. |
| 18 | |
| 19 | GNU Emacs is distributed in the hope that it will be useful, |
| 20 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 22 | GNU General Public License for more details. |
| 23 | |
| 24 | You should have received a copy of the GNU General Public License |
| 25 | along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ |
| 26 | |
| 27 | #ifndef EMACS_CHARSET_H |
| 28 | #define EMACS_CHARSET_H |
| 29 | |
| 30 | /* Index to arguments of Fdefine_charset_internal. */ |
| 31 | |
| 32 | enum define_charset_arg_index |
| 33 | { |
| 34 | charset_arg_name, |
| 35 | charset_arg_dimension, |
| 36 | charset_arg_code_space, |
| 37 | charset_arg_min_code, |
| 38 | charset_arg_max_code, |
| 39 | charset_arg_iso_final, |
| 40 | charset_arg_iso_revision, |
| 41 | charset_arg_emacs_mule_id, |
| 42 | charset_arg_ascii_compatible_p, |
| 43 | charset_arg_supplementary_p, |
| 44 | charset_arg_invalid_code, |
| 45 | charset_arg_code_offset, |
| 46 | charset_arg_map, |
| 47 | charset_arg_subset, |
| 48 | charset_arg_superset, |
| 49 | charset_arg_unify_map, |
| 50 | charset_arg_plist, |
| 51 | charset_arg_max |
| 52 | }; |
| 53 | |
| 54 | |
| 55 | /* Indices to charset attributes vector. */ |
| 56 | |
| 57 | enum charset_attr_index |
| 58 | { |
| 59 | /* ID number of the charset. */ |
| 60 | charset_id, |
| 61 | |
| 62 | /* Name of the charset (symbol). */ |
| 63 | charset_name, |
| 64 | |
| 65 | /* Property list of the charset. */ |
| 66 | charset_plist, |
| 67 | |
| 68 | /* If the method of the charset is `MAP', the value is a mapping |
| 69 | vector or a file name that contains mapping vector. Otherwise, |
| 70 | nil. */ |
| 71 | charset_map, |
| 72 | |
| 73 | /* If the method of the charset is `MAP', the value is a vector |
| 74 | that maps code points of the charset to characters. The vector |
| 75 | is indexed by a character index. A character index is |
| 76 | calculated from a code point and the code-space table of the |
| 77 | charset. */ |
| 78 | charset_decoder, |
| 79 | |
| 80 | /* If the method of the charset is `MAP', the value is a |
| 81 | char-table that maps characters of the charset to code |
| 82 | points. */ |
| 83 | charset_encoder, |
| 84 | |
| 85 | /* If the method of the charset is `SUBSET', the value is a vector |
| 86 | that has this form: |
| 87 | |
| 88 | [ CHARSET-ID MIN-CODE MAX-CODE OFFSET ] |
| 89 | |
| 90 | CHARSET-ID is an ID number of a parent charset. MIN-CODE and |
| 91 | MAX-CODE specify the range of characters inherited from the |
| 92 | parent. OFFSET is an integer value to add to a code point of |
| 93 | the parent charset to get the corresponding code point of this |
| 94 | charset. */ |
| 95 | charset_subset, |
| 96 | |
| 97 | /* If the method of the charset is `SUPERSET', the value is a list |
| 98 | whose elements have this form: |
| 99 | |
| 100 | (CHARSET-ID . OFFSET) |
| 101 | |
| 102 | CHARSET-IDs are ID numbers of parent charsets. OFFSET is an |
| 103 | integer value to add to a code point of the parent charset to |
| 104 | get the corresponding code point of this charset. */ |
| 105 | charset_superset, |
| 106 | |
| 107 | /* The value is a mapping vector or a file name that contains the |
| 108 | mapping. This defines how characters in the charset should be |
| 109 | unified with Unicode. The value of the member |
| 110 | `charset_deunifier' is created from this information. */ |
| 111 | charset_unify_map, |
| 112 | |
| 113 | /* If characters in the charset must be unified Unicode, the value |
| 114 | is a char table that maps a unified Unicode character code to |
| 115 | the non-unified character code in the charset. */ |
| 116 | charset_deunifier, |
| 117 | |
| 118 | /* The length of the charset attribute vector. */ |
| 119 | charset_attr_max |
| 120 | }; |
| 121 | |
| 122 | /* Methods for converting code points and characters of charsets. */ |
| 123 | |
| 124 | enum charset_method |
| 125 | { |
| 126 | /* For a charset of this method, a character code is calculated |
| 127 | from a character index (which is calculated from a code point) |
| 128 | simply by adding an offset value. */ |
| 129 | CHARSET_METHOD_OFFSET, |
| 130 | |
| 131 | /* For a charset of this method, a decoder vector and an encoder |
| 132 | char-table is used for code point <-> character code |
| 133 | conversion. */ |
| 134 | CHARSET_METHOD_MAP, |
| 135 | |
| 136 | /* A charset of this method is a subset of another charset. */ |
| 137 | CHARSET_METHOD_SUBSET, |
| 138 | |
| 139 | /* A charset of this method is a superset of other charsets. */ |
| 140 | CHARSET_METHOD_SUPERSET |
| 141 | }; |
| 142 | |
| 143 | struct charset |
| 144 | { |
| 145 | /* Index to charset_table. */ |
| 146 | int id; |
| 147 | |
| 148 | /* Index to Vcharset_hash_table. */ |
| 149 | EMACS_INT hash_index; |
| 150 | |
| 151 | /* Dimension of the charset: 1, 2, 3, or 4. */ |
| 152 | int dimension; |
| 153 | |
| 154 | /* Byte code range of each dimension. <code_space>[4N] is a mininum |
| 155 | byte code of the (N+1)th dimension, <code_space>[4N+1] is a |
| 156 | maximum byte code of the (N+1)th dimension, <code_space>[4N+2] is |
| 157 | (<code_space>[4N+1] - <code_space>[4N] + 1), <code_space>[4N+3] |
| 158 | is the number of characters contained in the first through (N+1)th |
| 159 | dimensions, except that there is no <code_space>[15]. |
| 160 | We get `char-index' of a `code-point' from this |
| 161 | information. */ |
| 162 | int code_space[15]; |
| 163 | |
| 164 | /* If B is a byte of Nth dimension of a code-point, the (N-1)th bit |
| 165 | of code_space_mask[B] is set. This array is used to quickly |
| 166 | check if a code-point is in a valid range. */ |
| 167 | unsigned char *code_space_mask; |
| 168 | |
| 169 | /* 1 if there's no gap in code-points. */ |
| 170 | int code_linear_p; |
| 171 | |
| 172 | /* If the charset is treated as 94-chars in ISO-2022, the value is 0. |
| 173 | If the charset is treated as 96-chars in ISO-2022, the value is 1. */ |
| 174 | int iso_chars_96; |
| 175 | |
| 176 | /* ISO final byte of the charset: 48..127. It may be -1 if the |
| 177 | charset doesn't conform to ISO-2022. */ |
| 178 | int iso_final; |
| 179 | |
| 180 | /* ISO revision number of the charset. */ |
| 181 | int iso_revision; |
| 182 | |
| 183 | /* If the charset is identical to what supported by Emacs 21 and the |
| 184 | priors, the identification number of the charset used in those |
| 185 | version. Otherwise, -1. */ |
| 186 | int emacs_mule_id; |
| 187 | |
| 188 | /* Nonzero if the charset is compatible with ASCII. */ |
| 189 | int ascii_compatible_p; |
| 190 | |
| 191 | /* Nonzero if the charset is supplementary. */ |
| 192 | int supplementary_p; |
| 193 | |
| 194 | /* Nonzero if all the code points are representable by Lisp_Int. */ |
| 195 | int compact_codes_p; |
| 196 | |
| 197 | /* The method for encoding/decoding characters of the charset. */ |
| 198 | enum charset_method method; |
| 199 | |
| 200 | /* Mininum and Maximum code points of the charset. */ |
| 201 | unsigned min_code, max_code; |
| 202 | |
| 203 | /* Offset value used by macros CODE_POINT_TO_INDEX and |
| 204 | INDEX_TO_CODE_POINT. . */ |
| 205 | unsigned char_index_offset; |
| 206 | |
| 207 | /* Mininum and Maximum character codes of the charset. If the |
| 208 | charset is compatible with ASCII, min_char is a minimum non-ASCII |
| 209 | character of the charset. If the method of charset is |
| 210 | CHARSET_METHOD_OFFSET, even if the charset is unified, min_char |
| 211 | and max_char doesn't change. */ |
| 212 | int min_char, max_char; |
| 213 | |
| 214 | /* The code returned by ENCODE_CHAR if a character is not encodable |
| 215 | by the charset. */ |
| 216 | unsigned invalid_code; |
| 217 | |
| 218 | /* If the method of the charset is CHARSET_METHOD_MAP, this is a |
| 219 | table of bits used to quickly and roughly guess if a character |
| 220 | belongs to the charset. |
| 221 | |
| 222 | The first 64 elements are 512 bits for characters less than |
| 223 | 0x10000. Each bit corresponds to 128-character block. The last |
| 224 | 126 elements are 1008 bits for the greater characters |
| 225 | (0x10000..0x3FFFFF). Each bit corresponds to 4096-character |
| 226 | block. |
| 227 | |
| 228 | If a bit is 1, at least one character in the corresponding block is |
| 229 | in this charset. */ |
| 230 | unsigned char fast_map[190]; |
| 231 | |
| 232 | /* Offset value to calculate a character code from code-point, and |
| 233 | visa versa. */ |
| 234 | int code_offset; |
| 235 | |
| 236 | int unified_p; |
| 237 | }; |
| 238 | |
| 239 | /* Hash table of charset symbols vs. the correponding attribute |
| 240 | vectors. */ |
| 241 | extern Lisp_Object Vcharset_hash_table; |
| 242 | |
| 243 | /* Table of struct charset. */ |
| 244 | extern struct charset *charset_table; |
| 245 | |
| 246 | #define CHARSET_FROM_ID(id) (charset_table + (id)) |
| 247 | |
| 248 | extern Lisp_Object Vcharset_ordered_list; |
| 249 | extern Lisp_Object Vcharset_non_preferred_head; |
| 250 | |
| 251 | /* Incremented everytime we change the priority of charsets. */ |
| 252 | extern unsigned short charset_ordered_list_tick; |
| 253 | |
| 254 | extern Lisp_Object Viso_2022_charset_list; |
| 255 | extern Lisp_Object Vemacs_mule_charset_list; |
| 256 | |
| 257 | extern int emacs_mule_charset[256]; |
| 258 | |
| 259 | /* Macros to access information about charset. */ |
| 260 | |
| 261 | /* Return the attribute vector of charset whose symbol is SYMBOL. */ |
| 262 | #define CHARSET_SYMBOL_ATTRIBUTES(symbol) \ |
| 263 | Fgethash ((symbol), Vcharset_hash_table, Qnil) |
| 264 | |
| 265 | #define CHARSET_ATTR_ID(attrs) AREF ((attrs), charset_id) |
| 266 | #define CHARSET_ATTR_NAME(attrs) AREF ((attrs), charset_name) |
| 267 | #define CHARSET_ATTR_PLIST(attrs) AREF ((attrs), charset_plist) |
| 268 | #define CHARSET_ATTR_MAP(attrs) AREF ((attrs), charset_map) |
| 269 | #define CHARSET_ATTR_DECODER(attrs) AREF ((attrs), charset_decoder) |
| 270 | #define CHARSET_ATTR_ENCODER(attrs) AREF ((attrs), charset_encoder) |
| 271 | #define CHARSET_ATTR_SUBSET(attrs) AREF ((attrs), charset_subset) |
| 272 | #define CHARSET_ATTR_SUPERSET(attrs) AREF ((attrs), charset_superset) |
| 273 | #define CHARSET_ATTR_UNIFY_MAP(attrs) AREF ((attrs), charset_unify_map) |
| 274 | #define CHARSET_ATTR_DEUNIFIER(attrs) AREF ((attrs), charset_deunifier) |
| 275 | |
| 276 | #define CHARSET_SYMBOL_ID(symbol) \ |
| 277 | CHARSET_ATTR_ID (CHARSET_SYMBOL_ATTRIBUTES (symbol)) |
| 278 | |
| 279 | /* Return an index to Vcharset_hash_table of the charset whose symbol |
| 280 | is SYMBOL. */ |
| 281 | #define CHARSET_SYMBOL_HASH_INDEX(symbol) \ |
| 282 | hash_lookup (XHASH_TABLE (Vcharset_hash_table), symbol, NULL) |
| 283 | |
| 284 | /* Return the attribute vector of CHARSET. */ |
| 285 | #define CHARSET_ATTRIBUTES(charset) \ |
| 286 | (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), (charset)->hash_index)) |
| 287 | |
| 288 | #define CHARSET_ID(charset) ((charset)->id) |
| 289 | #define CHARSET_HASH_INDEX(charset) ((charset)->hash_index) |
| 290 | #define CHARSET_DIMENSION(charset) ((charset)->dimension) |
| 291 | #define CHARSET_CODE_SPACE(charset) ((charset)->code_space) |
| 292 | #define CHARSET_CODE_LINEAR_P(charset) ((charset)->code_linear_p) |
| 293 | #define CHARSET_ISO_CHARS_96(charset) ((charset)->iso_chars_96) |
| 294 | #define CHARSET_ISO_FINAL(charset) ((charset)->iso_final) |
| 295 | #define CHARSET_ISO_PLANE(charset) ((charset)->iso_plane) |
| 296 | #define CHARSET_ISO_REVISION(charset) ((charset)->iso_revision) |
| 297 | #define CHARSET_EMACS_MULE_ID(charset) ((charset)->emacs_mule_id) |
| 298 | #define CHARSET_ASCII_COMPATIBLE_P(charset) ((charset)->ascii_compatible_p) |
| 299 | #define CHARSET_COMPACT_CODES_P(charset) ((charset)->compact_codes_p) |
| 300 | #define CHARSET_METHOD(charset) ((charset)->method) |
| 301 | #define CHARSET_MIN_CODE(charset) ((charset)->min_code) |
| 302 | #define CHARSET_MAX_CODE(charset) ((charset)->max_code) |
| 303 | #define CHARSET_INVALID_CODE(charset) ((charset)->invalid_code) |
| 304 | #define CHARSET_MIN_CHAR(charset) ((charset)->min_char) |
| 305 | #define CHARSET_MAX_CHAR(charset) ((charset)->max_char) |
| 306 | #define CHARSET_CODE_OFFSET(charset) ((charset)->code_offset) |
| 307 | #define CHARSET_UNIFIED_P(charset) ((charset)->unified_p) |
| 308 | |
| 309 | #define CHARSET_NAME(charset) \ |
| 310 | (CHARSET_ATTR_NAME (CHARSET_ATTRIBUTES (charset))) |
| 311 | #define CHARSET_MAP(charset) \ |
| 312 | (CHARSET_ATTR_MAP (CHARSET_ATTRIBUTES (charset))) |
| 313 | #define CHARSET_DECODER(charset) \ |
| 314 | (CHARSET_ATTR_DECODER (CHARSET_ATTRIBUTES (charset))) |
| 315 | #define CHARSET_ENCODER(charset) \ |
| 316 | (CHARSET_ATTR_ENCODER (CHARSET_ATTRIBUTES (charset))) |
| 317 | #define CHARSET_SUBSET(charset) \ |
| 318 | (CHARSET_ATTR_SUBSET (CHARSET_ATTRIBUTES (charset))) |
| 319 | #define CHARSET_SUPERSET(charset) \ |
| 320 | (CHARSET_ATTR_SUPERSET (CHARSET_ATTRIBUTES (charset))) |
| 321 | #define CHARSET_UNIFY_MAP(charset) \ |
| 322 | (CHARSET_ATTR_UNIFY_MAP (CHARSET_ATTRIBUTES (charset))) |
| 323 | #define CHARSET_DEUNIFIER(charset) \ |
| 324 | (CHARSET_ATTR_DEUNIFIER (CHARSET_ATTRIBUTES (charset))) |
| 325 | |
| 326 | |
| 327 | /* Nonzero if OBJ is a valid charset symbol. */ |
| 328 | #define CHARSETP(obj) (CHARSET_SYMBOL_HASH_INDEX (obj) >= 0) |
| 329 | |
| 330 | /* Check if X is a valid charset symbol. If not, signal an error. */ |
| 331 | #define CHECK_CHARSET(x) \ |
| 332 | do { \ |
| 333 | if (! SYMBOLP (x) || CHARSET_SYMBOL_HASH_INDEX (x) < 0) \ |
| 334 | wrong_type_argument (Qcharsetp, (x)); \ |
| 335 | } while (0) |
| 336 | |
| 337 | |
| 338 | /* Check if X is a valid charset symbol. If valid, set ID to the id |
| 339 | number of the charset. Otherwise, signal an error. */ |
| 340 | #define CHECK_CHARSET_GET_ID(x, id) \ |
| 341 | do { \ |
| 342 | int idx; \ |
| 343 | \ |
| 344 | if (! SYMBOLP (x) || (idx = CHARSET_SYMBOL_HASH_INDEX (x)) < 0) \ |
| 345 | wrong_type_argument (Qcharsetp, (x)); \ |
| 346 | id = XINT (AREF (HASH_VALUE (XHASH_TABLE (Vcharset_hash_table), idx), \ |
| 347 | charset_id)); \ |
| 348 | } while (0) |
| 349 | |
| 350 | |
| 351 | /* Check if X is a valid charset symbol. If valid, set ATTR to the |
| 352 | attr vector of the charset. Otherwise, signal an error. */ |
| 353 | #define CHECK_CHARSET_GET_ATTR(x, attr) \ |
| 354 | do { \ |
| 355 | if (!SYMBOLP (x) || NILP (attr = CHARSET_SYMBOL_ATTRIBUTES (x))) \ |
| 356 | wrong_type_argument (Qcharsetp, (x)); \ |
| 357 | } while (0) |
| 358 | |
| 359 | |
| 360 | #define CHECK_CHARSET_GET_CHARSET(x, charset) \ |
| 361 | do { \ |
| 362 | int csid; \ |
| 363 | CHECK_CHARSET_GET_ID (x, csid); \ |
| 364 | charset = CHARSET_FROM_ID (csid); \ |
| 365 | } while (0) |
| 366 | |
| 367 | |
| 368 | /* Lookup Vcharset_ordered_list and return the first charset that |
| 369 | contains the character C. */ |
| 370 | #define CHAR_CHARSET(c) \ |
| 371 | ((c) < 0x80 ? CHARSET_FROM_ID (charset_ascii) \ |
| 372 | : char_charset ((c), Qnil, NULL)) |
| 373 | |
| 374 | #if 0 |
| 375 | /* Char-table of charset-sets. Each element is a bool vector indexed |
| 376 | by a charset ID. */ |
| 377 | extern Lisp_Object Vchar_charset_set; |
| 378 | |
| 379 | /* Charset-bag of character C. */ |
| 380 | #define CHAR_CHARSET_SET(c) \ |
| 381 | CHAR_TABLE_REF (Vchar_charset_set, c) |
| 382 | |
| 383 | /* Check if two characters C1 and C2 belong to the same charset. */ |
| 384 | #define SAME_CHARSET_P(c1, c2) \ |
| 385 | intersection_p (CHAR_CHARSET_SET (c1), CHAR_CHARSET_SET (c2)) |
| 386 | |
| 387 | #endif |
| 388 | |
| 389 | |
| 390 | /* Return a character correponding to the code-point CODE of CHARSET. |
| 391 | Try some optimization before calling decode_char. */ |
| 392 | |
| 393 | #define DECODE_CHAR(charset, code) \ |
| 394 | ((ASCII_BYTE_P (code) && (charset)->ascii_compatible_p) \ |
| 395 | ? (code) \ |
| 396 | : ((code) < (charset)->min_code || (code) > (charset)->max_code) \ |
| 397 | ? -1 \ |
| 398 | : (charset)->unified_p \ |
| 399 | ? decode_char ((charset), (code)) \ |
| 400 | : (charset)->method == CHARSET_METHOD_OFFSET \ |
| 401 | ? ((charset)->code_linear_p \ |
| 402 | ? (code) - (charset)->min_code + (charset)->code_offset \ |
| 403 | : decode_char ((charset), (code))) \ |
| 404 | : (charset)->method == CHARSET_METHOD_MAP \ |
| 405 | ? (((charset)->code_linear_p \ |
| 406 | && VECTORP (CHARSET_DECODER (charset))) \ |
| 407 | ? XINT (AREF (CHARSET_DECODER (charset), \ |
| 408 | (code) - (charset)->min_code)) \ |
| 409 | : decode_char ((charset), (code))) \ |
| 410 | : decode_char ((charset), (code))) |
| 411 | |
| 412 | |
| 413 | /* If CHARSET is a simple offset base charset, return it's offset, |
| 414 | otherwise return -1. */ |
| 415 | #define CHARSET_OFFSET(charset) \ |
| 416 | (((charset)->method == CHARSET_METHOD_OFFSET \ |
| 417 | && (charset)->code_linear_p \ |
| 418 | && ! (charset)->unified_p) \ |
| 419 | ? (charset)->code_offset - (charset)->min_code \ |
| 420 | : -1) |
| 421 | |
| 422 | extern Lisp_Object charset_work; |
| 423 | |
| 424 | /* Return a code point of CHAR in CHARSET. |
| 425 | Try some optimization before calling encode_char. */ |
| 426 | |
| 427 | #define ENCODE_CHAR(charset, c) \ |
| 428 | ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \ |
| 429 | ? (c) \ |
| 430 | : ((charset)->unified_p \ |
| 431 | || (charset)->method == CHARSET_METHOD_SUBSET \ |
| 432 | || (charset)->method == CHARSET_METHOD_SUPERSET) \ |
| 433 | ? encode_char ((charset), (c)) \ |
| 434 | : ((c) < (charset)->min_char || (c) > (charset)->max_char) \ |
| 435 | ? (charset)->invalid_code \ |
| 436 | : (charset)->method == CHARSET_METHOD_OFFSET \ |
| 437 | ? ((charset)->code_linear_p \ |
| 438 | ? (c) - (charset)->code_offset + (charset)->min_code \ |
| 439 | : encode_char ((charset), (c))) \ |
| 440 | : (charset)->method == CHARSET_METHOD_MAP \ |
| 441 | ? (((charset)->compact_codes_p \ |
| 442 | && CHAR_TABLE_P (CHARSET_ENCODER (charset))) \ |
| 443 | ? (charset_work = CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c)), \ |
| 444 | (NILP (charset_work) \ |
| 445 | ? (charset)->invalid_code \ |
| 446 | : XFASTINT (charset_work))) \ |
| 447 | : encode_char ((charset), (c))) \ |
| 448 | : encode_char ((charset), (c))) |
| 449 | |
| 450 | |
| 451 | /* Set to 1 when a charset map is loaded to warn that a buffer text |
| 452 | and a string data may be relocated. */ |
| 453 | extern int charset_map_loaded; |
| 454 | |
| 455 | |
| 456 | /* Set CHARSET to the charset highest priority of C, CODE to the |
| 457 | code-point of C in CHARSET. */ |
| 458 | #define SPLIT_CHAR(c, charset, code) \ |
| 459 | ((charset) = char_charset ((c), Qnil, &(code))) |
| 460 | |
| 461 | |
| 462 | #define ISO_MAX_DIMENSION 3 |
| 463 | #define ISO_MAX_CHARS 2 |
| 464 | #define ISO_MAX_FINAL 0x80 /* only 0x30..0xFF are used */ |
| 465 | |
| 466 | /* Mapping table from ISO2022's charset (specified by DIMENSION, |
| 467 | CHARS, and FINAL_CHAR) to Emacs' charset ID. Should be accessed by |
| 468 | macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */ |
| 469 | extern int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL]; |
| 470 | |
| 471 | /* A charset of type iso2022 who has DIMENSION, CHARS, and FINAL |
| 472 | (final character). */ |
| 473 | #define ISO_CHARSET_TABLE(dimension, chars_96, final) \ |
| 474 | iso_charset_table[(dimension) - 1][(chars_96)][(final)] |
| 475 | |
| 476 | /* Nonzero if the charset who has FAST_MAP may contain C. */ |
| 477 | #define CHARSET_FAST_MAP_REF(c, fast_map) \ |
| 478 | ((c) < 0x10000 \ |
| 479 | ? fast_map[(c) >> 10] & (1 << (((c) >> 7) & 7)) \ |
| 480 | : fast_map[((c) >> 15) + 62] & (1 << (((c) >> 12) & 7))) |
| 481 | |
| 482 | #define CHARSET_FAST_MAP_SET(c, fast_map) \ |
| 483 | do { \ |
| 484 | if ((c) < 0x10000) \ |
| 485 | (fast_map)[(c) >> 10] |= 1 << (((c) >> 7) & 7); \ |
| 486 | else \ |
| 487 | (fast_map)[((c) >> 15) + 62] |= 1 << (((c) >> 12) & 7); \ |
| 488 | } while (0) |
| 489 | |
| 490 | |
| 491 | |
| 492 | /* 1 if CHARSET may contain the character C. */ |
| 493 | #define CHAR_CHARSET_P(c, charset) \ |
| 494 | ((ASCII_CHAR_P (c) && (charset)->ascii_compatible_p) \ |
| 495 | || ((CHARSET_UNIFIED_P (charset) \ |
| 496 | || (charset)->method == CHARSET_METHOD_SUBSET \ |
| 497 | || (charset)->method == CHARSET_METHOD_SUPERSET) \ |
| 498 | ? encode_char ((charset), (c)) != (charset)->invalid_code \ |
| 499 | : (CHARSET_FAST_MAP_REF ((c), (charset)->fast_map) \ |
| 500 | && ((charset)->method == CHARSET_METHOD_OFFSET \ |
| 501 | ? (c) >= (charset)->min_char && (c) <= (charset)->max_char \ |
| 502 | : ((charset)->method == CHARSET_METHOD_MAP \ |
| 503 | && (charset)->compact_codes_p \ |
| 504 | && CHAR_TABLE_P (CHARSET_ENCODER (charset))) \ |
| 505 | ? ! NILP (CHAR_TABLE_REF (CHARSET_ENCODER (charset), (c))) \ |
| 506 | : encode_char ((charset), (c)) != (charset)->invalid_code)))) |
| 507 | |
| 508 | \f |
| 509 | /* Special macros for emacs-mule encoding. */ |
| 510 | |
| 511 | /* Leading-code followed by extended leading-code. DIMENSION/COLUMN */ |
| 512 | #define EMACS_MULE_LEADING_CODE_PRIVATE_11 0x9A /* 1/1 */ |
| 513 | #define EMACS_MULE_LEADING_CODE_PRIVATE_12 0x9B /* 1/2 */ |
| 514 | #define EMACS_MULE_LEADING_CODE_PRIVATE_21 0x9C /* 2/2 */ |
| 515 | #define EMACS_MULE_LEADING_CODE_PRIVATE_22 0x9D /* 2/2 */ |
| 516 | |
| 517 | \f |
| 518 | |
| 519 | extern Lisp_Object Qcharsetp; |
| 520 | |
| 521 | extern Lisp_Object Qascii; |
| 522 | extern int charset_ascii, charset_eight_bit; |
| 523 | extern int charset_unicode; |
| 524 | extern int charset_jisx0201_roman; |
| 525 | extern int charset_jisx0208_1978; |
| 526 | extern int charset_jisx0208; |
| 527 | extern int charset_ksc5601; |
| 528 | |
| 529 | extern int charset_unibyte; |
| 530 | |
| 531 | extern struct charset *char_charset (int, Lisp_Object, unsigned *); |
| 532 | extern Lisp_Object charset_attributes (int); |
| 533 | |
| 534 | extern int maybe_unify_char (int, Lisp_Object); |
| 535 | extern int decode_char (struct charset *, unsigned); |
| 536 | extern unsigned encode_char (struct charset *, int); |
| 537 | extern int string_xstring_p (Lisp_Object); |
| 538 | |
| 539 | extern void map_charset_chars (void (*) (Lisp_Object, Lisp_Object), |
| 540 | Lisp_Object, Lisp_Object, |
| 541 | struct charset *, unsigned, unsigned); |
| 542 | |
| 543 | #endif /* EMACS_CHARSET_H */ |