| 1 | /* Header for coding system handler. |
| 2 | Copyright (C) 2001, 2002, 2003, 2004, 2005, |
| 3 | 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. |
| 4 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
| 5 | 2005, 2006, 2007, 2008, 2009, 2010 |
| 6 | National Institute of Advanced Industrial Science and Technology (AIST) |
| 7 | Registration Number H14PRO021 |
| 8 | Copyright (C) 2003 |
| 9 | National Institute of Advanced Industrial Science and Technology (AIST) |
| 10 | Registration Number H13PRO009 |
| 11 | |
| 12 | This file is part of GNU Emacs. |
| 13 | |
| 14 | GNU Emacs is free software: you can redistribute it and/or modify |
| 15 | it under the terms of the GNU General Public License as published by |
| 16 | the Free Software Foundation, either version 3 of the License, or |
| 17 | (at your option) any later version. |
| 18 | |
| 19 | GNU Emacs is distributed in the hope that it will be useful, |
| 20 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 22 | GNU General Public License for more details. |
| 23 | |
| 24 | You should have received a copy of the GNU General Public License |
| 25 | along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ |
| 26 | |
| 27 | #ifndef EMACS_CODING_H |
| 28 | #define EMACS_CODING_H |
| 29 | |
| 30 | /* Index to arguments of Fdefine_coding_system_internal. */ |
| 31 | |
| 32 | enum define_coding_system_arg_index |
| 33 | { |
| 34 | coding_arg_name, |
| 35 | coding_arg_mnemonic, |
| 36 | coding_arg_coding_type, |
| 37 | coding_arg_charset_list, |
| 38 | coding_arg_ascii_compatible_p, |
| 39 | coding_arg_decode_translation_table, |
| 40 | coding_arg_encode_translation_table, |
| 41 | coding_arg_post_read_conversion, |
| 42 | coding_arg_pre_write_conversion, |
| 43 | coding_arg_default_char, |
| 44 | coding_arg_for_unibyte, |
| 45 | coding_arg_plist, |
| 46 | coding_arg_eol_type, |
| 47 | coding_arg_max |
| 48 | }; |
| 49 | |
| 50 | enum define_coding_iso2022_arg_index |
| 51 | { |
| 52 | coding_arg_iso2022_initial = coding_arg_max, |
| 53 | coding_arg_iso2022_reg_usage, |
| 54 | coding_arg_iso2022_request, |
| 55 | coding_arg_iso2022_flags, |
| 56 | coding_arg_iso2022_max |
| 57 | }; |
| 58 | |
| 59 | enum define_coding_utf8_arg_index |
| 60 | { |
| 61 | coding_arg_utf8_bom = coding_arg_max, |
| 62 | coding_arg_utf8_max |
| 63 | }; |
| 64 | |
| 65 | enum define_coding_utf16_arg_index |
| 66 | { |
| 67 | coding_arg_utf16_bom = coding_arg_max, |
| 68 | coding_arg_utf16_endian, |
| 69 | coding_arg_utf16_max |
| 70 | }; |
| 71 | |
| 72 | enum define_coding_ccl_arg_index |
| 73 | { |
| 74 | coding_arg_ccl_decoder = coding_arg_max, |
| 75 | coding_arg_ccl_encoder, |
| 76 | coding_arg_ccl_valids, |
| 77 | coding_arg_ccl_max |
| 78 | }; |
| 79 | |
| 80 | /* Hash table for all coding systems. Keys are coding system symbols |
| 81 | and values are spec vectors of the corresponding coding system. A |
| 82 | spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a |
| 83 | vector of attribute of the coding system. ALIASES is a list of |
| 84 | aliases (symbols) of the coding system. EOL-TYPE is `unix', `dos', |
| 85 | `mac' or a vector of coding systems (symbols). */ |
| 86 | |
| 87 | extern Lisp_Object Vcoding_system_hash_table; |
| 88 | |
| 89 | |
| 90 | /* Enumeration of coding system type. */ |
| 91 | |
| 92 | enum coding_system_type |
| 93 | { |
| 94 | coding_type_charset, |
| 95 | coding_type_utf_8, |
| 96 | coding_type_utf_16, |
| 97 | coding_type_iso_2022, |
| 98 | coding_type_emacs_mule, |
| 99 | coding_type_sjis, |
| 100 | coding_type_ccl, |
| 101 | coding_type_raw_text, |
| 102 | coding_type_undecided, |
| 103 | coding_type_max |
| 104 | }; |
| 105 | |
| 106 | |
| 107 | /* Enumeration of end-of-line format type. */ |
| 108 | |
| 109 | enum end_of_line_type |
| 110 | { |
| 111 | eol_lf, /* Line-feed only, same as Emacs' internal |
| 112 | format. */ |
| 113 | eol_crlf, /* Sequence of carriage-return and |
| 114 | line-feed. */ |
| 115 | eol_cr, /* Carriage-return only. */ |
| 116 | eol_any, /* Accept any of above. Produce line-feed |
| 117 | only. */ |
| 118 | eol_undecided, /* This value is used to denote that the |
| 119 | eol-type is not yet undecided. */ |
| 120 | eol_type_max |
| 121 | }; |
| 122 | |
| 123 | /* Enumeration of index to an attribute vector of a coding system. */ |
| 124 | |
| 125 | enum coding_attr_index |
| 126 | { |
| 127 | coding_attr_base_name, |
| 128 | coding_attr_docstring, |
| 129 | coding_attr_mnemonic, |
| 130 | coding_attr_type, |
| 131 | coding_attr_charset_list, |
| 132 | coding_attr_ascii_compat, |
| 133 | coding_attr_decode_tbl, |
| 134 | coding_attr_encode_tbl, |
| 135 | coding_attr_trans_tbl, |
| 136 | coding_attr_post_read, |
| 137 | coding_attr_pre_write, |
| 138 | coding_attr_default_char, |
| 139 | coding_attr_for_unibyte, |
| 140 | coding_attr_plist, |
| 141 | |
| 142 | coding_attr_category, |
| 143 | coding_attr_safe_charsets, |
| 144 | |
| 145 | /* The followings are extra attributes for each type. */ |
| 146 | coding_attr_charset_valids, |
| 147 | |
| 148 | coding_attr_ccl_decoder, |
| 149 | coding_attr_ccl_encoder, |
| 150 | coding_attr_ccl_valids, |
| 151 | |
| 152 | coding_attr_iso_initial, |
| 153 | coding_attr_iso_usage, |
| 154 | coding_attr_iso_request, |
| 155 | coding_attr_iso_flags, |
| 156 | |
| 157 | coding_attr_utf_bom, |
| 158 | coding_attr_utf_16_endian, |
| 159 | |
| 160 | coding_attr_emacs_mule_full, |
| 161 | |
| 162 | coding_attr_last_index |
| 163 | }; |
| 164 | |
| 165 | |
| 166 | /* Macros to access an element of an attribute vector. */ |
| 167 | |
| 168 | #define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name) |
| 169 | #define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type) |
| 170 | #define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list) |
| 171 | #define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic) |
| 172 | #define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring) |
| 173 | #define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat) |
| 174 | #define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl) |
| 175 | #define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl) |
| 176 | #define CODING_ATTR_TRANS_TBL(attrs) AREF (attrs, coding_attr_trans_tbl) |
| 177 | #define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read) |
| 178 | #define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write) |
| 179 | #define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char) |
| 180 | #define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte) |
| 181 | #define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing) |
| 182 | #define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist) |
| 183 | #define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category) |
| 184 | #define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets) |
| 185 | |
| 186 | |
| 187 | /* Return the name of a coding system specified by ID. */ |
| 188 | #define CODING_ID_NAME(id) \ |
| 189 | (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id)) |
| 190 | |
| 191 | /* Return the attribute vector of a coding system specified by ID. */ |
| 192 | |
| 193 | #define CODING_ID_ATTRS(id) \ |
| 194 | (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0)) |
| 195 | |
| 196 | /* Return the list of aliases of a coding system specified by ID. */ |
| 197 | |
| 198 | #define CODING_ID_ALIASES(id) \ |
| 199 | (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1)) |
| 200 | |
| 201 | /* Return the eol-type of a coding system specified by ID. */ |
| 202 | |
| 203 | #define CODING_ID_EOL_TYPE(id) \ |
| 204 | (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2)) |
| 205 | |
| 206 | |
| 207 | /* Return the spec vector of CODING_SYSTEM_SYMBOL. */ |
| 208 | |
| 209 | #define CODING_SYSTEM_SPEC(coding_system_symbol) \ |
| 210 | (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil)) |
| 211 | |
| 212 | |
| 213 | /* Return the ID of CODING_SYSTEM_SYMBOL. */ |
| 214 | |
| 215 | #define CODING_SYSTEM_ID(coding_system_symbol) \ |
| 216 | hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \ |
| 217 | coding_system_symbol, NULL) |
| 218 | |
| 219 | /* Return 1 if CODING_SYSTEM_SYMBOL is a coding system. */ |
| 220 | |
| 221 | #define CODING_SYSTEM_P(coding_system_symbol) \ |
| 222 | (CODING_SYSTEM_ID (coding_system_symbol) >= 0 \ |
| 223 | || (! NILP (coding_system_symbol) \ |
| 224 | && ! NILP (Fcoding_system_p (coding_system_symbol)))) |
| 225 | |
| 226 | /* Check if X is a coding system or not. */ |
| 227 | |
| 228 | #define CHECK_CODING_SYSTEM(x) \ |
| 229 | do { \ |
| 230 | if (CODING_SYSTEM_ID (x) < 0 \ |
| 231 | && NILP (Fcheck_coding_system (x))) \ |
| 232 | wrong_type_argument (Qcoding_system_p, (x)); \ |
| 233 | } while (0) |
| 234 | |
| 235 | |
| 236 | /* Check if X is a coding system or not. If it is, set SEPC to the |
| 237 | spec vector of the coding system. */ |
| 238 | |
| 239 | #define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \ |
| 240 | do { \ |
| 241 | spec = CODING_SYSTEM_SPEC (x); \ |
| 242 | if (NILP (spec)) \ |
| 243 | { \ |
| 244 | Fcheck_coding_system (x); \ |
| 245 | spec = CODING_SYSTEM_SPEC (x); \ |
| 246 | } \ |
| 247 | if (NILP (spec)) \ |
| 248 | wrong_type_argument (Qcoding_system_p, (x)); \ |
| 249 | } while (0) |
| 250 | |
| 251 | |
| 252 | /* Check if X is a coding system or not. If it is, set ID to the |
| 253 | ID of the coding system. */ |
| 254 | |
| 255 | #define CHECK_CODING_SYSTEM_GET_ID(x, id) \ |
| 256 | do \ |
| 257 | { \ |
| 258 | id = CODING_SYSTEM_ID (x); \ |
| 259 | if (id < 0) \ |
| 260 | { \ |
| 261 | Fcheck_coding_system (x); \ |
| 262 | id = CODING_SYSTEM_ID (x); \ |
| 263 | } \ |
| 264 | if (id < 0) \ |
| 265 | wrong_type_argument (Qcoding_system_p, (x)); \ |
| 266 | } while (0) |
| 267 | |
| 268 | |
| 269 | /*** GENERAL section ***/ |
| 270 | |
| 271 | /* Enumeration of result code of code conversion. */ |
| 272 | enum coding_result_code |
| 273 | { |
| 274 | CODING_RESULT_SUCCESS, |
| 275 | CODING_RESULT_INSUFFICIENT_SRC, |
| 276 | CODING_RESULT_INSUFFICIENT_DST, |
| 277 | CODING_RESULT_INCONSISTENT_EOL, |
| 278 | CODING_RESULT_INVALID_SRC, |
| 279 | CODING_RESULT_INTERRUPT, |
| 280 | CODING_RESULT_INSUFFICIENT_MEM |
| 281 | }; |
| 282 | |
| 283 | |
| 284 | /* Macros used for the member `mode' of the struct coding_system. */ |
| 285 | |
| 286 | /* If set, recover the original CR or LF of the already decoded text |
| 287 | when the decoding routine encounters an inconsistent eol format. */ |
| 288 | #define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01 |
| 289 | |
| 290 | /* If set, the decoding/encoding routines treat the current data as |
| 291 | the last block of the whole text to be converted, and do the |
| 292 | appropriate finishing job. */ |
| 293 | #define CODING_MODE_LAST_BLOCK 0x02 |
| 294 | |
| 295 | /* If set, it means that the current source text is in a buffer which |
| 296 | enables selective display. */ |
| 297 | #define CODING_MODE_SELECTIVE_DISPLAY 0x04 |
| 298 | |
| 299 | /* This flag is used by the decoding/encoding routines on the fly. If |
| 300 | set, it means that right-to-left text is being processed. */ |
| 301 | #define CODING_MODE_DIRECTION 0x08 |
| 302 | |
| 303 | #define CODING_MODE_FIXED_DESTINATION 0x10 |
| 304 | |
| 305 | /* If set, it means that the encoding routines produces some safe |
| 306 | ASCII characters (usually '?') for unsupported characters. */ |
| 307 | #define CODING_MODE_SAFE_ENCODING 0x20 |
| 308 | |
| 309 | /* For handling composition sequence. */ |
| 310 | #include "composite.h" |
| 311 | |
| 312 | enum composition_state |
| 313 | { |
| 314 | COMPOSING_NO, |
| 315 | COMPOSING_CHAR, |
| 316 | COMPOSING_RULE, |
| 317 | COMPOSING_COMPONENT_CHAR, |
| 318 | COMPOSING_COMPONENT_RULE |
| 319 | }; |
| 320 | |
| 321 | /* Structure for the current composition status. */ |
| 322 | struct composition_status |
| 323 | { |
| 324 | enum composition_state state; |
| 325 | enum composition_method method; |
| 326 | int old_form; /* 0:pre-21 form, 1:post-21 form */ |
| 327 | int length; /* number of elements produced in charbuf */ |
| 328 | int nchars; /* number of characters composed */ |
| 329 | int ncomps; /* number of composition components */ |
| 330 | /* Maximum carryover is for the case of COMPOSITION_WITH_RULE_ALTCHARS. |
| 331 | See the comment in coding.c. */ |
| 332 | int carryover[4 /* annotation header */ |
| 333 | + MAX_COMPOSITION_COMPONENTS * 3 - 2 /* ALTs and RULEs */ |
| 334 | + 2 /* intermediate -1 -1 */ |
| 335 | + MAX_COMPOSITION_COMPONENTS /* CHARs */ |
| 336 | ]; |
| 337 | }; |
| 338 | |
| 339 | |
| 340 | /* Structure of the field `spec.iso_2022' in the structure |
| 341 | `coding_system'. */ |
| 342 | struct iso_2022_spec |
| 343 | { |
| 344 | /* Bit-wise-or of CODING_ISO_FLAG_XXX. */ |
| 345 | unsigned flags; |
| 346 | |
| 347 | /* The current graphic register invoked to each graphic plane. */ |
| 348 | int current_invocation[2]; |
| 349 | |
| 350 | /* The current charset designated to each graphic register. The |
| 351 | value -1 means that not charset is designated, -2 means that |
| 352 | there was an invalid designation previously. */ |
| 353 | int current_designation[4]; |
| 354 | |
| 355 | /* Set to 1 temporarily only when graphic register 2 or 3 is invoked |
| 356 | by single-shift while encoding. */ |
| 357 | int single_shifting; |
| 358 | |
| 359 | /* Set to 1 temporarily only when processing at beginning of line. */ |
| 360 | int bol; |
| 361 | |
| 362 | /* If positive, we are now scanning CTEXT extended segment. */ |
| 363 | int ctext_extended_segment_len; |
| 364 | |
| 365 | /* If nonzero, we are now scanning embedded UTF-8 sequence. */ |
| 366 | int embedded_utf_8; |
| 367 | |
| 368 | /* The current composition. */ |
| 369 | struct composition_status cmp_status; |
| 370 | }; |
| 371 | |
| 372 | struct emacs_mule_spec |
| 373 | { |
| 374 | int full_support; |
| 375 | struct composition_status cmp_status; |
| 376 | }; |
| 377 | |
| 378 | struct ccl_spec; |
| 379 | |
| 380 | enum utf_bom_type |
| 381 | { |
| 382 | utf_detect_bom, |
| 383 | utf_without_bom, |
| 384 | utf_with_bom |
| 385 | }; |
| 386 | |
| 387 | enum utf_16_endian_type |
| 388 | { |
| 389 | utf_16_big_endian, |
| 390 | utf_16_little_endian |
| 391 | }; |
| 392 | |
| 393 | struct utf_16_spec |
| 394 | { |
| 395 | enum utf_bom_type bom; |
| 396 | enum utf_16_endian_type endian; |
| 397 | int surrogate; |
| 398 | }; |
| 399 | |
| 400 | struct coding_detection_info |
| 401 | { |
| 402 | /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs. */ |
| 403 | /* Which categories are already checked. */ |
| 404 | int checked; |
| 405 | /* Which categories are strongly found. */ |
| 406 | int found; |
| 407 | /* Which categories are rejected. */ |
| 408 | int rejected; |
| 409 | }; |
| 410 | |
| 411 | |
| 412 | struct coding_system |
| 413 | { |
| 414 | /* ID number of the coding system. This is an index to |
| 415 | Vcoding_system_hash_table. This value is set by |
| 416 | setup_coding_system. At the early stage of building time, this |
| 417 | value is -1 in the array coding_categories to indicate that no |
| 418 | coding-system of that category is yet defined. */ |
| 419 | int id; |
| 420 | |
| 421 | /* Flag bits of the coding system. The meaning of each bit is common |
| 422 | to all types of coding systems. */ |
| 423 | int common_flags; |
| 424 | |
| 425 | /* Mode bits of the coding system. See the comments of the macros |
| 426 | CODING_MODE_XXX. */ |
| 427 | unsigned int mode; |
| 428 | |
| 429 | /* Detailed information specific to each type of coding system. */ |
| 430 | union |
| 431 | { |
| 432 | struct iso_2022_spec iso_2022; |
| 433 | struct ccl_spec *ccl; /* Defined in ccl.h. */ |
| 434 | struct utf_16_spec utf_16; |
| 435 | enum utf_bom_type utf_8_bom; |
| 436 | struct emacs_mule_spec emacs_mule; |
| 437 | } spec; |
| 438 | |
| 439 | int max_charset_id; |
| 440 | unsigned char *safe_charsets; |
| 441 | |
| 442 | /* The following two members specify how binary 8-bit code 128..255 |
| 443 | are represented in source and destination text respectively. 1 |
| 444 | means they are represented by 2-byte sequence, 0 means they are |
| 445 | represented by 1-byte as is (see the comment in character.h). */ |
| 446 | unsigned src_multibyte : 1; |
| 447 | unsigned dst_multibyte : 1; |
| 448 | |
| 449 | /* How may heading bytes we can skip for decoding. This is set to |
| 450 | -1 in setup_coding_system, and updated by detect_coding. So, |
| 451 | when this is equal to the byte length of the text being |
| 452 | converted, we can skip the actual conversion process. */ |
| 453 | int head_ascii; |
| 454 | |
| 455 | /* The following members are set by encoding/decoding routine. */ |
| 456 | EMACS_INT produced, produced_char, consumed, consumed_char; |
| 457 | |
| 458 | /* Number of error source data found in a decoding routine. */ |
| 459 | int errors; |
| 460 | |
| 461 | /* Store the positions of error source data. */ |
| 462 | EMACS_INT *error_positions; |
| 463 | |
| 464 | /* Finish status of code conversion. */ |
| 465 | enum coding_result_code result; |
| 466 | |
| 467 | EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes; |
| 468 | Lisp_Object src_object; |
| 469 | const unsigned char *source; |
| 470 | |
| 471 | EMACS_INT dst_pos, dst_pos_byte, dst_bytes; |
| 472 | Lisp_Object dst_object; |
| 473 | unsigned char *destination; |
| 474 | |
| 475 | /* Set to 1 if the source of conversion is not in the member |
| 476 | `charbuf', but at `src_object'. */ |
| 477 | int chars_at_source; |
| 478 | |
| 479 | /* If an element is non-negative, it is a character code. |
| 480 | |
| 481 | If it is in the range -128..-1, it is a 8-bit character code |
| 482 | minus 256. |
| 483 | |
| 484 | If it is less than -128, it specifies the start of an annotation |
| 485 | chunk. The length of the chunk is -128 minus the value of the |
| 486 | element. The following elements are OFFSET, ANNOTATION-TYPE, and |
| 487 | a sequence of actual data for the annotation. OFFSET is a |
| 488 | character position offset from dst_pos or src_pos, |
| 489 | ANNOTATION-TYPE specfies the meaning of the annotation and how to |
| 490 | handle the following data.. */ |
| 491 | int *charbuf; |
| 492 | int charbuf_size, charbuf_used; |
| 493 | |
| 494 | /* Set to 1 if charbuf contains an annotation. */ |
| 495 | int annotated; |
| 496 | |
| 497 | unsigned char carryover[64]; |
| 498 | int carryover_bytes; |
| 499 | |
| 500 | int default_char; |
| 501 | |
| 502 | int (*detector) P_ ((struct coding_system *, |
| 503 | struct coding_detection_info *)); |
| 504 | void (*decoder) P_ ((struct coding_system *)); |
| 505 | int (*encoder) P_ ((struct coding_system *)); |
| 506 | }; |
| 507 | |
| 508 | /* Meanings of bits in the member `common_flags' of the structure |
| 509 | coding_system. The lowest 8 bits are reserved for various kind of |
| 510 | annotations (currently two of them are used). */ |
| 511 | #define CODING_ANNOTATION_MASK 0x00FF |
| 512 | #define CODING_ANNOTATE_COMPOSITION_MASK 0x0001 |
| 513 | #define CODING_ANNOTATE_DIRECTION_MASK 0x0002 |
| 514 | #define CODING_ANNOTATE_CHARSET_MASK 0x0003 |
| 515 | #define CODING_FOR_UNIBYTE_MASK 0x0100 |
| 516 | #define CODING_REQUIRE_FLUSHING_MASK 0x0200 |
| 517 | #define CODING_REQUIRE_DECODING_MASK 0x0400 |
| 518 | #define CODING_REQUIRE_ENCODING_MASK 0x0800 |
| 519 | #define CODING_REQUIRE_DETECTION_MASK 0x1000 |
| 520 | #define CODING_RESET_AT_BOL_MASK 0x2000 |
| 521 | |
| 522 | /* Return 1 if the coding context CODING requires annotaion |
| 523 | handling. */ |
| 524 | #define CODING_REQUIRE_ANNOTATION(coding) \ |
| 525 | ((coding)->common_flags & CODING_ANNOTATION_MASK) |
| 526 | |
| 527 | /* Return 1 if the coding context CODING prefers decoding into unibyte. */ |
| 528 | #define CODING_FOR_UNIBYTE(coding) \ |
| 529 | ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK) |
| 530 | |
| 531 | /* Return 1 if the coding context CODING requires specific code to be |
| 532 | attached at the tail of converted text. */ |
| 533 | #define CODING_REQUIRE_FLUSHING(coding) \ |
| 534 | ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK) |
| 535 | |
| 536 | /* Return 1 if the coding context CODING requires code conversion on |
| 537 | decoding. */ |
| 538 | #define CODING_REQUIRE_DECODING(coding) \ |
| 539 | ((coding)->dst_multibyte \ |
| 540 | || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK) |
| 541 | |
| 542 | |
| 543 | /* Return 1 if the coding context CODING requires code conversion on |
| 544 | encoding. |
| 545 | The non-multibyte part of the condition is to support encoding of |
| 546 | unibyte strings/buffers generated by string-as-unibyte or |
| 547 | (set-buffer-multibyte nil) from multibyte strings/buffers. */ |
| 548 | #define CODING_REQUIRE_ENCODING(coding) \ |
| 549 | ((coding)->src_multibyte \ |
| 550 | || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \ |
| 551 | || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY) |
| 552 | |
| 553 | |
| 554 | /* Return 1 if the coding context CODING requires some kind of code |
| 555 | detection. */ |
| 556 | #define CODING_REQUIRE_DETECTION(coding) \ |
| 557 | ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK) |
| 558 | |
| 559 | /* Return 1 if the coding context CODING requires code conversion on |
| 560 | decoding or some kind of code detection. */ |
| 561 | #define CODING_MAY_REQUIRE_DECODING(coding) \ |
| 562 | (CODING_REQUIRE_DECODING (coding) \ |
| 563 | || CODING_REQUIRE_DETECTION (coding)) |
| 564 | |
| 565 | /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and |
| 566 | S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding |
| 567 | system. C1 and C2 are the 1st and 2nd position codes of Emacs' |
| 568 | internal format. */ |
| 569 | |
| 570 | #define SJIS_TO_JIS(code) \ |
| 571 | do { \ |
| 572 | int s1, s2, j1, j2; \ |
| 573 | \ |
| 574 | s1 = (code) >> 8, s2 = (code) & 0xFF; \ |
| 575 | \ |
| 576 | if (s2 >= 0x9F) \ |
| 577 | (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \ |
| 578 | j2 = s2 - 0x7E); \ |
| 579 | else \ |
| 580 | (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \ |
| 581 | j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \ |
| 582 | (code) = (j1 << 8) | j2; \ |
| 583 | } while (0) |
| 584 | |
| 585 | #define SJIS_TO_JIS2(code) \ |
| 586 | do { \ |
| 587 | int s1, s2, j1, j2; \ |
| 588 | \ |
| 589 | s1 = (code) >> 8, s2 = (code) & 0xFF; \ |
| 590 | \ |
| 591 | if (s2 >= 0x9F) \ |
| 592 | { \ |
| 593 | j1 = (s1 == 0xF0 ? 0x28 \ |
| 594 | : s1 == 0xF1 ? 0x24 \ |
| 595 | : s1 == 0xF2 ? 0x2C \ |
| 596 | : s1 == 0xF3 ? 0x2E \ |
| 597 | : 0x6E + (s1 - 0xF4) * 2); \ |
| 598 | j2 = s2 - 0x7E; \ |
| 599 | } \ |
| 600 | else \ |
| 601 | { \ |
| 602 | j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2 \ |
| 603 | : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2 \ |
| 604 | : 0x6F + (s1 - 0xF5) * 2); \ |
| 605 | j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F)); \ |
| 606 | } \ |
| 607 | (code) = (j1 << 8) | j2; \ |
| 608 | } while (0) |
| 609 | |
| 610 | |
| 611 | #define JIS_TO_SJIS(code) \ |
| 612 | do { \ |
| 613 | int s1, s2, j1, j2; \ |
| 614 | \ |
| 615 | j1 = (code) >> 8, j2 = (code) & 0xFF; \ |
| 616 | if (j1 & 1) \ |
| 617 | (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \ |
| 618 | s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \ |
| 619 | else \ |
| 620 | (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \ |
| 621 | s2 = j2 + 0x7E); \ |
| 622 | (code) = (s1 << 8) | s2; \ |
| 623 | } while (0) |
| 624 | |
| 625 | #define JIS_TO_SJIS2(code) \ |
| 626 | do { \ |
| 627 | int s1, s2, j1, j2; \ |
| 628 | \ |
| 629 | j1 = (code) >> 8, j2 = (code) & 0xFF; \ |
| 630 | if (j1 & 1) \ |
| 631 | { \ |
| 632 | s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2 \ |
| 633 | : j1 <= 0x2F ? 0xF3 + (j1 - 0x2D) / 2 \ |
| 634 | : 0xF5 + (j1 - 0x6F) / 2); \ |
| 635 | s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F); \ |
| 636 | } \ |
| 637 | else \ |
| 638 | { \ |
| 639 | s1 = (j1 == 0x28 ? 0xF0 \ |
| 640 | : j1 == 0x24 ? 0xF1 \ |
| 641 | : j1 == 0x2C ? 0xF2 \ |
| 642 | : j1 == 0x2E ? 0xF3 \ |
| 643 | : 0xF4 + (j1 - 0x6E) / 2); \ |
| 644 | s2 = j2 + 0x7E; \ |
| 645 | } \ |
| 646 | (code) = (s1 << 8) | s2; \ |
| 647 | } while (0) |
| 648 | |
| 649 | /* Encode the file name NAME using the specified coding system |
| 650 | for file names, if any. */ |
| 651 | #define ENCODE_FILE(name) \ |
| 652 | (! NILP (Vfile_name_coding_system) \ |
| 653 | && !EQ (Vfile_name_coding_system, make_number (0)) \ |
| 654 | ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \ |
| 655 | : (! NILP (Vdefault_file_name_coding_system) \ |
| 656 | && !EQ (Vdefault_file_name_coding_system, make_number (0)) \ |
| 657 | ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \ |
| 658 | : name)) |
| 659 | |
| 660 | |
| 661 | /* Decode the file name NAME using the specified coding system |
| 662 | for file names, if any. */ |
| 663 | #define DECODE_FILE(name) \ |
| 664 | (! NILP (Vfile_name_coding_system) \ |
| 665 | && !EQ (Vfile_name_coding_system, make_number (0)) \ |
| 666 | ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \ |
| 667 | : (! NILP (Vdefault_file_name_coding_system) \ |
| 668 | && !EQ (Vdefault_file_name_coding_system, make_number (0)) \ |
| 669 | ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \ |
| 670 | : name)) |
| 671 | |
| 672 | |
| 673 | /* Encode the string STR using the specified coding system |
| 674 | for system functions, if any. */ |
| 675 | #define ENCODE_SYSTEM(str) \ |
| 676 | (! NILP (Vlocale_coding_system) \ |
| 677 | && !EQ (Vlocale_coding_system, make_number (0)) \ |
| 678 | ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \ |
| 679 | : str) |
| 680 | |
| 681 | /* Decode the string STR using the specified coding system |
| 682 | for system functions, if any. */ |
| 683 | #define DECODE_SYSTEM(str) \ |
| 684 | (! NILP (Vlocale_coding_system) \ |
| 685 | && !EQ (Vlocale_coding_system, make_number (0)) \ |
| 686 | ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \ |
| 687 | : str) |
| 688 | |
| 689 | /* Used by the gtk menu code. Note that this encodes utf-8, not |
| 690 | utf-8-emacs, so it's not a no-op. */ |
| 691 | #define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1) |
| 692 | |
| 693 | /* Extern declarations. */ |
| 694 | extern Lisp_Object code_conversion_save P_ ((int, int)); |
| 695 | extern int decoding_buffer_size P_ ((struct coding_system *, int)); |
| 696 | extern int encoding_buffer_size P_ ((struct coding_system *, int)); |
| 697 | extern void setup_coding_system P_ ((Lisp_Object, struct coding_system *)); |
| 698 | extern Lisp_Object coding_charset_list P_ ((struct coding_system *)); |
| 699 | extern Lisp_Object coding_system_charset_list P_ ((Lisp_Object)); |
| 700 | extern void detect_coding P_ ((struct coding_system *)); |
| 701 | extern Lisp_Object code_convert_region P_ ((Lisp_Object, Lisp_Object, |
| 702 | Lisp_Object, Lisp_Object, |
| 703 | int, int)); |
| 704 | extern Lisp_Object code_convert_string P_ ((Lisp_Object, Lisp_Object, |
| 705 | Lisp_Object, int, int, int)); |
| 706 | extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object, |
| 707 | int)); |
| 708 | extern Lisp_Object raw_text_coding_system P_ ((Lisp_Object)); |
| 709 | extern Lisp_Object coding_inherit_eol_type P_ ((Lisp_Object, Lisp_Object)); |
| 710 | extern Lisp_Object complement_process_encoding_system P_ ((Lisp_Object)); |
| 711 | |
| 712 | extern int decode_coding_gap P_ ((struct coding_system *, |
| 713 | EMACS_INT, EMACS_INT)); |
| 714 | extern int encode_coding_gap P_ ((struct coding_system *, |
| 715 | EMACS_INT, EMACS_INT)); |
| 716 | extern void decode_coding_object P_ ((struct coding_system *, |
| 717 | Lisp_Object, EMACS_INT, EMACS_INT, |
| 718 | EMACS_INT, EMACS_INT, Lisp_Object)); |
| 719 | extern void encode_coding_object P_ ((struct coding_system *, |
| 720 | Lisp_Object, EMACS_INT, EMACS_INT, |
| 721 | EMACS_INT, EMACS_INT, Lisp_Object)); |
| 722 | |
| 723 | /* Macros for backward compatibility. */ |
| 724 | |
| 725 | #define decode_coding_region(coding, from, to) \ |
| 726 | decode_coding_object (coding, Fcurrent_buffer (), \ |
| 727 | from, CHAR_TO_BYTE (from), \ |
| 728 | to, CHAR_TO_BYTE (to), Fcurrent_buffer ()) |
| 729 | |
| 730 | |
| 731 | #define encode_coding_region(coding, from, to) \ |
| 732 | encode_coding_object (coding, Fcurrent_buffer (), \ |
| 733 | from, CHAR_TO_BYTE (from), \ |
| 734 | to, CHAR_TO_BYTE (to), Fcurrent_buffer ()) |
| 735 | |
| 736 | |
| 737 | #define decode_coding_string(coding, string, nocopy) \ |
| 738 | decode_coding_object (coding, string, 0, 0, SCHARS (string), \ |
| 739 | SBYTES (string), Qt) |
| 740 | |
| 741 | #define encode_coding_string(coding, string, nocopy) \ |
| 742 | (STRING_MULTIBYTE(string) ? \ |
| 743 | (encode_coding_object (coding, string, 0, 0, SCHARS (string), \ |
| 744 | SBYTES (string), Qt), \ |
| 745 | (coding)->dst_object) : (string)) |
| 746 | |
| 747 | |
| 748 | #define decode_coding_c_string(coding, src, bytes, dst_object) \ |
| 749 | do { \ |
| 750 | (coding)->source = (src); \ |
| 751 | (coding)->src_chars = (coding)->src_bytes = (bytes); \ |
| 752 | decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \ |
| 753 | (dst_object)); \ |
| 754 | } while (0) |
| 755 | |
| 756 | |
| 757 | extern Lisp_Object preferred_coding_system P_ (()); |
| 758 | |
| 759 | |
| 760 | extern Lisp_Object Qutf_8, Qutf_8_emacs; |
| 761 | |
| 762 | extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index; |
| 763 | extern Lisp_Object Qcoding_system_p; |
| 764 | extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided; |
| 765 | extern Lisp_Object Qiso_2022; |
| 766 | extern Lisp_Object Qbuffer_file_coding_system; |
| 767 | |
| 768 | extern Lisp_Object Qunix, Qdos, Qmac; |
| 769 | |
| 770 | extern Lisp_Object Qtranslation_table; |
| 771 | extern Lisp_Object Qtranslation_table_id; |
| 772 | |
| 773 | /* Mnemonic strings to indicate each type of end-of-line. */ |
| 774 | extern Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac; |
| 775 | /* Mnemonic string to indicate type of end-of-line is not yet decided. */ |
| 776 | extern Lisp_Object eol_mnemonic_undecided; |
| 777 | |
| 778 | #ifdef emacs |
| 779 | extern Lisp_Object Qfile_coding_system; |
| 780 | extern Lisp_Object Qcall_process, Qcall_process_region; |
| 781 | extern Lisp_Object Qstart_process, Qopen_network_stream; |
| 782 | extern Lisp_Object Qwrite_region; |
| 783 | |
| 784 | extern char *emacs_strerror P_ ((int)); |
| 785 | |
| 786 | /* Coding-system for reading files and receiving data from process. */ |
| 787 | extern Lisp_Object Vcoding_system_for_read; |
| 788 | /* Coding-system for writing files and sending data to process. */ |
| 789 | extern Lisp_Object Vcoding_system_for_write; |
| 790 | /* Coding-system actually used in the latest I/O. */ |
| 791 | extern Lisp_Object Vlast_coding_system_used; |
| 792 | /* Coding-system to use with system messages (e.g. strerror). */ |
| 793 | extern Lisp_Object Vlocale_coding_system; |
| 794 | |
| 795 | /* If non-zero, process buffer inherits the coding system used to decode |
| 796 | the subprocess output. */ |
| 797 | extern int inherit_process_coding_system; |
| 798 | |
| 799 | /* Coding system to be used to encode text for terminal display when |
| 800 | terminal coding system is nil. */ |
| 801 | extern struct coding_system safe_terminal_coding; |
| 802 | |
| 803 | /* Default coding systems used for process I/O. */ |
| 804 | extern Lisp_Object Vdefault_process_coding_system; |
| 805 | |
| 806 | /* Function to call to force a user to force select a propert coding |
| 807 | system. */ |
| 808 | extern Lisp_Object Vselect_safe_coding_system_function; |
| 809 | |
| 810 | /* If nonzero, on writing a file, Vselect_safe_coding_system_function |
| 811 | is called even if Vcoding_system_for_write is non-nil. */ |
| 812 | extern int coding_system_require_warning; |
| 813 | |
| 814 | /* Coding system for file names, or nil if none. */ |
| 815 | extern Lisp_Object Vfile_name_coding_system; |
| 816 | |
| 817 | /* Coding system for file names used only when |
| 818 | Vfile_name_coding_system is nil. */ |
| 819 | extern Lisp_Object Vdefault_file_name_coding_system; |
| 820 | |
| 821 | #endif |
| 822 | |
| 823 | /* Error signaled when there's a problem with detecting coding system */ |
| 824 | extern Lisp_Object Qcoding_system_error; |
| 825 | |
| 826 | extern char emacs_mule_bytes[256]; |
| 827 | extern int emacs_mule_string_char P_ ((unsigned char *)); |
| 828 | |
| 829 | #endif /* EMACS_CODING_H */ |
| 830 | |
| 831 | /* arch-tag: 2bc3b4fa-6870-4f64-8135-b962b2d290e4 |
| 832 | (do not change this comment) */ |