Commit | Line | Data |
---|---|---|
4ed46869 | 1 | /* Header for coding system handler. |
429ab54e | 2 | Copyright (C) 2001, 2002, 2003, 2004, 2005, |
8cabe764 | 3 | 2006, 2007, 2008 Free Software Foundation, Inc. |
7976eda0 | 4 | Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
8cabe764 | 5 | 2005, 2006, 2007, 2008 |
ce03bf76 KH |
6 | National Institute of Advanced Industrial Science and Technology (AIST) |
7 | Registration Number H14PRO021 | |
8f924df7 | 8 | Copyright (C) 2003 |
df7492f9 KH |
9 | National Institute of Advanced Industrial Science and Technology (AIST) |
10 | Registration Number H13PRO009 | |
4ed46869 | 11 | |
369314dc KH |
12 | This file is part of GNU Emacs. |
13 | ||
14 | GNU Emacs is free software; you can redistribute it and/or modify | |
15 | it under the terms of the GNU General Public License as published by | |
1427aa65 | 16 | the Free Software Foundation; either version 3, or (at your option) |
369314dc | 17 | any later version. |
4ed46869 | 18 | |
369314dc KH |
19 | GNU Emacs is distributed in the hope that it will be useful, |
20 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 | GNU General Public License for more details. | |
4ed46869 | 23 | |
369314dc KH |
24 | You should have received a copy of the GNU General Public License |
25 | along with GNU Emacs; see the file COPYING. If not, write to | |
4fc5845f LK |
26 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
27 | Boston, MA 02110-1301, USA. */ | |
4ed46869 | 28 | |
6f776e81 KH |
29 | #ifndef EMACS_CODING_H |
30 | #define EMACS_CODING_H | |
4ed46869 | 31 | |
df7492f9 | 32 | /* Index to arguments of Fdefine_coding_system_internal. */ |
4ed46869 | 33 | |
df7492f9 KH |
34 | enum define_coding_system_arg_index |
35 | { | |
36 | coding_arg_name, | |
37 | coding_arg_mnemonic, | |
38 | coding_arg_coding_type, | |
39 | coding_arg_charset_list, | |
40 | coding_arg_ascii_compatible_p, | |
41 | coding_arg_decode_translation_table, | |
42 | coding_arg_encode_translation_table, | |
43 | coding_arg_post_read_conversion, | |
44 | coding_arg_pre_write_conversion, | |
45 | coding_arg_default_char, | |
8f924df7 | 46 | coding_arg_for_unibyte, |
df7492f9 KH |
47 | coding_arg_plist, |
48 | coding_arg_eol_type, | |
49 | coding_arg_max | |
50 | }; | |
4ed46869 | 51 | |
df7492f9 | 52 | enum define_coding_iso2022_arg_index |
4ed46869 | 53 | { |
df7492f9 KH |
54 | coding_arg_iso2022_initial = coding_arg_max, |
55 | coding_arg_iso2022_reg_usage, | |
56 | coding_arg_iso2022_request, | |
57 | coding_arg_iso2022_flags, | |
58 | coding_arg_iso2022_max | |
4ed46869 KH |
59 | }; |
60 | ||
df7492f9 | 61 | enum define_coding_utf16_arg_index |
4ed46869 | 62 | { |
df7492f9 KH |
63 | coding_arg_utf16_bom = coding_arg_max, |
64 | coding_arg_utf16_endian, | |
65 | coding_arg_utf16_max | |
4ed46869 KH |
66 | }; |
67 | ||
df7492f9 KH |
68 | enum define_coding_ccl_arg_index |
69 | { | |
35d47d18 | 70 | coding_arg_ccl_decoder = coding_arg_max, |
df7492f9 KH |
71 | coding_arg_ccl_encoder, |
72 | coding_arg_ccl_valids, | |
73 | coding_arg_ccl_max | |
74 | }; | |
4ed46869 | 75 | |
933373ed KH |
76 | /* Hash table for all coding systems. Keys are coding system symbols |
77 | and values are spec vectors of the corresponding coding system. A | |
78 | spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a | |
79 | vector of attribute of the coding system. ALIASES is a list of | |
80 | aliases (symbols) of the coding system. EOL-TYPE is `unix', `dos', | |
81 | `mac' or a vector of coding systems (symbols). */ | |
82 | ||
df7492f9 | 83 | extern Lisp_Object Vcoding_system_hash_table; |
4ed46869 | 84 | |
933373ed | 85 | |
df7492f9 | 86 | /* Enumeration of coding system type. */ |
4ed46869 | 87 | |
df7492f9 KH |
88 | enum coding_system_type |
89 | { | |
90 | coding_type_charset, | |
91 | coding_type_utf_8, | |
92 | coding_type_utf_16, | |
93 | coding_type_iso_2022, | |
94 | coding_type_emacs_mule, | |
95 | coding_type_sjis, | |
96 | coding_type_ccl, | |
97 | coding_type_raw_text, | |
98 | coding_type_undecided, | |
99 | coding_type_max | |
100 | }; | |
4ed46869 | 101 | |
4ed46869 | 102 | |
df7492f9 | 103 | /* Enumeration of end-of-line format type. */ |
4ed46869 | 104 | |
df7492f9 KH |
105 | enum end_of_line_type |
106 | { | |
107 | eol_lf, /* Line-feed only, same as Emacs' internal | |
108 | format. */ | |
109 | eol_crlf, /* Sequence of carriage-return and | |
110 | line-feed. */ | |
111 | eol_cr, /* Carriage-return only. */ | |
112 | eol_any, /* Accept any of above. Produce line-feed | |
113 | only. */ | |
114 | eol_undecided, /* This value is used to denote that the | |
115 | eol-type is not yet undecided. */ | |
116 | eol_type_max | |
117 | }; | |
4ed46869 | 118 | |
df7492f9 | 119 | /* Enumeration of index to an attribute vector of a coding system. */ |
4ed46869 | 120 | |
df7492f9 KH |
121 | enum coding_attr_index |
122 | { | |
123 | coding_attr_base_name, | |
124 | coding_attr_docstring, | |
125 | coding_attr_mnemonic, | |
126 | coding_attr_type, | |
127 | coding_attr_charset_list, | |
128 | coding_attr_ascii_compat, | |
129 | coding_attr_decode_tbl, | |
130 | coding_attr_encode_tbl, | |
404202e7 | 131 | coding_attr_trans_tbl, |
df7492f9 KH |
132 | coding_attr_post_read, |
133 | coding_attr_pre_write, | |
134 | coding_attr_default_char, | |
8f924df7 | 135 | coding_attr_for_unibyte, |
df7492f9 KH |
136 | coding_attr_plist, |
137 | ||
138 | coding_attr_category, | |
139 | coding_attr_safe_charsets, | |
140 | ||
141 | /* The followings are extra attributes for each type. */ | |
142 | coding_attr_charset_valids, | |
143 | ||
144 | coding_attr_ccl_decoder, | |
145 | coding_attr_ccl_encoder, | |
146 | coding_attr_ccl_valids, | |
147 | ||
148 | coding_attr_iso_initial, | |
149 | coding_attr_iso_usage, | |
150 | coding_attr_iso_request, | |
151 | coding_attr_iso_flags, | |
152 | ||
153 | coding_attr_utf_16_bom, | |
154 | coding_attr_utf_16_endian, | |
155 | ||
156 | coding_attr_emacs_mule_full, | |
157 | ||
158 | coding_attr_last_index | |
159 | }; | |
4ed46869 | 160 | |
4ed46869 | 161 | |
933373ed KH |
162 | /* Macros to access an element of an attribute vector. */ |
163 | ||
df7492f9 KH |
164 | #define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name) |
165 | #define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type) | |
166 | #define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list) | |
167 | #define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic) | |
168 | #define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring) | |
169 | #define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat) | |
170 | #define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl) | |
171 | #define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl) | |
404202e7 | 172 | #define CODING_ATTR_TRANS_TBL(attrs) AREF (attrs, coding_attr_trans_tbl) |
df7492f9 KH |
173 | #define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read) |
174 | #define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write) | |
175 | #define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char) | |
8f924df7 | 176 | #define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte) |
df7492f9 KH |
177 | #define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing) |
178 | #define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist) | |
179 | #define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category) | |
180 | #define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets) | |
8ddb35b2 | 181 | |
8ddb35b2 | 182 | |
933373ed KH |
183 | /* Return the name of a coding system specified by ID. */ |
184 | #define CODING_ID_NAME(id) \ | |
185 | (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id)) | |
186 | ||
187 | /* Return the attribute vector of a coding system specified by ID. */ | |
188 | ||
df7492f9 KH |
189 | #define CODING_ID_ATTRS(id) \ |
190 | (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0)) | |
fbaa2ed9 | 191 | |
933373ed KH |
192 | /* Return the list of aliases of a coding system specified by ID. */ |
193 | ||
df7492f9 KH |
194 | #define CODING_ID_ALIASES(id) \ |
195 | (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1)) | |
c0c69d45 | 196 | |
933373ed KH |
197 | /* Return the eol-type of a coding system specified by ID. */ |
198 | ||
df7492f9 KH |
199 | #define CODING_ID_EOL_TYPE(id) \ |
200 | (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2)) | |
658cc252 | 201 | |
933373ed KH |
202 | |
203 | /* Return the spec vector of CODING_SYSTEM_SYMBOL. */ | |
fbaa2ed9 | 204 | |
df7492f9 KH |
205 | #define CODING_SYSTEM_SPEC(coding_system_symbol) \ |
206 | (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil)) | |
4ed46869 | 207 | |
933373ed KH |
208 | |
209 | /* Return the ID of CODING_SYSTEM_SYMBOL. */ | |
210 | ||
df7492f9 KH |
211 | #define CODING_SYSTEM_ID(coding_system_symbol) \ |
212 | hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \ | |
213 | coding_system_symbol, NULL) | |
4ed46869 | 214 | |
b2e6b10f | 215 | /* Return 1 if CODING_SYSTEM_SYMBOL is a coding system. */ |
933373ed | 216 | |
7c00e33d KH |
217 | #define CODING_SYSTEM_P(coding_system_symbol) \ |
218 | (CODING_SYSTEM_ID (coding_system_symbol) >= 0 \ | |
219 | || (! NILP (coding_system_symbol) \ | |
220 | && ! NILP (Fcoding_system_p (coding_system_symbol)))) | |
4ed46869 | 221 | |
933373ed KH |
222 | /* Check if X is a coding system or not. */ |
223 | ||
8f924df7 | 224 | #define CHECK_CODING_SYSTEM(x) \ |
df7492f9 | 225 | do { \ |
7c00e33d KH |
226 | if (CODING_SYSTEM_ID (x) < 0 \ |
227 | && NILP (Fcheck_coding_system (x))) \ | |
8f924df7 | 228 | wrong_type_argument (Qcoding_system_p, (x)); \ |
df7492f9 | 229 | } while (0) |
658cc252 | 230 | |
4ed46869 | 231 | |
933373ed KH |
232 | /* Check if X is a coding system or not. If it is, set SEPC to the |
233 | spec vector of the coding system. */ | |
234 | ||
df7492f9 KH |
235 | #define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \ |
236 | do { \ | |
237 | spec = CODING_SYSTEM_SPEC (x); \ | |
7c00e33d KH |
238 | if (NILP (spec)) \ |
239 | { \ | |
240 | Fcheck_coding_system (x); \ | |
241 | spec = CODING_SYSTEM_SPEC (x); \ | |
242 | } \ | |
df7492f9 KH |
243 | if (NILP (spec)) \ |
244 | x = wrong_type_argument (Qcoding_system_p, (x)); \ | |
245 | } while (0) | |
e6de76f8 | 246 | |
8ddb35b2 | 247 | |
933373ed KH |
248 | /* Check if X is a coding system or not. If it is, set ID to the |
249 | ID of the coding system. */ | |
250 | ||
df7492f9 KH |
251 | #define CHECK_CODING_SYSTEM_GET_ID(x, id) \ |
252 | do \ | |
253 | { \ | |
254 | id = CODING_SYSTEM_ID (x); \ | |
7c00e33d KH |
255 | if (id < 0) \ |
256 | { \ | |
257 | Fcheck_coding_system (x); \ | |
258 | id = CODING_SYSTEM_ID (x); \ | |
259 | } \ | |
df7492f9 KH |
260 | if (id < 0) \ |
261 | x = wrong_type_argument (Qcoding_system_p, (x)); \ | |
262 | } while (0) | |
4ed46869 | 263 | |
4ed46869 KH |
264 | |
265 | /*** GENERAL section ***/ | |
266 | ||
df7492f9 KH |
267 | /* Enumeration of result code of code conversion. */ |
268 | enum coding_result_code | |
4ed46869 | 269 | { |
df7492f9 KH |
270 | CODING_RESULT_SUCCESS, |
271 | CODING_RESULT_INSUFFICIENT_SRC, | |
272 | CODING_RESULT_INSUFFICIENT_DST, | |
273 | CODING_RESULT_INCONSISTENT_EOL, | |
63e11478 | 274 | CODING_RESULT_INVALID_SRC, |
df7492f9 KH |
275 | CODING_RESULT_INTERRUPT, |
276 | CODING_RESULT_INSUFFICIENT_MEM | |
4ed46869 KH |
277 | }; |
278 | ||
658cc252 | 279 | |
3b2d77fe | 280 | /* Macros used for the member `mode' of the struct coding_system. */ |
658cc252 KH |
281 | |
282 | /* If set, recover the original CR or LF of the already decoded text | |
283 | when the decoding routine encounters an inconsistent eol format. */ | |
284 | #define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01 | |
285 | ||
286 | /* If set, the decoding/encoding routines treat the current data as | |
5998373a | 287 | the last block of the whole text to be converted, and do the |
55496054 | 288 | appropriate finishing job. */ |
658cc252 KH |
289 | #define CODING_MODE_LAST_BLOCK 0x02 |
290 | ||
291 | /* If set, it means that the current source text is in a buffer which | |
292 | enables selective display. */ | |
293 | #define CODING_MODE_SELECTIVE_DISPLAY 0x04 | |
294 | ||
295 | /* This flag is used by the decoding/encoding routines on the fly. If | |
296 | set, it means that right-to-left text is being processed. */ | |
297 | #define CODING_MODE_DIRECTION 0x08 | |
298 | ||
df7492f9 KH |
299 | #define CODING_MODE_FIXED_DESTINATION 0x10 |
300 | ||
933373ed KH |
301 | /* If set, it means that the encoding routines produces some safe |
302 | ASCII characters (usually '?') for unsupported characters. */ | |
df7492f9 KH |
303 | #define CODING_MODE_SAFE_ENCODING 0x20 |
304 | ||
305 | /* Structure of the field `spec.iso_2022' in the structure | |
306 | `coding_system'. */ | |
307 | struct iso_2022_spec | |
4ed46869 | 308 | { |
2ec49574 | 309 | /* Bit-wise-or of CODING_ISO_FLAG_XXX. */ |
df7492f9 | 310 | unsigned flags; |
4ed46869 | 311 | |
df7492f9 KH |
312 | /* The current graphic register invoked to each graphic plane. */ |
313 | int current_invocation[2]; | |
658cc252 | 314 | |
df7492f9 KH |
315 | /* The current charset designated to each graphic register. The |
316 | value -1 means that not charset is designated, -2 means that | |
317 | there was an invalid designation previously. */ | |
318 | int current_designation[4]; | |
4ed46869 | 319 | |
df7492f9 KH |
320 | /* Set to 1 temporarily only when graphic register 2 or 3 is invoked |
321 | by single-shift while encoding. */ | |
322 | int single_shifting; | |
4ed46869 | 323 | |
df7492f9 KH |
324 | /* Set to 1 temporarily only when processing at beginning of line. */ |
325 | int bol; | |
326 | }; | |
4ed46869 | 327 | |
df7492f9 | 328 | struct ccl_spec; |
4ed46869 | 329 | |
df7492f9 KH |
330 | enum utf_16_bom_type |
331 | { | |
332 | utf_16_detect_bom, | |
333 | utf_16_without_bom, | |
334 | utf_16_with_bom | |
335 | }; | |
279d9f7b | 336 | |
df7492f9 KH |
337 | enum utf_16_endian_type |
338 | { | |
339 | utf_16_big_endian, | |
340 | utf_16_little_endian | |
341 | }; | |
279d9f7b | 342 | |
df7492f9 KH |
343 | struct utf_16_spec |
344 | { | |
345 | enum utf_16_bom_type bom; | |
346 | enum utf_16_endian_type endian; | |
347 | int surrogate; | |
348 | }; | |
279d9f7b | 349 | |
4fecac5c KH |
350 | struct coding_detection_info |
351 | { | |
352 | /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs. */ | |
353 | /* Which categories are already checked. */ | |
354 | int checked; | |
355 | /* Which categories are strongly found. */ | |
356 | int found; | |
357 | /* Which categories are rejected. */ | |
358 | int rejected; | |
359 | }; | |
279d9f7b | 360 | |
279d9f7b | 361 | |
df7492f9 KH |
362 | struct coding_system |
363 | { | |
364 | /* ID number of the coding system. This is an index to | |
365 | Vcoding_system_hash_table. This value is set by | |
366 | setup_coding_system. At the early stage of building time, this | |
367 | value is -1 in the array coding_categories to indicate that no | |
368 | coding-system of that category is yet defined. */ | |
369 | int id; | |
370 | ||
371 | /* Flag bits of the coding system. The meaning of each bit is common | |
372 | to all types of coding systems. */ | |
373 | int common_flags; | |
374 | ||
375 | /* Mode bits of the coding system. See the comments of the macros | |
376 | CODING_MODE_XXX. */ | |
377 | unsigned int mode; | |
450c60a5 | 378 | |
4ed46869 | 379 | /* Detailed information specific to each type of coding system. */ |
df7492f9 | 380 | union |
4ed46869 | 381 | { |
df7492f9 KH |
382 | struct iso_2022_spec iso_2022; |
383 | struct ccl_spec *ccl; /* Defined in ccl.h. */ | |
384 | struct utf_16_spec utf_16; | |
385 | int emacs_mule_full_support; | |
4ed46869 KH |
386 | } spec; |
387 | ||
df7492f9 KH |
388 | int max_charset_id; |
389 | char *safe_charsets; | |
658cc252 | 390 | |
df7492f9 KH |
391 | /* The following two members specify how binary 8-bit code 128..255 |
392 | are represented in source and destination text respectively. 1 | |
393 | means they are represented by 2-byte sequence, 0 means they are | |
394 | represented by 1-byte as is (see the comment in character.h). */ | |
811ea086 KH |
395 | unsigned src_multibyte : 1; |
396 | unsigned dst_multibyte : 1; | |
397 | ||
a137bb00 KH |
398 | /* How may heading bytes we can skip for decoding. This is set to |
399 | -1 in setup_coding_system, and updated by detect_coding. So, | |
400 | when this is equal to the byte length of the text being | |
401 | converted, we can skip the actual conversion process. */ | |
df7492f9 | 402 | int head_ascii; |
658cc252 KH |
403 | |
404 | /* The following members are set by encoding/decoding routine. */ | |
df7492f9 | 405 | EMACS_INT produced, produced_char, consumed, consumed_char; |
658cc252 | 406 | |
811ea086 KH |
407 | /* Number of error source data found in a decoding routine. */ |
408 | int errors; | |
409 | ||
df7492f9 KH |
410 | /* Store the positions of error source data. */ |
411 | EMACS_INT *error_positions; | |
e6a9a0bc | 412 | |
df7492f9 KH |
413 | /* Finish status of code conversion. */ |
414 | enum coding_result_code result; | |
6041c9ce | 415 | |
df7492f9 KH |
416 | EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes; |
417 | Lisp_Object src_object; | |
8f924df7 | 418 | const unsigned char *source; |
4ed46869 | 419 | |
df7492f9 KH |
420 | EMACS_INT dst_pos, dst_pos_byte, dst_bytes; |
421 | Lisp_Object dst_object; | |
422 | unsigned char *destination; | |
4ed46869 | 423 | |
b2e6b10f | 424 | /* Set to 1 if the source of conversion is not in the member |
933373ed | 425 | `charbuf', but at `src_object'. */ |
df7492f9 | 426 | int chars_at_source; |
4ed46869 | 427 | |
df7492f9 KH |
428 | /* If an element is non-negative, it is a character code. |
429 | ||
430 | If it is in the range -128..-1, it is a 8-bit character code | |
431 | minus 256. | |
432 | ||
433 | If it is less than -128, it specifies the start of an annotation | |
434 | chunk. The length of the chunk is -128 minus the value of the | |
435 | element. The following elements are OFFSET, ANNOTATION-TYPE, and | |
436 | a sequence of actual data for the annotation. OFFSET is a | |
437 | character position offset from dst_pos or src_pos, | |
438 | ANNOTATION-TYPE specfies the meaning of the annotation and how to | |
439 | handle the following data.. */ | |
440 | int *charbuf; | |
441 | int charbuf_size, charbuf_used; | |
442 | ||
443 | /* Set to 1 if charbuf contains an annotation. */ | |
444 | int annotated; | |
4ed46869 | 445 | |
df7492f9 KH |
446 | unsigned char carryover[64]; |
447 | int carryover_bytes; | |
a5ee738b | 448 | |
df7492f9 KH |
449 | int default_char; |
450 | ||
4fecac5c KH |
451 | int (*detector) P_ ((struct coding_system *, |
452 | struct coding_detection_info *)); | |
df7492f9 KH |
453 | void (*decoder) P_ ((struct coding_system *)); |
454 | int (*encoder) P_ ((struct coding_system *)); | |
455 | }; | |
456 | ||
457 | /* Meanings of bits in the member `common_flags' of the structure | |
458 | coding_system. The lowest 8 bits are reserved for various kind of | |
459 | annotations (currently two of them are used). */ | |
460 | #define CODING_ANNOTATION_MASK 0x00FF | |
461 | #define CODING_ANNOTATE_COMPOSITION_MASK 0x0001 | |
462 | #define CODING_ANNOTATE_DIRECTION_MASK 0x0002 | |
4fecac5c | 463 | #define CODING_ANNOTATE_CHARSET_MASK 0x0003 |
df7492f9 KH |
464 | #define CODING_FOR_UNIBYTE_MASK 0x0100 |
465 | #define CODING_REQUIRE_FLUSHING_MASK 0x0200 | |
466 | #define CODING_REQUIRE_DECODING_MASK 0x0400 | |
467 | #define CODING_REQUIRE_ENCODING_MASK 0x0800 | |
468 | #define CODING_REQUIRE_DETECTION_MASK 0x1000 | |
469 | #define CODING_RESET_AT_BOL_MASK 0x2000 | |
470 | ||
471 | /* Return 1 if the coding context CODING requires annotaion | |
472 | handling. */ | |
473 | #define CODING_REQUIRE_ANNOTATION(coding) \ | |
474 | ((coding)->common_flags & CODING_ANNOTATION_MASK) | |
475 | ||
476 | /* Return 1 if the coding context CODING prefers decoding into unibyte. */ | |
477 | #define CODING_FOR_UNIBYTE(coding) \ | |
478 | ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK) | |
479 | ||
480 | /* Return 1 if the coding context CODING requires specific code to be | |
a5ee738b KH |
481 | attached at the tail of converted text. */ |
482 | #define CODING_REQUIRE_FLUSHING(coding) \ | |
483 | ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK) | |
484 | ||
df7492f9 | 485 | /* Return 1 if the coding context CODING requires code conversion on |
a5ee738b KH |
486 | decoding. */ |
487 | #define CODING_REQUIRE_DECODING(coding) \ | |
811ea086 KH |
488 | ((coding)->dst_multibyte \ |
489 | || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK) | |
a5ee738b | 490 | |
df7492f9 KH |
491 | |
492 | /* Return 1 if the coding context CODING requires code conversion on | |
c198294f KH |
493 | encoding. |
494 | The non-multibyte part of the condition is to support encoding of | |
495 | unibyte strings/buffers generated by string-as-unibyte or | |
496 | (set-buffer-multibyte nil) from multibyte strings/buffers. */ | |
df7492f9 KH |
497 | #define CODING_REQUIRE_ENCODING(coding) \ |
498 | ((coding)->src_multibyte \ | |
499 | || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \ | |
500 | || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY) | |
501 | ||
a5ee738b | 502 | |
df7492f9 | 503 | /* Return 1 if the coding context CODING requires some kind of code |
a5ee738b KH |
504 | detection. */ |
505 | #define CODING_REQUIRE_DETECTION(coding) \ | |
506 | ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK) | |
507 | ||
df7492f9 | 508 | /* Return 1 if the coding context CODING requires code conversion on |
811ea086 | 509 | decoding or some kind of code detection. */ |
658cc252 | 510 | #define CODING_MAY_REQUIRE_DECODING(coding) \ |
811ea086 KH |
511 | (CODING_REQUIRE_DECODING (coding) \ |
512 | || CODING_REQUIRE_DETECTION (coding)) | |
4ed46869 | 513 | |
4ed46869 KH |
514 | /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and |
515 | S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding | |
516 | system. C1 and C2 are the 1st and 2nd position codes of Emacs' | |
517 | internal format. */ | |
518 | ||
df7492f9 KH |
519 | #define SJIS_TO_JIS(code) \ |
520 | do { \ | |
521 | int s1, s2, j1, j2; \ | |
522 | \ | |
523 | s1 = (code) >> 8, s2 = (code) & 0xFF; \ | |
524 | \ | |
525 | if (s2 >= 0x9F) \ | |
526 | (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \ | |
527 | j2 = s2 - 0x7E); \ | |
528 | else \ | |
529 | (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \ | |
530 | j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \ | |
531 | (code) = (j1 << 8) | j2; \ | |
4ed46869 KH |
532 | } while (0) |
533 | ||
6e58724e KH |
534 | #define SJIS_TO_JIS2(code) \ |
535 | do { \ | |
536 | int s1, s2, j1, j2; \ | |
537 | \ | |
538 | s1 = (code) >> 8, s2 = (code) & 0xFF; \ | |
539 | \ | |
540 | if (s2 >= 0x9F) \ | |
541 | { \ | |
542 | j1 = (s1 == 0xF0 ? 0x28 \ | |
543 | : s1 == 0xF1 ? 0x24 \ | |
544 | : s1 == 0xF2 ? 0x2C \ | |
545 | : s1 == 0xF3 ? 0x2E \ | |
546 | : 0x6E + (s1 - 0xF4) * 2); \ | |
547 | j2 = s2 - 0x7E; \ | |
548 | } \ | |
549 | else \ | |
550 | { \ | |
551 | j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2 \ | |
552 | : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2 \ | |
553 | : 0x6F + (s1 - 0xF5) * 2); \ | |
554 | j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F)); \ | |
555 | } \ | |
556 | (code) = (j1 << 8) | j2; \ | |
557 | } while (0) | |
558 | ||
df7492f9 KH |
559 | |
560 | #define JIS_TO_SJIS(code) \ | |
4ed46869 | 561 | do { \ |
df7492f9 KH |
562 | int s1, s2, j1, j2; \ |
563 | \ | |
564 | j1 = (code) >> 8, j2 = (code) & 0xFF; \ | |
565 | if (j1 & 1) \ | |
566 | (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \ | |
567 | s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \ | |
4ed46869 | 568 | else \ |
df7492f9 KH |
569 | (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \ |
570 | s2 = j2 + 0x7E); \ | |
5afaefc1 | 571 | (code) = (s1 << 8) | s2; \ |
4ed46869 KH |
572 | } while (0) |
573 | ||
6e58724e KH |
574 | #define JIS_TO_SJIS2(code) \ |
575 | do { \ | |
576 | int s1, s2, j1, j2; \ | |
577 | \ | |
578 | j1 = (code) >> 8, j2 = (code) & 0xFF; \ | |
579 | if (j1 & 1) \ | |
580 | { \ | |
581 | s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2 \ | |
582 | : j1 <= 0x27 ? 0xF3 + (j1 - 0x2D) / 2 \ | |
583 | : 0xF5 + (j1 - 0x6F) / 2); \ | |
584 | s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F); \ | |
585 | } \ | |
586 | else \ | |
587 | { \ | |
588 | s1 = (j1 == 0x28 ? 0xF0 \ | |
589 | : j1 == 0x24 ? 0xF1 \ | |
590 | : j1 == 0x2C ? 0xF2 \ | |
591 | : j1 == 0x2E ? 0xF3 \ | |
592 | : 0xF4 + (j1 - 0x6E) / 2); \ | |
593 | s2 = j2 + 0x7E; \ | |
594 | } \ | |
595 | (code) = (s1 << 8) | s2; \ | |
596 | } while (0) | |
df7492f9 | 597 | |
290591c8 KH |
598 | /* Encode the file name NAME using the specified coding system |
599 | for file names, if any. */ | |
600 | #define ENCODE_FILE(name) \ | |
601 | (! NILP (Vfile_name_coding_system) \ | |
5e92b1ca | 602 | && !EQ (Vfile_name_coding_system, make_number (0)) \ |
afee9150 | 603 | ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \ |
290591c8 | 604 | : (! NILP (Vdefault_file_name_coding_system) \ |
5e92b1ca | 605 | && !EQ (Vdefault_file_name_coding_system, make_number (0)) \ |
afee9150 | 606 | ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \ |
290591c8 KH |
607 | : name)) |
608 | ||
df7492f9 | 609 | |
290591c8 KH |
610 | /* Decode the file name NAME using the specified coding system |
611 | for file names, if any. */ | |
612 | #define DECODE_FILE(name) \ | |
613 | (! NILP (Vfile_name_coding_system) \ | |
5e92b1ca | 614 | && !EQ (Vfile_name_coding_system, make_number (0)) \ |
afee9150 | 615 | ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \ |
290591c8 | 616 | : (! NILP (Vdefault_file_name_coding_system) \ |
5e92b1ca | 617 | && !EQ (Vdefault_file_name_coding_system, make_number (0)) \ |
afee9150 | 618 | ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \ |
290591c8 KH |
619 | : name)) |
620 | ||
df7492f9 | 621 | |
2dfda962 | 622 | /* Encode the string STR using the specified coding system |
53eda481 | 623 | for system functions, if any. */ |
2dfda962 | 624 | #define ENCODE_SYSTEM(str) \ |
9b58c683 | 625 | (! NILP (Vlocale_coding_system) \ |
5e92b1ca | 626 | && !EQ (Vlocale_coding_system, make_number (0)) \ |
9b58c683 | 627 | ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \ |
2dfda962 JR |
628 | : str) |
629 | ||
630 | /* Decode the string STR using the specified coding system | |
53eda481 | 631 | for system functions, if any. */ |
581e7427 | 632 | #define DECODE_SYSTEM(str) \ |
9b58c683 | 633 | (! NILP (Vlocale_coding_system) \ |
5e92b1ca | 634 | && !EQ (Vlocale_coding_system, make_number (0)) \ |
9b58c683 | 635 | ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \ |
2dfda962 | 636 | : str) |
cf29bf99 | 637 | |
b3a208b0 DL |
638 | /* Used by the gtk menu code. Note that this encodes utf-8, not |
639 | utf-8-emacs, so it's not a no-op. */ | |
640 | #define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1) | |
641 | ||
4ed46869 | 642 | /* Extern declarations. */ |
16869d7e | 643 | extern Lisp_Object code_conversion_save P_ ((int, int)); |
c04809fb AS |
644 | extern int decoding_buffer_size P_ ((struct coding_system *, int)); |
645 | extern int encoding_buffer_size P_ ((struct coding_system *, int)); | |
df7492f9 | 646 | extern void setup_coding_system P_ ((Lisp_Object, struct coding_system *)); |
76c26397 | 647 | extern Lisp_Object coding_charset_list P_ ((struct coding_system *)); |
df7492f9 | 648 | extern void detect_coding P_ ((struct coding_system *)); |
511dd8e7 | 649 | extern Lisp_Object code_convert_region P_ ((Lisp_Object, Lisp_Object, |
df7492f9 KH |
650 | Lisp_Object, Lisp_Object, |
651 | int, int)); | |
652 | extern Lisp_Object code_convert_string P_ ((Lisp_Object, Lisp_Object, | |
653 | Lisp_Object, int, int, int)); | |
eb545596 DL |
654 | extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object, |
655 | int)); | |
df7492f9 KH |
656 | extern Lisp_Object raw_text_coding_system P_ ((Lisp_Object)); |
657 | extern Lisp_Object coding_inherit_eol_type P_ ((Lisp_Object, Lisp_Object)); | |
658 | ||
659 | extern int decode_coding_gap P_ ((struct coding_system *, | |
660 | EMACS_INT, EMACS_INT)); | |
661 | extern int encode_coding_gap P_ ((struct coding_system *, | |
662 | EMACS_INT, EMACS_INT)); | |
663 | extern void decode_coding_object P_ ((struct coding_system *, | |
664 | Lisp_Object, EMACS_INT, EMACS_INT, | |
665 | EMACS_INT, EMACS_INT, Lisp_Object)); | |
666 | extern void encode_coding_object P_ ((struct coding_system *, | |
667 | Lisp_Object, EMACS_INT, EMACS_INT, | |
668 | EMACS_INT, EMACS_INT, Lisp_Object)); | |
669 | ||
933373ed KH |
670 | /* Macros for backward compatibility. */ |
671 | ||
df7492f9 KH |
672 | #define decode_coding_region(coding, from, to) \ |
673 | decode_coding_object (coding, Fcurrent_buffer (), \ | |
674 | from, CHAR_TO_BYTE (from), \ | |
675 | to, CHAR_TO_BYTE (to), Fcurrent_buffer ()) | |
676 | ||
677 | ||
678 | #define encode_coding_region(coding, from, to) \ | |
679 | encode_coding_object (coding, Fcurrent_buffer (), \ | |
680 | from, CHAR_TO_BYTE (from), \ | |
681 | to, CHAR_TO_BYTE (to), Fcurrent_buffer ()) | |
682 | ||
683 | ||
684 | #define decode_coding_string(coding, string, nocopy) \ | |
685 | decode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \ | |
686 | STRING_BYTES (XSTRING (string)), Qt) | |
687 | ||
688 | #define encode_coding_string(coding, string, nocopy) \ | |
689 | (encode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \ | |
690 | STRING_BYTES (XSTRING (string)), Qt), \ | |
691 | (coding)->dst_object) | |
692 | ||
693 | ||
694 | #define decode_coding_c_string(coding, src, bytes, dst_object) \ | |
695 | do { \ | |
696 | (coding)->source = (src); \ | |
697 | (coding)->src_chars = (coding)->src_bytes = (bytes); \ | |
698 | decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \ | |
699 | (dst_object)); \ | |
700 | } while (0) | |
701 | ||
702 | ||
703 | extern Lisp_Object preferred_coding_system P_ (()); | |
704 | ||
705 | ||
8f924df7 KH |
706 | extern Lisp_Object Qutf_8, Qutf_8_emacs; |
707 | ||
4ed46869 | 708 | extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index; |
df7492f9 KH |
709 | extern Lisp_Object Qcoding_system_p; |
710 | extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided; | |
711 | extern Lisp_Object Qiso_2022; | |
4ed46869 | 712 | extern Lisp_Object Qbuffer_file_coding_system; |
df7492f9 KH |
713 | |
714 | extern Lisp_Object Qunix, Qdos, Qmac; | |
4ed46869 | 715 | |
f967223b KH |
716 | extern Lisp_Object Qtranslation_table; |
717 | extern Lisp_Object Qtranslation_table_id; | |
ab45712c | 718 | |
20ee919e EZ |
719 | /* Mnemonic strings to indicate each type of end-of-line. */ |
720 | extern Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac; | |
721 | /* Mnemonic string to indicate type of end-of-line is not yet decided. */ | |
722 | extern Lisp_Object eol_mnemonic_undecided; | |
4ed46869 | 723 | |
4ed46869 KH |
724 | #ifdef emacs |
725 | extern Lisp_Object Qfile_coding_system; | |
387f6ba5 | 726 | extern Lisp_Object Qcall_process, Qcall_process_region; |
4ed46869 | 727 | extern Lisp_Object Qstart_process, Qopen_network_stream; |
d008a7cc | 728 | extern Lisp_Object Qwrite_region; |
4ed46869 | 729 | |
68c45bf0 PE |
730 | extern char *emacs_strerror P_ ((int)); |
731 | ||
4ed46869 KH |
732 | /* Coding-system for reading files and receiving data from process. */ |
733 | extern Lisp_Object Vcoding_system_for_read; | |
734 | /* Coding-system for writing files and sending data to process. */ | |
735 | extern Lisp_Object Vcoding_system_for_write; | |
736 | /* Coding-system actually used in the latest I/O. */ | |
737 | extern Lisp_Object Vlast_coding_system_used; | |
68c45bf0 PE |
738 | /* Coding-system to use with system messages (e.g. strerror). */ |
739 | extern Lisp_Object Vlocale_coding_system; | |
4ed46869 | 740 | |
77a9bc9a EZ |
741 | /* If non-zero, process buffer inherits the coding system used to decode |
742 | the subprocess output. */ | |
743 | extern int inherit_process_coding_system; | |
744 | ||
fbaa2ed9 KH |
745 | /* Coding system to be used to encode text for terminal display when |
746 | terminal coding system is nil. */ | |
747 | extern struct coding_system safe_terminal_coding; | |
748 | ||
a5825a24 KH |
749 | /* Default coding systems used for process I/O. */ |
750 | extern Lisp_Object Vdefault_process_coding_system; | |
4ed46869 | 751 | |
df7492f9 | 752 | /* Function to call to force a user to force select a propert coding |
658cc252 KH |
753 | system. */ |
754 | extern Lisp_Object Vselect_safe_coding_system_function; | |
755 | ||
6926d591 KH |
756 | /* If nonzero, on writing a file, Vselect_safe_coding_system_function |
757 | is called even if Vcoding_system_for_write is non-nil. */ | |
758 | extern int coding_system_require_warning; | |
759 | ||
31406df1 RS |
760 | /* Coding system for file names, or nil if none. */ |
761 | extern Lisp_Object Vfile_name_coding_system; | |
762 | ||
763 | /* Coding system for file names used only when | |
764 | Vfile_name_coding_system is nil. */ | |
765 | extern Lisp_Object Vdefault_file_name_coding_system; | |
2dfda962 | 766 | |
4ed46869 KH |
767 | #endif |
768 | ||
d008a7cc GM |
769 | /* Error signaled when there's a problem with detecting coding system */ |
770 | extern Lisp_Object Qcoding_system_error; | |
771 | ||
df7492f9 KH |
772 | extern char emacs_mule_bytes[256]; |
773 | extern int emacs_mule_string_char P_ ((unsigned char *)); | |
774 | ||
6f776e81 | 775 | #endif /* EMACS_CODING_H */ |
cefd8c4f KH |
776 | |
777 | /* arch-tag: 2bc3b4fa-6870-4f64-8135-b962b2d290e4 | |
778 | (do not change this comment) */ |