*** empty log message ***
[bpt/emacs.git] / src / coding.h
CommitLineData
4ed46869 1/* Header for coding system handler.
75c8c592 2 Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN.
8f924df7
KH
3 Licensed to the Free Software Foundation.
4 Copyright (C) 2003
df7492f9
KH
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H13PRO009
4ed46869 7
369314dc
KH
8This file is part of GNU Emacs.
9
10GNU Emacs is free software; you can redistribute it and/or modify
11it under the terms of the GNU General Public License as published by
12the Free Software Foundation; either version 2, or (at your option)
13any later version.
4ed46869 14
369314dc
KH
15GNU Emacs is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18GNU General Public License for more details.
4ed46869 19
369314dc
KH
20You should have received a copy of the GNU General Public License
21along with GNU Emacs; see the file COPYING. If not, write to
22the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23Boston, MA 02111-1307, USA. */
4ed46869 24
6f776e81
KH
25#ifndef EMACS_CODING_H
26#define EMACS_CODING_H
4ed46869 27
df7492f9 28/* Index to arguments of Fdefine_coding_system_internal. */
4ed46869 29
df7492f9
KH
30enum define_coding_system_arg_index
31 {
32 coding_arg_name,
33 coding_arg_mnemonic,
34 coding_arg_coding_type,
35 coding_arg_charset_list,
36 coding_arg_ascii_compatible_p,
37 coding_arg_decode_translation_table,
38 coding_arg_encode_translation_table,
39 coding_arg_post_read_conversion,
40 coding_arg_pre_write_conversion,
41 coding_arg_default_char,
8f924df7 42 coding_arg_for_unibyte,
df7492f9
KH
43 coding_arg_plist,
44 coding_arg_eol_type,
45 coding_arg_max
46 };
4ed46869 47
df7492f9 48enum define_coding_iso2022_arg_index
4ed46869 49 {
df7492f9
KH
50 coding_arg_iso2022_initial = coding_arg_max,
51 coding_arg_iso2022_reg_usage,
52 coding_arg_iso2022_request,
53 coding_arg_iso2022_flags,
54 coding_arg_iso2022_max
4ed46869
KH
55 };
56
df7492f9 57enum define_coding_utf16_arg_index
4ed46869 58 {
df7492f9
KH
59 coding_arg_utf16_bom = coding_arg_max,
60 coding_arg_utf16_endian,
61 coding_arg_utf16_max
4ed46869
KH
62 };
63
df7492f9
KH
64enum define_coding_ccl_arg_index
65 {
35d47d18 66 coding_arg_ccl_decoder = coding_arg_max,
df7492f9
KH
67 coding_arg_ccl_encoder,
68 coding_arg_ccl_valids,
69 coding_arg_ccl_max
70 };
4ed46869 71
933373ed
KH
72/* Hash table for all coding systems. Keys are coding system symbols
73 and values are spec vectors of the corresponding coding system. A
74 spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a
75 vector of attribute of the coding system. ALIASES is a list of
76 aliases (symbols) of the coding system. EOL-TYPE is `unix', `dos',
77 `mac' or a vector of coding systems (symbols). */
78
df7492f9 79extern Lisp_Object Vcoding_system_hash_table;
4ed46869 80
933373ed 81
df7492f9 82/* Enumeration of coding system type. */
4ed46869 83
df7492f9
KH
84enum coding_system_type
85 {
86 coding_type_charset,
87 coding_type_utf_8,
88 coding_type_utf_16,
89 coding_type_iso_2022,
90 coding_type_emacs_mule,
91 coding_type_sjis,
92 coding_type_ccl,
93 coding_type_raw_text,
94 coding_type_undecided,
95 coding_type_max
96 };
4ed46869 97
4ed46869 98
df7492f9 99/* Enumeration of end-of-line format type. */
4ed46869 100
df7492f9
KH
101enum end_of_line_type
102 {
103 eol_lf, /* Line-feed only, same as Emacs' internal
104 format. */
105 eol_crlf, /* Sequence of carriage-return and
106 line-feed. */
107 eol_cr, /* Carriage-return only. */
108 eol_any, /* Accept any of above. Produce line-feed
109 only. */
110 eol_undecided, /* This value is used to denote that the
111 eol-type is not yet undecided. */
112 eol_type_max
113 };
4ed46869 114
df7492f9 115/* Enumeration of index to an attribute vector of a coding system. */
4ed46869 116
df7492f9
KH
117enum coding_attr_index
118 {
119 coding_attr_base_name,
120 coding_attr_docstring,
121 coding_attr_mnemonic,
122 coding_attr_type,
123 coding_attr_charset_list,
124 coding_attr_ascii_compat,
125 coding_attr_decode_tbl,
126 coding_attr_encode_tbl,
404202e7 127 coding_attr_trans_tbl,
df7492f9
KH
128 coding_attr_post_read,
129 coding_attr_pre_write,
130 coding_attr_default_char,
8f924df7 131 coding_attr_for_unibyte,
df7492f9
KH
132 coding_attr_plist,
133
134 coding_attr_category,
135 coding_attr_safe_charsets,
136
137 /* The followings are extra attributes for each type. */
138 coding_attr_charset_valids,
139
140 coding_attr_ccl_decoder,
141 coding_attr_ccl_encoder,
142 coding_attr_ccl_valids,
143
144 coding_attr_iso_initial,
145 coding_attr_iso_usage,
146 coding_attr_iso_request,
147 coding_attr_iso_flags,
148
149 coding_attr_utf_16_bom,
150 coding_attr_utf_16_endian,
151
152 coding_attr_emacs_mule_full,
153
154 coding_attr_last_index
155 };
4ed46869 156
4ed46869 157
933373ed
KH
158/* Macros to access an element of an attribute vector. */
159
df7492f9
KH
160#define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name)
161#define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type)
162#define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
163#define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic)
164#define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring)
165#define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
166#define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl)
167#define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl)
404202e7 168#define CODING_ATTR_TRANS_TBL(attrs) AREF (attrs, coding_attr_trans_tbl)
df7492f9
KH
169#define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read)
170#define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write)
171#define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
8f924df7 172#define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte)
df7492f9
KH
173#define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing)
174#define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist)
175#define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category)
176#define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
8ddb35b2 177
8ddb35b2 178
933373ed
KH
179/* Return the name of a coding system specified by ID. */
180#define CODING_ID_NAME(id) \
181 (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
182
183/* Return the attribute vector of a coding system specified by ID. */
184
df7492f9
KH
185#define CODING_ID_ATTRS(id) \
186 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
fbaa2ed9 187
933373ed
KH
188/* Return the list of aliases of a coding system specified by ID. */
189
df7492f9
KH
190#define CODING_ID_ALIASES(id) \
191 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
c0c69d45 192
933373ed
KH
193/* Return the eol-type of a coding system specified by ID. */
194
df7492f9
KH
195#define CODING_ID_EOL_TYPE(id) \
196 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
658cc252 197
933373ed
KH
198
199/* Return the spec vector of CODING_SYSTEM_SYMBOL. */
fbaa2ed9 200
df7492f9
KH
201#define CODING_SYSTEM_SPEC(coding_system_symbol) \
202 (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
4ed46869 203
933373ed
KH
204
205/* Return the ID of CODING_SYSTEM_SYMBOL. */
206
df7492f9
KH
207#define CODING_SYSTEM_ID(coding_system_symbol) \
208 hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \
209 coding_system_symbol, NULL)
4ed46869 210
933373ed
KH
211/* Return 1 iff CODING_SYSTEM_SYMBOL is a coding system. */
212
df7492f9
KH
213#define CODING_SYSTEM_P(coding_system_symbol) \
214 (! NILP (CODING_SYSTEM_SPEC (coding_system_symbol)))
4ed46869 215
933373ed
KH
216/* Check if X is a coding system or not. */
217
8f924df7 218#define CHECK_CODING_SYSTEM(x) \
df7492f9
KH
219 do { \
220 if (!CODING_SYSTEM_P (x)) \
8f924df7 221 wrong_type_argument (Qcoding_system_p, (x)); \
df7492f9 222 } while (0)
658cc252 223
4ed46869 224
933373ed
KH
225/* Check if X is a coding system or not. If it is, set SEPC to the
226 spec vector of the coding system. */
227
df7492f9
KH
228#define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \
229 do { \
230 spec = CODING_SYSTEM_SPEC (x); \
231 if (NILP (spec)) \
232 x = wrong_type_argument (Qcoding_system_p, (x)); \
233 } while (0)
e6de76f8 234
8ddb35b2 235
933373ed
KH
236/* Check if X is a coding system or not. If it is, set ID to the
237 ID of the coding system. */
238
df7492f9
KH
239#define CHECK_CODING_SYSTEM_GET_ID(x, id) \
240 do \
241 { \
242 id = CODING_SYSTEM_ID (x); \
243 if (id < 0) \
244 x = wrong_type_argument (Qcoding_system_p, (x)); \
245 } while (0)
4ed46869 246
4ed46869
KH
247
248/*** GENERAL section ***/
249
df7492f9
KH
250/* Enumeration of result code of code conversion. */
251enum coding_result_code
4ed46869 252 {
df7492f9
KH
253 CODING_RESULT_SUCCESS,
254 CODING_RESULT_INSUFFICIENT_SRC,
255 CODING_RESULT_INSUFFICIENT_DST,
256 CODING_RESULT_INCONSISTENT_EOL,
63e11478 257 CODING_RESULT_INVALID_SRC,
df7492f9
KH
258 CODING_RESULT_INTERRUPT,
259 CODING_RESULT_INSUFFICIENT_MEM
4ed46869
KH
260 };
261
658cc252 262
3b2d77fe 263/* Macros used for the member `mode' of the struct coding_system. */
658cc252
KH
264
265/* If set, recover the original CR or LF of the already decoded text
266 when the decoding routine encounters an inconsistent eol format. */
267#define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
268
269/* If set, the decoding/encoding routines treat the current data as
5998373a 270 the last block of the whole text to be converted, and do the
55496054 271 appropriate finishing job. */
658cc252
KH
272#define CODING_MODE_LAST_BLOCK 0x02
273
274/* If set, it means that the current source text is in a buffer which
275 enables selective display. */
276#define CODING_MODE_SELECTIVE_DISPLAY 0x04
277
278/* This flag is used by the decoding/encoding routines on the fly. If
279 set, it means that right-to-left text is being processed. */
280#define CODING_MODE_DIRECTION 0x08
281
df7492f9
KH
282#define CODING_MODE_FIXED_DESTINATION 0x10
283
933373ed
KH
284/* If set, it means that the encoding routines produces some safe
285 ASCII characters (usually '?') for unsupported characters. */
df7492f9
KH
286#define CODING_MODE_SAFE_ENCODING 0x20
287
288/* Structure of the field `spec.iso_2022' in the structure
289 `coding_system'. */
290struct iso_2022_spec
4ed46869 291{
2ec49574 292 /* Bit-wise-or of CODING_ISO_FLAG_XXX. */
df7492f9 293 unsigned flags;
4ed46869 294
df7492f9
KH
295 /* The current graphic register invoked to each graphic plane. */
296 int current_invocation[2];
658cc252 297
df7492f9
KH
298 /* The current charset designated to each graphic register. The
299 value -1 means that not charset is designated, -2 means that
300 there was an invalid designation previously. */
301 int current_designation[4];
4ed46869 302
df7492f9
KH
303 /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
304 by single-shift while encoding. */
305 int single_shifting;
4ed46869 306
df7492f9
KH
307 /* Set to 1 temporarily only when processing at beginning of line. */
308 int bol;
309};
4ed46869 310
df7492f9 311struct ccl_spec;
4ed46869 312
df7492f9
KH
313enum utf_16_bom_type
314 {
315 utf_16_detect_bom,
316 utf_16_without_bom,
317 utf_16_with_bom
318 };
279d9f7b 319
df7492f9
KH
320enum utf_16_endian_type
321 {
322 utf_16_big_endian,
323 utf_16_little_endian
324 };
279d9f7b 325
df7492f9
KH
326struct utf_16_spec
327{
328 enum utf_16_bom_type bom;
329 enum utf_16_endian_type endian;
330 int surrogate;
331};
279d9f7b 332
4fecac5c
KH
333struct coding_detection_info
334{
335 /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs. */
336 /* Which categories are already checked. */
337 int checked;
338 /* Which categories are strongly found. */
339 int found;
340 /* Which categories are rejected. */
341 int rejected;
342};
279d9f7b 343
279d9f7b 344
df7492f9
KH
345struct coding_system
346{
347 /* ID number of the coding system. This is an index to
348 Vcoding_system_hash_table. This value is set by
349 setup_coding_system. At the early stage of building time, this
350 value is -1 in the array coding_categories to indicate that no
351 coding-system of that category is yet defined. */
352 int id;
353
354 /* Flag bits of the coding system. The meaning of each bit is common
355 to all types of coding systems. */
356 int common_flags;
357
358 /* Mode bits of the coding system. See the comments of the macros
359 CODING_MODE_XXX. */
360 unsigned int mode;
450c60a5 361
4ed46869 362 /* Detailed information specific to each type of coding system. */
df7492f9 363 union
4ed46869 364 {
df7492f9
KH
365 struct iso_2022_spec iso_2022;
366 struct ccl_spec *ccl; /* Defined in ccl.h. */
367 struct utf_16_spec utf_16;
368 int emacs_mule_full_support;
4ed46869
KH
369 } spec;
370
df7492f9
KH
371 int max_charset_id;
372 char *safe_charsets;
658cc252 373
df7492f9
KH
374 /* The following two members specify how binary 8-bit code 128..255
375 are represented in source and destination text respectively. 1
376 means they are represented by 2-byte sequence, 0 means they are
377 represented by 1-byte as is (see the comment in character.h). */
811ea086
KH
378 unsigned src_multibyte : 1;
379 unsigned dst_multibyte : 1;
380
a137bb00
KH
381 /* How may heading bytes we can skip for decoding. This is set to
382 -1 in setup_coding_system, and updated by detect_coding. So,
383 when this is equal to the byte length of the text being
384 converted, we can skip the actual conversion process. */
df7492f9 385 int head_ascii;
658cc252
KH
386
387 /* The following members are set by encoding/decoding routine. */
df7492f9 388 EMACS_INT produced, produced_char, consumed, consumed_char;
658cc252 389
811ea086
KH
390 /* Number of error source data found in a decoding routine. */
391 int errors;
392
df7492f9
KH
393 /* Store the positions of error source data. */
394 EMACS_INT *error_positions;
e6a9a0bc 395
df7492f9
KH
396 /* Finish status of code conversion. */
397 enum coding_result_code result;
6041c9ce 398
658cc252
KH
399 /* The following members are all Lisp symbols. We don't have to
400 protect them from GC because the current garbage collection
401 doesn't relocate Lisp symbols. But, when it is changed, we must
402 find a way to protect them. */
403
df7492f9
KH
404 EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes;
405 Lisp_Object src_object;
8f924df7 406 const unsigned char *source;
4ed46869 407
df7492f9
KH
408 EMACS_INT dst_pos, dst_pos_byte, dst_bytes;
409 Lisp_Object dst_object;
410 unsigned char *destination;
4ed46869 411
933373ed
KH
412 /* Set to 1 iff the source of conversion is not in the member
413 `charbuf', but at `src_object'. */
df7492f9 414 int chars_at_source;
4ed46869 415
df7492f9
KH
416 /* If an element is non-negative, it is a character code.
417
418 If it is in the range -128..-1, it is a 8-bit character code
419 minus 256.
420
421 If it is less than -128, it specifies the start of an annotation
422 chunk. The length of the chunk is -128 minus the value of the
423 element. The following elements are OFFSET, ANNOTATION-TYPE, and
424 a sequence of actual data for the annotation. OFFSET is a
425 character position offset from dst_pos or src_pos,
426 ANNOTATION-TYPE specfies the meaning of the annotation and how to
427 handle the following data.. */
428 int *charbuf;
429 int charbuf_size, charbuf_used;
430
431 /* Set to 1 if charbuf contains an annotation. */
432 int annotated;
4ed46869 433
df7492f9
KH
434 unsigned char carryover[64];
435 int carryover_bytes;
a5ee738b 436
df7492f9
KH
437 int default_char;
438
4fecac5c
KH
439 int (*detector) P_ ((struct coding_system *,
440 struct coding_detection_info *));
df7492f9
KH
441 void (*decoder) P_ ((struct coding_system *));
442 int (*encoder) P_ ((struct coding_system *));
443};
444
445/* Meanings of bits in the member `common_flags' of the structure
446 coding_system. The lowest 8 bits are reserved for various kind of
447 annotations (currently two of them are used). */
448#define CODING_ANNOTATION_MASK 0x00FF
449#define CODING_ANNOTATE_COMPOSITION_MASK 0x0001
450#define CODING_ANNOTATE_DIRECTION_MASK 0x0002
4fecac5c 451#define CODING_ANNOTATE_CHARSET_MASK 0x0003
df7492f9
KH
452#define CODING_FOR_UNIBYTE_MASK 0x0100
453#define CODING_REQUIRE_FLUSHING_MASK 0x0200
454#define CODING_REQUIRE_DECODING_MASK 0x0400
455#define CODING_REQUIRE_ENCODING_MASK 0x0800
456#define CODING_REQUIRE_DETECTION_MASK 0x1000
457#define CODING_RESET_AT_BOL_MASK 0x2000
458
459/* Return 1 if the coding context CODING requires annotaion
460 handling. */
461#define CODING_REQUIRE_ANNOTATION(coding) \
462 ((coding)->common_flags & CODING_ANNOTATION_MASK)
463
464/* Return 1 if the coding context CODING prefers decoding into unibyte. */
465#define CODING_FOR_UNIBYTE(coding) \
466 ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
467
468/* Return 1 if the coding context CODING requires specific code to be
a5ee738b
KH
469 attached at the tail of converted text. */
470#define CODING_REQUIRE_FLUSHING(coding) \
471 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
472
df7492f9 473/* Return 1 if the coding context CODING requires code conversion on
a5ee738b
KH
474 decoding. */
475#define CODING_REQUIRE_DECODING(coding) \
811ea086
KH
476 ((coding)->dst_multibyte \
477 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
a5ee738b 478
df7492f9
KH
479
480/* Return 1 if the coding context CODING requires code conversion on
a5ee738b 481 encoding. */
df7492f9
KH
482#define CODING_REQUIRE_ENCODING(coding) \
483 ((coding)->src_multibyte \
484 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \
485 || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
486
a5ee738b 487
df7492f9 488/* Return 1 if the coding context CODING requires some kind of code
a5ee738b
KH
489 detection. */
490#define CODING_REQUIRE_DETECTION(coding) \
491 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
492
df7492f9 493/* Return 1 if the coding context CODING requires code conversion on
811ea086 494 decoding or some kind of code detection. */
658cc252 495#define CODING_MAY_REQUIRE_DECODING(coding) \
811ea086
KH
496 (CODING_REQUIRE_DECODING (coding) \
497 || CODING_REQUIRE_DETECTION (coding))
4ed46869 498
4ed46869
KH
499/* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
500 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
501 system. C1 and C2 are the 1st and 2nd position codes of Emacs'
502 internal format. */
503
df7492f9
KH
504#define SJIS_TO_JIS(code) \
505 do { \
506 int s1, s2, j1, j2; \
507 \
508 s1 = (code) >> 8, s2 = (code) & 0xFF; \
509 \
510 if (s2 >= 0x9F) \
511 (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
512 j2 = s2 - 0x7E); \
513 else \
514 (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
515 j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \
516 (code) = (j1 << 8) | j2; \
4ed46869
KH
517 } while (0)
518
6e58724e
KH
519#define SJIS_TO_JIS2(code) \
520 do { \
521 int s1, s2, j1, j2; \
522 \
523 s1 = (code) >> 8, s2 = (code) & 0xFF; \
524 \
525 if (s2 >= 0x9F) \
526 { \
527 j1 = (s1 == 0xF0 ? 0x28 \
528 : s1 == 0xF1 ? 0x24 \
529 : s1 == 0xF2 ? 0x2C \
530 : s1 == 0xF3 ? 0x2E \
531 : 0x6E + (s1 - 0xF4) * 2); \
532 j2 = s2 - 0x7E; \
533 } \
534 else \
535 { \
536 j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2 \
537 : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2 \
538 : 0x6F + (s1 - 0xF5) * 2); \
539 j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F)); \
540 } \
541 (code) = (j1 << 8) | j2; \
542 } while (0)
543
df7492f9
KH
544
545#define JIS_TO_SJIS(code) \
4ed46869 546 do { \
df7492f9
KH
547 int s1, s2, j1, j2; \
548 \
549 j1 = (code) >> 8, j2 = (code) & 0xFF; \
550 if (j1 & 1) \
551 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \
552 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \
4ed46869 553 else \
df7492f9
KH
554 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \
555 s2 = j2 + 0x7E); \
5afaefc1 556 (code) = (s1 << 8) | s2; \
4ed46869
KH
557 } while (0)
558
6e58724e
KH
559#define JIS_TO_SJIS2(code) \
560 do { \
561 int s1, s2, j1, j2; \
562 \
563 j1 = (code) >> 8, j2 = (code) & 0xFF; \
564 if (j1 & 1) \
565 { \
566 s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2 \
567 : j1 <= 0x27 ? 0xF3 + (j1 - 0x2D) / 2 \
568 : 0xF5 + (j1 - 0x6F) / 2); \
569 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F); \
570 } \
571 else \
572 { \
573 s1 = (j1 == 0x28 ? 0xF0 \
574 : j1 == 0x24 ? 0xF1 \
575 : j1 == 0x2C ? 0xF2 \
576 : j1 == 0x2E ? 0xF3 \
577 : 0xF4 + (j1 - 0x6E) / 2); \
578 s2 = j2 + 0x7E; \
579 } \
580 (code) = (s1 << 8) | s2; \
581 } while (0)
df7492f9 582
290591c8
KH
583/* Encode the file name NAME using the specified coding system
584 for file names, if any. */
585#define ENCODE_FILE(name) \
586 (! NILP (Vfile_name_coding_system) \
587 && XFASTINT (Vfile_name_coding_system) != 0 \
afee9150 588 ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
290591c8 589 : (! NILP (Vdefault_file_name_coding_system) \
afee9150
KH
590 && XFASTINT (Vdefault_file_name_coding_system) != 0 \
591 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
290591c8
KH
592 : name))
593
df7492f9 594
290591c8
KH
595/* Decode the file name NAME using the specified coding system
596 for file names, if any. */
597#define DECODE_FILE(name) \
598 (! NILP (Vfile_name_coding_system) \
599 && XFASTINT (Vfile_name_coding_system) != 0 \
afee9150 600 ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \
290591c8 601 : (! NILP (Vdefault_file_name_coding_system) \
afee9150
KH
602 && XFASTINT (Vdefault_file_name_coding_system) != 0 \
603 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
290591c8
KH
604 : name))
605
df7492f9 606
2dfda962
JR
607#ifdef WINDOWSNT
608/* Encode the string STR using the specified coding system
609 for w32 system functions, if any. */
df7492f9
KH
610#define ENCODE_SYSTEM(str) \
611 (! NILP (Vlocale_coding_system) \
612 && XFASTINT (Vlocale_coding_system) != 0 \
613 ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \
2dfda962
JR
614 : str)
615
616/* Decode the string STR using the specified coding system
617 for w32 system functions, if any. */
df7492f9
KH
618#define DECODE_SYSTEM(name) \
619 (! NILP (Vlocale_coding_system) \
620 && XFASTINT (Vlocale_coding_system) != 0 \
621 ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
2dfda962 622 : str)
cf29bf99
GM
623
624#else /* WINDOWSNT */
625
626#define ENCODE_SYSTEM(str) string_make_unibyte(str)
627#define DECODE_SYSTEM(name) name
628
629#endif /* !WINDOWSNT */
2dfda962 630
b3a208b0
DL
631/* Used by the gtk menu code. Note that this encodes utf-8, not
632 utf-8-emacs, so it's not a no-op. */
633#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1)
634
4ed46869 635/* Extern declarations. */
16869d7e 636extern Lisp_Object code_conversion_save P_ ((int, int));
c04809fb
AS
637extern int decoding_buffer_size P_ ((struct coding_system *, int));
638extern int encoding_buffer_size P_ ((struct coding_system *, int));
df7492f9
KH
639extern void setup_coding_system P_ ((Lisp_Object, struct coding_system *));
640extern void detect_coding P_ ((struct coding_system *));
511dd8e7 641extern Lisp_Object code_convert_region P_ ((Lisp_Object, Lisp_Object,
df7492f9
KH
642 Lisp_Object, Lisp_Object,
643 int, int));
644extern Lisp_Object code_convert_string P_ ((Lisp_Object, Lisp_Object,
645 Lisp_Object, int, int, int));
eb545596
DL
646extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object,
647 int));
df7492f9
KH
648extern Lisp_Object raw_text_coding_system P_ ((Lisp_Object));
649extern Lisp_Object coding_inherit_eol_type P_ ((Lisp_Object, Lisp_Object));
650
651extern int decode_coding_gap P_ ((struct coding_system *,
652 EMACS_INT, EMACS_INT));
653extern int encode_coding_gap P_ ((struct coding_system *,
654 EMACS_INT, EMACS_INT));
655extern void decode_coding_object P_ ((struct coding_system *,
656 Lisp_Object, EMACS_INT, EMACS_INT,
657 EMACS_INT, EMACS_INT, Lisp_Object));
658extern void encode_coding_object P_ ((struct coding_system *,
659 Lisp_Object, EMACS_INT, EMACS_INT,
660 EMACS_INT, EMACS_INT, Lisp_Object));
661
933373ed
KH
662/* Macros for backward compatibility. */
663
df7492f9
KH
664#define decode_coding_region(coding, from, to) \
665 decode_coding_object (coding, Fcurrent_buffer (), \
666 from, CHAR_TO_BYTE (from), \
667 to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
668
669
670#define encode_coding_region(coding, from, to) \
671 encode_coding_object (coding, Fcurrent_buffer (), \
672 from, CHAR_TO_BYTE (from), \
673 to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
674
675
676#define decode_coding_string(coding, string, nocopy) \
677 decode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \
678 STRING_BYTES (XSTRING (string)), Qt)
679
680#define encode_coding_string(coding, string, nocopy) \
681 (encode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \
682 STRING_BYTES (XSTRING (string)), Qt), \
683 (coding)->dst_object)
684
685
686#define decode_coding_c_string(coding, src, bytes, dst_object) \
687 do { \
688 (coding)->source = (src); \
689 (coding)->src_chars = (coding)->src_bytes = (bytes); \
690 decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \
691 (dst_object)); \
692 } while (0)
693
694
695extern Lisp_Object preferred_coding_system P_ (());
696
697
8f924df7
KH
698extern Lisp_Object Qutf_8, Qutf_8_emacs;
699
4ed46869 700extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index;
df7492f9
KH
701extern Lisp_Object Qcoding_system_p;
702extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided;
703extern Lisp_Object Qiso_2022;
4ed46869 704extern Lisp_Object Qbuffer_file_coding_system;
df7492f9
KH
705
706extern Lisp_Object Qunix, Qdos, Qmac;
4ed46869 707
f967223b
KH
708extern Lisp_Object Qtranslation_table;
709extern Lisp_Object Qtranslation_table_id;
ab45712c 710
20ee919e
EZ
711/* Mnemonic strings to indicate each type of end-of-line. */
712extern Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
713/* Mnemonic string to indicate type of end-of-line is not yet decided. */
714extern Lisp_Object eol_mnemonic_undecided;
4ed46869 715
4ed46869
KH
716#ifdef emacs
717extern Lisp_Object Qfile_coding_system;
718extern Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
719extern Lisp_Object Qstart_process, Qopen_network_stream;
d008a7cc 720extern Lisp_Object Qwrite_region;
4ed46869 721
68c45bf0
PE
722extern char *emacs_strerror P_ ((int));
723
4ed46869
KH
724/* Coding-system for reading files and receiving data from process. */
725extern Lisp_Object Vcoding_system_for_read;
726/* Coding-system for writing files and sending data to process. */
727extern Lisp_Object Vcoding_system_for_write;
728/* Coding-system actually used in the latest I/O. */
729extern Lisp_Object Vlast_coding_system_used;
68c45bf0
PE
730/* Coding-system to use with system messages (e.g. strerror). */
731extern Lisp_Object Vlocale_coding_system;
4ed46869 732
77a9bc9a
EZ
733/* If non-zero, process buffer inherits the coding system used to decode
734 the subprocess output. */
735extern int inherit_process_coding_system;
736
4ed46869
KH
737/* Coding-system to be used for encoding terminal output. This
738 structure contains information of a coding-system specified by the
739 function `set-terminal-coding-system'. */
740extern struct coding_system terminal_coding;
741
fbaa2ed9
KH
742/* Coding system to be used to encode text for terminal display when
743 terminal coding system is nil. */
744extern struct coding_system safe_terminal_coding;
745
4ed46869
KH
746/* Coding-system of what is sent from terminal keyboard. This
747 structure contains information of a coding-system specified by the
748 function `set-keyboard-coding-system'. */
749extern struct coding_system keyboard_coding;
750
a5825a24
KH
751/* Default coding systems used for process I/O. */
752extern Lisp_Object Vdefault_process_coding_system;
4ed46869 753
df7492f9 754/* Function to call to force a user to force select a propert coding
658cc252
KH
755 system. */
756extern Lisp_Object Vselect_safe_coding_system_function;
757
6926d591
KH
758/* If nonzero, on writing a file, Vselect_safe_coding_system_function
759 is called even if Vcoding_system_for_write is non-nil. */
760extern int coding_system_require_warning;
761
31406df1
RS
762/* Coding system for file names, or nil if none. */
763extern Lisp_Object Vfile_name_coding_system;
764
765/* Coding system for file names used only when
766 Vfile_name_coding_system is nil. */
767extern Lisp_Object Vdefault_file_name_coding_system;
2dfda962 768
4ed46869
KH
769#endif
770
d008a7cc
GM
771/* Error signaled when there's a problem with detecting coding system */
772extern Lisp_Object Qcoding_system_error;
773
df7492f9
KH
774extern char emacs_mule_bytes[256];
775extern int emacs_mule_string_char P_ ((unsigned char *));
776
6f776e81 777#endif /* EMACS_CODING_H */