* config.nt (__P): Remove.
[bpt/emacs.git] / src / coding.h
CommitLineData
4ed46869 1/* Header for coding system handler.
429ab54e 2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
114f9c96 3 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
114f9c96 5 2005, 2006, 2007, 2008, 2009, 2010
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8f924df7 8 Copyright (C) 2003
df7492f9
KH
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
4ed46869 11
369314dc
KH
12This file is part of GNU Emacs.
13
b9b1cc14 14GNU Emacs is free software: you can redistribute it and/or modify
369314dc 15it under the terms of the GNU General Public License as published by
b9b1cc14
GM
16the Free Software Foundation, either version 3 of the License, or
17(at your option) any later version.
4ed46869 18
369314dc
KH
19GNU Emacs is distributed in the hope that it will be useful,
20but WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22GNU General Public License for more details.
4ed46869 23
369314dc 24You should have received a copy of the GNU General Public License
b9b1cc14 25along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 26
6f776e81
KH
27#ifndef EMACS_CODING_H
28#define EMACS_CODING_H
4ed46869 29
df7492f9 30/* Index to arguments of Fdefine_coding_system_internal. */
4ed46869 31
df7492f9
KH
32enum define_coding_system_arg_index
33 {
34 coding_arg_name,
35 coding_arg_mnemonic,
36 coding_arg_coding_type,
37 coding_arg_charset_list,
38 coding_arg_ascii_compatible_p,
39 coding_arg_decode_translation_table,
40 coding_arg_encode_translation_table,
41 coding_arg_post_read_conversion,
42 coding_arg_pre_write_conversion,
43 coding_arg_default_char,
8f924df7 44 coding_arg_for_unibyte,
df7492f9
KH
45 coding_arg_plist,
46 coding_arg_eol_type,
47 coding_arg_max
48 };
4ed46869 49
df7492f9 50enum define_coding_iso2022_arg_index
4ed46869 51 {
df7492f9
KH
52 coding_arg_iso2022_initial = coding_arg_max,
53 coding_arg_iso2022_reg_usage,
54 coding_arg_iso2022_request,
55 coding_arg_iso2022_flags,
56 coding_arg_iso2022_max
4ed46869
KH
57 };
58
e4215ddd
KH
59enum define_coding_utf8_arg_index
60 {
61 coding_arg_utf8_bom = coding_arg_max,
62 coding_arg_utf8_max
63 };
64
df7492f9 65enum define_coding_utf16_arg_index
4ed46869 66 {
df7492f9
KH
67 coding_arg_utf16_bom = coding_arg_max,
68 coding_arg_utf16_endian,
69 coding_arg_utf16_max
4ed46869
KH
70 };
71
df7492f9
KH
72enum define_coding_ccl_arg_index
73 {
35d47d18 74 coding_arg_ccl_decoder = coding_arg_max,
df7492f9
KH
75 coding_arg_ccl_encoder,
76 coding_arg_ccl_valids,
77 coding_arg_ccl_max
78 };
4ed46869 79
933373ed
KH
80/* Hash table for all coding systems. Keys are coding system symbols
81 and values are spec vectors of the corresponding coding system. A
82 spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a
83 vector of attribute of the coding system. ALIASES is a list of
84 aliases (symbols) of the coding system. EOL-TYPE is `unix', `dos',
85 `mac' or a vector of coding systems (symbols). */
86
df7492f9 87extern Lisp_Object Vcoding_system_hash_table;
4ed46869 88
933373ed 89
df7492f9 90/* Enumeration of coding system type. */
4ed46869 91
df7492f9
KH
92enum coding_system_type
93 {
94 coding_type_charset,
95 coding_type_utf_8,
96 coding_type_utf_16,
97 coding_type_iso_2022,
98 coding_type_emacs_mule,
99 coding_type_sjis,
100 coding_type_ccl,
101 coding_type_raw_text,
102 coding_type_undecided,
103 coding_type_max
104 };
4ed46869 105
4ed46869 106
df7492f9 107/* Enumeration of end-of-line format type. */
4ed46869 108
df7492f9
KH
109enum end_of_line_type
110 {
111 eol_lf, /* Line-feed only, same as Emacs' internal
112 format. */
113 eol_crlf, /* Sequence of carriage-return and
114 line-feed. */
115 eol_cr, /* Carriage-return only. */
116 eol_any, /* Accept any of above. Produce line-feed
117 only. */
118 eol_undecided, /* This value is used to denote that the
119 eol-type is not yet undecided. */
120 eol_type_max
121 };
4ed46869 122
df7492f9 123/* Enumeration of index to an attribute vector of a coding system. */
4ed46869 124
df7492f9
KH
125enum coding_attr_index
126 {
127 coding_attr_base_name,
128 coding_attr_docstring,
129 coding_attr_mnemonic,
130 coding_attr_type,
131 coding_attr_charset_list,
132 coding_attr_ascii_compat,
133 coding_attr_decode_tbl,
134 coding_attr_encode_tbl,
404202e7 135 coding_attr_trans_tbl,
df7492f9
KH
136 coding_attr_post_read,
137 coding_attr_pre_write,
138 coding_attr_default_char,
8f924df7 139 coding_attr_for_unibyte,
df7492f9
KH
140 coding_attr_plist,
141
142 coding_attr_category,
143 coding_attr_safe_charsets,
144
145 /* The followings are extra attributes for each type. */
146 coding_attr_charset_valids,
147
148 coding_attr_ccl_decoder,
149 coding_attr_ccl_encoder,
150 coding_attr_ccl_valids,
151
152 coding_attr_iso_initial,
153 coding_attr_iso_usage,
154 coding_attr_iso_request,
155 coding_attr_iso_flags,
156
e4215ddd 157 coding_attr_utf_bom,
df7492f9
KH
158 coding_attr_utf_16_endian,
159
160 coding_attr_emacs_mule_full,
161
162 coding_attr_last_index
163 };
4ed46869 164
4ed46869 165
933373ed
KH
166/* Macros to access an element of an attribute vector. */
167
04e05596
JB
168#define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name)
169#define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type)
170#define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
171#define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic)
172#define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring)
173#define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
174#define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl)
175#define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl)
176#define CODING_ATTR_TRANS_TBL(attrs) AREF (attrs, coding_attr_trans_tbl)
177#define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read)
178#define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write)
179#define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
180#define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte)
181#define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing)
182#define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist)
183#define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category)
184#define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
8ddb35b2 185
8ddb35b2 186
933373ed
KH
187/* Return the name of a coding system specified by ID. */
188#define CODING_ID_NAME(id) \
189 (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
190
191/* Return the attribute vector of a coding system specified by ID. */
192
df7492f9
KH
193#define CODING_ID_ATTRS(id) \
194 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
fbaa2ed9 195
933373ed
KH
196/* Return the list of aliases of a coding system specified by ID. */
197
df7492f9
KH
198#define CODING_ID_ALIASES(id) \
199 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
c0c69d45 200
933373ed
KH
201/* Return the eol-type of a coding system specified by ID. */
202
df7492f9
KH
203#define CODING_ID_EOL_TYPE(id) \
204 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
658cc252 205
933373ed
KH
206
207/* Return the spec vector of CODING_SYSTEM_SYMBOL. */
fbaa2ed9 208
df7492f9
KH
209#define CODING_SYSTEM_SPEC(coding_system_symbol) \
210 (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
4ed46869 211
933373ed
KH
212
213/* Return the ID of CODING_SYSTEM_SYMBOL. */
214
df7492f9
KH
215#define CODING_SYSTEM_ID(coding_system_symbol) \
216 hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \
217 coding_system_symbol, NULL)
4ed46869 218
b2e6b10f 219/* Return 1 if CODING_SYSTEM_SYMBOL is a coding system. */
933373ed 220
7c00e33d
KH
221#define CODING_SYSTEM_P(coding_system_symbol) \
222 (CODING_SYSTEM_ID (coding_system_symbol) >= 0 \
223 || (! NILP (coding_system_symbol) \
224 && ! NILP (Fcoding_system_p (coding_system_symbol))))
4ed46869 225
933373ed
KH
226/* Check if X is a coding system or not. */
227
8f924df7 228#define CHECK_CODING_SYSTEM(x) \
df7492f9 229 do { \
7c00e33d
KH
230 if (CODING_SYSTEM_ID (x) < 0 \
231 && NILP (Fcheck_coding_system (x))) \
8f924df7 232 wrong_type_argument (Qcoding_system_p, (x)); \
df7492f9 233 } while (0)
658cc252 234
4ed46869 235
933373ed
KH
236/* Check if X is a coding system or not. If it is, set SEPC to the
237 spec vector of the coding system. */
238
df7492f9
KH
239#define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \
240 do { \
241 spec = CODING_SYSTEM_SPEC (x); \
7c00e33d
KH
242 if (NILP (spec)) \
243 { \
244 Fcheck_coding_system (x); \
245 spec = CODING_SYSTEM_SPEC (x); \
246 } \
df7492f9 247 if (NILP (spec)) \
02dfeba8 248 wrong_type_argument (Qcoding_system_p, (x)); \
df7492f9 249 } while (0)
e6de76f8 250
8ddb35b2 251
933373ed
KH
252/* Check if X is a coding system or not. If it is, set ID to the
253 ID of the coding system. */
254
df7492f9
KH
255#define CHECK_CODING_SYSTEM_GET_ID(x, id) \
256 do \
257 { \
258 id = CODING_SYSTEM_ID (x); \
7c00e33d
KH
259 if (id < 0) \
260 { \
261 Fcheck_coding_system (x); \
262 id = CODING_SYSTEM_ID (x); \
263 } \
df7492f9 264 if (id < 0) \
02dfeba8 265 wrong_type_argument (Qcoding_system_p, (x)); \
df7492f9 266 } while (0)
4ed46869 267
4ed46869
KH
268
269/*** GENERAL section ***/
270
df7492f9
KH
271/* Enumeration of result code of code conversion. */
272enum coding_result_code
4ed46869 273 {
df7492f9
KH
274 CODING_RESULT_SUCCESS,
275 CODING_RESULT_INSUFFICIENT_SRC,
276 CODING_RESULT_INSUFFICIENT_DST,
277 CODING_RESULT_INCONSISTENT_EOL,
63e11478 278 CODING_RESULT_INVALID_SRC,
df7492f9
KH
279 CODING_RESULT_INTERRUPT,
280 CODING_RESULT_INSUFFICIENT_MEM
4ed46869
KH
281 };
282
658cc252 283
3b2d77fe 284/* Macros used for the member `mode' of the struct coding_system. */
658cc252
KH
285
286/* If set, recover the original CR or LF of the already decoded text
287 when the decoding routine encounters an inconsistent eol format. */
288#define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
289
290/* If set, the decoding/encoding routines treat the current data as
5998373a 291 the last block of the whole text to be converted, and do the
55496054 292 appropriate finishing job. */
658cc252
KH
293#define CODING_MODE_LAST_BLOCK 0x02
294
295/* If set, it means that the current source text is in a buffer which
296 enables selective display. */
297#define CODING_MODE_SELECTIVE_DISPLAY 0x04
298
299/* This flag is used by the decoding/encoding routines on the fly. If
300 set, it means that right-to-left text is being processed. */
301#define CODING_MODE_DIRECTION 0x08
302
df7492f9
KH
303#define CODING_MODE_FIXED_DESTINATION 0x10
304
933373ed
KH
305/* If set, it means that the encoding routines produces some safe
306 ASCII characters (usually '?') for unsupported characters. */
df7492f9
KH
307#define CODING_MODE_SAFE_ENCODING 0x20
308
825d0875
KH
309 /* For handling composition sequence. */
310#include "composite.h"
311
312enum composition_state
313 {
314 COMPOSING_NO,
315 COMPOSING_CHAR,
316 COMPOSING_RULE,
317 COMPOSING_COMPONENT_CHAR,
318 COMPOSING_COMPONENT_RULE
319 };
320
321/* Structure for the current composition status. */
322struct composition_status
323{
324 enum composition_state state;
325 enum composition_method method;
326 int old_form; /* 0:pre-21 form, 1:post-21 form */
327 int length; /* number of elements produced in charbuf */
328 int nchars; /* number of characters composed */
329 int ncomps; /* number of composition components */
330 /* Maximum carryover is for the case of COMPOSITION_WITH_RULE_ALTCHARS.
331 See the comment in coding.c. */
332 int carryover[4 /* annotation header */
333 + MAX_COMPOSITION_COMPONENTS * 3 - 2 /* ALTs and RULEs */
334 + 2 /* intermediate -1 -1 */
335 + MAX_COMPOSITION_COMPONENTS /* CHARs */
336 ];
337};
338
339
df7492f9
KH
340/* Structure of the field `spec.iso_2022' in the structure
341 `coding_system'. */
342struct iso_2022_spec
4ed46869 343{
2ec49574 344 /* Bit-wise-or of CODING_ISO_FLAG_XXX. */
df7492f9 345 unsigned flags;
4ed46869 346
df7492f9
KH
347 /* The current graphic register invoked to each graphic plane. */
348 int current_invocation[2];
658cc252 349
df7492f9
KH
350 /* The current charset designated to each graphic register. The
351 value -1 means that not charset is designated, -2 means that
352 there was an invalid designation previously. */
353 int current_designation[4];
4ed46869 354
df7492f9
KH
355 /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
356 by single-shift while encoding. */
357 int single_shifting;
4ed46869 358
df7492f9
KH
359 /* Set to 1 temporarily only when processing at beginning of line. */
360 int bol;
825d0875
KH
361
362 /* If positive, we are now scanning CTEXT extended segment. */
363 int ctext_extended_segment_len;
364
365 /* If nonzero, we are now scanning embedded UTF-8 sequence. */
366 int embedded_utf_8;
367
368 /* The current composition. */
369 struct composition_status cmp_status;
370};
371
372struct emacs_mule_spec
373{
374 int full_support;
375 struct composition_status cmp_status;
df7492f9 376};
4ed46869 377
df7492f9 378struct ccl_spec;
4ed46869 379
e4215ddd 380enum utf_bom_type
df7492f9 381 {
e4215ddd
KH
382 utf_detect_bom,
383 utf_without_bom,
384 utf_with_bom
df7492f9 385 };
279d9f7b 386
df7492f9
KH
387enum utf_16_endian_type
388 {
389 utf_16_big_endian,
390 utf_16_little_endian
391 };
279d9f7b 392
df7492f9
KH
393struct utf_16_spec
394{
e4215ddd 395 enum utf_bom_type bom;
df7492f9
KH
396 enum utf_16_endian_type endian;
397 int surrogate;
398};
279d9f7b 399
4fecac5c
KH
400struct coding_detection_info
401{
402 /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs. */
403 /* Which categories are already checked. */
404 int checked;
405 /* Which categories are strongly found. */
406 int found;
407 /* Which categories are rejected. */
408 int rejected;
409};
279d9f7b 410
279d9f7b 411
df7492f9
KH
412struct coding_system
413{
414 /* ID number of the coding system. This is an index to
415 Vcoding_system_hash_table. This value is set by
416 setup_coding_system. At the early stage of building time, this
417 value is -1 in the array coding_categories to indicate that no
418 coding-system of that category is yet defined. */
419 int id;
420
421 /* Flag bits of the coding system. The meaning of each bit is common
422 to all types of coding systems. */
423 int common_flags;
424
425 /* Mode bits of the coding system. See the comments of the macros
426 CODING_MODE_XXX. */
427 unsigned int mode;
450c60a5 428
4ed46869 429 /* Detailed information specific to each type of coding system. */
df7492f9 430 union
4ed46869 431 {
df7492f9
KH
432 struct iso_2022_spec iso_2022;
433 struct ccl_spec *ccl; /* Defined in ccl.h. */
434 struct utf_16_spec utf_16;
e4215ddd 435 enum utf_bom_type utf_8_bom;
825d0875 436 struct emacs_mule_spec emacs_mule;
4ed46869
KH
437 } spec;
438
df7492f9 439 int max_charset_id;
1b3b981b 440 unsigned char *safe_charsets;
658cc252 441
df7492f9
KH
442 /* The following two members specify how binary 8-bit code 128..255
443 are represented in source and destination text respectively. 1
444 means they are represented by 2-byte sequence, 0 means they are
445 represented by 1-byte as is (see the comment in character.h). */
811ea086
KH
446 unsigned src_multibyte : 1;
447 unsigned dst_multibyte : 1;
448
a137bb00
KH
449 /* How may heading bytes we can skip for decoding. This is set to
450 -1 in setup_coding_system, and updated by detect_coding. So,
451 when this is equal to the byte length of the text being
452 converted, we can skip the actual conversion process. */
df7492f9 453 int head_ascii;
658cc252
KH
454
455 /* The following members are set by encoding/decoding routine. */
df7492f9 456 EMACS_INT produced, produced_char, consumed, consumed_char;
658cc252 457
811ea086
KH
458 /* Number of error source data found in a decoding routine. */
459 int errors;
460
df7492f9
KH
461 /* Store the positions of error source data. */
462 EMACS_INT *error_positions;
e6a9a0bc 463
df7492f9
KH
464 /* Finish status of code conversion. */
465 enum coding_result_code result;
6041c9ce 466
df7492f9
KH
467 EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes;
468 Lisp_Object src_object;
8f924df7 469 const unsigned char *source;
4ed46869 470
df7492f9
KH
471 EMACS_INT dst_pos, dst_pos_byte, dst_bytes;
472 Lisp_Object dst_object;
473 unsigned char *destination;
4ed46869 474
b2e6b10f 475 /* Set to 1 if the source of conversion is not in the member
933373ed 476 `charbuf', but at `src_object'. */
df7492f9 477 int chars_at_source;
4ed46869 478
df7492f9
KH
479 /* If an element is non-negative, it is a character code.
480
481 If it is in the range -128..-1, it is a 8-bit character code
482 minus 256.
483
484 If it is less than -128, it specifies the start of an annotation
485 chunk. The length of the chunk is -128 minus the value of the
486 element. The following elements are OFFSET, ANNOTATION-TYPE, and
487 a sequence of actual data for the annotation. OFFSET is a
488 character position offset from dst_pos or src_pos,
489 ANNOTATION-TYPE specfies the meaning of the annotation and how to
490 handle the following data.. */
491 int *charbuf;
492 int charbuf_size, charbuf_used;
493
494 /* Set to 1 if charbuf contains an annotation. */
495 int annotated;
4ed46869 496
df7492f9
KH
497 unsigned char carryover[64];
498 int carryover_bytes;
a5ee738b 499
df7492f9
KH
500 int default_char;
501
383e0970
J
502 int (*detector) (struct coding_system *,
503 struct coding_detection_info *);
504 void (*decoder) (struct coding_system *);
505 int (*encoder) (struct coding_system *);
df7492f9
KH
506};
507
508/* Meanings of bits in the member `common_flags' of the structure
509 coding_system. The lowest 8 bits are reserved for various kind of
510 annotations (currently two of them are used). */
511#define CODING_ANNOTATION_MASK 0x00FF
512#define CODING_ANNOTATE_COMPOSITION_MASK 0x0001
513#define CODING_ANNOTATE_DIRECTION_MASK 0x0002
4fecac5c 514#define CODING_ANNOTATE_CHARSET_MASK 0x0003
df7492f9
KH
515#define CODING_FOR_UNIBYTE_MASK 0x0100
516#define CODING_REQUIRE_FLUSHING_MASK 0x0200
517#define CODING_REQUIRE_DECODING_MASK 0x0400
518#define CODING_REQUIRE_ENCODING_MASK 0x0800
519#define CODING_REQUIRE_DETECTION_MASK 0x1000
520#define CODING_RESET_AT_BOL_MASK 0x2000
521
522/* Return 1 if the coding context CODING requires annotaion
523 handling. */
524#define CODING_REQUIRE_ANNOTATION(coding) \
525 ((coding)->common_flags & CODING_ANNOTATION_MASK)
526
527/* Return 1 if the coding context CODING prefers decoding into unibyte. */
528#define CODING_FOR_UNIBYTE(coding) \
529 ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
530
531/* Return 1 if the coding context CODING requires specific code to be
a5ee738b
KH
532 attached at the tail of converted text. */
533#define CODING_REQUIRE_FLUSHING(coding) \
534 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
535
df7492f9 536/* Return 1 if the coding context CODING requires code conversion on
a5ee738b
KH
537 decoding. */
538#define CODING_REQUIRE_DECODING(coding) \
811ea086
KH
539 ((coding)->dst_multibyte \
540 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
a5ee738b 541
df7492f9
KH
542
543/* Return 1 if the coding context CODING requires code conversion on
c198294f
KH
544 encoding.
545 The non-multibyte part of the condition is to support encoding of
546 unibyte strings/buffers generated by string-as-unibyte or
547 (set-buffer-multibyte nil) from multibyte strings/buffers. */
df7492f9
KH
548#define CODING_REQUIRE_ENCODING(coding) \
549 ((coding)->src_multibyte \
550 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \
551 || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
552
a5ee738b 553
df7492f9 554/* Return 1 if the coding context CODING requires some kind of code
a5ee738b
KH
555 detection. */
556#define CODING_REQUIRE_DETECTION(coding) \
557 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
558
df7492f9 559/* Return 1 if the coding context CODING requires code conversion on
811ea086 560 decoding or some kind of code detection. */
658cc252 561#define CODING_MAY_REQUIRE_DECODING(coding) \
811ea086
KH
562 (CODING_REQUIRE_DECODING (coding) \
563 || CODING_REQUIRE_DETECTION (coding))
4ed46869 564
4ed46869
KH
565/* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
566 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
567 system. C1 and C2 are the 1st and 2nd position codes of Emacs'
568 internal format. */
569
df7492f9
KH
570#define SJIS_TO_JIS(code) \
571 do { \
572 int s1, s2, j1, j2; \
573 \
574 s1 = (code) >> 8, s2 = (code) & 0xFF; \
575 \
576 if (s2 >= 0x9F) \
577 (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
578 j2 = s2 - 0x7E); \
579 else \
580 (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
581 j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \
582 (code) = (j1 << 8) | j2; \
4ed46869
KH
583 } while (0)
584
6e58724e
KH
585#define SJIS_TO_JIS2(code) \
586 do { \
587 int s1, s2, j1, j2; \
588 \
589 s1 = (code) >> 8, s2 = (code) & 0xFF; \
590 \
591 if (s2 >= 0x9F) \
592 { \
593 j1 = (s1 == 0xF0 ? 0x28 \
594 : s1 == 0xF1 ? 0x24 \
595 : s1 == 0xF2 ? 0x2C \
596 : s1 == 0xF3 ? 0x2E \
597 : 0x6E + (s1 - 0xF4) * 2); \
598 j2 = s2 - 0x7E; \
599 } \
600 else \
601 { \
602 j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2 \
603 : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2 \
604 : 0x6F + (s1 - 0xF5) * 2); \
605 j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F)); \
606 } \
607 (code) = (j1 << 8) | j2; \
608 } while (0)
609
df7492f9
KH
610
611#define JIS_TO_SJIS(code) \
4ed46869 612 do { \
df7492f9
KH
613 int s1, s2, j1, j2; \
614 \
615 j1 = (code) >> 8, j2 = (code) & 0xFF; \
616 if (j1 & 1) \
617 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \
618 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \
4ed46869 619 else \
df7492f9
KH
620 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \
621 s2 = j2 + 0x7E); \
5afaefc1 622 (code) = (s1 << 8) | s2; \
4ed46869
KH
623 } while (0)
624
6e58724e
KH
625#define JIS_TO_SJIS2(code) \
626 do { \
627 int s1, s2, j1, j2; \
628 \
629 j1 = (code) >> 8, j2 = (code) & 0xFF; \
630 if (j1 & 1) \
631 { \
632 s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2 \
edb61b39 633 : j1 <= 0x2F ? 0xF3 + (j1 - 0x2D) / 2 \
6e58724e
KH
634 : 0xF5 + (j1 - 0x6F) / 2); \
635 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F); \
636 } \
637 else \
638 { \
639 s1 = (j1 == 0x28 ? 0xF0 \
640 : j1 == 0x24 ? 0xF1 \
641 : j1 == 0x2C ? 0xF2 \
642 : j1 == 0x2E ? 0xF3 \
643 : 0xF4 + (j1 - 0x6E) / 2); \
644 s2 = j2 + 0x7E; \
645 } \
646 (code) = (s1 << 8) | s2; \
647 } while (0)
df7492f9 648
290591c8
KH
649/* Encode the file name NAME using the specified coding system
650 for file names, if any. */
651#define ENCODE_FILE(name) \
652 (! NILP (Vfile_name_coding_system) \
5e92b1ca 653 && !EQ (Vfile_name_coding_system, make_number (0)) \
afee9150 654 ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
290591c8 655 : (! NILP (Vdefault_file_name_coding_system) \
5e92b1ca 656 && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
afee9150 657 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
290591c8
KH
658 : name))
659
df7492f9 660
290591c8
KH
661/* Decode the file name NAME using the specified coding system
662 for file names, if any. */
663#define DECODE_FILE(name) \
664 (! NILP (Vfile_name_coding_system) \
5e92b1ca 665 && !EQ (Vfile_name_coding_system, make_number (0)) \
afee9150 666 ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \
290591c8 667 : (! NILP (Vdefault_file_name_coding_system) \
5e92b1ca 668 && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
afee9150 669 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
290591c8
KH
670 : name))
671
df7492f9 672
2dfda962 673/* Encode the string STR using the specified coding system
53eda481 674 for system functions, if any. */
2dfda962 675#define ENCODE_SYSTEM(str) \
9b58c683 676 (! NILP (Vlocale_coding_system) \
5e92b1ca 677 && !EQ (Vlocale_coding_system, make_number (0)) \
9b58c683 678 ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \
2dfda962
JR
679 : str)
680
681/* Decode the string STR using the specified coding system
53eda481 682 for system functions, if any. */
581e7427 683#define DECODE_SYSTEM(str) \
9b58c683 684 (! NILP (Vlocale_coding_system) \
5e92b1ca 685 && !EQ (Vlocale_coding_system, make_number (0)) \
9b58c683 686 ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
2dfda962 687 : str)
cf29bf99 688
b3a208b0
DL
689/* Used by the gtk menu code. Note that this encodes utf-8, not
690 utf-8-emacs, so it's not a no-op. */
691#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1)
692
4ed46869 693/* Extern declarations. */
383e0970
J
694extern Lisp_Object code_conversion_save (int, int);
695extern int decoding_buffer_size (struct coding_system *, int);
696extern int encoding_buffer_size (struct coding_system *, int);
697extern void setup_coding_system (Lisp_Object, struct coding_system *);
698extern Lisp_Object coding_charset_list (struct coding_system *);
699extern Lisp_Object coding_system_charset_list (Lisp_Object);
700extern void detect_coding (struct coding_system *);
701extern Lisp_Object code_convert_region (Lisp_Object, Lisp_Object,
702 Lisp_Object, Lisp_Object,
703 int, int);
704extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object,
705 Lisp_Object, int, int, int);
706extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
707 int);
708extern Lisp_Object raw_text_coding_system (Lisp_Object);
709extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
710
711extern int decode_coding_gap (struct coding_system *,
712 EMACS_INT, EMACS_INT);
713extern int encode_coding_gap (struct coding_system *,
714 EMACS_INT, EMACS_INT);
715extern void decode_coding_object (struct coding_system *,
716 Lisp_Object, EMACS_INT, EMACS_INT,
717 EMACS_INT, EMACS_INT, Lisp_Object);
718extern void encode_coding_object (struct coding_system *,
719 Lisp_Object, EMACS_INT, EMACS_INT,
720 EMACS_INT, EMACS_INT, Lisp_Object);
df7492f9 721
933373ed
KH
722/* Macros for backward compatibility. */
723
df7492f9
KH
724#define decode_coding_region(coding, from, to) \
725 decode_coding_object (coding, Fcurrent_buffer (), \
726 from, CHAR_TO_BYTE (from), \
727 to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
728
729
730#define encode_coding_region(coding, from, to) \
731 encode_coding_object (coding, Fcurrent_buffer (), \
732 from, CHAR_TO_BYTE (from), \
733 to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
734
735
736#define decode_coding_string(coding, string, nocopy) \
8890e5f5 737 decode_coding_object (coding, string, 0, 0, SCHARS (string), \
f0bed503 738 SBYTES (string), Qt)
df7492f9
KH
739
740#define encode_coding_string(coding, string, nocopy) \
729eadda
EZ
741 (STRING_MULTIBYTE(string) ? \
742 (encode_coding_object (coding, string, 0, 0, SCHARS (string), \
743 SBYTES (string), Qt), \
744 (coding)->dst_object) : (string))
df7492f9
KH
745
746
747#define decode_coding_c_string(coding, src, bytes, dst_object) \
748 do { \
749 (coding)->source = (src); \
750 (coding)->src_chars = (coding)->src_bytes = (bytes); \
751 decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \
752 (dst_object)); \
753 } while (0)
754
755
383e0970 756extern Lisp_Object preferred_coding_system ();
df7492f9
KH
757
758
8f924df7
KH
759extern Lisp_Object Qutf_8, Qutf_8_emacs;
760
4ed46869 761extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index;
df7492f9
KH
762extern Lisp_Object Qcoding_system_p;
763extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided;
764extern Lisp_Object Qiso_2022;
4ed46869 765extern Lisp_Object Qbuffer_file_coding_system;
df7492f9
KH
766
767extern Lisp_Object Qunix, Qdos, Qmac;
4ed46869 768
f967223b
KH
769extern Lisp_Object Qtranslation_table;
770extern Lisp_Object Qtranslation_table_id;
ab45712c 771
20ee919e
EZ
772/* Mnemonic strings to indicate each type of end-of-line. */
773extern Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
774/* Mnemonic string to indicate type of end-of-line is not yet decided. */
775extern Lisp_Object eol_mnemonic_undecided;
4ed46869 776
4ed46869
KH
777#ifdef emacs
778extern Lisp_Object Qfile_coding_system;
387f6ba5 779extern Lisp_Object Qcall_process, Qcall_process_region;
4ed46869 780extern Lisp_Object Qstart_process, Qopen_network_stream;
d008a7cc 781extern Lisp_Object Qwrite_region;
4ed46869 782
383e0970 783extern char *emacs_strerror (int);
68c45bf0 784
4ed46869
KH
785/* Coding-system for reading files and receiving data from process. */
786extern Lisp_Object Vcoding_system_for_read;
787/* Coding-system for writing files and sending data to process. */
788extern Lisp_Object Vcoding_system_for_write;
789/* Coding-system actually used in the latest I/O. */
790extern Lisp_Object Vlast_coding_system_used;
68c45bf0
PE
791/* Coding-system to use with system messages (e.g. strerror). */
792extern Lisp_Object Vlocale_coding_system;
4ed46869 793
77a9bc9a
EZ
794/* If non-zero, process buffer inherits the coding system used to decode
795 the subprocess output. */
796extern int inherit_process_coding_system;
797
fbaa2ed9
KH
798/* Coding system to be used to encode text for terminal display when
799 terminal coding system is nil. */
800extern struct coding_system safe_terminal_coding;
801
a5825a24
KH
802/* Default coding systems used for process I/O. */
803extern Lisp_Object Vdefault_process_coding_system;
4ed46869 804
df7492f9 805/* Function to call to force a user to force select a propert coding
658cc252
KH
806 system. */
807extern Lisp_Object Vselect_safe_coding_system_function;
808
6926d591
KH
809/* If nonzero, on writing a file, Vselect_safe_coding_system_function
810 is called even if Vcoding_system_for_write is non-nil. */
811extern int coding_system_require_warning;
812
31406df1
RS
813/* Coding system for file names, or nil if none. */
814extern Lisp_Object Vfile_name_coding_system;
815
816/* Coding system for file names used only when
817 Vfile_name_coding_system is nil. */
818extern Lisp_Object Vdefault_file_name_coding_system;
2dfda962 819
4ed46869
KH
820#endif
821
d008a7cc
GM
822/* Error signaled when there's a problem with detecting coding system */
823extern Lisp_Object Qcoding_system_error;
824
df7492f9 825extern char emacs_mule_bytes[256];
383e0970 826extern int emacs_mule_string_char (unsigned char *);
df7492f9 827
6f776e81 828#endif /* EMACS_CODING_H */
cefd8c4f
KH
829
830/* arch-tag: 2bc3b4fa-6870-4f64-8135-b962b2d290e4
831 (do not change this comment) */