Update years in copyright notice; nfc.
[bpt/emacs.git] / src / coding.h
CommitLineData
4ed46869 1/* Header for coding system handler.
aaef169d
TTN
2 Copyright (C) 2002, 2003, 2004, 2005,
3 2006 Free Software Foundation, Inc.
ce03bf76
KH
4 Copyright (C) 1995, 1997, 1998, 2000
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
4ed46869 7
369314dc
KH
8This file is part of GNU Emacs.
9
10GNU Emacs is free software; you can redistribute it and/or modify
11it under the terms of the GNU General Public License as published by
12the Free Software Foundation; either version 2, or (at your option)
13any later version.
4ed46869 14
369314dc
KH
15GNU Emacs is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18GNU General Public License for more details.
4ed46869 19
369314dc
KH
20You should have received a copy of the GNU General Public License
21along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
22the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23Boston, MA 02110-1301, USA. */
4ed46869 24
6f776e81
KH
25#ifndef EMACS_CODING_H
26#define EMACS_CODING_H
4ed46869 27
2ea6666c 28#include "ccl.h"
4ed46869 29
0ef69138 30/*** EMACS' INTERNAL FORMAT (emacs-mule) section ***/
4ed46869
KH
31
32/* All code (1-byte) of Emacs' internal format is classified into one
33 of the followings. See also `charset.h'. */
34enum emacs_code_class_type
35 {
36 EMACS_control_code, /* Control codes in the range
37 0x00..0x1F and 0x7F except for the
38 following two codes. */
39 EMACS_linefeed_code, /* 0x0A (linefeed) to denote
40 end-of-line. */
41 EMACS_carriage_return_code, /* 0x0D (carriage-return) to be used
42 in selective display mode. */
43 EMACS_ascii_code, /* ASCII characters. */
4ed46869
KH
44 EMACS_leading_code_2, /* Base leading code of official
45 TYPE9N character. */
46 EMACS_leading_code_3, /* Base leading code of private TYPE9N
47 or official TYPE9Nx9N character. */
48 EMACS_leading_code_4, /* Base leading code of private
49 TYPE9Nx9N character. */
50 EMACS_invalid_code /* Invalid code, i.e. a base leading
51 code not yet assigned to any
52 charset, or a code of the range
53 0xA0..0xFF. */
54 };
55
56extern enum emacs_code_class_type emacs_code_class[256];
57
58/*** ISO2022 section ***/
59
60/* Macros to define code of control characters for ISO2022's functions. */
61 /* code */ /* function */
62#define ISO_CODE_LF 0x0A /* line-feed */
63#define ISO_CODE_CR 0x0D /* carriage-return */
64#define ISO_CODE_SO 0x0E /* shift-out */
65#define ISO_CODE_SI 0x0F /* shift-in */
66#define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
67#define ISO_CODE_ESC 0x1B /* escape */
68#define ISO_CODE_SS2 0x8E /* single-shift-2 */
69#define ISO_CODE_SS3 0x8F /* single-shift-3 */
70#define ISO_CODE_CSI 0x9B /* control-sequence-introduce */
71
72/* All code (1-byte) of ISO2022 is classified into one of the
73 followings. */
74enum iso_code_class_type
75 {
811ea086
KH
76 ISO_control_0, /* Control codes in the range
77 0x00..0x1F and 0x7F, except for the
78 following 5 codes. */
4ed46869
KH
79 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
80 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
81 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
82 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
83 ISO_escape, /* ISO_CODE_SO (0x1B) */
811ea086
KH
84 ISO_control_1, /* Control codes in the range
85 0x80..0x9F, except for the
86 following 3 codes. */
4ed46869
KH
87 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
88 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
89 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
90 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
91 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
92 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
93 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
94 };
95
96/** The macros CODING_FLAG_ISO_XXX defines a flag bit of the `flags'
97 element in the structure `coding_system'. This information is used
98 while encoding a text to ISO2022. **/
99
100/* If set, produce short-form designation sequence (e.g. ESC $ A)
101 instead of long-form sequence (e.g. ESC $ ( A). */
102#define CODING_FLAG_ISO_SHORT_FORM 0x0001
103
104/* If set, reset graphic planes and registers at end-of-line to the
105 initial state. */
106#define CODING_FLAG_ISO_RESET_AT_EOL 0x0002
107
108/* If set, reset graphic planes and registers before any control
109 characters to the initial state. */
110#define CODING_FLAG_ISO_RESET_AT_CNTL 0x0004
111
112/* If set, encode by 7-bit environment. */
113#define CODING_FLAG_ISO_SEVEN_BITS 0x0008
114
115/* If set, use locking-shift function. */
116#define CODING_FLAG_ISO_LOCKING_SHIFT 0x0010
117
118/* If set, use single-shift function. Overwrite
119 CODING_FLAG_ISO_LOCKING_SHIFT. */
120#define CODING_FLAG_ISO_SINGLE_SHIFT 0x0020
121
122/* If set, designate JISX0201-Roman instead of ASCII. */
123#define CODING_FLAG_ISO_USE_ROMAN 0x0040
124
125/* If set, designate JISX0208-1978 instead of JISX0208-1983. */
126#define CODING_FLAG_ISO_USE_OLDJIS 0x0080
127
128/* If set, do not produce ISO6429's direction specifying sequence. */
129#define CODING_FLAG_ISO_NO_DIRECTION 0x0100
130
8ddb35b2
KH
131/* If set, assume designation states are reset at beginning of line on
132 output. */
133#define CODING_FLAG_ISO_INIT_AT_BOL 0x0200
134
135/* If set, designation sequence should be placed at beginning of line
136 on output. */
137#define CODING_FLAG_ISO_DESIGNATE_AT_BOL 0x0400
138
55496054 139/* If set, do not encode unsafe characters on output. */
fbaa2ed9
KH
140#define CODING_FLAG_ISO_SAFE 0x0800
141
c0c69d45
KH
142/* If set, extra latin codes (128..159) are accepted as a valid code
143 on input. */
144#define CODING_FLAG_ISO_LATIN_EXTRA 0x1000
145
658cc252
KH
146/* If set, use designation escape sequence. */
147#define CODING_FLAG_ISO_DESIGNATION 0x10000
148
fbaa2ed9 149/* A character to be produced on output if encoding of the original
cbb76662
KH
150 character is inhibitted by CODING_MODE_INHIBIT_UNENCODABLE_CHAR.
151 It must be an ASCII character. */
152#define CODING_REPLACEMENT_CHARACTER '?'
fbaa2ed9 153
4ed46869
KH
154/* Structure of the field `spec.iso2022' in the structure `coding_system'. */
155struct iso2022_spec
156{
157 /* The current graphic register invoked to each graphic plane. */
158 int current_invocation[2];
159
160 /* The current charset designated to each graphic register. */
161 int current_designation[4];
162
163 /* A charset initially designated to each graphic register. */
164 int initial_designation[4];
165
658cc252
KH
166 /* If not -1, it is a graphic register specified in an invalid
167 designation sequence. */
168 int last_invalid_designation_register;
169
4ed46869 170 /* A graphic register to which each charset should be designated. */
da30d5b1 171 unsigned char requested_designation[MAX_CHARSET + 1];
4ed46869 172
1db9ba06
KH
173 /* A revision number to be specified for each charset on encoding.
174 The value 255 means no revision number for the corresponding
175 charset. */
176 unsigned char charset_revision_number[MAX_CHARSET + 1];
e6de76f8 177
4ed46869
KH
178 /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
179 by single-shift while encoding. */
180 int single_shifting;
8ddb35b2
KH
181
182 /* Set to 1 temporarily only when processing at beginning of line. */
183 int bol;
4ed46869
KH
184};
185
186/* Macros to access each field in the structure `spec.iso2022'. */
187#define CODING_SPEC_ISO_INVOCATION(coding, plane) \
658cc252 188 (coding)->spec.iso2022.current_invocation[plane]
4ed46869 189#define CODING_SPEC_ISO_DESIGNATION(coding, reg) \
658cc252 190 (coding)->spec.iso2022.current_designation[reg]
4ed46869 191#define CODING_SPEC_ISO_INITIAL_DESIGNATION(coding, reg) \
658cc252 192 (coding)->spec.iso2022.initial_designation[reg]
4ed46869 193#define CODING_SPEC_ISO_REQUESTED_DESIGNATION(coding, charset) \
658cc252 194 (coding)->spec.iso2022.requested_designation[charset]
1db9ba06 195#define CODING_SPEC_ISO_REVISION_NUMBER(coding, charset) \
658cc252 196 (coding)->spec.iso2022.charset_revision_number[charset]
4ed46869 197#define CODING_SPEC_ISO_SINGLE_SHIFTING(coding) \
658cc252 198 (coding)->spec.iso2022.single_shifting
8ddb35b2 199#define CODING_SPEC_ISO_BOL(coding) \
658cc252 200 (coding)->spec.iso2022.bol
4ed46869 201
da30d5b1
KH
202/* A value which may appear in
203 coding->spec.iso2022.requested_designation indicating that the
204 corresponding charset does not request any graphic register to be
205 designated. */
206#define CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION 4
207
4ed46869
KH
208/* Return a charset which is currently designated to the graphic plane
209 PLANE in the coding-system CODING. */
ceb58510
KH
210#define CODING_SPEC_ISO_PLANE_CHARSET(coding, plane) \
211 ((CODING_SPEC_ISO_INVOCATION (coding, plane) < 0) \
212 ? -1 \
213 : CODING_SPEC_ISO_DESIGNATION (coding, \
214 CODING_SPEC_ISO_INVOCATION (coding, plane)))
4ed46869
KH
215
216/*** BIG5 section ***/
217
218/* Macros to denote each type of BIG5 coding system. */
219#define CODING_FLAG_BIG5_HKU 0x00 /* BIG5-HKU is one of variants of
220 BIG5 developed by Hong Kong
221 University. */
222#define CODING_FLAG_BIG5_ETEN 0x01 /* BIG5_ETen is one of variants
223 of BIG5 developed by the
224 company ETen in Taiwan. */
225
226/*** GENERAL section ***/
227
228/* Types of coding system. */
229enum coding_type
230 {
231 coding_type_no_conversion, /* A coding system which requires no
232 conversion for reading and writing
233 including end-of-line format. */
0ef69138 234 coding_type_emacs_mule, /* A coding system used in Emacs'
4ed46869
KH
235 buffer and string. Requires no
236 conversion for reading and writing
237 except for end-of-line format. */
0ef69138 238 coding_type_undecided, /* A coding system which requires
4ed46869
KH
239 automatic detection of a real
240 coding system. */
241 coding_type_sjis, /* SJIS coding system for Japanese. */
242 coding_type_iso2022, /* Any coding system of ISO2022
243 variants. */
244 coding_type_big5, /* BIG5 coding system for Chinese. */
e80de6b1 245 coding_type_ccl, /* The coding system of which decoder
4ed46869 246 and encoder are written in CCL. */
e80de6b1 247 coding_type_raw_text /* A coding system for a text
55496054 248 containing random 8-bit code which
e80de6b1
KH
249 does not require code conversion
250 except for end-of-line format. */
4ed46869
KH
251 };
252
253/* Formats of end-of-line. */
254#define CODING_EOL_LF 0 /* Line-feed only, same as Emacs'
255 internal format. */
256#define CODING_EOL_CRLF 1 /* Sequence of carriage-return and
257 line-feed. */
258#define CODING_EOL_CR 2 /* Carriage-return only. */
0ef69138 259#define CODING_EOL_UNDECIDED 3 /* This value is used to denote the
4ed46869 260 eol-type is not yet decided. */
e80de6b1
KH
261#define CODING_EOL_INCONSISTENT 4 /* This value is used to denote the
262 eol-type is not consistent
263 through the file. */
4ed46869 264
4ed46869 265/* 1 iff composing. */
279d9f7b
KH
266#define COMPOSING_P(coding) ((int) coding->composing > (int) COMPOSITION_NO)
267
268#define COMPOSITION_DATA_SIZE 4080
269#define COMPOSITION_DATA_MAX_BUNCH_LENGTH (4 + MAX_COMPOSITION_COMPONENTS*2)
270
271/* Data structure to hold information about compositions of text that
272 is being decoded or encode. ISO 2022 base code conversion routines
273 handle special ESC sequences for composition specification. But,
274 they can't get/put such information directly from/to a buffer in
275 the deepest place. So, they store or retrieve the information
276 through this structure.
277
278 The encoder stores the information in this structure when it meets
279 ESC sequences for composition while encoding codes, then, after all
280 text codes are encoded, puts `composition' properties on the text
55496054 281 by referring to the structure.
279d9f7b
KH
282
283 The decoder at first stores the information of a text to be
284 decoded, then, while decoding codes, generates ESC sequences for
55496054 285 composition at proper places by referring to the structure. */
279d9f7b
KH
286
287struct composition_data
288{
289 /* The character position of the first character to be encoded or
290 decoded. START and END (see below) are relative to this
291 position. */
292 int char_offset;
293
294 /* The composition data. These elements are repeated for each
295 composition:
296 LENGTH START END METHOD [ COMPONENT ... ]
297 where,
298 LENGTH is the number of elements for this composition.
299
300 START and END are starting and ending character positions of
301 the composition relative to `char_offset'.
302
55496054 303 METHOD is one of `enum composing_status' specifying the way of
279d9f7b
KH
304 composition.
305
306 COMPONENT is a character or an encoded composition rule. */
307 int data[COMPOSITION_DATA_SIZE];
308
309 /* The number of elements in `data' currently used. */
310 int used;
311
312 /* Pointers to the previous and next structures. When `data' is
313 filled up, another structure is allocated and linked in `next'.
55496054
DL
314 The new structure has backward link to this structure in `prev'.
315 The number of chained structures depends on how many compositions
279d9f7b
KH
316 the text being encoded or decoded contains. */
317 struct composition_data *prev, *next;
318};
4ed46869 319
0749a608 320/* Macros used for the member `result' of the struct
658cc252
KH
321 coding_system. */
322#define CODING_FINISH_NORMAL 0
323#define CODING_FINISH_INSUFFICIENT_SRC 1
324#define CODING_FINISH_INSUFFICIENT_DST 2
325#define CODING_FINISH_INCONSISTENT_EOL 3
279d9f7b
KH
326#define CODING_FINISH_INSUFFICIENT_CMP 4
327#define CODING_FINISH_INTERRUPT 5
658cc252 328
3b2d77fe 329/* Macros used for the member `mode' of the struct coding_system. */
658cc252
KH
330
331/* If set, recover the original CR or LF of the already decoded text
332 when the decoding routine encounters an inconsistent eol format. */
333#define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
334
335/* If set, the decoding/encoding routines treat the current data as
336 the last block of the whole text to be converted, and do
55496054 337 appropriate finishing job. */
658cc252
KH
338#define CODING_MODE_LAST_BLOCK 0x02
339
340/* If set, it means that the current source text is in a buffer which
341 enables selective display. */
342#define CODING_MODE_SELECTIVE_DISPLAY 0x04
343
cbb76662
KH
344/* If set, replace unencodabae characters by `?' on encoding. */
345#define CODING_MODE_INHIBIT_UNENCODABLE_CHAR 0x08
346
658cc252
KH
347/* This flag is used by the decoding/encoding routines on the fly. If
348 set, it means that right-to-left text is being processed. */
cbb76662 349#define CODING_MODE_DIRECTION 0x10
658cc252 350
4ed46869
KH
351struct coding_system
352{
353 /* Type of the coding system. */
354 enum coding_type type;
355
658cc252
KH
356 /* Type of end-of-line format (LF, CRLF, or CR) of the coding system. */
357 int eol_type;
358
a5ee738b 359 /* Flag bits of the coding system. The meaning of each bit is common
658cc252 360 to all types of coding systems. */
a5ee738b 361 unsigned int common_flags;
4ed46869
KH
362
363 /* Flag bits of the coding system. The meaning of each bit depends
364 on the type of the coding system. */
365 unsigned int flags;
366
658cc252
KH
367 /* Mode bits of the coding system. See the comments of the macros
368 CODING_MODE_XXX. */
369 unsigned int mode;
4ed46869 370
279d9f7b 371 /* The current status of composition handling. */
4ed46869
KH
372 int composing;
373
279d9f7b
KH
374 /* 1 iff the next character is a composition rule. */
375 int composition_rule_follows;
376
377 /* Information of compositions are stored here on decoding and set
378 in advance on encoding. */
379 struct composition_data *cmp_data;
380
381 /* Index to cmp_data->data for the first element for the current
382 composition. */
383 int cmp_data_start;
384
385 /* Index to cmp_data->data for the current element for the current
386 composition. */
387 int cmp_data_index;
450c60a5 388
4ed46869
KH
389 /* Detailed information specific to each type of coding system. */
390 union spec
391 {
392 struct iso2022_spec iso2022;
393 struct ccl_spec ccl; /* Defined in ccl.h. */
394 } spec;
395
658cc252
KH
396 /* Index number of coding category of the coding system. */
397 int category_idx;
398
d23ee514
KH
399 /* The following two members specify how characters 128..159 are
400 represented in source and destination text respectively. 1 means
401 they are represented by 2-byte sequence, 0 means they are
402 represented by 1-byte as is (see the comment in charset.h). */
811ea086
KH
403 unsigned src_multibyte : 1;
404 unsigned dst_multibyte : 1;
405
a137bb00
KH
406 /* How may heading bytes we can skip for decoding. This is set to
407 -1 in setup_coding_system, and updated by detect_coding. So,
408 when this is equal to the byte length of the text being
409 converted, we can skip the actual conversion process. */
658cc252
KH
410 int heading_ascii;
411
412 /* The following members are set by encoding/decoding routine. */
413 int produced, produced_char, consumed, consumed_char;
414
811ea086
KH
415 /* Number of error source data found in a decoding routine. */
416 int errors;
417
418 /* Finish status of code conversion. It should be one of macros
419 CODING_FINISH_XXXX. */
420 int result;
e6a9a0bc 421
6041c9ce
KH
422 /* If nonzero, suppress error notification. */
423 int suppress_error;
424
658cc252
KH
425 /* The following members are all Lisp symbols. We don't have to
426 protect them from GC because the current garbage collection
427 doesn't relocate Lisp symbols. But, when it is changed, we must
428 find a way to protect them. */
429
4ed46869
KH
430 /* Backward pointer to the Lisp symbol of the coding system. */
431 Lisp_Object symbol;
432
433 /* Lisp function (symbol) to be called after decoding to do
658cc252 434 additional conversion, or nil. */
4ed46869
KH
435 Lisp_Object post_read_conversion;
436
437 /* Lisp function (symbol) to be called before encoding to do
658cc252 438 additional conversion, or nil. */
4ed46869
KH
439 Lisp_Object pre_write_conversion;
440
ab45712c 441 /* Character translation tables to look up, or nil. */
f967223b
KH
442 Lisp_Object translation_table_for_decode;
443 Lisp_Object translation_table_for_encode;
4ed46869
KH
444};
445
a5ee738b
KH
446#define CODING_REQUIRE_FLUSHING_MASK 1
447#define CODING_REQUIRE_DECODING_MASK 2
448#define CODING_REQUIRE_ENCODING_MASK 4
449#define CODING_REQUIRE_DETECTION_MASK 8
450
451/* Return 1 if the coding system CODING requires specific code to be
452 attached at the tail of converted text. */
453#define CODING_REQUIRE_FLUSHING(coding) \
454 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
455
456/* Return 1 if the coding system CODING requires code conversion on
457 decoding. */
458#define CODING_REQUIRE_DECODING(coding) \
811ea086
KH
459 ((coding)->dst_multibyte \
460 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
a5ee738b
KH
461
462/* Return 1 if the coding system CODING requires code conversion on
463 encoding. */
464#define CODING_REQUIRE_ENCODING(coding) \
811ea086
KH
465 ((coding)->src_multibyte \
466 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK)
a5ee738b
KH
467
468/* Return 1 if the coding system CODING requires some kind of code
469 detection. */
470#define CODING_REQUIRE_DETECTION(coding) \
471 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
472
811ea086
KH
473/* Return 1 if the coding system CODING requires code conversion on
474 decoding or some kind of code detection. */
658cc252 475#define CODING_MAY_REQUIRE_DECODING(coding) \
811ea086
KH
476 (CODING_REQUIRE_DECODING (coding) \
477 || CODING_REQUIRE_DETECTION (coding))
4ed46869
KH
478
479/* Index for each coding category in `coding_category_table' */
0ef69138 480#define CODING_CATEGORY_IDX_EMACS_MULE 0
4ed46869
KH
481#define CODING_CATEGORY_IDX_SJIS 1
482#define CODING_CATEGORY_IDX_ISO_7 2
658cc252
KH
483#define CODING_CATEGORY_IDX_ISO_7_TIGHT 3
484#define CODING_CATEGORY_IDX_ISO_8_1 4
485#define CODING_CATEGORY_IDX_ISO_8_2 5
486#define CODING_CATEGORY_IDX_ISO_7_ELSE 6
487#define CODING_CATEGORY_IDX_ISO_8_ELSE 7
8469bb88
KH
488#define CODING_CATEGORY_IDX_CCL 8
489#define CODING_CATEGORY_IDX_BIG5 9
62537270
KH
490#define CODING_CATEGORY_IDX_UTF_8 10
491#define CODING_CATEGORY_IDX_UTF_16_BE 11
492#define CODING_CATEGORY_IDX_UTF_16_LE 12
493#define CODING_CATEGORY_IDX_RAW_TEXT 13
494#define CODING_CATEGORY_IDX_BINARY 14
495#define CODING_CATEGORY_IDX_MAX 15
4ed46869
KH
496
497/* Definitions of flag bits returned by the function
498 detect_coding_mask (). */
0ef69138 499#define CODING_CATEGORY_MASK_EMACS_MULE (1 << CODING_CATEGORY_IDX_EMACS_MULE)
4ed46869
KH
500#define CODING_CATEGORY_MASK_SJIS (1 << CODING_CATEGORY_IDX_SJIS)
501#define CODING_CATEGORY_MASK_ISO_7 (1 << CODING_CATEGORY_IDX_ISO_7)
658cc252 502#define CODING_CATEGORY_MASK_ISO_7_TIGHT (1 << CODING_CATEGORY_IDX_ISO_7_TIGHT)
4ed46869
KH
503#define CODING_CATEGORY_MASK_ISO_8_1 (1 << CODING_CATEGORY_IDX_ISO_8_1)
504#define CODING_CATEGORY_MASK_ISO_8_2 (1 << CODING_CATEGORY_IDX_ISO_8_2)
a38ede41
KH
505#define CODING_CATEGORY_MASK_ISO_7_ELSE (1 << CODING_CATEGORY_IDX_ISO_7_ELSE)
506#define CODING_CATEGORY_MASK_ISO_8_ELSE (1 << CODING_CATEGORY_IDX_ISO_8_ELSE)
8469bb88 507#define CODING_CATEGORY_MASK_CCL (1 << CODING_CATEGORY_IDX_CCL)
4ed46869 508#define CODING_CATEGORY_MASK_BIG5 (1 << CODING_CATEGORY_IDX_BIG5)
62537270
KH
509#define CODING_CATEGORY_MASK_UTF_8 (1 << CODING_CATEGORY_IDX_UTF_8)
510#define CODING_CATEGORY_MASK_UTF_16_BE (1 << CODING_CATEGORY_IDX_UTF_16_BE)
511#define CODING_CATEGORY_MASK_UTF_16_LE (1 << CODING_CATEGORY_IDX_UTF_16_LE)
f1651811 512#define CODING_CATEGORY_MASK_RAW_TEXT (1 << CODING_CATEGORY_IDX_RAW_TEXT)
1f312d8a 513#define CODING_CATEGORY_MASK_BINARY (1 << CODING_CATEGORY_IDX_BINARY)
4ed46869
KH
514
515/* This value is returned if detect_coding_mask () find nothing other
516 than ASCII characters. */
517#define CODING_CATEGORY_MASK_ANY \
0ef69138 518 ( CODING_CATEGORY_MASK_EMACS_MULE \
4ed46869
KH
519 | CODING_CATEGORY_MASK_SJIS \
520 | CODING_CATEGORY_MASK_ISO_7 \
658cc252 521 | CODING_CATEGORY_MASK_ISO_7_TIGHT \
4ed46869
KH
522 | CODING_CATEGORY_MASK_ISO_8_1 \
523 | CODING_CATEGORY_MASK_ISO_8_2 \
a38ede41
KH
524 | CODING_CATEGORY_MASK_ISO_7_ELSE \
525 | CODING_CATEGORY_MASK_ISO_8_ELSE \
8469bb88 526 | CODING_CATEGORY_MASK_CCL \
62537270
KH
527 | CODING_CATEGORY_MASK_BIG5 \
528 | CODING_CATEGORY_MASK_UTF_8 \
529 | CODING_CATEGORY_MASK_UTF_16_BE \
530 | CODING_CATEGORY_MASK_UTF_16_LE)
4ed46869 531
658cc252
KH
532#define CODING_CATEGORY_MASK_ISO_7BIT \
533 (CODING_CATEGORY_MASK_ISO_7 | CODING_CATEGORY_MASK_ISO_7_TIGHT)
534
535#define CODING_CATEGORY_MASK_ISO_8BIT \
536 (CODING_CATEGORY_MASK_ISO_8_1 | CODING_CATEGORY_MASK_ISO_8_2)
537
538#define CODING_CATEGORY_MASK_ISO_SHIFT \
539 (CODING_CATEGORY_MASK_ISO_7_ELSE | CODING_CATEGORY_MASK_ISO_8_ELSE)
540
541#define CODING_CATEGORY_MASK_ISO \
542 ( CODING_CATEGORY_MASK_ISO_7BIT \
543 | CODING_CATEGORY_MASK_ISO_SHIFT \
544 | CODING_CATEGORY_MASK_ISO_8BIT)
545
62537270
KH
546#define CODING_CATEGORY_MASK_UTF_16_BE_LE \
547 (CODING_CATEGORY_MASK_UTF_16_BE | CODING_CATEGORY_MASK_UTF_16_LE)
548
4ed46869
KH
549/* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
550 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
551 system. C1 and C2 are the 1st and 2nd position codes of Emacs'
552 internal format. */
553
554#define DECODE_SJIS(s1, s2, c1, c2) \
555 do { \
556 if (s2 >= 0x9F) \
557 c1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
558 c2 = s2 - 0x7E; \
559 else \
560 c1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
561 c2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F); \
562 } while (0)
563
564#define ENCODE_SJIS(c1, c2, s1, s2) \
565 do { \
566 if (c1 & 1) \
567 s1 = c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1), \
568 s2 = c2 + ((c2 >= 0x60) ? 0x20 : 0x1F); \
569 else \
570 s1 = c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0), \
571 s2 = c2 + 0x7E; \
572 } while (0)
573
290591c8
KH
574/* Encode the file name NAME using the specified coding system
575 for file names, if any. */
576#define ENCODE_FILE(name) \
577 (! NILP (Vfile_name_coding_system) \
5e92b1ca 578 && !EQ (Vfile_name_coding_system, make_number (0)) \
afee9150 579 ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
290591c8 580 : (! NILP (Vdefault_file_name_coding_system) \
5e92b1ca 581 && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
afee9150 582 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
290591c8
KH
583 : name))
584
585/* Decode the file name NAME using the specified coding system
586 for file names, if any. */
587#define DECODE_FILE(name) \
588 (! NILP (Vfile_name_coding_system) \
5e92b1ca 589 && !EQ (Vfile_name_coding_system, make_number (0)) \
afee9150 590 ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \
290591c8 591 : (! NILP (Vdefault_file_name_coding_system) \
5e92b1ca 592 && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
afee9150 593 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
290591c8
KH
594 : name))
595
2dfda962 596/* Encode the string STR using the specified coding system
53eda481 597 for system functions, if any. */
2dfda962 598#define ENCODE_SYSTEM(str) \
9b58c683 599 (! NILP (Vlocale_coding_system) \
5e92b1ca 600 && !EQ (Vlocale_coding_system, make_number (0)) \
9b58c683 601 ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \
2dfda962
JR
602 : str)
603
604/* Decode the string STR using the specified coding system
53eda481 605 for system functions, if any. */
581e7427 606#define DECODE_SYSTEM(str) \
9b58c683 607 (! NILP (Vlocale_coding_system) \
5e92b1ca 608 && !EQ (Vlocale_coding_system, make_number (0)) \
9b58c683 609 ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
2dfda962 610 : str)
2dfda962 611
c14e5a3a
DL
612#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1)
613
4ed46869 614/* Extern declarations. */
f14049eb 615extern int decode_coding P_ ((struct coding_system *, const unsigned char *,
658cc252 616 unsigned char *, int, int));
f14049eb 617extern int encode_coding P_ ((struct coding_system *, const unsigned char *,
658cc252 618 unsigned char *, int, int));
279d9f7b
KH
619extern void coding_save_composition P_ ((struct coding_system *, int, int,
620 Lisp_Object));
621extern void coding_free_composition_data P_ ((struct coding_system *));
622extern void coding_adjust_composition_offset P_ ((struct coding_system *,
623 int));
55fff3f9
KH
624extern void coding_allocate_composition_data P_ ((struct coding_system *,
625 int));
626extern void coding_restore_composition P_ ((struct coding_system *,
627 Lisp_Object));
ec5d8db7
AS
628extern int code_convert_region P_ ((int, int, int, int, struct coding_system *,
629 int, int));
f227fded
EZ
630extern Lisp_Object run_pre_post_conversion_on_str P_ ((Lisp_Object,
631 struct coding_system *,
632 int));
387f6ba5 633extern void run_pre_write_conversin_on_c_str P_ ((unsigned char **, int *,
f9439896
KH
634 int, int,
635 struct coding_system *));
636
c04809fb
AS
637extern int decoding_buffer_size P_ ((struct coding_system *, int));
638extern int encoding_buffer_size P_ ((struct coding_system *, int));
f14049eb
KR
639extern void detect_coding P_ ((struct coding_system *, const unsigned char *,
640 int));
641extern void detect_eol P_ ((struct coding_system *, const unsigned char *,
642 int));
c04809fb 643extern int setup_coding_system P_ ((Lisp_Object, struct coding_system *));
8000e212
KR
644extern Lisp_Object code_convert_string P_ ((Lisp_Object,
645 struct coding_system *, int, int));
6dd24186
KR
646extern Lisp_Object code_convert_string1 P_ ((Lisp_Object, Lisp_Object,
647 Lisp_Object, int));
eb545596
DL
648extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object,
649 int));
e5ff4bc2 650extern void setup_raw_text_coding_system P_ ((struct coding_system *));
52e386c2
KR
651extern Lisp_Object encode_coding_string P_ ((Lisp_Object,
652 struct coding_system *, int));
8a6d111e
KR
653extern Lisp_Object decode_coding_string P_ ((Lisp_Object,
654 struct coding_system *, int));
4ed46869 655extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index;
64abe701 656extern Lisp_Object Qraw_text, Qemacs_mule;
4ed46869
KH
657extern Lisp_Object Qbuffer_file_coding_system;
658extern Lisp_Object Vcoding_category_list;
c14e5a3a 659extern Lisp_Object Qutf_8;
4ed46869 660
f967223b
KH
661extern Lisp_Object Qtranslation_table;
662extern Lisp_Object Qtranslation_table_id;
ab45712c 663
20ee919e
EZ
664/* Mnemonic strings to indicate each type of end-of-line. */
665extern Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
666/* Mnemonic string to indicate type of end-of-line is not yet decided. */
667extern Lisp_Object eol_mnemonic_undecided;
4ed46869 668
4ed46869
KH
669#ifdef emacs
670extern Lisp_Object Qfile_coding_system;
387f6ba5 671extern Lisp_Object Qcall_process, Qcall_process_region;
4ed46869 672extern Lisp_Object Qstart_process, Qopen_network_stream;
d008a7cc 673extern Lisp_Object Qwrite_region;
4ed46869 674
68c45bf0
PE
675extern char *emacs_strerror P_ ((int));
676
4ed46869
KH
677/* Coding-system for reading files and receiving data from process. */
678extern Lisp_Object Vcoding_system_for_read;
679/* Coding-system for writing files and sending data to process. */
680extern Lisp_Object Vcoding_system_for_write;
681/* Coding-system actually used in the latest I/O. */
682extern Lisp_Object Vlast_coding_system_used;
68c45bf0
PE
683/* Coding-system to use with system messages (e.g. strerror). */
684extern Lisp_Object Vlocale_coding_system;
4ed46869 685
77a9bc9a
EZ
686/* If non-zero, process buffer inherits the coding system used to decode
687 the subprocess output. */
688extern int inherit_process_coding_system;
689
4ed46869
KH
690/* Coding-system to be used for encoding terminal output. This
691 structure contains information of a coding-system specified by the
692 function `set-terminal-coding-system'. */
693extern struct coding_system terminal_coding;
694
fbaa2ed9
KH
695/* Coding system to be used to encode text for terminal display when
696 terminal coding system is nil. */
697extern struct coding_system safe_terminal_coding;
698
4ed46869
KH
699/* Coding-system of what is sent from terminal keyboard. This
700 structure contains information of a coding-system specified by the
701 function `set-keyboard-coding-system'. */
702extern struct coding_system keyboard_coding;
703
0827f88d
KH
704/* Default coding system to be used to write a file. */
705extern struct coding_system default_buffer_file_coding;
706
a5825a24
KH
707/* Default coding systems used for process I/O. */
708extern Lisp_Object Vdefault_process_coding_system;
4ed46869 709
55496054 710/* Function to call to force a user to force select a proper coding
658cc252
KH
711 system. */
712extern Lisp_Object Vselect_safe_coding_system_function;
713
6926d591
KH
714/* If nonzero, on writing a file, Vselect_safe_coding_system_function
715 is called even if Vcoding_system_for_write is non-nil. */
716extern int coding_system_require_warning;
717
31406df1
RS
718/* Coding system for file names, or nil if none. */
719extern Lisp_Object Vfile_name_coding_system;
720
721/* Coding system for file names used only when
722 Vfile_name_coding_system is nil. */
723extern Lisp_Object Vdefault_file_name_coding_system;
2dfda962 724
4ed46869
KH
725#endif
726
d008a7cc
GM
727/* Error signaled when there's a problem with detecting coding system */
728extern Lisp_Object Qcoding_system_error;
729
6f776e81 730#endif /* EMACS_CODING_H */
ab5796a9
MB
731
732/* arch-tag: 2bc3b4fa-6870-4f64-8135-b962b2d290e4
733 (do not change this comment) */