Add 2012 to FSF copyright years for Emacs files
[bpt/emacs.git] / src / coding.h
CommitLineData
4ed46869 1/* Header for coding system handler.
acaf905b 2 Copyright (C) 2001-2012 Free Software Foundation, Inc.
7976eda0 3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5df4f04c 4 2005, 2006, 2007, 2008, 2009, 2010, 2011
ce03bf76
KH
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
8f924df7 7 Copyright (C) 2003
df7492f9
KH
8 National Institute of Advanced Industrial Science and Technology (AIST)
9 Registration Number H13PRO009
4ed46869 10
369314dc
KH
11This file is part of GNU Emacs.
12
b9b1cc14 13GNU Emacs is free software: you can redistribute it and/or modify
369314dc 14it under the terms of the GNU General Public License as published by
b9b1cc14
GM
15the Free Software Foundation, either version 3 of the License, or
16(at your option) any later version.
4ed46869 17
369314dc
KH
18GNU Emacs is distributed in the hope that it will be useful,
19but WITHOUT ANY WARRANTY; without even the implied warranty of
20MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21GNU General Public License for more details.
4ed46869 22
369314dc 23You should have received a copy of the GNU General Public License
b9b1cc14 24along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 25
6f776e81
KH
26#ifndef EMACS_CODING_H
27#define EMACS_CODING_H
4ed46869 28
df7492f9 29/* Index to arguments of Fdefine_coding_system_internal. */
4ed46869 30
df7492f9
KH
31enum define_coding_system_arg_index
32 {
33 coding_arg_name,
34 coding_arg_mnemonic,
35 coding_arg_coding_type,
36 coding_arg_charset_list,
37 coding_arg_ascii_compatible_p,
38 coding_arg_decode_translation_table,
39 coding_arg_encode_translation_table,
40 coding_arg_post_read_conversion,
41 coding_arg_pre_write_conversion,
42 coding_arg_default_char,
8f924df7 43 coding_arg_for_unibyte,
df7492f9
KH
44 coding_arg_plist,
45 coding_arg_eol_type,
46 coding_arg_max
47 };
4ed46869 48
df7492f9 49enum define_coding_iso2022_arg_index
4ed46869 50 {
df7492f9
KH
51 coding_arg_iso2022_initial = coding_arg_max,
52 coding_arg_iso2022_reg_usage,
53 coding_arg_iso2022_request,
54 coding_arg_iso2022_flags,
55 coding_arg_iso2022_max
4ed46869
KH
56 };
57
e4215ddd
KH
58enum define_coding_utf8_arg_index
59 {
60 coding_arg_utf8_bom = coding_arg_max,
61 coding_arg_utf8_max
62 };
63
df7492f9 64enum define_coding_utf16_arg_index
4ed46869 65 {
df7492f9
KH
66 coding_arg_utf16_bom = coding_arg_max,
67 coding_arg_utf16_endian,
68 coding_arg_utf16_max
4ed46869
KH
69 };
70
df7492f9
KH
71enum define_coding_ccl_arg_index
72 {
35d47d18 73 coding_arg_ccl_decoder = coding_arg_max,
df7492f9
KH
74 coding_arg_ccl_encoder,
75 coding_arg_ccl_valids,
76 coding_arg_ccl_max
77 };
4ed46869 78
933373ed
KH
79/* Hash table for all coding systems. Keys are coding system symbols
80 and values are spec vectors of the corresponding coding system. A
81 spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a
82 vector of attribute of the coding system. ALIASES is a list of
83 aliases (symbols) of the coding system. EOL-TYPE is `unix', `dos',
84 `mac' or a vector of coding systems (symbols). */
85
df7492f9 86extern Lisp_Object Vcoding_system_hash_table;
4ed46869 87
933373ed 88
df7492f9 89/* Enumeration of coding system type. */
4ed46869 90
df7492f9
KH
91enum coding_system_type
92 {
93 coding_type_charset,
94 coding_type_utf_8,
95 coding_type_utf_16,
96 coding_type_iso_2022,
97 coding_type_emacs_mule,
98 coding_type_sjis,
99 coding_type_ccl,
100 coding_type_raw_text,
101 coding_type_undecided,
102 coding_type_max
103 };
4ed46869 104
4ed46869 105
df7492f9 106/* Enumeration of end-of-line format type. */
4ed46869 107
df7492f9
KH
108enum end_of_line_type
109 {
110 eol_lf, /* Line-feed only, same as Emacs' internal
111 format. */
112 eol_crlf, /* Sequence of carriage-return and
113 line-feed. */
114 eol_cr, /* Carriage-return only. */
115 eol_any, /* Accept any of above. Produce line-feed
116 only. */
117 eol_undecided, /* This value is used to denote that the
118 eol-type is not yet undecided. */
119 eol_type_max
120 };
4ed46869 121
df7492f9 122/* Enumeration of index to an attribute vector of a coding system. */
4ed46869 123
df7492f9
KH
124enum coding_attr_index
125 {
126 coding_attr_base_name,
127 coding_attr_docstring,
128 coding_attr_mnemonic,
129 coding_attr_type,
130 coding_attr_charset_list,
131 coding_attr_ascii_compat,
132 coding_attr_decode_tbl,
133 coding_attr_encode_tbl,
404202e7 134 coding_attr_trans_tbl,
df7492f9
KH
135 coding_attr_post_read,
136 coding_attr_pre_write,
137 coding_attr_default_char,
8f924df7 138 coding_attr_for_unibyte,
df7492f9
KH
139 coding_attr_plist,
140
141 coding_attr_category,
142 coding_attr_safe_charsets,
143
144 /* The followings are extra attributes for each type. */
145 coding_attr_charset_valids,
146
147 coding_attr_ccl_decoder,
148 coding_attr_ccl_encoder,
149 coding_attr_ccl_valids,
150
151 coding_attr_iso_initial,
152 coding_attr_iso_usage,
153 coding_attr_iso_request,
154 coding_attr_iso_flags,
155
e4215ddd 156 coding_attr_utf_bom,
df7492f9
KH
157 coding_attr_utf_16_endian,
158
159 coding_attr_emacs_mule_full,
160
161 coding_attr_last_index
162 };
4ed46869 163
4ed46869 164
933373ed
KH
165/* Macros to access an element of an attribute vector. */
166
04e05596
JB
167#define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name)
168#define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type)
169#define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
170#define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic)
171#define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring)
172#define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
173#define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl)
174#define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl)
175#define CODING_ATTR_TRANS_TBL(attrs) AREF (attrs, coding_attr_trans_tbl)
176#define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read)
177#define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write)
178#define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
179#define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte)
180#define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing)
181#define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist)
182#define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category)
183#define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
8ddb35b2 184
8ddb35b2 185
933373ed
KH
186/* Return the name of a coding system specified by ID. */
187#define CODING_ID_NAME(id) \
188 (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
189
190/* Return the attribute vector of a coding system specified by ID. */
191
df7492f9
KH
192#define CODING_ID_ATTRS(id) \
193 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
fbaa2ed9 194
933373ed
KH
195/* Return the list of aliases of a coding system specified by ID. */
196
df7492f9
KH
197#define CODING_ID_ALIASES(id) \
198 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
c0c69d45 199
933373ed
KH
200/* Return the eol-type of a coding system specified by ID. */
201
df7492f9
KH
202#define CODING_ID_EOL_TYPE(id) \
203 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
658cc252 204
933373ed
KH
205
206/* Return the spec vector of CODING_SYSTEM_SYMBOL. */
fbaa2ed9 207
df7492f9
KH
208#define CODING_SYSTEM_SPEC(coding_system_symbol) \
209 (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
4ed46869 210
933373ed
KH
211
212/* Return the ID of CODING_SYSTEM_SYMBOL. */
213
df7492f9
KH
214#define CODING_SYSTEM_ID(coding_system_symbol) \
215 hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \
216 coding_system_symbol, NULL)
4ed46869 217
b2e6b10f 218/* Return 1 if CODING_SYSTEM_SYMBOL is a coding system. */
933373ed 219
7c00e33d
KH
220#define CODING_SYSTEM_P(coding_system_symbol) \
221 (CODING_SYSTEM_ID (coding_system_symbol) >= 0 \
222 || (! NILP (coding_system_symbol) \
223 && ! NILP (Fcoding_system_p (coding_system_symbol))))
4ed46869 224
933373ed
KH
225/* Check if X is a coding system or not. */
226
8f924df7 227#define CHECK_CODING_SYSTEM(x) \
df7492f9 228 do { \
7c00e33d
KH
229 if (CODING_SYSTEM_ID (x) < 0 \
230 && NILP (Fcheck_coding_system (x))) \
8f924df7 231 wrong_type_argument (Qcoding_system_p, (x)); \
df7492f9 232 } while (0)
658cc252 233
4ed46869 234
933373ed
KH
235/* Check if X is a coding system or not. If it is, set SEPC to the
236 spec vector of the coding system. */
237
df7492f9
KH
238#define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \
239 do { \
240 spec = CODING_SYSTEM_SPEC (x); \
7c00e33d
KH
241 if (NILP (spec)) \
242 { \
243 Fcheck_coding_system (x); \
244 spec = CODING_SYSTEM_SPEC (x); \
245 } \
df7492f9 246 if (NILP (spec)) \
02dfeba8 247 wrong_type_argument (Qcoding_system_p, (x)); \
df7492f9 248 } while (0)
e6de76f8 249
8ddb35b2 250
933373ed
KH
251/* Check if X is a coding system or not. If it is, set ID to the
252 ID of the coding system. */
253
df7492f9
KH
254#define CHECK_CODING_SYSTEM_GET_ID(x, id) \
255 do \
256 { \
257 id = CODING_SYSTEM_ID (x); \
7c00e33d
KH
258 if (id < 0) \
259 { \
260 Fcheck_coding_system (x); \
261 id = CODING_SYSTEM_ID (x); \
262 } \
df7492f9 263 if (id < 0) \
02dfeba8 264 wrong_type_argument (Qcoding_system_p, (x)); \
df7492f9 265 } while (0)
4ed46869 266
4ed46869
KH
267
268/*** GENERAL section ***/
269
df7492f9
KH
270/* Enumeration of result code of code conversion. */
271enum coding_result_code
4ed46869 272 {
df7492f9
KH
273 CODING_RESULT_SUCCESS,
274 CODING_RESULT_INSUFFICIENT_SRC,
275 CODING_RESULT_INSUFFICIENT_DST,
276 CODING_RESULT_INCONSISTENT_EOL,
63e11478 277 CODING_RESULT_INVALID_SRC,
df7492f9
KH
278 CODING_RESULT_INTERRUPT,
279 CODING_RESULT_INSUFFICIENT_MEM
4ed46869
KH
280 };
281
658cc252 282
3b2d77fe 283/* Macros used for the member `mode' of the struct coding_system. */
658cc252
KH
284
285/* If set, recover the original CR or LF of the already decoded text
286 when the decoding routine encounters an inconsistent eol format. */
287#define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
288
289/* If set, the decoding/encoding routines treat the current data as
5998373a 290 the last block of the whole text to be converted, and do the
55496054 291 appropriate finishing job. */
658cc252
KH
292#define CODING_MODE_LAST_BLOCK 0x02
293
294/* If set, it means that the current source text is in a buffer which
295 enables selective display. */
296#define CODING_MODE_SELECTIVE_DISPLAY 0x04
297
298/* This flag is used by the decoding/encoding routines on the fly. If
299 set, it means that right-to-left text is being processed. */
300#define CODING_MODE_DIRECTION 0x08
301
df7492f9
KH
302#define CODING_MODE_FIXED_DESTINATION 0x10
303
933373ed
KH
304/* If set, it means that the encoding routines produces some safe
305 ASCII characters (usually '?') for unsupported characters. */
df7492f9
KH
306#define CODING_MODE_SAFE_ENCODING 0x20
307
825d0875
KH
308 /* For handling composition sequence. */
309#include "composite.h"
310
311enum composition_state
312 {
313 COMPOSING_NO,
314 COMPOSING_CHAR,
315 COMPOSING_RULE,
316 COMPOSING_COMPONENT_CHAR,
317 COMPOSING_COMPONENT_RULE
318 };
319
320/* Structure for the current composition status. */
321struct composition_status
322{
323 enum composition_state state;
324 enum composition_method method;
325 int old_form; /* 0:pre-21 form, 1:post-21 form */
326 int length; /* number of elements produced in charbuf */
327 int nchars; /* number of characters composed */
328 int ncomps; /* number of composition components */
329 /* Maximum carryover is for the case of COMPOSITION_WITH_RULE_ALTCHARS.
330 See the comment in coding.c. */
331 int carryover[4 /* annotation header */
332 + MAX_COMPOSITION_COMPONENTS * 3 - 2 /* ALTs and RULEs */
333 + 2 /* intermediate -1 -1 */
334 + MAX_COMPOSITION_COMPONENTS /* CHARs */
335 ];
336};
337
338
df7492f9
KH
339/* Structure of the field `spec.iso_2022' in the structure
340 `coding_system'. */
341struct iso_2022_spec
4ed46869 342{
2ec49574 343 /* Bit-wise-or of CODING_ISO_FLAG_XXX. */
df7492f9 344 unsigned flags;
4ed46869 345
df7492f9
KH
346 /* The current graphic register invoked to each graphic plane. */
347 int current_invocation[2];
658cc252 348
df7492f9
KH
349 /* The current charset designated to each graphic register. The
350 value -1 means that not charset is designated, -2 means that
351 there was an invalid designation previously. */
352 int current_designation[4];
4ed46869 353
df7492f9
KH
354 /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
355 by single-shift while encoding. */
356 int single_shifting;
4ed46869 357
df7492f9
KH
358 /* Set to 1 temporarily only when processing at beginning of line. */
359 int bol;
825d0875
KH
360
361 /* If positive, we are now scanning CTEXT extended segment. */
362 int ctext_extended_segment_len;
363
364 /* If nonzero, we are now scanning embedded UTF-8 sequence. */
365 int embedded_utf_8;
366
367 /* The current composition. */
368 struct composition_status cmp_status;
369};
370
371struct emacs_mule_spec
372{
373 int full_support;
374 struct composition_status cmp_status;
df7492f9 375};
4ed46869 376
df7492f9 377struct ccl_spec;
4ed46869 378
e4215ddd 379enum utf_bom_type
df7492f9 380 {
e4215ddd
KH
381 utf_detect_bom,
382 utf_without_bom,
383 utf_with_bom
df7492f9 384 };
279d9f7b 385
df7492f9
KH
386enum utf_16_endian_type
387 {
388 utf_16_big_endian,
389 utf_16_little_endian
390 };
279d9f7b 391
df7492f9
KH
392struct utf_16_spec
393{
e4215ddd 394 enum utf_bom_type bom;
df7492f9
KH
395 enum utf_16_endian_type endian;
396 int surrogate;
397};
279d9f7b 398
4fecac5c
KH
399struct coding_detection_info
400{
401 /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs. */
402 /* Which categories are already checked. */
403 int checked;
404 /* Which categories are strongly found. */
405 int found;
406 /* Which categories are rejected. */
407 int rejected;
408};
279d9f7b 409
279d9f7b 410
df7492f9
KH
411struct coding_system
412{
413 /* ID number of the coding system. This is an index to
414 Vcoding_system_hash_table. This value is set by
415 setup_coding_system. At the early stage of building time, this
416 value is -1 in the array coding_categories to indicate that no
417 coding-system of that category is yet defined. */
d3411f89 418 ptrdiff_t id;
df7492f9
KH
419
420 /* Flag bits of the coding system. The meaning of each bit is common
421 to all types of coding systems. */
422 int common_flags;
423
424 /* Mode bits of the coding system. See the comments of the macros
425 CODING_MODE_XXX. */
426 unsigned int mode;
450c60a5 427
4ed46869 428 /* Detailed information specific to each type of coding system. */
df7492f9 429 union
4ed46869 430 {
df7492f9
KH
431 struct iso_2022_spec iso_2022;
432 struct ccl_spec *ccl; /* Defined in ccl.h. */
433 struct utf_16_spec utf_16;
e4215ddd 434 enum utf_bom_type utf_8_bom;
825d0875 435 struct emacs_mule_spec emacs_mule;
4ed46869
KH
436 } spec;
437
df7492f9 438 int max_charset_id;
1b3b981b 439 unsigned char *safe_charsets;
658cc252 440
df7492f9
KH
441 /* The following two members specify how binary 8-bit code 128..255
442 are represented in source and destination text respectively. 1
443 means they are represented by 2-byte sequence, 0 means they are
444 represented by 1-byte as is (see the comment in character.h). */
811ea086
KH
445 unsigned src_multibyte : 1;
446 unsigned dst_multibyte : 1;
447
a137bb00
KH
448 /* How may heading bytes we can skip for decoding. This is set to
449 -1 in setup_coding_system, and updated by detect_coding. So,
450 when this is equal to the byte length of the text being
451 converted, we can skip the actual conversion process. */
a53e2e89 452 EMACS_INT head_ascii;
658cc252
KH
453
454 /* The following members are set by encoding/decoding routine. */
df7492f9 455 EMACS_INT produced, produced_char, consumed, consumed_char;
658cc252 456
811ea086
KH
457 /* Number of error source data found in a decoding routine. */
458 int errors;
459
6d5eb5b0 460 /* Store the positions of error source data. */
df7492f9 461 EMACS_INT *error_positions;
e6a9a0bc 462
df7492f9
KH
463 /* Finish status of code conversion. */
464 enum coding_result_code result;
6041c9ce 465
df7492f9
KH
466 EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes;
467 Lisp_Object src_object;
8f924df7 468 const unsigned char *source;
4ed46869 469
df7492f9
KH
470 EMACS_INT dst_pos, dst_pos_byte, dst_bytes;
471 Lisp_Object dst_object;
472 unsigned char *destination;
4ed46869 473
b2e6b10f 474 /* Set to 1 if the source of conversion is not in the member
933373ed 475 `charbuf', but at `src_object'. */
df7492f9 476 int chars_at_source;
4ed46869 477
df7492f9
KH
478 /* If an element is non-negative, it is a character code.
479
480 If it is in the range -128..-1, it is a 8-bit character code
481 minus 256.
482
483 If it is less than -128, it specifies the start of an annotation
484 chunk. The length of the chunk is -128 minus the value of the
485 element. The following elements are OFFSET, ANNOTATION-TYPE, and
486 a sequence of actual data for the annotation. OFFSET is a
487 character position offset from dst_pos or src_pos,
22bcf204 488 ANNOTATION-TYPE specifies the meaning of the annotation and how to
df7492f9
KH
489 handle the following data.. */
490 int *charbuf;
491 int charbuf_size, charbuf_used;
492
493 /* Set to 1 if charbuf contains an annotation. */
494 int annotated;
4ed46869 495
df7492f9
KH
496 unsigned char carryover[64];
497 int carryover_bytes;
a5ee738b 498
df7492f9
KH
499 int default_char;
500
383e0970
J
501 int (*detector) (struct coding_system *,
502 struct coding_detection_info *);
503 void (*decoder) (struct coding_system *);
504 int (*encoder) (struct coding_system *);
df7492f9
KH
505};
506
507/* Meanings of bits in the member `common_flags' of the structure
508 coding_system. The lowest 8 bits are reserved for various kind of
509 annotations (currently two of them are used). */
510#define CODING_ANNOTATION_MASK 0x00FF
511#define CODING_ANNOTATE_COMPOSITION_MASK 0x0001
512#define CODING_ANNOTATE_DIRECTION_MASK 0x0002
4fecac5c 513#define CODING_ANNOTATE_CHARSET_MASK 0x0003
df7492f9
KH
514#define CODING_FOR_UNIBYTE_MASK 0x0100
515#define CODING_REQUIRE_FLUSHING_MASK 0x0200
516#define CODING_REQUIRE_DECODING_MASK 0x0400
517#define CODING_REQUIRE_ENCODING_MASK 0x0800
518#define CODING_REQUIRE_DETECTION_MASK 0x1000
519#define CODING_RESET_AT_BOL_MASK 0x2000
520
91af3942 521/* Return 1 if the coding context CODING requires annotation
df7492f9
KH
522 handling. */
523#define CODING_REQUIRE_ANNOTATION(coding) \
524 ((coding)->common_flags & CODING_ANNOTATION_MASK)
525
526/* Return 1 if the coding context CODING prefers decoding into unibyte. */
527#define CODING_FOR_UNIBYTE(coding) \
528 ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
529
530/* Return 1 if the coding context CODING requires specific code to be
a5ee738b
KH
531 attached at the tail of converted text. */
532#define CODING_REQUIRE_FLUSHING(coding) \
533 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
534
df7492f9 535/* Return 1 if the coding context CODING requires code conversion on
a5ee738b
KH
536 decoding. */
537#define CODING_REQUIRE_DECODING(coding) \
811ea086
KH
538 ((coding)->dst_multibyte \
539 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
a5ee738b 540
df7492f9
KH
541
542/* Return 1 if the coding context CODING requires code conversion on
c198294f
KH
543 encoding.
544 The non-multibyte part of the condition is to support encoding of
545 unibyte strings/buffers generated by string-as-unibyte or
546 (set-buffer-multibyte nil) from multibyte strings/buffers. */
df7492f9
KH
547#define CODING_REQUIRE_ENCODING(coding) \
548 ((coding)->src_multibyte \
549 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \
550 || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
551
a5ee738b 552
df7492f9 553/* Return 1 if the coding context CODING requires some kind of code
a5ee738b
KH
554 detection. */
555#define CODING_REQUIRE_DETECTION(coding) \
556 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
557
df7492f9 558/* Return 1 if the coding context CODING requires code conversion on
811ea086 559 decoding or some kind of code detection. */
658cc252 560#define CODING_MAY_REQUIRE_DECODING(coding) \
811ea086
KH
561 (CODING_REQUIRE_DECODING (coding) \
562 || CODING_REQUIRE_DETECTION (coding))
4ed46869 563
4ed46869
KH
564/* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
565 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
566 system. C1 and C2 are the 1st and 2nd position codes of Emacs'
567 internal format. */
568
df7492f9
KH
569#define SJIS_TO_JIS(code) \
570 do { \
571 int s1, s2, j1, j2; \
572 \
573 s1 = (code) >> 8, s2 = (code) & 0xFF; \
574 \
575 if (s2 >= 0x9F) \
576 (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
577 j2 = s2 - 0x7E); \
578 else \
579 (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
580 j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \
581 (code) = (j1 << 8) | j2; \
4ed46869
KH
582 } while (0)
583
6e58724e
KH
584#define SJIS_TO_JIS2(code) \
585 do { \
586 int s1, s2, j1, j2; \
587 \
588 s1 = (code) >> 8, s2 = (code) & 0xFF; \
589 \
590 if (s2 >= 0x9F) \
591 { \
592 j1 = (s1 == 0xF0 ? 0x28 \
593 : s1 == 0xF1 ? 0x24 \
594 : s1 == 0xF2 ? 0x2C \
595 : s1 == 0xF3 ? 0x2E \
596 : 0x6E + (s1 - 0xF4) * 2); \
597 j2 = s2 - 0x7E; \
598 } \
599 else \
600 { \
601 j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2 \
602 : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2 \
603 : 0x6F + (s1 - 0xF5) * 2); \
604 j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F)); \
605 } \
606 (code) = (j1 << 8) | j2; \
607 } while (0)
608
df7492f9
KH
609
610#define JIS_TO_SJIS(code) \
4ed46869 611 do { \
df7492f9
KH
612 int s1, s2, j1, j2; \
613 \
614 j1 = (code) >> 8, j2 = (code) & 0xFF; \
615 if (j1 & 1) \
616 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \
617 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \
4ed46869 618 else \
df7492f9
KH
619 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \
620 s2 = j2 + 0x7E); \
5afaefc1 621 (code) = (s1 << 8) | s2; \
4ed46869
KH
622 } while (0)
623
6e58724e
KH
624#define JIS_TO_SJIS2(code) \
625 do { \
626 int s1, s2, j1, j2; \
627 \
628 j1 = (code) >> 8, j2 = (code) & 0xFF; \
629 if (j1 & 1) \
630 { \
631 s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2 \
edb61b39 632 : j1 <= 0x2F ? 0xF3 + (j1 - 0x2D) / 2 \
6e58724e
KH
633 : 0xF5 + (j1 - 0x6F) / 2); \
634 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F); \
635 } \
636 else \
637 { \
638 s1 = (j1 == 0x28 ? 0xF0 \
639 : j1 == 0x24 ? 0xF1 \
640 : j1 == 0x2C ? 0xF2 \
641 : j1 == 0x2E ? 0xF3 \
642 : 0xF4 + (j1 - 0x6E) / 2); \
643 s2 = j2 + 0x7E; \
644 } \
645 (code) = (s1 << 8) | s2; \
646 } while (0)
df7492f9 647
290591c8
KH
648/* Encode the file name NAME using the specified coding system
649 for file names, if any. */
650#define ENCODE_FILE(name) \
651 (! NILP (Vfile_name_coding_system) \
5e92b1ca 652 && !EQ (Vfile_name_coding_system, make_number (0)) \
afee9150 653 ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
290591c8 654 : (! NILP (Vdefault_file_name_coding_system) \
5e92b1ca 655 && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
afee9150 656 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
290591c8
KH
657 : name))
658
df7492f9 659
290591c8
KH
660/* Decode the file name NAME using the specified coding system
661 for file names, if any. */
662#define DECODE_FILE(name) \
663 (! NILP (Vfile_name_coding_system) \
5e92b1ca 664 && !EQ (Vfile_name_coding_system, make_number (0)) \
afee9150 665 ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \
290591c8 666 : (! NILP (Vdefault_file_name_coding_system) \
5e92b1ca 667 && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
afee9150 668 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
290591c8
KH
669 : name))
670
df7492f9 671
2dfda962 672/* Encode the string STR using the specified coding system
53eda481 673 for system functions, if any. */
2dfda962 674#define ENCODE_SYSTEM(str) \
9b58c683 675 (! NILP (Vlocale_coding_system) \
5e92b1ca 676 && !EQ (Vlocale_coding_system, make_number (0)) \
9b58c683 677 ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \
2dfda962
JR
678 : str)
679
680/* Decode the string STR using the specified coding system
53eda481 681 for system functions, if any. */
581e7427 682#define DECODE_SYSTEM(str) \
9b58c683 683 (! NILP (Vlocale_coding_system) \
5e92b1ca 684 && !EQ (Vlocale_coding_system, make_number (0)) \
9b58c683 685 ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
2dfda962 686 : str)
cf29bf99 687
5bbb4727 688/* Note that this encodes utf-8, not utf-8-emacs, so it's not a no-op. */
b3a208b0
DL
689#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1)
690
4ed46869 691/* Extern declarations. */
383e0970
J
692extern Lisp_Object code_conversion_save (int, int);
693extern int decoding_buffer_size (struct coding_system *, int);
694extern int encoding_buffer_size (struct coding_system *, int);
695extern void setup_coding_system (Lisp_Object, struct coding_system *);
696extern Lisp_Object coding_charset_list (struct coding_system *);
697extern Lisp_Object coding_system_charset_list (Lisp_Object);
383e0970
J
698extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object,
699 Lisp_Object, int, int, int);
700extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
701 int);
702extern Lisp_Object raw_text_coding_system (Lisp_Object);
703extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
4628bef1 704extern Lisp_Object complement_process_encoding_system (Lisp_Object);
383e0970
J
705
706extern int decode_coding_gap (struct coding_system *,
707 EMACS_INT, EMACS_INT);
383e0970
J
708extern void decode_coding_object (struct coding_system *,
709 Lisp_Object, EMACS_INT, EMACS_INT,
710 EMACS_INT, EMACS_INT, Lisp_Object);
711extern void encode_coding_object (struct coding_system *,
712 Lisp_Object, EMACS_INT, EMACS_INT,
713 EMACS_INT, EMACS_INT, Lisp_Object);
df7492f9 714
933373ed
KH
715/* Macros for backward compatibility. */
716
df7492f9
KH
717#define decode_coding_region(coding, from, to) \
718 decode_coding_object (coding, Fcurrent_buffer (), \
719 from, CHAR_TO_BYTE (from), \
720 to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
721
722
723#define encode_coding_region(coding, from, to) \
724 encode_coding_object (coding, Fcurrent_buffer (), \
725 from, CHAR_TO_BYTE (from), \
726 to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
727
728
729#define decode_coding_string(coding, string, nocopy) \
8890e5f5 730 decode_coding_object (coding, string, 0, 0, SCHARS (string), \
f0bed503 731 SBYTES (string), Qt)
df7492f9
KH
732
733#define encode_coding_string(coding, string, nocopy) \
729eadda
EZ
734 (STRING_MULTIBYTE(string) ? \
735 (encode_coding_object (coding, string, 0, 0, SCHARS (string), \
736 SBYTES (string), Qt), \
737 (coding)->dst_object) : (string))
df7492f9
KH
738
739
740#define decode_coding_c_string(coding, src, bytes, dst_object) \
741 do { \
742 (coding)->source = (src); \
743 (coding)->src_chars = (coding)->src_bytes = (bytes); \
744 decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \
745 (dst_object)); \
746 } while (0)
747
748
c532d349 749extern Lisp_Object preferred_coding_system (void);
df7492f9
KH
750
751
8f924df7
KH
752extern Lisp_Object Qutf_8, Qutf_8_emacs;
753
955cbe7b 754extern Lisp_Object Qcoding_category_index;
df7492f9
KH
755extern Lisp_Object Qcoding_system_p;
756extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided;
4ed46869 757extern Lisp_Object Qbuffer_file_coding_system;
df7492f9
KH
758
759extern Lisp_Object Qunix, Qdos, Qmac;
4ed46869 760
f967223b
KH
761extern Lisp_Object Qtranslation_table;
762extern Lisp_Object Qtranslation_table_id;
ab45712c 763
4ed46869
KH
764#ifdef emacs
765extern Lisp_Object Qfile_coding_system;
387f6ba5 766extern Lisp_Object Qcall_process, Qcall_process_region;
4ed46869 767extern Lisp_Object Qstart_process, Qopen_network_stream;
d008a7cc 768extern Lisp_Object Qwrite_region;
4ed46869 769
383e0970 770extern char *emacs_strerror (int);
68c45bf0 771
fbaa2ed9
KH
772/* Coding system to be used to encode text for terminal display when
773 terminal coding system is nil. */
774extern struct coding_system safe_terminal_coding;
775
4ed46869
KH
776#endif
777
d008a7cc
GM
778/* Error signaled when there's a problem with detecting coding system */
779extern Lisp_Object Qcoding_system_error;
780
df7492f9 781extern char emacs_mule_bytes[256];
383e0970 782extern int emacs_mule_string_char (unsigned char *);
df7492f9 783
6f776e81 784#endif /* EMACS_CODING_H */