use guile subrs
[bpt/emacs.git] / src / coding.h
CommitLineData
4ed46869 1/* Header for coding system handler.
ba318903 2 Copyright (C) 2001-2014 Free Software Foundation, Inc.
7976eda0 3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5df4f04c 4 2005, 2006, 2007, 2008, 2009, 2010, 2011
ce03bf76
KH
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
8f924df7 7 Copyright (C) 2003
df7492f9
KH
8 National Institute of Advanced Industrial Science and Technology (AIST)
9 Registration Number H13PRO009
4ed46869 10
369314dc
KH
11This file is part of GNU Emacs.
12
b9b1cc14 13GNU Emacs is free software: you can redistribute it and/or modify
369314dc 14it under the terms of the GNU General Public License as published by
b9b1cc14
GM
15the Free Software Foundation, either version 3 of the License, or
16(at your option) any later version.
4ed46869 17
369314dc
KH
18GNU Emacs is distributed in the hope that it will be useful,
19but WITHOUT ANY WARRANTY; without even the implied warranty of
20MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21GNU General Public License for more details.
4ed46869 22
369314dc 23You should have received a copy of the GNU General Public License
b9b1cc14 24along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 25
6f776e81
KH
26#ifndef EMACS_CODING_H
27#define EMACS_CODING_H
4ed46869 28
df7492f9 29/* Index to arguments of Fdefine_coding_system_internal. */
4ed46869 30
df7492f9
KH
31enum define_coding_system_arg_index
32 {
33 coding_arg_name,
34 coding_arg_mnemonic,
35 coding_arg_coding_type,
36 coding_arg_charset_list,
37 coding_arg_ascii_compatible_p,
38 coding_arg_decode_translation_table,
39 coding_arg_encode_translation_table,
40 coding_arg_post_read_conversion,
41 coding_arg_pre_write_conversion,
42 coding_arg_default_char,
8f924df7 43 coding_arg_for_unibyte,
df7492f9
KH
44 coding_arg_plist,
45 coding_arg_eol_type,
46 coding_arg_max
47 };
4ed46869 48
df7492f9 49enum define_coding_iso2022_arg_index
4ed46869 50 {
df7492f9
KH
51 coding_arg_iso2022_initial = coding_arg_max,
52 coding_arg_iso2022_reg_usage,
53 coding_arg_iso2022_request,
54 coding_arg_iso2022_flags,
55 coding_arg_iso2022_max
4ed46869
KH
56 };
57
e4215ddd
KH
58enum define_coding_utf8_arg_index
59 {
60 coding_arg_utf8_bom = coding_arg_max,
61 coding_arg_utf8_max
62 };
63
df7492f9 64enum define_coding_utf16_arg_index
4ed46869 65 {
df7492f9
KH
66 coding_arg_utf16_bom = coding_arg_max,
67 coding_arg_utf16_endian,
68 coding_arg_utf16_max
4ed46869
KH
69 };
70
df7492f9
KH
71enum define_coding_ccl_arg_index
72 {
35d47d18 73 coding_arg_ccl_decoder = coding_arg_max,
df7492f9
KH
74 coding_arg_ccl_encoder,
75 coding_arg_ccl_valids,
76 coding_arg_ccl_max
77 };
4ed46869 78
270afa77
KH
79enum define_coding_undecided_arg_index
80 {
81 coding_arg_undecided_inhibit_null_byte_detection = coding_arg_max,
82 coding_arg_undecided_inhibit_iso_escape_detection,
83 coding_arg_undecided_prefer_utf_8,
84 coding_arg_undecided_max
85 };
86
933373ed
KH
87/* Hash table for all coding systems. Keys are coding system symbols
88 and values are spec vectors of the corresponding coding system. A
89 spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a
90 vector of attribute of the coding system. ALIASES is a list of
91 aliases (symbols) of the coding system. EOL-TYPE is `unix', `dos',
92 `mac' or a vector of coding systems (symbols). */
93
df7492f9 94extern Lisp_Object Vcoding_system_hash_table;
4ed46869 95
933373ed 96
df7492f9 97/* Enumeration of coding system type. */
4ed46869 98
df7492f9
KH
99enum coding_system_type
100 {
101 coding_type_charset,
102 coding_type_utf_8,
103 coding_type_utf_16,
104 coding_type_iso_2022,
105 coding_type_emacs_mule,
106 coding_type_sjis,
107 coding_type_ccl,
108 coding_type_raw_text,
109 coding_type_undecided,
110 coding_type_max
111 };
4ed46869 112
4ed46869 113
df7492f9 114/* Enumeration of end-of-line format type. */
4ed46869 115
df7492f9
KH
116enum end_of_line_type
117 {
118 eol_lf, /* Line-feed only, same as Emacs' internal
119 format. */
120 eol_crlf, /* Sequence of carriage-return and
121 line-feed. */
122 eol_cr, /* Carriage-return only. */
123 eol_any, /* Accept any of above. Produce line-feed
124 only. */
125 eol_undecided, /* This value is used to denote that the
126 eol-type is not yet undecided. */
127 eol_type_max
128 };
4ed46869 129
df7492f9 130/* Enumeration of index to an attribute vector of a coding system. */
4ed46869 131
df7492f9
KH
132enum coding_attr_index
133 {
134 coding_attr_base_name,
135 coding_attr_docstring,
136 coding_attr_mnemonic,
137 coding_attr_type,
138 coding_attr_charset_list,
139 coding_attr_ascii_compat,
140 coding_attr_decode_tbl,
141 coding_attr_encode_tbl,
404202e7 142 coding_attr_trans_tbl,
df7492f9
KH
143 coding_attr_post_read,
144 coding_attr_pre_write,
145 coding_attr_default_char,
8f924df7 146 coding_attr_for_unibyte,
df7492f9
KH
147 coding_attr_plist,
148
149 coding_attr_category,
150 coding_attr_safe_charsets,
151
152 /* The followings are extra attributes for each type. */
153 coding_attr_charset_valids,
154
155 coding_attr_ccl_decoder,
156 coding_attr_ccl_encoder,
157 coding_attr_ccl_valids,
158
159 coding_attr_iso_initial,
160 coding_attr_iso_usage,
161 coding_attr_iso_request,
162 coding_attr_iso_flags,
163
e4215ddd 164 coding_attr_utf_bom,
df7492f9
KH
165 coding_attr_utf_16_endian,
166
167 coding_attr_emacs_mule_full,
168
270afa77
KH
169 coding_attr_undecided_inhibit_null_byte_detection,
170 coding_attr_undecided_inhibit_iso_escape_detection,
171 coding_attr_undecided_prefer_utf_8,
172
df7492f9
KH
173 coding_attr_last_index
174 };
4ed46869 175
4ed46869 176
933373ed
KH
177/* Macros to access an element of an attribute vector. */
178
04e05596
JB
179#define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name)
180#define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type)
181#define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
182#define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic)
183#define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring)
184#define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
185#define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl)
186#define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl)
187#define CODING_ATTR_TRANS_TBL(attrs) AREF (attrs, coding_attr_trans_tbl)
188#define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read)
189#define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write)
190#define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
191#define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte)
04e05596
JB
192#define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist)
193#define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category)
194#define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
8ddb35b2 195
8ddb35b2 196
933373ed
KH
197/* Return the name of a coding system specified by ID. */
198#define CODING_ID_NAME(id) \
199 (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
200
201/* Return the attribute vector of a coding system specified by ID. */
202
df7492f9
KH
203#define CODING_ID_ATTRS(id) \
204 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
fbaa2ed9 205
933373ed
KH
206/* Return the list of aliases of a coding system specified by ID. */
207
df7492f9
KH
208#define CODING_ID_ALIASES(id) \
209 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
c0c69d45 210
933373ed
KH
211/* Return the eol-type of a coding system specified by ID. */
212
df7492f9
KH
213#define CODING_ID_EOL_TYPE(id) \
214 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
658cc252 215
933373ed
KH
216
217/* Return the spec vector of CODING_SYSTEM_SYMBOL. */
fbaa2ed9 218
df7492f9
KH
219#define CODING_SYSTEM_SPEC(coding_system_symbol) \
220 (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
4ed46869 221
933373ed
KH
222
223/* Return the ID of CODING_SYSTEM_SYMBOL. */
224
df7492f9
KH
225#define CODING_SYSTEM_ID(coding_system_symbol) \
226 hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \
227 coding_system_symbol, NULL)
4ed46869 228
96c06863 229/* Return true if CODING_SYSTEM_SYMBOL is a coding system. */
933373ed 230
7c00e33d
KH
231#define CODING_SYSTEM_P(coding_system_symbol) \
232 (CODING_SYSTEM_ID (coding_system_symbol) >= 0 \
233 || (! NILP (coding_system_symbol) \
234 && ! NILP (Fcoding_system_p (coding_system_symbol))))
4ed46869 235
933373ed
KH
236/* Check if X is a coding system or not. */
237
8f924df7 238#define CHECK_CODING_SYSTEM(x) \
df7492f9 239 do { \
7c00e33d
KH
240 if (CODING_SYSTEM_ID (x) < 0 \
241 && NILP (Fcheck_coding_system (x))) \
8f924df7 242 wrong_type_argument (Qcoding_system_p, (x)); \
96c06863 243 } while (false)
658cc252 244
4ed46869 245
933373ed
KH
246/* Check if X is a coding system or not. If it is, set SEPC to the
247 spec vector of the coding system. */
248
df7492f9
KH
249#define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \
250 do { \
251 spec = CODING_SYSTEM_SPEC (x); \
7c00e33d
KH
252 if (NILP (spec)) \
253 { \
254 Fcheck_coding_system (x); \
255 spec = CODING_SYSTEM_SPEC (x); \
256 } \
df7492f9 257 if (NILP (spec)) \
02dfeba8 258 wrong_type_argument (Qcoding_system_p, (x)); \
96c06863 259 } while (false)
e6de76f8 260
8ddb35b2 261
933373ed
KH
262/* Check if X is a coding system or not. If it is, set ID to the
263 ID of the coding system. */
264
df7492f9
KH
265#define CHECK_CODING_SYSTEM_GET_ID(x, id) \
266 do \
267 { \
268 id = CODING_SYSTEM_ID (x); \
7c00e33d
KH
269 if (id < 0) \
270 { \
271 Fcheck_coding_system (x); \
272 id = CODING_SYSTEM_ID (x); \
273 } \
df7492f9 274 if (id < 0) \
02dfeba8 275 wrong_type_argument (Qcoding_system_p, (x)); \
96c06863 276 } while (false)
4ed46869 277
4ed46869
KH
278
279/*** GENERAL section ***/
280
df7492f9
KH
281/* Enumeration of result code of code conversion. */
282enum coding_result_code
4ed46869 283 {
df7492f9
KH
284 CODING_RESULT_SUCCESS,
285 CODING_RESULT_INSUFFICIENT_SRC,
286 CODING_RESULT_INSUFFICIENT_DST,
63e11478 287 CODING_RESULT_INVALID_SRC,
1af1a51a 288 CODING_RESULT_INTERRUPT
4ed46869
KH
289 };
290
658cc252 291
3b2d77fe 292/* Macros used for the member `mode' of the struct coding_system. */
658cc252 293
658cc252 294/* If set, the decoding/encoding routines treat the current data as
5998373a 295 the last block of the whole text to be converted, and do the
55496054 296 appropriate finishing job. */
1af1a51a 297#define CODING_MODE_LAST_BLOCK 0x01
658cc252
KH
298
299/* If set, it means that the current source text is in a buffer which
300 enables selective display. */
1af1a51a 301#define CODING_MODE_SELECTIVE_DISPLAY 0x02
658cc252
KH
302
303/* This flag is used by the decoding/encoding routines on the fly. If
304 set, it means that right-to-left text is being processed. */
1af1a51a 305#define CODING_MODE_DIRECTION 0x04
658cc252 306
1af1a51a 307#define CODING_MODE_FIXED_DESTINATION 0x08
df7492f9 308
933373ed
KH
309/* If set, it means that the encoding routines produces some safe
310 ASCII characters (usually '?') for unsupported characters. */
1af1a51a 311#define CODING_MODE_SAFE_ENCODING 0x10
df7492f9 312
825d0875
KH
313 /* For handling composition sequence. */
314#include "composite.h"
315
316enum composition_state
317 {
318 COMPOSING_NO,
319 COMPOSING_CHAR,
320 COMPOSING_RULE,
321 COMPOSING_COMPONENT_CHAR,
322 COMPOSING_COMPONENT_RULE
323 };
324
325/* Structure for the current composition status. */
326struct composition_status
327{
328 enum composition_state state;
329 enum composition_method method;
f10fe38f 330 bool old_form; /* true if pre-21 form */
825d0875
KH
331 int length; /* number of elements produced in charbuf */
332 int nchars; /* number of characters composed */
333 int ncomps; /* number of composition components */
334 /* Maximum carryover is for the case of COMPOSITION_WITH_RULE_ALTCHARS.
335 See the comment in coding.c. */
336 int carryover[4 /* annotation header */
337 + MAX_COMPOSITION_COMPONENTS * 3 - 2 /* ALTs and RULEs */
338 + 2 /* intermediate -1 -1 */
339 + MAX_COMPOSITION_COMPONENTS /* CHARs */
340 ];
341};
342
343
df7492f9
KH
344/* Structure of the field `spec.iso_2022' in the structure
345 `coding_system'. */
346struct iso_2022_spec
4ed46869 347{
2ec49574 348 /* Bit-wise-or of CODING_ISO_FLAG_XXX. */
df7492f9 349 unsigned flags;
4ed46869 350
df7492f9
KH
351 /* The current graphic register invoked to each graphic plane. */
352 int current_invocation[2];
658cc252 353
df7492f9
KH
354 /* The current charset designated to each graphic register. The
355 value -1 means that not charset is designated, -2 means that
356 there was an invalid designation previously. */
357 int current_designation[4];
4ed46869 358
825d0875
KH
359 /* If positive, we are now scanning CTEXT extended segment. */
360 int ctext_extended_segment_len;
361
f10fe38f
PE
362 /* True temporarily only when graphic register 2 or 3 is invoked by
363 single-shift while encoding. */
96c06863 364 bool_bf single_shifting : 1;
f10fe38f
PE
365
366 /* True temporarily only when processing at beginning of line. */
96c06863 367 bool_bf bol : 1;
f10fe38f
PE
368
369 /* If true, we are now scanning embedded UTF-8 sequence. */
96c06863 370 bool_bf embedded_utf_8 : 1;
825d0875
KH
371
372 /* The current composition. */
373 struct composition_status cmp_status;
374};
375
376struct emacs_mule_spec
377{
825d0875 378 struct composition_status cmp_status;
df7492f9 379};
4ed46869 380
270afa77
KH
381struct undecided_spec
382{
9c90cc06
PE
383 /* Inhibit null byte detection. 1 means always inhibit,
384 -1 means do not inhibit, 0 means rely on user variable. */
385 int inhibit_nbd;
386
387 /* Inhibit ISO escape detection. -1, 0, 1 as above. */
388 int inhibit_ied;
389
390 /* Prefer UTF-8 when the input could be other encodings. */
391 bool prefer_utf_8;
270afa77
KH
392};
393
e4215ddd 394enum utf_bom_type
df7492f9 395 {
e4215ddd
KH
396 utf_detect_bom,
397 utf_without_bom,
398 utf_with_bom
df7492f9 399 };
279d9f7b 400
df7492f9
KH
401enum utf_16_endian_type
402 {
403 utf_16_big_endian,
404 utf_16_little_endian
405 };
279d9f7b 406
df7492f9
KH
407struct utf_16_spec
408{
e4215ddd 409 enum utf_bom_type bom;
df7492f9
KH
410 enum utf_16_endian_type endian;
411 int surrogate;
412};
279d9f7b 413
4fecac5c
KH
414struct coding_detection_info
415{
416 /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs. */
417 /* Which categories are already checked. */
418 int checked;
419 /* Which categories are strongly found. */
420 int found;
421 /* Which categories are rejected. */
422 int rejected;
423};
279d9f7b 424
279d9f7b 425
df7492f9
KH
426struct coding_system
427{
428 /* ID number of the coding system. This is an index to
429 Vcoding_system_hash_table. This value is set by
430 setup_coding_system. At the early stage of building time, this
431 value is -1 in the array coding_categories to indicate that no
432 coding-system of that category is yet defined. */
d3411f89 433 ptrdiff_t id;
df7492f9
KH
434
435 /* Flag bits of the coding system. The meaning of each bit is common
436 to all types of coding systems. */
437 int common_flags;
438
439 /* Mode bits of the coding system. See the comments of the macros
440 CODING_MODE_XXX. */
441 unsigned int mode;
450c60a5 442
4ed46869 443 /* Detailed information specific to each type of coding system. */
df7492f9 444 union
4ed46869 445 {
df7492f9
KH
446 struct iso_2022_spec iso_2022;
447 struct ccl_spec *ccl; /* Defined in ccl.h. */
448 struct utf_16_spec utf_16;
e4215ddd 449 enum utf_bom_type utf_8_bom;
825d0875 450 struct emacs_mule_spec emacs_mule;
270afa77 451 struct undecided_spec undecided;
4ed46869
KH
452 } spec;
453
df7492f9 454 int max_charset_id;
1b3b981b 455 unsigned char *safe_charsets;
658cc252 456
df7492f9 457 /* The following two members specify how binary 8-bit code 128..255
96c06863
PE
458 are represented in source and destination text respectively. True
459 means they are represented by 2-byte sequence, false means they are
df7492f9 460 represented by 1-byte as is (see the comment in character.h). */
96c06863
PE
461 bool_bf src_multibyte : 1;
462 bool_bf dst_multibyte : 1;
811ea086 463
a137bb00
KH
464 /* How may heading bytes we can skip for decoding. This is set to
465 -1 in setup_coding_system, and updated by detect_coding. So,
466 when this is equal to the byte length of the text being
8a44e6d1
KH
467 converted, we can skip the actual conversion process except for
468 the eol format. */
d311d28c 469 ptrdiff_t head_ascii;
658cc252 470
52840a9c
KH
471 /* How many bytes/chars at the source are detected as valid utf-8
472 sequence. Set by detect_coding_utf_8. */
473 ptrdiff_t detected_utf8_bytes, detected_utf8_chars;
e6d2f155 474
8a44e6d1
KH
475 /* Used internally in coding.c. See the comment of detect_ascii. */
476 int eol_seen;
477
658cc252 478 /* The following members are set by encoding/decoding routine. */
d311d28c 479 ptrdiff_t produced, produced_char, consumed, consumed_char;
658cc252 480
811ea086 481 /* Number of error source data found in a decoding routine. */
e6f29a68 482 ptrdiff_t errors;
811ea086 483
6d5eb5b0 484 /* Store the positions of error source data. */
d311d28c 485 ptrdiff_t *error_positions;
e6a9a0bc 486
df7492f9
KH
487 /* Finish status of code conversion. */
488 enum coding_result_code result;
6041c9ce 489
d311d28c 490 ptrdiff_t src_pos, src_pos_byte, src_chars, src_bytes;
df7492f9 491 Lisp_Object src_object;
8f924df7 492 const unsigned char *source;
4ed46869 493
d311d28c 494 ptrdiff_t dst_pos, dst_pos_byte, dst_bytes;
df7492f9
KH
495 Lisp_Object dst_object;
496 unsigned char *destination;
4ed46869 497
df7492f9
KH
498 /* If an element is non-negative, it is a character code.
499
500 If it is in the range -128..-1, it is a 8-bit character code
501 minus 256.
502
503 If it is less than -128, it specifies the start of an annotation
504 chunk. The length of the chunk is -128 minus the value of the
505 element. The following elements are OFFSET, ANNOTATION-TYPE, and
506 a sequence of actual data for the annotation. OFFSET is a
507 character position offset from dst_pos or src_pos,
22bcf204 508 ANNOTATION-TYPE specifies the meaning of the annotation and how to
df7492f9
KH
509 handle the following data.. */
510 int *charbuf;
511 int charbuf_size, charbuf_used;
512
f10fe38f
PE
513 /* True if the source of conversion is not in the member
514 `charbuf', but at `src_object'. */
96c06863 515 bool_bf chars_at_source : 1;
f10fe38f 516
f8498081
DA
517 /* Nonzero if the result of conversion is in `destination'
518 buffer rather than in `dst_object'. */
96c06863 519 bool_bf raw_destination : 1;
f8498081 520
96c06863
PE
521 /* Set to true if charbuf contains an annotation. */
522 bool_bf annotated : 1;
4ed46869 523
df7492f9
KH
524 unsigned char carryover[64];
525 int carryover_bytes;
a5ee738b 526
df7492f9
KH
527 int default_char;
528
f10fe38f 529 bool (*detector) (struct coding_system *, struct coding_detection_info *);
383e0970 530 void (*decoder) (struct coding_system *);
f10fe38f 531 bool (*encoder) (struct coding_system *);
df7492f9
KH
532};
533
534/* Meanings of bits in the member `common_flags' of the structure
535 coding_system. The lowest 8 bits are reserved for various kind of
536 annotations (currently two of them are used). */
537#define CODING_ANNOTATION_MASK 0x00FF
538#define CODING_ANNOTATE_COMPOSITION_MASK 0x0001
539#define CODING_ANNOTATE_DIRECTION_MASK 0x0002
4fecac5c 540#define CODING_ANNOTATE_CHARSET_MASK 0x0003
df7492f9
KH
541#define CODING_FOR_UNIBYTE_MASK 0x0100
542#define CODING_REQUIRE_FLUSHING_MASK 0x0200
543#define CODING_REQUIRE_DECODING_MASK 0x0400
544#define CODING_REQUIRE_ENCODING_MASK 0x0800
545#define CODING_REQUIRE_DETECTION_MASK 0x1000
546#define CODING_RESET_AT_BOL_MASK 0x2000
547
96c06863 548/* Return nonzero if the coding context CODING requires annotation
df7492f9
KH
549 handling. */
550#define CODING_REQUIRE_ANNOTATION(coding) \
551 ((coding)->common_flags & CODING_ANNOTATION_MASK)
552
96c06863
PE
553/* Return nonzero if the coding context CODING prefers decoding into
554 unibyte. */
df7492f9
KH
555#define CODING_FOR_UNIBYTE(coding) \
556 ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
557
96c06863 558/* Return nonzero if the coding context CODING requires specific code to be
a5ee738b
KH
559 attached at the tail of converted text. */
560#define CODING_REQUIRE_FLUSHING(coding) \
561 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
562
96c06863 563/* Return nonzero if the coding context CODING requires code conversion on
a5ee738b
KH
564 decoding. */
565#define CODING_REQUIRE_DECODING(coding) \
811ea086
KH
566 ((coding)->dst_multibyte \
567 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
a5ee738b 568
df7492f9 569
96c06863 570/* Return nonzero if the coding context CODING requires code conversion on
c198294f
KH
571 encoding.
572 The non-multibyte part of the condition is to support encoding of
573 unibyte strings/buffers generated by string-as-unibyte or
574 (set-buffer-multibyte nil) from multibyte strings/buffers. */
df7492f9
KH
575#define CODING_REQUIRE_ENCODING(coding) \
576 ((coding)->src_multibyte \
577 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \
578 || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
579
a5ee738b 580
96c06863 581/* Return nonzero if the coding context CODING requires some kind of code
a5ee738b
KH
582 detection. */
583#define CODING_REQUIRE_DETECTION(coding) \
584 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
585
96c06863 586/* Return nonzero if the coding context CODING requires code conversion on
811ea086 587 decoding or some kind of code detection. */
658cc252 588#define CODING_MAY_REQUIRE_DECODING(coding) \
811ea086
KH
589 (CODING_REQUIRE_DECODING (coding) \
590 || CODING_REQUIRE_DETECTION (coding))
4ed46869 591
4ed46869
KH
592/* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
593 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
594 system. C1 and C2 are the 1st and 2nd position codes of Emacs'
595 internal format. */
596
df7492f9
KH
597#define SJIS_TO_JIS(code) \
598 do { \
599 int s1, s2, j1, j2; \
600 \
601 s1 = (code) >> 8, s2 = (code) & 0xFF; \
602 \
603 if (s2 >= 0x9F) \
604 (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
605 j2 = s2 - 0x7E); \
606 else \
607 (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
608 j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \
609 (code) = (j1 << 8) | j2; \
96c06863 610 } while (false)
4ed46869 611
6e58724e
KH
612#define SJIS_TO_JIS2(code) \
613 do { \
614 int s1, s2, j1, j2; \
615 \
616 s1 = (code) >> 8, s2 = (code) & 0xFF; \
617 \
618 if (s2 >= 0x9F) \
619 { \
620 j1 = (s1 == 0xF0 ? 0x28 \
621 : s1 == 0xF1 ? 0x24 \
622 : s1 == 0xF2 ? 0x2C \
623 : s1 == 0xF3 ? 0x2E \
624 : 0x6E + (s1 - 0xF4) * 2); \
625 j2 = s2 - 0x7E; \
626 } \
627 else \
628 { \
629 j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2 \
630 : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2 \
631 : 0x6F + (s1 - 0xF5) * 2); \
632 j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F)); \
633 } \
634 (code) = (j1 << 8) | j2; \
96c06863 635 } while (false)
6e58724e 636
df7492f9
KH
637
638#define JIS_TO_SJIS(code) \
4ed46869 639 do { \
df7492f9
KH
640 int s1, s2, j1, j2; \
641 \
642 j1 = (code) >> 8, j2 = (code) & 0xFF; \
643 if (j1 & 1) \
644 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \
645 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \
4ed46869 646 else \
df7492f9
KH
647 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \
648 s2 = j2 + 0x7E); \
5afaefc1 649 (code) = (s1 << 8) | s2; \
96c06863 650 } while (false)
4ed46869 651
6e58724e
KH
652#define JIS_TO_SJIS2(code) \
653 do { \
654 int s1, s2, j1, j2; \
655 \
656 j1 = (code) >> 8, j2 = (code) & 0xFF; \
657 if (j1 & 1) \
658 { \
659 s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2 \
edb61b39 660 : j1 <= 0x2F ? 0xF3 + (j1 - 0x2D) / 2 \
6e58724e
KH
661 : 0xF5 + (j1 - 0x6F) / 2); \
662 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F); \
663 } \
664 else \
665 { \
666 s1 = (j1 == 0x28 ? 0xF0 \
667 : j1 == 0x24 ? 0xF1 \
668 : j1 == 0x2C ? 0xF2 \
669 : j1 == 0x2E ? 0xF3 \
670 : 0xF4 + (j1 - 0x6E) / 2); \
671 s2 = j2 + 0x7E; \
672 } \
673 (code) = (s1 << 8) | s2; \
96c06863 674 } while (false)
df7492f9 675
290591c8
KH
676/* Encode the file name NAME using the specified coding system
677 for file names, if any. */
c3e9160b 678#define ENCODE_FILE(NAME) encode_file_name (NAME)
df7492f9 679
290591c8
KH
680/* Decode the file name NAME using the specified coding system
681 for file names, if any. */
c3e9160b 682#define DECODE_FILE(NAME) decode_file_name (NAME)
df7492f9 683
2dfda962 684/* Encode the string STR using the specified coding system
53eda481 685 for system functions, if any. */
2dfda962 686#define ENCODE_SYSTEM(str) \
9b58c683 687 (! NILP (Vlocale_coding_system) \
96c06863 688 ? code_convert_string_norecord (str, Vlocale_coding_system, true) \
2dfda962
JR
689 : str)
690
691/* Decode the string STR using the specified coding system
53eda481 692 for system functions, if any. */
581e7427 693#define DECODE_SYSTEM(str) \
9b58c683 694 (! NILP (Vlocale_coding_system) \
96c06863 695 ? code_convert_string_norecord (str, Vlocale_coding_system, false) \
2dfda962 696 : str)
cf29bf99 697
5bbb4727 698/* Note that this encodes utf-8, not utf-8-emacs, so it's not a no-op. */
96c06863 699#define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, true)
b3a208b0 700
4ed46869 701/* Extern declarations. */
f10fe38f 702extern Lisp_Object code_conversion_save (bool, bool);
383e0970
J
703extern void setup_coding_system (Lisp_Object, struct coding_system *);
704extern Lisp_Object coding_charset_list (struct coding_system *);
705extern Lisp_Object coding_system_charset_list (Lisp_Object);
383e0970 706extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object,
f10fe38f 707 Lisp_Object, bool, bool, bool);
383e0970 708extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
f10fe38f 709 bool);
c3e9160b
EZ
710extern Lisp_Object encode_file_name (Lisp_Object);
711extern Lisp_Object decode_file_name (Lisp_Object);
383e0970
J
712extern Lisp_Object raw_text_coding_system (Lisp_Object);
713extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
4628bef1 714extern Lisp_Object complement_process_encoding_system (Lisp_Object);
383e0970 715
f10fe38f
PE
716extern void decode_coding_gap (struct coding_system *,
717 ptrdiff_t, ptrdiff_t);
383e0970 718extern void decode_coding_object (struct coding_system *,
d311d28c
PE
719 Lisp_Object, ptrdiff_t, ptrdiff_t,
720 ptrdiff_t, ptrdiff_t, Lisp_Object);
383e0970 721extern void encode_coding_object (struct coding_system *,
d311d28c
PE
722 Lisp_Object, ptrdiff_t, ptrdiff_t,
723 ptrdiff_t, ptrdiff_t, Lisp_Object);
df7492f9 724
7f590b0c 725#if defined (WINDOWSNT) || defined (CYGWIN)
ba116008
DC
726
727/* These functions use Lisp string objects to store the UTF-16LE
728 strings that modern versions of Windows expect. These strings are
729 not particularly useful to Lisp, and all Lisp strings should be
730 native Emacs multibyte. */
731
732/* Access the wide-character string stored in a Lisp string object. */
733#define WCSDATA(x) ((wchar_t *) SDATA (x))
734
735/* Convert the multi-byte string in STR to UTF-16LE encoded unibyte
736 string, and store it in *BUF. BUF may safely point to STR on entry. */
737extern wchar_t *to_unicode (Lisp_Object str, Lisp_Object *buf);
738
739/* Convert STR, a UTF-16LE encoded string embedded in a unibyte string
740 object, to a multi-byte Emacs string and return it. This function
741 calls code_convert_string_norecord internally and has all its
742 failure modes. STR itself is not modified. */
743extern Lisp_Object from_unicode (Lisp_Object str);
744
819e2da9 745/* Convert WSTR to an Emacs string. */
faa52174 746extern Lisp_Object from_unicode_buffer (const wchar_t *wstr);
819e2da9 747
7f590b0c 748#endif /* WINDOWSNT || CYGWIN */
ba116008 749
933373ed
KH
750/* Macros for backward compatibility. */
751
df7492f9 752#define encode_coding_string(coding, string, nocopy) \
729eadda
EZ
753 (STRING_MULTIBYTE(string) ? \
754 (encode_coding_object (coding, string, 0, 0, SCHARS (string), \
755 SBYTES (string), Qt), \
756 (coding)->dst_object) : (string))
df7492f9
KH
757
758
759#define decode_coding_c_string(coding, src, bytes, dst_object) \
760 do { \
761 (coding)->source = (src); \
762 (coding)->src_chars = (coding)->src_bytes = (bytes); \
763 decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \
764 (dst_object)); \
96c06863 765 } while (false)
df7492f9
KH
766
767
c532d349 768extern Lisp_Object preferred_coding_system (void);
df7492f9
KH
769
770
8f924df7
KH
771extern Lisp_Object Qutf_8, Qutf_8_emacs;
772
955cbe7b 773extern Lisp_Object Qcoding_category_index;
df7492f9
KH
774extern Lisp_Object Qcoding_system_p;
775extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided;
4ed46869 776extern Lisp_Object Qbuffer_file_coding_system;
df7492f9 777
84cc1ab6 778extern Lisp_Object Qunix, Qdos;
4ed46869 779
f967223b
KH
780extern Lisp_Object Qtranslation_table;
781extern Lisp_Object Qtranslation_table_id;
ab45712c 782
4ed46869
KH
783#ifdef emacs
784extern Lisp_Object Qfile_coding_system;
387f6ba5 785extern Lisp_Object Qcall_process, Qcall_process_region;
4ed46869 786extern Lisp_Object Qstart_process, Qopen_network_stream;
d008a7cc 787extern Lisp_Object Qwrite_region;
4ed46869 788
383e0970 789extern char *emacs_strerror (int);
68c45bf0 790
fbaa2ed9
KH
791/* Coding system to be used to encode text for terminal display when
792 terminal coding system is nil. */
793extern struct coding_system safe_terminal_coding;
794
4ed46869
KH
795#endif
796
d008a7cc
GM
797/* Error signaled when there's a problem with detecting coding system */
798extern Lisp_Object Qcoding_system_error;
799
df7492f9 800extern char emacs_mule_bytes[256];
df7492f9 801
6f776e81 802#endif /* EMACS_CODING_H */