src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   5    Copyright (C) 2003
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (coding, detect_info)
 157      struct coding_system *coding;
 158      struct coding_detection_info *detect_info;
 159 {
 160   unsigned char *src = coding->source;
 161   unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (coding)
 206      struct coding_system *coding;
 207 {
 208   unsigned char *src = coding->source + coding->consumed;
 209   unsigned char *src_end = coding->source + coding->src_bytes;
 210   /* SRC_BASE remembers the start position in source in each loop.
 211      The loop will be exited when there's not enough source code, or
 212      when there's no room in CHARBUF for a decoded character.  */
 213   unsigned char *src_base;
 214   /* A buffer to produce decoded characters.  */
 215   int *charbuf = coding->charbuf;
 216   int *charbuf_end = charbuf + coding->charbuf_size;
 217   int multibytep = coding->src_multibyte;
 218
 219   while (1)
 220     {
 221       src_base = src;
 222       if (charbuf < charbuf_end)
 223         /* No more room to produce a decoded character.  */
 224         break;
 225       ONE_MORE_BYTE (c);
 226       /* Decode it. */
 227     }
 228
 229  no_more_source:
 230   if (src_base < src_end
 231       && coding->mode & CODING_MODE_LAST_BLOCK)
 232     /* If the source ends by partial bytes to construct a character,
 233        treat them as eight-bit raw data.  */
 234     while (src_base < src_end && charbuf < charbuf_end)
 235       *charbuf++ = *src_base++;
 236   /* Remember how many bytes and characters we consumed.  If the
 237      source is multibyte, the bytes and chars are not identical.  */
 238   coding->consumed = coding->consumed_char = src_base - coding->source;
 239   /* Remember how many characters we produced.  */
 240   coding->charbuf_used = charbuf - coding->charbuf;
 241 }
 242 #endif
 243
 244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 245
 246   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 247   internal multibyte format by CODING.  The resulting byte sequence
 248   goes to a place pointed to by DESTINATION, the length of which
 249   should not exceed DST_BYTES.
 250
 251   These functions set the information of original and encoded texts in
 252   the members produced, produced_char, consumed, and consumed_char of
 253   the structure *CODING.  They also set the member result to one of
 254   CODING_RESULT_XXX indicating how the encoding finished.
 255
 256   DST_BYTES zero means that source area and destination area are
 257   overlapped, which means that we can produce a encoded text until it
 258   reaches at the head of not-yet-encoded source text.
 259
 260   Below is a template of these functions.  */
 261 #if 0
 262 static void
 263 encode_coding_XXX (coding)
 264      struct coding_system *coding;
 265 {
 266   int multibytep = coding->dst_multibyte;
 267   int *charbuf = coding->charbuf;
 268   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 269   unsigned char *dst = coding->destination + coding->produced;
 270   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 271   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 272   int produced_chars = 0;
 273
 274   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 275     {
 276       int c = *charbuf;
 277       /* Encode C into DST, and increment DST.  */
 278     }
 279  label_no_more_destination:
 280   /* How many chars and bytes we produced.  */
 281   coding->produced_char += produced_chars;
 282   coding->produced = dst - coding->destination;
 283 }
 284 #endif
 285
 286 \f
 287 /*** 1. Preamble ***/
 288
 289 #include <config.h>
 290 #include <stdio.h>
 291
 292 #include "lisp.h"
 293 #include "buffer.h"
 294 #include "character.h"
 295 #include "charset.h"
 296 #include "ccl.h"
 297 #include "composite.h"
 298 #include "coding.h"
 299 #include "window.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 304 Lisp_Object Qunix, Qdos;
 305 extern Lisp_Object Qmac;        /* frame.c */
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qbig, Qlittle;
 312 Lisp_Object Qcoding_system_history;
 313 Lisp_Object Qvalid_codes;
 314 Lisp_Object QCcategory;
 315
 316 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 317 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 318 Lisp_Object Qstart_process, Qopen_network_stream;
 319 Lisp_Object Qtarget_idx;
 320
 321 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 322 Lisp_Object Qinterrupted, Qinsufficient_memory;
 323
 324 int coding_system_require_warning;
 325
 326 Lisp_Object Vselect_safe_coding_system_function;
 327
 328 /* Mnemonic string for each format of end-of-line.  */
 329 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 330 /* Mnemonic string to indicate format of end-of-line is not yet
 331    decided.  */
 332 Lisp_Object eol_mnemonic_undecided;
 333
 334 #ifdef emacs
 335
 336 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 337
 338 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 339
 340 /* Coding system emacs-mule and raw-text are for converting only
 341    end-of-line format.  */
 342 Lisp_Object Qemacs_mule, Qraw_text;
 343 Lisp_Object Qutf_8_emacs;
 344
 345 /* Coding-systems are handed between Emacs Lisp programs and C internal
 346    routines by the following three variables.  */
 347 /* Coding-system for reading files and receiving data from process.  */
 348 Lisp_Object Vcoding_system_for_read;
 349 /* Coding-system for writing files and sending data to process.  */
 350 Lisp_Object Vcoding_system_for_write;
 351 /* Coding-system actually used in the latest I/O.  */
 352 Lisp_Object Vlast_coding_system_used;
 353 /* Set to non-nil when an error is detected while code conversion.  */
 354 Lisp_Object Vlast_code_conversion_error;
 355 /* A vector of length 256 which contains information about special
 356    Latin codes (especially for dealing with Microsoft codes).  */
 357 Lisp_Object Vlatin_extra_code_table;
 358
 359 /* Flag to inhibit code conversion of end-of-line format.  */
 360 int inhibit_eol_conversion;
 361
 362 /* Flag to inhibit ISO2022 escape sequence detection.  */
 363 int inhibit_iso_escape_detection;
 364
 365 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 366 int inherit_process_coding_system;
 367
 368 /* Coding system to be used to encode text for terminal display.  */
 369 struct coding_system terminal_coding;
 370
 371 /* Coding system to be used to encode text for terminal display when
 372    terminal coding system is nil.  */
 373 struct coding_system safe_terminal_coding;
 374
 375 /* Coding system of what is sent from terminal keyboard.  */
 376 struct coding_system keyboard_coding;
 377
 378 Lisp_Object Vfile_coding_system_alist;
 379 Lisp_Object Vprocess_coding_system_alist;
 380 Lisp_Object Vnetwork_coding_system_alist;
 381
 382 Lisp_Object Vlocale_coding_system;
 383
 384 #endif /* emacs */
 385
 386 /* Flag to tell if we look up translation table on character code
 387    conversion.  */
 388 Lisp_Object Venable_character_translation;
 389 /* Standard translation table to look up on decoding (reading).  */
 390 Lisp_Object Vstandard_translation_table_for_decode;
 391 /* Standard translation table to look up on encoding (writing).  */
 392 Lisp_Object Vstandard_translation_table_for_encode;
 393
 394 Lisp_Object Qtranslation_table;
 395 Lisp_Object Qtranslation_table_id;
 396 Lisp_Object Qtranslation_table_for_decode;
 397 Lisp_Object Qtranslation_table_for_encode;
 398
 399 /* Alist of charsets vs revision number.  */
 400 static Lisp_Object Vcharset_revision_table;
 401
 402 /* Default coding systems used for process I/O.  */
 403 Lisp_Object Vdefault_process_coding_system;
 404
 405 /* Char table for translating Quail and self-inserting input.  */
 406 Lisp_Object Vtranslation_table_for_input;
 407
 408 /* Two special coding systems.  */
 409 Lisp_Object Vsjis_coding_system;
 410 Lisp_Object Vbig5_coding_system;
 411
 412
 413 static void record_conversion_result (struct coding_system *coding,
 414                                       enum coding_result_code result);
 415 static int detect_coding_utf_8 P_ ((struct coding_system *,
 416                                     struct coding_detection_info *info));
 417 static void decode_coding_utf_8 P_ ((struct coding_system *));
 418 static int encode_coding_utf_8 P_ ((struct coding_system *));
 419
 420 static int detect_coding_utf_16 P_ ((struct coding_system *,
 421                                      struct coding_detection_info *info));
 422 static void decode_coding_utf_16 P_ ((struct coding_system *));
 423 static int encode_coding_utf_16 P_ ((struct coding_system *));
 424
 425 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 426                                        struct coding_detection_info *info));
 427 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 428 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 429
 430 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 431                                          struct coding_detection_info *info));
 432 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 433 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 434
 435 static int detect_coding_sjis P_ ((struct coding_system *,
 436                                    struct coding_detection_info *info));
 437 static void decode_coding_sjis P_ ((struct coding_system *));
 438 static int encode_coding_sjis P_ ((struct coding_system *));
 439
 440 static int detect_coding_big5 P_ ((struct coding_system *,
 441                                    struct coding_detection_info *info));
 442 static void decode_coding_big5 P_ ((struct coding_system *));
 443 static int encode_coding_big5 P_ ((struct coding_system *));
 444
 445 static int detect_coding_ccl P_ ((struct coding_system *,
 446                                   struct coding_detection_info *info));
 447 static void decode_coding_ccl P_ ((struct coding_system *));
 448 static int encode_coding_ccl P_ ((struct coding_system *));
 449
 450 static void decode_coding_raw_text P_ ((struct coding_system *));
 451 static int encode_coding_raw_text P_ ((struct coding_system *));
 452
 453
 454 /* ISO2022 section */
 455
 456 #define CODING_ISO_INITIAL(coding, reg)                 \
 457   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 458                      coding_attr_iso_initial),          \
 459                reg)))
 460
 461
 462 #define CODING_ISO_REQUEST(coding, charset_id)  \
 463   ((charset_id <= (coding)->max_charset_id      \
 464     ? (coding)->safe_charsets[charset_id]       \
 465     : -1))
 466
 467
 468 #define CODING_ISO_FLAGS(coding)        \
 469   ((coding)->spec.iso_2022.flags)
 470 #define CODING_ISO_DESIGNATION(coding, reg)     \
 471   ((coding)->spec.iso_2022.current_designation[reg])
 472 #define CODING_ISO_INVOCATION(coding, plane)    \
 473   ((coding)->spec.iso_2022.current_invocation[plane])
 474 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 475   ((coding)->spec.iso_2022.single_shifting)
 476 #define CODING_ISO_BOL(coding)  \
 477   ((coding)->spec.iso_2022.bol)
 478 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 479   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 480
 481 /* Control characters of ISO2022.  */
 482                         /* code */      /* function */
 483 #define ISO_CODE_LF     0x0A            /* line-feed */
 484 #define ISO_CODE_CR     0x0D            /* carriage-return */
 485 #define ISO_CODE_SO     0x0E            /* shift-out */
 486 #define ISO_CODE_SI     0x0F            /* shift-in */
 487 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 488 #define ISO_CODE_ESC    0x1B            /* escape */
 489 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 490 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 491 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 492
 493 /* All code (1-byte) of ISO2022 is classified into one of the
 494    followings.  */
 495 enum iso_code_class_type
 496   {
 497     ISO_control_0,              /* Control codes in the range
 498                                    0x00..0x1F and 0x7F, except for the
 499                                    following 5 codes.  */
 500     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 501     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 502     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 503     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 504     ISO_control_1,              /* Control codes in the range
 505                                    0x80..0x9F, except for the
 506                                    following 3 codes.  */
 507     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 508     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 509     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 510     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 511     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 512     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 513     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 514   };
 515
 516 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 517     `iso-flags' attribute of an iso2022 coding system.  */
 518
 519 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 520    instead of the correct short-form sequence (e.g. ESC $ A).  */
 521 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 522
 523 /* If set, reset graphic planes and registers at end-of-line to the
 524    initial state.  */
 525 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 526
 527 /* If set, reset graphic planes and registers before any control
 528    characters to the initial state.  */
 529 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 530
 531 /* If set, encode by 7-bit environment.  */
 532 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 533
 534 /* If set, use locking-shift function.  */
 535 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 536
 537 /* If set, use single-shift function.  Overwrite
 538    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 539 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 540
 541 /* If set, use designation escape sequence.  */
 542 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 543
 544 /* If set, produce revision number sequence.  */
 545 #define CODING_ISO_FLAG_REVISION        0x0080
 546
 547 /* If set, produce ISO6429's direction specifying sequence.  */
 548 #define CODING_ISO_FLAG_DIRECTION       0x0100
 549
 550 /* If set, assume designation states are reset at beginning of line on
 551    output.  */
 552 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 553
 554 /* If set, designation sequence should be placed at beginning of line
 555    on output.  */
 556 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 557
 558 /* If set, do not encode unsafe charactes on output.  */
 559 #define CODING_ISO_FLAG_SAFE            0x0800
 560
 561 /* If set, extra latin codes (128..159) are accepted as a valid code
 562    on input.  */
 563 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 564
 565 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 566
 567 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 568
 569 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 570
 571 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 572
 573 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 574
 575 /* A character to be produced on output if encoding of the original
 576    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 577 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 578
 579
 580 /* UTF-16 section */
 581 #define CODING_UTF_16_BOM(coding)       \
 582   ((coding)->spec.utf_16.bom)
 583
 584 #define CODING_UTF_16_ENDIAN(coding)    \
 585   ((coding)->spec.utf_16.endian)
 586
 587 #define CODING_UTF_16_SURROGATE(coding) \
 588   ((coding)->spec.utf_16.surrogate)
 589
 590
 591 /* CCL section */
 592 #define CODING_CCL_DECODER(coding)      \
 593   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 594 #define CODING_CCL_ENCODER(coding)      \
 595   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 596 #define CODING_CCL_VALIDS(coding)                                          \
 597   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 598
 599 /* Index for each coding category in `coding_categories' */
 600
 601 enum coding_category
 602   {
 603     coding_category_iso_7,
 604     coding_category_iso_7_tight,
 605     coding_category_iso_8_1,
 606     coding_category_iso_8_2,
 607     coding_category_iso_7_else,
 608     coding_category_iso_8_else,
 609     coding_category_utf_8,
 610     coding_category_utf_16_auto,
 611     coding_category_utf_16_be,
 612     coding_category_utf_16_le,
 613     coding_category_utf_16_be_nosig,
 614     coding_category_utf_16_le_nosig,
 615     coding_category_charset,
 616     coding_category_sjis,
 617     coding_category_big5,
 618     coding_category_ccl,
 619     coding_category_emacs_mule,
 620     /* All above are targets of code detection.  */
 621     coding_category_raw_text,
 622     coding_category_undecided,
 623     coding_category_max
 624   };
 625
 626 /* Definitions of flag bits used in detect_coding_XXXX.  */
 627 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 628 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 629 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 630 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 631 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 632 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 633 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 634 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 635 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 636 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 637 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 638 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 639 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 640 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 641 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 642 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 643 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 644 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 645
 646 /* This value is returned if detect_coding_mask () find nothing other
 647    than ASCII characters.  */
 648 #define CATEGORY_MASK_ANY               \
 649   (CATEGORY_MASK_ISO_7                  \
 650    | CATEGORY_MASK_ISO_7_TIGHT          \
 651    | CATEGORY_MASK_ISO_8_1              \
 652    | CATEGORY_MASK_ISO_8_2              \
 653    | CATEGORY_MASK_ISO_7_ELSE           \
 654    | CATEGORY_MASK_ISO_8_ELSE           \
 655    | CATEGORY_MASK_UTF_8                \
 656    | CATEGORY_MASK_UTF_16_BE            \
 657    | CATEGORY_MASK_UTF_16_LE            \
 658    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 659    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 660    | CATEGORY_MASK_CHARSET              \
 661    | CATEGORY_MASK_SJIS                 \
 662    | CATEGORY_MASK_BIG5                 \
 663    | CATEGORY_MASK_CCL                  \
 664    | CATEGORY_MASK_EMACS_MULE)
 665
 666
 667 #define CATEGORY_MASK_ISO_7BIT \
 668   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 669
 670 #define CATEGORY_MASK_ISO_8BIT \
 671   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 672
 673 #define CATEGORY_MASK_ISO_ELSE \
 674   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 675
 676 #define CATEGORY_MASK_ISO_ESCAPE        \
 677   (CATEGORY_MASK_ISO_7                  \
 678    | CATEGORY_MASK_ISO_7_TIGHT          \
 679    | CATEGORY_MASK_ISO_7_ELSE           \
 680    | CATEGORY_MASK_ISO_8_ELSE)
 681
 682 #define CATEGORY_MASK_ISO       \
 683   (  CATEGORY_MASK_ISO_7BIT     \
 684      | CATEGORY_MASK_ISO_8BIT   \
 685      | CATEGORY_MASK_ISO_ELSE)
 686
 687 #define CATEGORY_MASK_UTF_16            \
 688   (CATEGORY_MASK_UTF_16_BE              \
 689    | CATEGORY_MASK_UTF_16_LE            \
 690    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 691    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 692
 693
 694 /* List of symbols `coding-category-xxx' ordered by priority.  This
 695    variable is exposed to Emacs Lisp.  */
 696 static Lisp_Object Vcoding_category_list;
 697
 698 /* Table of coding categories (Lisp symbols).  This variable is for
 699    internal use oly.  */
 700 static Lisp_Object Vcoding_category_table;
 701
 702 /* Table of coding-categories ordered by priority.  */
 703 static enum coding_category coding_priorities[coding_category_max];
 704
 705 /* Nth element is a coding context for the coding system bound to the
 706    Nth coding category.  */
 707 static struct coding_system coding_categories[coding_category_max];
 708
 709 /*** Commonly used macros and functions ***/
 710
 711 #ifndef min
 712 #define min(a, b) ((a) < (b) ? (a) : (b))
 713 #endif
 714 #ifndef max
 715 #define max(a, b) ((a) > (b) ? (a) : (b))
 716 #endif
 717
 718 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 719   do {                                                  \
 720     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 721     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 722   } while (0)
 723
 724
 725 /* Safely get one byte from the source text pointed by SRC which ends
 726    at SRC_END, and set C to that byte.  If there are not enough bytes
 727    in the source, it jumps to `no_more_source'.  If multibytep is
 728    nonzero, and a multibyte character is found at SRC, set C to the
 729    negative value of the character code.  The caller should declare
 730    and set these variables appropriately in advance:
 731         src, src_end, multibytep */
 732
 733 #define ONE_MORE_BYTE(c)                                \
 734   do {                                                  \
 735     if (src == src_end)                                 \
 736       {                                                 \
 737         if (src_base < src)                             \
 738           record_conversion_result                      \
 739             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 740         goto no_more_source;                            \
 741       }                                                 \
 742     c = *src++;                                         \
 743     if (multibytep && (c & 0x80))                       \
 744       {                                                 \
 745         if ((c & 0xFE) == 0xC0)                         \
 746           c = ((c & 1) << 6) | *src++;                  \
 747         else                                            \
 748           {                                             \
 749             c = - string_char (--src, &src, NULL);      \
 750             record_conversion_result                    \
 751               (coding, CODING_RESULT_INVALID_SRC);      \
 752           }                                             \
 753       }                                                 \
 754     consumed_chars++;                                   \
 755   } while (0)
 756
 757
 758 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 759   do {                                                  \
 760     c = *src++;                                         \
 761     if (multibytep && (c & 0x80))                       \
 762       {                                                 \
 763         if ((c & 0xFE) == 0xC0)                         \
 764           c = ((c & 1) << 6) | *src++;                  \
 765         else                                            \
 766           {                                             \
 767             c = - string_char (--src, &src, NULL);      \
 768             record_conversion_result                    \
 769               (coding, CODING_RESULT_INVALID_SRC);      \
 770           }                                             \
 771       }                                                 \
 772     consumed_chars++;                                   \
 773   } while (0)
 774
 775
 776 /* Store a byte C in the place pointed by DST and increment DST to the
 777    next free point, and increment PRODUCED_CHARS.  The caller should
 778    assure that C is 0..127, and declare and set the variable `dst'
 779    appropriately in advance.
 780 */
 781
 782
 783 #define EMIT_ONE_ASCII_BYTE(c)  \
 784   do {                          \
 785     produced_chars++;           \
 786     *dst++ = (c);               \
 787   } while (0)
 788
 789
 790 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 791
 792 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 793   do {                                  \
 794     produced_chars += 2;                \
 795     *dst++ = (c1), *dst++ = (c2);       \
 796   } while (0)
 797
 798
 799 /* Store a byte C in the place pointed by DST and increment DST to the
 800    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 801    nonzero, store in an appropriate multibyte from.  The caller should
 802    declare and set the variables `dst' and `multibytep' appropriately
 803    in advance.  */
 804
 805 #define EMIT_ONE_BYTE(c)                \
 806   do {                                  \
 807     produced_chars++;                   \
 808     if (multibytep)                     \
 809       {                                 \
 810         int ch = (c);                   \
 811         if (ch >= 0x80)                 \
 812           ch = BYTE8_TO_CHAR (ch);      \
 813         CHAR_STRING_ADVANCE (ch, dst);  \
 814       }                                 \
 815     else                                \
 816       *dst++ = (c);                     \
 817   } while (0)
 818
 819
 820 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 821
 822 #define EMIT_TWO_BYTES(c1, c2)          \
 823   do {                                  \
 824     produced_chars += 2;                \
 825     if (multibytep)                     \
 826       {                                 \
 827         int ch;                         \
 828                                         \
 829         ch = (c1);                      \
 830         if (ch >= 0x80)                 \
 831           ch = BYTE8_TO_CHAR (ch);      \
 832         CHAR_STRING_ADVANCE (ch, dst);  \
 833         ch = (c2);                      \
 834         if (ch >= 0x80)                 \
 835           ch = BYTE8_TO_CHAR (ch);      \
 836         CHAR_STRING_ADVANCE (ch, dst);  \
 837       }                                 \
 838     else                                \
 839       {                                 \
 840         *dst++ = (c1);                  \
 841         *dst++ = (c2);                  \
 842       }                                 \
 843   } while (0)
 844
 845
 846 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 847   do {                                  \
 848     EMIT_ONE_BYTE (c1);                 \
 849     EMIT_TWO_BYTES (c2, c3);            \
 850   } while (0)
 851
 852
 853 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 854   do {                                          \
 855     EMIT_TWO_BYTES (c1, c2);                    \
 856     EMIT_TWO_BYTES (c3, c4);                    \
 857   } while (0)
 858
 859
 860 static void
 861 record_conversion_result (struct coding_system *coding,
 862                           enum coding_result_code result)
 863 {
 864   coding->result = result;
 865   switch (result)
 866     {
 867     case CODING_RESULT_INSUFFICIENT_SRC:
 868       Vlast_code_conversion_error = Qinsufficient_source;
 869       break;
 870     case CODING_RESULT_INCONSISTENT_EOL:
 871       Vlast_code_conversion_error = Qinconsistent_eol;
 872       break;
 873     case CODING_RESULT_INVALID_SRC:
 874       Vlast_code_conversion_error = Qinvalid_source;
 875       break;
 876     case CODING_RESULT_INTERRUPT:
 877       Vlast_code_conversion_error = Qinterrupted;
 878       break;
 879     case CODING_RESULT_INSUFFICIENT_MEM:
 880       Vlast_code_conversion_error = Qinsufficient_memory;
 881       break;
 882     }
 883 }
 884
 885 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 886   do {                                                                       \
 887     charset_map_loaded = 0;                                                  \
 888     c = DECODE_CHAR (charset, code);                                         \
 889     if (charset_map_loaded)                                                  \
 890       {                                                                      \
 891         const unsigned char *orig = coding->source;                          \
 892         EMACS_INT offset;                                                    \
 893                                                                              \
 894         coding_set_source (coding);                                          \
 895         offset = coding->source - orig;                                      \
 896         src += offset;                                                       \
 897         src_base += offset;                                                  \
 898         src_end += offset;                                                   \
 899       }                                                                      \
 900   } while (0)
 901
 902
 903 #define ASSURE_DESTINATION(bytes)                               \
 904   do {                                                          \
 905     if (dst + (bytes) >= dst_end)                               \
 906       {                                                         \
 907         int more_bytes = charbuf_end - charbuf + (bytes);       \
 908                                                                 \
 909         dst = alloc_destination (coding, more_bytes, dst);      \
 910         dst_end = coding->destination + coding->dst_bytes;      \
 911       }                                                         \
 912   } while (0)
 913
 914
 915
 916 static void
 917 coding_set_source (coding)
 918      struct coding_system *coding;
 919 {
 920   if (BUFFERP (coding->src_object))
 921     {
 922       struct buffer *buf = XBUFFER (coding->src_object);
 923
 924       if (coding->src_pos < 0)
 925         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
 926       else
 927         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
 928     }
 929   else if (STRINGP (coding->src_object))
 930     {
 931       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
 932     }
 933   else
 934     /* Otherwise, the source is C string and is never relocated
 935        automatically.  Thus we don't have to update anything.  */
 936     ;
 937 }
 938
 939 static void
 940 coding_set_destination (coding)
 941      struct coding_system *coding;
 942 {
 943   if (BUFFERP (coding->dst_object))
 944     {
 945       if (coding->src_pos < 0)
 946         {
 947           coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
 948           coding->dst_bytes = (GAP_END_ADDR
 949                                - (coding->src_bytes - coding->consumed)
 950                                - coding->destination);
 951         }
 952       else
 953         {
 954           /* We are sure that coding->dst_pos_byte is before the gap
 955              of the buffer. */
 956           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 957                                  + coding->dst_pos_byte - 1);
 958           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 959                                - coding->destination);
 960         }
 961     }
 962   else
 963     /* Otherwise, the destination is C string and is never relocated
 964        automatically.  Thus we don't have to update anything.  */
 965     ;
 966 }
 967
 968
 969 static void
 970 coding_alloc_by_realloc (coding, bytes)
 971      struct coding_system *coding;
 972      EMACS_INT bytes;
 973 {
 974   coding->destination = (unsigned char *) xrealloc (coding->destination,
 975                                                     coding->dst_bytes + bytes);
 976   coding->dst_bytes += bytes;
 977 }
 978
 979 static void
 980 coding_alloc_by_making_gap (coding, bytes)
 981      struct coding_system *coding;
 982      EMACS_INT bytes;
 983 {
 984   if (BUFFERP (coding->dst_object)
 985       && EQ (coding->src_object, coding->dst_object))
 986     {
 987       EMACS_INT add = coding->src_bytes - coding->consumed;
 988
 989       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
 990       make_gap (bytes);
 991       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
 992     }
 993   else
 994     {
 995       Lisp_Object this_buffer;
 996
 997       this_buffer = Fcurrent_buffer ();
 998       set_buffer_internal (XBUFFER (coding->dst_object));
 999       make_gap (bytes);
1000       set_buffer_internal (XBUFFER (this_buffer));
1001     }
1002 }
1003
1004
1005 static unsigned char *
1006 alloc_destination (coding, nbytes, dst)
1007      struct coding_system *coding;
1008      EMACS_INT nbytes;
1009      unsigned char *dst;
1010 {
1011   EMACS_INT offset = dst - coding->destination;
1012
1013   if (BUFFERP (coding->dst_object))
1014     coding_alloc_by_making_gap (coding, nbytes);
1015   else
1016     coding_alloc_by_realloc (coding, nbytes);
1017   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1018   coding_set_destination (coding);
1019   dst = coding->destination + offset;
1020   return dst;
1021 }
1022
1023 /** Macros for annotations.  */
1024
1025 /* Maximum length of annotation data (sum of annotations for
1026    composition and charset).  */
1027 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
1028
1029 /* An annotation data is stored in the array coding->charbuf in this
1030    format:
1031      [ -LENGTH ANNOTATION_MASK FROM TO ... ]
1032    LENGTH is the number of elements in the annotation.
1033    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1034    FROM and TO specify the range of text annotated.  They are relative
1035    to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1036
1037    The format of the following elements depend on ANNOTATION_MASK.
1038
1039    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1040    follows:
1041      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1042    METHOD is one of enum composition_method.
1043    Optionnal COMPOSITION-COMPONENTS are characters and composition
1044    rules.
1045
1046    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1047    follows.  */
1048
1049 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to)   \
1050   do {                                                  \
1051     *(buf)++ = -(len);                                  \
1052     *(buf)++ = (mask);                                  \
1053     *(buf)++ = (from);                                  \
1054     *(buf)++ = (to);                                    \
1055     coding->annotated = 1;                              \
1056   } while (0);
1057
1058 #define ADD_COMPOSITION_DATA(buf, from, to, method)                           \
1059   do {                                                                        \
1060     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1061     *buf++ = method;                                                          \
1062   } while (0)
1063
1064
1065 #define ADD_CHARSET_DATA(buf, from, to, id)                               \
1066   do {                                                                    \
1067     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1068     *buf++ = id;                                                          \
1069   } while (0)
1070
1071 \f
1072 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1073
1074
1075
1076 \f
1077 /*** 3. UTF-8 ***/
1078
1079 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1080    Check if a text is encoded in UTF-8.  If it is, return 1, else
1081    return 0.  */
1082
1083 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1084 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1085 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1086 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1087 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1088 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1089
1090 static int
1091 detect_coding_utf_8 (coding, detect_info)
1092      struct coding_system *coding;
1093      struct coding_detection_info *detect_info;
1094 {
1095   const unsigned char *src = coding->source, *src_base;
1096   const unsigned char *src_end = coding->source + coding->src_bytes;
1097   int multibytep = coding->src_multibyte;
1098   int consumed_chars = 0;
1099   int found = 0;
1100
1101   detect_info->checked |= CATEGORY_MASK_UTF_8;
1102   /* A coding system of this category is always ASCII compatible.  */
1103   src += coding->head_ascii;
1104
1105   while (1)
1106     {
1107       int c, c1, c2, c3, c4;
1108
1109       src_base = src;
1110       ONE_MORE_BYTE (c);
1111       if (c < 0 || UTF_8_1_OCTET_P (c))
1112         continue;
1113       ONE_MORE_BYTE (c1);
1114       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1115         break;
1116       if (UTF_8_2_OCTET_LEADING_P (c))
1117         {
1118           found = CATEGORY_MASK_UTF_8;
1119           continue;
1120         }
1121       ONE_MORE_BYTE (c2);
1122       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1123         break;
1124       if (UTF_8_3_OCTET_LEADING_P (c))
1125         {
1126           found = CATEGORY_MASK_UTF_8;
1127           continue;
1128         }
1129       ONE_MORE_BYTE (c3);
1130       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1131         break;
1132       if (UTF_8_4_OCTET_LEADING_P (c))
1133         {
1134           found = CATEGORY_MASK_UTF_8;
1135           continue;
1136         }
1137       ONE_MORE_BYTE (c4);
1138       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1139         break;
1140       if (UTF_8_5_OCTET_LEADING_P (c))
1141         {
1142           found = CATEGORY_MASK_UTF_8;
1143           continue;
1144         }
1145       break;
1146     }
1147   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1148   return 0;
1149
1150  no_more_source:
1151   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1152     {
1153       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1154       return 0;
1155     }
1156   detect_info->found |= found;
1157   return 1;
1158 }
1159
1160
1161 static void
1162 decode_coding_utf_8 (coding)
1163      struct coding_system *coding;
1164 {
1165   const unsigned char *src = coding->source + coding->consumed;
1166   const unsigned char *src_end = coding->source + coding->src_bytes;
1167   const unsigned char *src_base;
1168   int *charbuf = coding->charbuf;
1169   int *charbuf_end = charbuf + coding->charbuf_size;
1170   int consumed_chars = 0, consumed_chars_base;
1171   int multibytep = coding->src_multibyte;
1172   Lisp_Object attr, charset_list;
1173
1174   CODING_GET_INFO (coding, attr, charset_list);
1175
1176   while (1)
1177     {
1178       int c, c1, c2, c3, c4, c5;
1179
1180       src_base = src;
1181       consumed_chars_base = consumed_chars;
1182
1183       if (charbuf >= charbuf_end)
1184         break;
1185
1186       ONE_MORE_BYTE (c1);
1187       if (c1 < 0)
1188         {
1189           c = - c1;
1190         }
1191       else if (UTF_8_1_OCTET_P(c1))
1192         {
1193           c = c1;
1194         }
1195       else
1196         {
1197           ONE_MORE_BYTE (c2);
1198           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1199             goto invalid_code;
1200           if (UTF_8_2_OCTET_LEADING_P (c1))
1201             {
1202               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1203               /* Reject overlong sequences here and below.  Encoders
1204                  producing them are incorrect, they can be misleading,
1205                  and they mess up read/write invariance.  */
1206               if (c < 128)
1207                 goto invalid_code;
1208             }
1209           else
1210             {
1211               ONE_MORE_BYTE (c3);
1212               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1213                 goto invalid_code;
1214               if (UTF_8_3_OCTET_LEADING_P (c1))
1215                 {
1216                   c = (((c1 & 0xF) << 12)
1217                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1218                   if (c < 0x800
1219                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1220                     goto invalid_code;
1221                 }
1222               else
1223                 {
1224                   ONE_MORE_BYTE (c4);
1225                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1226                     goto invalid_code;
1227                   if (UTF_8_4_OCTET_LEADING_P (c1))
1228                     {
1229                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1230                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1231                     if (c < 0x10000)
1232                       goto invalid_code;
1233                     }
1234                   else
1235                     {
1236                       ONE_MORE_BYTE (c5);
1237                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1238                         goto invalid_code;
1239                       if (UTF_8_5_OCTET_LEADING_P (c1))
1240                         {
1241                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1242                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1243                                | (c5 & 0x3F));
1244                           if ((c > MAX_CHAR) || (c < 0x200000))
1245                             goto invalid_code;
1246                         }
1247                       else
1248                         goto invalid_code;
1249                     }
1250                 }
1251             }
1252         }
1253
1254       *charbuf++ = c;
1255       continue;
1256
1257     invalid_code:
1258       src = src_base;
1259       consumed_chars = consumed_chars_base;
1260       ONE_MORE_BYTE (c);
1261       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1262       coding->errors++;
1263     }
1264
1265  no_more_source:
1266   coding->consumed_char += consumed_chars_base;
1267   coding->consumed = src_base - coding->source;
1268   coding->charbuf_used = charbuf - coding->charbuf;
1269 }
1270
1271
1272 static int
1273 encode_coding_utf_8 (coding)
1274      struct coding_system *coding;
1275 {
1276   int multibytep = coding->dst_multibyte;
1277   int *charbuf = coding->charbuf;
1278   int *charbuf_end = charbuf + coding->charbuf_used;
1279   unsigned char *dst = coding->destination + coding->produced;
1280   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1281   int produced_chars = 0;
1282   int c;
1283
1284   if (multibytep)
1285     {
1286       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1287
1288       while (charbuf < charbuf_end)
1289         {
1290           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1291
1292           ASSURE_DESTINATION (safe_room);
1293           c = *charbuf++;
1294           if (CHAR_BYTE8_P (c))
1295             {
1296               c = CHAR_TO_BYTE8 (c);
1297               EMIT_ONE_BYTE (c);
1298             }
1299           else
1300             {
1301               CHAR_STRING_ADVANCE (c, pend);
1302               for (p = str; p < pend; p++)
1303                 EMIT_ONE_BYTE (*p);
1304             }
1305         }
1306     }
1307   else
1308     {
1309       int safe_room = MAX_MULTIBYTE_LENGTH;
1310
1311       while (charbuf < charbuf_end)
1312         {
1313           ASSURE_DESTINATION (safe_room);
1314           c = *charbuf++;
1315           dst += CHAR_STRING (c, dst);
1316           produced_chars++;
1317         }
1318     }
1319   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1320   coding->produced_char += produced_chars;
1321   coding->produced = dst - coding->destination;
1322   return 0;
1323 }
1324
1325
1326 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1327    Check if a text is encoded in one of UTF-16 based coding systems.
1328    If it is, return 1, else return 0.  */
1329
1330 #define UTF_16_HIGH_SURROGATE_P(val) \
1331   (((val) & 0xFC00) == 0xD800)
1332
1333 #define UTF_16_LOW_SURROGATE_P(val) \
1334   (((val) & 0xFC00) == 0xDC00)
1335
1336 #define UTF_16_INVALID_P(val)   \
1337   (((val) == 0xFFFE)            \
1338    || ((val) == 0xFFFF)         \
1339    || UTF_16_LOW_SURROGATE_P (val))
1340
1341
1342 static int
1343 detect_coding_utf_16 (coding, detect_info)
1344      struct coding_system *coding;
1345      struct coding_detection_info *detect_info;
1346 {
1347   const unsigned char *src = coding->source, *src_base = src;
1348   const unsigned char *src_end = coding->source + coding->src_bytes;
1349   int multibytep = coding->src_multibyte;
1350   int consumed_chars = 0;
1351   int c1, c2;
1352
1353   detect_info->checked |= CATEGORY_MASK_UTF_16;
1354   if (coding->mode & CODING_MODE_LAST_BLOCK
1355       && (coding->src_chars & 1))
1356     {
1357       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1358       return 0;
1359     }
1360
1361   ONE_MORE_BYTE (c1);
1362   ONE_MORE_BYTE (c2);
1363   if ((c1 == 0xFF) && (c2 == 0xFE))
1364     {
1365       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1366                              | CATEGORY_MASK_UTF_16_AUTO);
1367       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1368                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1369                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1370     }
1371   else if ((c1 == 0xFE) && (c2 == 0xFF))
1372     {
1373       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1374                              | CATEGORY_MASK_UTF_16_AUTO);
1375       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1376                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1377                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1378     }
1379   else if (c1 >= 0 && c2 >= 0)
1380     {
1381       unsigned char b1[256], b2[256];
1382       int b1_variants = 1, b2_variants = 1;
1383       int n;
1384
1385       bzero (b1, 256), bzero (b2, 256);
1386       b1[c1]++, b2[c2]++;
1387       for (n = 0; n < 256 && src < src_end; n++)
1388         {
1389           src_base = src;
1390           ONE_MORE_BYTE (c1);
1391           ONE_MORE_BYTE (c2);
1392           if (c1 < 0 || c2 < 0)
1393             break;
1394           if (! b1[c1++]) b1_variants++;
1395           if (! b2[c2++]) b2_variants++;
1396         }
1397       if (b1_variants < b2_variants)
1398         detect_info->found |= CATEGORY_MASK_UTF_16_BE_NOSIG;
1399       else
1400         detect_info->found |= CATEGORY_MASK_UTF_16_LE_NOSIG;
1401       detect_info->rejected
1402         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1403     }
1404  no_more_source:
1405   return 1;
1406 }
1407
1408 static void
1409 decode_coding_utf_16 (coding)
1410      struct coding_system *coding;
1411 {
1412   const unsigned char *src = coding->source + coding->consumed;
1413   const unsigned char *src_end = coding->source + coding->src_bytes;
1414   const unsigned char *src_base;
1415   int *charbuf = coding->charbuf;
1416   int *charbuf_end = charbuf + coding->charbuf_size;
1417   int consumed_chars = 0, consumed_chars_base;
1418   int multibytep = coding->src_multibyte;
1419   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1420   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1421   int surrogate = CODING_UTF_16_SURROGATE (coding);
1422   Lisp_Object attr, charset_list;
1423
1424   CODING_GET_INFO (coding, attr, charset_list);
1425
1426   if (bom == utf_16_with_bom)
1427     {
1428       int c, c1, c2;
1429
1430       src_base = src;
1431       ONE_MORE_BYTE (c1);
1432       ONE_MORE_BYTE (c2);
1433       c = (c1 << 8) | c2;
1434
1435       if (endian == utf_16_big_endian
1436           ? c != 0xFEFF : c != 0xFFFE)
1437         {
1438           /* The first two bytes are not BOM.  Treat them as bytes
1439              for a normal character.  */
1440           src = src_base;
1441           coding->errors++;
1442         }
1443       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1444     }
1445   else if (bom == utf_16_detect_bom)
1446     {
1447       /* We have already tried to detect BOM and failed in
1448          detect_coding.  */
1449       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1450     }
1451
1452   while (1)
1453     {
1454       int c, c1, c2;
1455
1456       src_base = src;
1457       consumed_chars_base = consumed_chars;
1458
1459       if (charbuf + 2 >= charbuf_end)
1460         break;
1461
1462       ONE_MORE_BYTE (c1);
1463       if (c1 < 0)
1464         {
1465           *charbuf++ = -c1;
1466           continue;
1467         }
1468       ONE_MORE_BYTE (c2);
1469       if (c2 < 0)
1470         {
1471           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1472           *charbuf++ = -c2;
1473           continue;
1474         }
1475       c = (endian == utf_16_big_endian
1476            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1477       if (surrogate)
1478         {
1479           if (! UTF_16_LOW_SURROGATE_P (c))
1480             {
1481               if (endian == utf_16_big_endian)
1482                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1483               else
1484                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1485               *charbuf++ = c1;
1486               *charbuf++ = c2;
1487               coding->errors++;
1488               if (UTF_16_HIGH_SURROGATE_P (c))
1489                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1490               else
1491                 *charbuf++ = c;
1492             }
1493           else
1494             {
1495               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1496               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1497               *charbuf++ = c;
1498             }
1499         }
1500       else
1501         {
1502           if (UTF_16_HIGH_SURROGATE_P (c))
1503             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1504           else
1505             *charbuf++ = c;
1506         }
1507     }
1508
1509  no_more_source:
1510   coding->consumed_char += consumed_chars_base;
1511   coding->consumed = src_base - coding->source;
1512   coding->charbuf_used = charbuf - coding->charbuf;
1513 }
1514
1515 static int
1516 encode_coding_utf_16 (coding)
1517      struct coding_system *coding;
1518 {
1519   int multibytep = coding->dst_multibyte;
1520   int *charbuf = coding->charbuf;
1521   int *charbuf_end = charbuf + coding->charbuf_used;
1522   unsigned char *dst = coding->destination + coding->produced;
1523   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1524   int safe_room = 8;
1525   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1526   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1527   int produced_chars = 0;
1528   Lisp_Object attrs, charset_list;
1529   int c;
1530
1531   CODING_GET_INFO (coding, attrs, charset_list);
1532
1533   if (bom != utf_16_without_bom)
1534     {
1535       ASSURE_DESTINATION (safe_room);
1536       if (big_endian)
1537         EMIT_TWO_BYTES (0xFE, 0xFF);
1538       else
1539         EMIT_TWO_BYTES (0xFF, 0xFE);
1540       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1541     }
1542
1543   while (charbuf < charbuf_end)
1544     {
1545       ASSURE_DESTINATION (safe_room);
1546       c = *charbuf++;
1547       if (c >= MAX_UNICODE_CHAR)
1548         c = coding->default_char;
1549
1550       if (c < 0x10000)
1551         {
1552           if (big_endian)
1553             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1554           else
1555             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1556         }
1557       else
1558         {
1559           int c1, c2;
1560
1561           c -= 0x10000;
1562           c1 = (c >> 10) + 0xD800;
1563           c2 = (c & 0x3FF) + 0xDC00;
1564           if (big_endian)
1565             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1566           else
1567             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1568         }
1569     }
1570   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1571   coding->produced = dst - coding->destination;
1572   coding->produced_char += produced_chars;
1573   return 0;
1574 }
1575
1576 \f
1577 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1578
1579 /* Emacs' internal format for representation of multiple character
1580    sets is a kind of multi-byte encoding, i.e. characters are
1581    represented by variable-length sequences of one-byte codes.
1582
1583    ASCII characters and control characters (e.g. `tab', `newline') are
1584    represented by one-byte sequences which are their ASCII codes, in
1585    the range 0x00 through 0x7F.
1586
1587    8-bit characters of the range 0x80..0x9F are represented by
1588    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1589    code + 0x20).
1590
1591    8-bit characters of the range 0xA0..0xFF are represented by
1592    one-byte sequences which are their 8-bit code.
1593
1594    The other characters are represented by a sequence of `base
1595    leading-code', optional `extended leading-code', and one or two
1596    `position-code's.  The length of the sequence is determined by the
1597    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1598    whereas extended leading-code and position-code take the range 0xA0
1599    through 0xFF.  See `charset.h' for more details about leading-code
1600    and position-code.
1601
1602    --- CODE RANGE of Emacs' internal format ---
1603    character set        range
1604    -------------        -----
1605    ascii                0x00..0x7F
1606    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1607    eight-bit-graphic    0xA0..0xBF
1608    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1609    ---------------------------------------------
1610
1611    As this is the internal character representation, the format is
1612    usually not used externally (i.e. in a file or in a data sent to a
1613    process).  But, it is possible to have a text externally in this
1614    format (i.e. by encoding by the coding system `emacs-mule').
1615
1616    In that case, a sequence of one-byte codes has a slightly different
1617    form.
1618
1619    At first, all characters in eight-bit-control are represented by
1620    one-byte sequences which are their 8-bit code.
1621
1622    Next, character composition data are represented by the byte
1623    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1624    where,
1625         METHOD is 0xF0 plus one of composition method (enum
1626         composition_method),
1627
1628         BYTES is 0xA0 plus a byte length of this composition data,
1629
1630         CHARS is 0x20 plus a number of characters composed by this
1631         data,
1632
1633         COMPONENTs are characters of multibye form or composition
1634         rules encoded by two-byte of ASCII codes.
1635
1636    In addition, for backward compatibility, the following formats are
1637    also recognized as composition data on decoding.
1638
1639    0x80 MSEQ ...
1640    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1641
1642    Here,
1643         MSEQ is a multibyte form but in these special format:
1644           ASCII: 0xA0 ASCII_CODE+0x80,
1645           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1646         RULE is a one byte code of the range 0xA0..0xF0 that
1647         represents a composition rule.
1648   */
1649
1650 char emacs_mule_bytes[256];
1651
1652 int
1653 emacs_mule_char (coding, src, nbytes, nchars, id)
1654      struct coding_system *coding;
1655      const unsigned char *src;
1656      int *nbytes, *nchars, *id;
1657 {
1658   const unsigned char *src_end = coding->source + coding->src_bytes;
1659   const unsigned char *src_base = src;
1660   int multibytep = coding->src_multibyte;
1661   struct charset *charset;
1662   unsigned code;
1663   int c;
1664   int consumed_chars = 0;
1665
1666   ONE_MORE_BYTE (c);
1667   if (c < 0)
1668     {
1669       c = -c;
1670       charset = emacs_mule_charset[0];
1671     }
1672   else
1673     {
1674       switch (emacs_mule_bytes[c])
1675         {
1676         case 2:
1677           if (! (charset = emacs_mule_charset[c]))
1678             goto invalid_code;
1679           ONE_MORE_BYTE (c);
1680           if (c < 0)
1681             goto invalid_code;
1682           code = c & 0x7F;
1683           break;
1684
1685         case 3:
1686           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1687               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1688             {
1689               ONE_MORE_BYTE (c);
1690               if (c < 0 || ! (charset = emacs_mule_charset[c]))
1691                 goto invalid_code;
1692               ONE_MORE_BYTE (c);
1693               if (c < 0)
1694                 goto invalid_code;
1695               code = c & 0x7F;
1696             }
1697           else
1698             {
1699               if (! (charset = emacs_mule_charset[c]))
1700                 goto invalid_code;
1701               ONE_MORE_BYTE (c);
1702               if (c < 0)
1703                 goto invalid_code;
1704               code = (c & 0x7F) << 8;
1705               ONE_MORE_BYTE (c);
1706               if (c < 0)
1707                 goto invalid_code;
1708               code |= c & 0x7F;
1709             }
1710           break;
1711
1712         case 4:
1713           ONE_MORE_BYTE (c);
1714           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1715             goto invalid_code;
1716           ONE_MORE_BYTE (c);
1717           if (c < 0)
1718             goto invalid_code;
1719           code = (c & 0x7F) << 8;
1720           ONE_MORE_BYTE (c);
1721           if (c < 0)
1722             goto invalid_code;
1723           code |= c & 0x7F;
1724           break;
1725
1726         case 1:
1727           code = c;
1728           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1729                                      ? charset_ascii : charset_eight_bit);
1730           break;
1731
1732         default:
1733           abort ();
1734         }
1735       c = DECODE_CHAR (charset, code);
1736       if (c < 0)
1737         goto invalid_code;
1738     }
1739   *nbytes = src - src_base;
1740   *nchars = consumed_chars;
1741   if (id)
1742     *id = charset->id;
1743   return c;
1744
1745  no_more_source:
1746   return -2;
1747
1748  invalid_code:
1749   return -1;
1750 }
1751
1752
1753 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1754    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1755    else return 0.  */
1756
1757 static int
1758 detect_coding_emacs_mule (coding, detect_info)
1759      struct coding_system *coding;
1760      struct coding_detection_info *detect_info;
1761 {
1762   const unsigned char *src = coding->source, *src_base;
1763   const unsigned char *src_end = coding->source + coding->src_bytes;
1764   int multibytep = coding->src_multibyte;
1765   int consumed_chars = 0;
1766   int c;
1767   int found = 0;
1768
1769   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1770   /* A coding system of this category is always ASCII compatible.  */
1771   src += coding->head_ascii;
1772
1773   while (1)
1774     {
1775       src_base = src;
1776       ONE_MORE_BYTE (c);
1777       if (c < 0)
1778         continue;
1779       if (c == 0x80)
1780         {
1781           /* Perhaps the start of composite character.  We simple skip
1782              it because analyzing it is too heavy for detecting.  But,
1783              at least, we check that the composite character
1784              constitues of more than 4 bytes.  */
1785           const unsigned char *src_base;
1786
1787         repeat:
1788           src_base = src;
1789           do
1790             {
1791               ONE_MORE_BYTE (c);
1792             }
1793           while (c >= 0xA0);
1794
1795           if (src - src_base <= 4)
1796             break;
1797           found = CATEGORY_MASK_EMACS_MULE;
1798           if (c == 0x80)
1799             goto repeat;
1800         }
1801
1802       if (c < 0x80)
1803         {
1804           if (c < 0x20
1805               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1806             break;
1807         }
1808       else
1809         {
1810           const unsigned char *src_base = src - 1;
1811
1812           do
1813             {
1814               ONE_MORE_BYTE (c);
1815             }
1816           while (c >= 0xA0);
1817           if (src - src_base != emacs_mule_bytes[*src_base])
1818             break;
1819           found = CATEGORY_MASK_EMACS_MULE;
1820         }
1821     }
1822   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1823   return 0;
1824
1825  no_more_source:
1826   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1827     {
1828       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1829       return 0;
1830     }
1831   detect_info->found |= found;
1832   return 1;
1833 }
1834
1835
1836 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1837
1838 /* Decode a character represented as a component of composition
1839    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1840    update SRC to the head of next character (or an encoded composition
1841    rule).  If SRC doesn't points a composition component, set C to -1.
1842    If SRC points an invalid byte sequence, global exit by a return
1843    value 0.  */
1844
1845 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1846   if (1)                                                        \
1847     {                                                           \
1848       int c;                                                    \
1849       int nbytes, nchars;                                       \
1850                                                                 \
1851       if (src == src_end)                                       \
1852         break;                                                  \
1853       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1854       if (c < 0)                                                \
1855         {                                                       \
1856           if (c == -2)                                          \
1857             break;                                              \
1858           goto invalid_code;                                    \
1859         }                                                       \
1860       *buf++ = c;                                               \
1861       src += nbytes;                                            \
1862       consumed_chars += nchars;                                 \
1863     }                                                           \
1864   else
1865
1866
1867 /* Decode a composition rule represented as a component of composition
1868    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1869    and increment BUF.  If SRC points an invalid byte sequence, set C
1870    to -1.  */
1871
1872 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1873   do {                                                  \
1874     int c, gref, nref;                                  \
1875                                                         \
1876     if (src >= src_end)                                 \
1877       goto invalid_code;                                \
1878     ONE_MORE_BYTE_NO_CHECK (c);                         \
1879     c -= 0x20;                                          \
1880     if (c < 0 || c >= 81)                               \
1881       goto invalid_code;                                \
1882                                                         \
1883     gref = c / 9, nref = c % 9;                         \
1884     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1885   } while (0)
1886
1887
1888 /* Decode a composition rule represented as a component of composition
1889    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1890    and increment BUF.  If SRC points an invalid byte sequence, set C
1891    to -1.  */
1892
1893 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1894   do {                                                  \
1895     int gref, nref;                                     \
1896                                                         \
1897     if (src + 1>= src_end)                              \
1898       goto invalid_code;                                \
1899     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1900     gref -= 0x20;                                       \
1901     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1902     nref -= 0x20;                                       \
1903     if (gref < 0 || gref >= 81                          \
1904         || nref < 0 || nref >= 81)                      \
1905       goto invalid_code;                                \
1906     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1907   } while (0)
1908
1909
1910 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1911   do {                                                                  \
1912     /* Emacs 21 style format.  The first three bytes at SRC are         \
1913        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1914        the byte length of this composition information, CHARS is the    \
1915        number of characters composed by this composition.  */           \
1916     enum composition_method method = c - 0xF2;                          \
1917     int *charbuf_base = charbuf;                                        \
1918     int from, to;                                                       \
1919     int consumed_chars_limit;                                           \
1920     int nbytes, nchars;                                                 \
1921                                                                         \
1922     ONE_MORE_BYTE (c);                                                  \
1923     if (c < 0)                                                          \
1924       goto invalid_code;                                                \
1925     nbytes = c - 0xA0;                                                  \
1926     if (nbytes < 3)                                                     \
1927       goto invalid_code;                                                \
1928     ONE_MORE_BYTE (c);                                                  \
1929     if (c < 0)                                                          \
1930       goto invalid_code;                                                \
1931     nchars = c - 0xA0;                                                  \
1932     from = coding->produced + char_offset;                              \
1933     to = from + nchars;                                                 \
1934     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
1935     consumed_chars_limit = consumed_chars_base + nbytes;                \
1936     if (method != COMPOSITION_RELATIVE)                                 \
1937       {                                                                 \
1938         int i = 0;                                                      \
1939         while (consumed_chars < consumed_chars_limit)                   \
1940           {                                                             \
1941             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1942               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1943             else                                                        \
1944               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1945             i++;                                                        \
1946           }                                                             \
1947         if (consumed_chars < consumed_chars_limit)                      \
1948           goto invalid_code;                                            \
1949         charbuf_base[0] -= i;                                           \
1950       }                                                                 \
1951   } while (0)
1952
1953
1954 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)            \
1955   do {                                                          \
1956     /* Emacs 20 style format for relative composition.  */      \
1957     /* Store multibyte form of characters to be composed.  */   \
1958     enum composition_method method = COMPOSITION_RELATIVE;      \
1959     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1960     int *buf = components;                                      \
1961     int i, j;                                                   \
1962     int from, to;                                               \
1963                                                                 \
1964     src = src_base;                                             \
1965     ONE_MORE_BYTE (c);          /* skip 0x80 */                 \
1966     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1967       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
1968     if (i < 2)                                                  \
1969       goto invalid_code;                                        \
1970     from = coding->produced_char + char_offset;                 \
1971     to = from + i;                                              \
1972     ADD_COMPOSITION_DATA (charbuf, from, to, method);           \
1973     for (j = 0; j < i; j++)                                     \
1974       *charbuf++ = components[j];                               \
1975   } while (0)
1976
1977
1978 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
1979   do {                                                          \
1980     /* Emacs 20 style format for rule-base composition.  */     \
1981     /* Store multibyte form of characters to be composed.  */   \
1982     enum composition_method method = COMPOSITION_WITH_RULE;     \
1983     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1984     int *buf = components;                                      \
1985     int i, j;                                                   \
1986     int from, to;                                               \
1987                                                                 \
1988     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
1989     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1990       {                                                         \
1991         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
1992         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
1993       }                                                         \
1994     if (i < 1 || (buf - components) % 2 == 0)                   \
1995       goto invalid_code;                                        \
1996     if (charbuf + i + (i / 2) + 1 < charbuf_end)                \
1997       goto no_more_source;                                      \
1998     from = coding->produced_char + char_offset;                 \
1999     to = from + i;                                              \
2000     ADD_COMPOSITION_DATA (buf, from, to, method);               \
2001     for (j = 0; j < i; j++)                                     \
2002       *charbuf++ = components[j];                               \
2003     for (j = 0; j < i; j += 2)                                  \
2004       *charbuf++ = components[j];                               \
2005   } while (0)
2006
2007
2008 static void
2009 decode_coding_emacs_mule (coding)
2010      struct coding_system *coding;
2011 {
2012   const unsigned char *src = coding->source + coding->consumed;
2013   const unsigned char *src_end = coding->source + coding->src_bytes;
2014   const unsigned char *src_base;
2015   int *charbuf = coding->charbuf;
2016   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2017   int consumed_chars = 0, consumed_chars_base;
2018   int multibytep = coding->src_multibyte;
2019   Lisp_Object attrs, charset_list;
2020   int char_offset = coding->produced_char;
2021   int last_offset = char_offset;
2022   int last_id = charset_ascii;
2023
2024   CODING_GET_INFO (coding, attrs, charset_list);
2025
2026   while (1)
2027     {
2028       int c;
2029
2030       src_base = src;
2031       consumed_chars_base = consumed_chars;
2032
2033       if (charbuf >= charbuf_end)
2034         break;
2035
2036       ONE_MORE_BYTE (c);
2037       if (c < 0)
2038         {
2039           *charbuf++ = -c;
2040           char_offset++;
2041         }
2042       else if (c < 0x80)
2043         {
2044           *charbuf++ = c;
2045           char_offset++;
2046         }
2047       else if (c == 0x80)
2048         {
2049           ONE_MORE_BYTE (c);
2050           if (c < 0)
2051             goto invalid_code;
2052           if (c - 0xF2 >= COMPOSITION_RELATIVE
2053               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2054             DECODE_EMACS_MULE_21_COMPOSITION (c);
2055           else if (c < 0xC0)
2056             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2057           else if (c == 0xFF)
2058             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2059           else
2060             goto invalid_code;
2061         }
2062       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2063         {
2064           int nbytes, nchars;
2065           int id;
2066
2067           src = src_base;
2068           consumed_chars = consumed_chars_base;
2069           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2070           if (c < 0)
2071             {
2072               if (c == -2)
2073                 break;
2074               goto invalid_code;
2075             }
2076           if (last_id != id)
2077             {
2078               if (last_id != charset_ascii)
2079                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2080               last_id = id;
2081               last_offset = char_offset;
2082             }
2083           *charbuf++ = c;
2084           src += nbytes;
2085           consumed_chars += nchars;
2086           char_offset++;
2087         }
2088       continue;
2089
2090     invalid_code:
2091       src = src_base;
2092       consumed_chars = consumed_chars_base;
2093       ONE_MORE_BYTE (c);
2094       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2095       char_offset++;
2096       coding->errors++;
2097     }
2098
2099  no_more_source:
2100   if (last_id != charset_ascii)
2101     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2102   coding->consumed_char += consumed_chars_base;
2103   coding->consumed = src_base - coding->source;
2104   coding->charbuf_used = charbuf - coding->charbuf;
2105 }
2106
2107
2108 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2109   do {                                          \
2110     if (id < 0xA0)                              \
2111       codes[0] = id, codes[1] = 0;              \
2112     else if (id < 0xE0)                         \
2113       codes[0] = 0x9A, codes[1] = id;           \
2114     else if (id < 0xF0)                         \
2115       codes[0] = 0x9B, codes[1] = id;           \
2116     else if (id < 0xF5)                         \
2117       codes[0] = 0x9C, codes[1] = id;           \
2118     else                                        \
2119       codes[0] = 0x9D, codes[1] = id;           \
2120   } while (0);
2121
2122
2123 static int
2124 encode_coding_emacs_mule (coding)
2125      struct coding_system *coding;
2126 {
2127   int multibytep = coding->dst_multibyte;
2128   int *charbuf = coding->charbuf;
2129   int *charbuf_end = charbuf + coding->charbuf_used;
2130   unsigned char *dst = coding->destination + coding->produced;
2131   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2132   int safe_room = 8;
2133   int produced_chars = 0;
2134   Lisp_Object attrs, charset_list;
2135   int c;
2136   int preferred_charset_id = -1;
2137
2138   CODING_GET_INFO (coding, attrs, charset_list);
2139   if (! EQ (charset_list, Vemacs_mule_charset_list))
2140     {
2141       CODING_ATTR_CHARSET_LIST (attrs)
2142         = charset_list = Vemacs_mule_charset_list;
2143     }
2144
2145   while (charbuf < charbuf_end)
2146     {
2147       ASSURE_DESTINATION (safe_room);
2148       c = *charbuf++;
2149
2150       if (c < 0)
2151         {
2152           /* Handle an annotation.  */
2153           switch (*charbuf)
2154             {
2155             case CODING_ANNOTATE_COMPOSITION_MASK:
2156               /* Not yet implemented.  */
2157               break;
2158             case CODING_ANNOTATE_CHARSET_MASK:
2159               preferred_charset_id = charbuf[3];
2160               if (preferred_charset_id >= 0
2161                   && NILP (Fmemq (make_number (preferred_charset_id),
2162                                   charset_list)))
2163                 preferred_charset_id = -1;
2164               break;
2165             default:
2166               abort ();
2167             }
2168           charbuf += -c - 1;
2169           continue;
2170         }
2171
2172       if (ASCII_CHAR_P (c))
2173         EMIT_ONE_ASCII_BYTE (c);
2174       else if (CHAR_BYTE8_P (c))
2175         {
2176           c = CHAR_TO_BYTE8 (c);
2177           EMIT_ONE_BYTE (c);
2178         }
2179       else
2180         {
2181           struct charset *charset;
2182           unsigned code;
2183           int dimension;
2184           int emacs_mule_id;
2185           unsigned char leading_codes[2];
2186
2187           if (preferred_charset_id >= 0)
2188             {
2189               charset = CHARSET_FROM_ID (preferred_charset_id);
2190               if (! CHAR_CHARSET_P (c, charset))
2191                 charset = char_charset (c, charset_list, NULL);
2192             }
2193           else
2194             charset = char_charset (c, charset_list, &code);
2195           if (! charset)
2196             {
2197               c = coding->default_char;
2198               if (ASCII_CHAR_P (c))
2199                 {
2200                   EMIT_ONE_ASCII_BYTE (c);
2201                   continue;
2202                 }
2203               charset = char_charset (c, charset_list, &code);
2204             }
2205           dimension = CHARSET_DIMENSION (charset);
2206           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2207           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2208           EMIT_ONE_BYTE (leading_codes[0]);
2209           if (leading_codes[1])
2210             EMIT_ONE_BYTE (leading_codes[1]);
2211           if (dimension == 1)
2212             EMIT_ONE_BYTE (code | 0x80);
2213           else
2214             {
2215               code |= 0x8080;
2216               EMIT_ONE_BYTE (code >> 8);
2217               EMIT_ONE_BYTE (code & 0xFF);
2218             }
2219         }
2220     }
2221   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2222   coding->produced_char += produced_chars;
2223   coding->produced = dst - coding->destination;
2224   return 0;
2225 }
2226
2227 \f
2228 /*** 7. ISO2022 handlers ***/
2229
2230 /* The following note describes the coding system ISO2022 briefly.
2231    Since the intention of this note is to help understand the
2232    functions in this file, some parts are NOT ACCURATE or are OVERLY
2233    SIMPLIFIED.  For thorough understanding, please refer to the
2234    original document of ISO2022.  This is equivalent to the standard
2235    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2236
2237    ISO2022 provides many mechanisms to encode several character sets
2238    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2239    is encoded using bytes less than 128.  This may make the encoded
2240    text a little bit longer, but the text passes more easily through
2241    several types of gateway, some of which strip off the MSB (Most
2242    Significant Bit).
2243
2244    There are two kinds of character sets: control character sets and
2245    graphic character sets.  The former contain control characters such
2246    as `newline' and `escape' to provide control functions (control
2247    functions are also provided by escape sequences).  The latter
2248    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2249    two control character sets and many graphic character sets.
2250
2251    Graphic character sets are classified into one of the following
2252    four classes, according to the number of bytes (DIMENSION) and
2253    number of characters in one dimension (CHARS) of the set:
2254    - DIMENSION1_CHARS94
2255    - DIMENSION1_CHARS96
2256    - DIMENSION2_CHARS94
2257    - DIMENSION2_CHARS96
2258
2259    In addition, each character set is assigned an identification tag,
2260    unique for each set, called the "final character" (denoted as <F>
2261    hereafter).  The <F> of each character set is decided by ECMA(*)
2262    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2263    (0x30..0x3F are for private use only).
2264
2265    Note (*): ECMA = European Computer Manufacturers Association
2266
2267    Here are examples of graphic character sets [NAME(<F>)]:
2268         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2269         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2270         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2271         o DIMENSION2_CHARS96 -- none for the moment
2272
2273    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2274         C0 [0x00..0x1F] -- control character plane 0
2275         GL [0x20..0x7F] -- graphic character plane 0
2276         C1 [0x80..0x9F] -- control character plane 1
2277         GR [0xA0..0xFF] -- graphic character plane 1
2278
2279    A control character set is directly designated and invoked to C0 or
2280    C1 by an escape sequence.  The most common case is that:
2281    - ISO646's  control character set is designated/invoked to C0, and
2282    - ISO6429's control character set is designated/invoked to C1,
2283    and usually these designations/invocations are omitted in encoded
2284    text.  In a 7-bit environment, only C0 can be used, and a control
2285    character for C1 is encoded by an appropriate escape sequence to
2286    fit into the environment.  All control characters for C1 are
2287    defined to have corresponding escape sequences.
2288
2289    A graphic character set is at first designated to one of four
2290    graphic registers (G0 through G3), then these graphic registers are
2291    invoked to GL or GR.  These designations and invocations can be
2292    done independently.  The most common case is that G0 is invoked to
2293    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2294    these invocations and designations are omitted in encoded text.
2295    In a 7-bit environment, only GL can be used.
2296
2297    When a graphic character set of CHARS94 is invoked to GL, codes
2298    0x20 and 0x7F of the GL area work as control characters SPACE and
2299    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2300    be used.
2301
2302    There are two ways of invocation: locking-shift and single-shift.
2303    With locking-shift, the invocation lasts until the next different
2304    invocation, whereas with single-shift, the invocation affects the
2305    following character only and doesn't affect the locking-shift
2306    state.  Invocations are done by the following control characters or
2307    escape sequences:
2308
2309    ----------------------------------------------------------------------
2310    abbrev  function                  cntrl escape seq   description
2311    ----------------------------------------------------------------------
2312    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2313    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2314    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2315    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2316    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2317    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2318    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2319    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2320    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2321    ----------------------------------------------------------------------
2322    (*) These are not used by any known coding system.
2323
2324    Control characters for these functions are defined by macros
2325    ISO_CODE_XXX in `coding.h'.
2326
2327    Designations are done by the following escape sequences:
2328    ----------------------------------------------------------------------
2329    escape sequence      description
2330    ----------------------------------------------------------------------
2331    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2332    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2333    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2334    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2335    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2336    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2337    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2338    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2339    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2340    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2341    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2342    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2343    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2344    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2345    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2346    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2347    ----------------------------------------------------------------------
2348
2349    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2350    of dimension 1, chars 94, and final character <F>, etc...
2351
2352    Note (*): Although these designations are not allowed in ISO2022,
2353    Emacs accepts them on decoding, and produces them on encoding
2354    CHARS96 character sets in a coding system which is characterized as
2355    7-bit environment, non-locking-shift, and non-single-shift.
2356
2357    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2358    '(' must be omitted.  We refer to this as "short-form" hereafter.
2359
2360    Now you may notice that there are a lot of ways of encoding the
2361    same multilingual text in ISO2022.  Actually, there exist many
2362    coding systems such as Compound Text (used in X11's inter client
2363    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2364    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2365    localized platforms), and all of these are variants of ISO2022.
2366
2367    In addition to the above, Emacs handles two more kinds of escape
2368    sequences: ISO6429's direction specification and Emacs' private
2369    sequence for specifying character composition.
2370
2371    ISO6429's direction specification takes the following form:
2372         o CSI ']'      -- end of the current direction
2373         o CSI '0' ']'  -- end of the current direction
2374         o CSI '1' ']'  -- start of left-to-right text
2375         o CSI '2' ']'  -- start of right-to-left text
2376    The control character CSI (0x9B: control sequence introducer) is
2377    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2378
2379    Character composition specification takes the following form:
2380         o ESC '0' -- start relative composition
2381         o ESC '1' -- end composition
2382         o ESC '2' -- start rule-base composition (*)
2383         o ESC '3' -- start relative composition with alternate chars  (**)
2384         o ESC '4' -- start rule-base composition with alternate chars  (**)
2385   Since these are not standard escape sequences of any ISO standard,
2386   the use of them with these meanings is restricted to Emacs only.
2387
2388   (*) This form is used only in Emacs 20.7 and older versions,
2389   but newer versions can safely decode it.
2390   (**) This form is used only in Emacs 21.1 and newer versions,
2391   and older versions can't decode it.
2392
2393   Here's a list of example usages of these composition escape
2394   sequences (categorized by `enum composition_method').
2395
2396   COMPOSITION_RELATIVE:
2397         ESC 0 CHAR [ CHAR ] ESC 1
2398   COMPOSITION_WITH_RULE:
2399         ESC 2 CHAR [ RULE CHAR ] ESC 1
2400   COMPOSITION_WITH_ALTCHARS:
2401         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2402   COMPOSITION_WITH_RULE_ALTCHARS:
2403         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2404
2405 enum iso_code_class_type iso_code_class[256];
2406
2407 #define SAFE_CHARSET_P(coding, id)      \
2408   ((id) <= (coding)->max_charset_id     \
2409    && (coding)->safe_charsets[id] >= 0)
2410
2411
2412 #define SHIFT_OUT_OK(category)  \
2413   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2414
2415 static void
2416 setup_iso_safe_charsets (attrs)
2417      Lisp_Object attrs;
2418 {
2419   Lisp_Object charset_list, safe_charsets;
2420   Lisp_Object request;
2421   Lisp_Object reg_usage;
2422   Lisp_Object tail;
2423   int reg94, reg96;
2424   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2425   int max_charset_id;
2426
2427   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2428   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2429       && ! EQ (charset_list, Viso_2022_charset_list))
2430     {
2431       CODING_ATTR_CHARSET_LIST (attrs)
2432         = charset_list = Viso_2022_charset_list;
2433       ASET (attrs, coding_attr_safe_charsets, Qnil);
2434     }
2435
2436   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2437     return;
2438
2439   max_charset_id = 0;
2440   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2441     {
2442       int id = XINT (XCAR (tail));
2443       if (max_charset_id < id)
2444         max_charset_id = id;
2445     }
2446
2447   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2448                                 make_number (255));
2449   request = AREF (attrs, coding_attr_iso_request);
2450   reg_usage = AREF (attrs, coding_attr_iso_usage);
2451   reg94 = XINT (XCAR (reg_usage));
2452   reg96 = XINT (XCDR (reg_usage));
2453
2454   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2455     {
2456       Lisp_Object id;
2457       Lisp_Object reg;
2458       struct charset *charset;
2459
2460       id = XCAR (tail);
2461       charset = CHARSET_FROM_ID (XINT (id));
2462       reg = Fcdr (Fassq (id, request));
2463       if (! NILP (reg))
2464         SSET (safe_charsets, XINT (id), XINT (reg));
2465       else if (charset->iso_chars_96)
2466         {
2467           if (reg96 < 4)
2468             SSET (safe_charsets, XINT (id), reg96);
2469         }
2470       else
2471         {
2472           if (reg94 < 4)
2473             SSET (safe_charsets, XINT (id), reg94);
2474         }
2475     }
2476   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2477 }
2478
2479
2480 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2481    Check if a text is encoded in one of ISO-2022 based codig systems.
2482    If it is, return 1, else return 0.  */
2483
2484 static int
2485 detect_coding_iso_2022 (coding, detect_info)
2486      struct coding_system *coding;
2487      struct coding_detection_info *detect_info;
2488 {
2489   const unsigned char *src = coding->source, *src_base = src;
2490   const unsigned char *src_end = coding->source + coding->src_bytes;
2491   int multibytep = coding->src_multibyte;
2492   int single_shifting = 0;
2493   int id;
2494   int c, c1;
2495   int consumed_chars = 0;
2496   int i;
2497   int rejected = 0;
2498   int found = 0;
2499
2500   detect_info->checked |= CATEGORY_MASK_ISO;
2501
2502   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2503     {
2504       struct coding_system *this = &(coding_categories[i]);
2505       Lisp_Object attrs, val;
2506
2507       attrs = CODING_ID_ATTRS (this->id);
2508       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2509           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2510         setup_iso_safe_charsets (attrs);
2511       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2512       this->max_charset_id = SCHARS (val) - 1;
2513       this->safe_charsets = (char *) SDATA (val);
2514     }
2515
2516   /* A coding system of this category is always ASCII compatible.  */
2517   src += coding->head_ascii;
2518
2519   while (rejected != CATEGORY_MASK_ISO)
2520     {
2521       src_base = src;
2522       ONE_MORE_BYTE (c);
2523       switch (c)
2524         {
2525         case ISO_CODE_ESC:
2526           if (inhibit_iso_escape_detection)
2527             break;
2528           single_shifting = 0;
2529           ONE_MORE_BYTE (c);
2530           if (c >= '(' && c <= '/')
2531             {
2532               /* Designation sequence for a charset of dimension 1.  */
2533               ONE_MORE_BYTE (c1);
2534               if (c1 < ' ' || c1 >= 0x80
2535                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2536                 /* Invalid designation sequence.  Just ignore.  */
2537                 break;
2538             }
2539           else if (c == '$')
2540             {
2541               /* Designation sequence for a charset of dimension 2.  */
2542               ONE_MORE_BYTE (c);
2543               if (c >= '@' && c <= 'B')
2544                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2545                 id = iso_charset_table[1][0][c];
2546               else if (c >= '(' && c <= '/')
2547                 {
2548                   ONE_MORE_BYTE (c1);
2549                   if (c1 < ' ' || c1 >= 0x80
2550                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2551                     /* Invalid designation sequence.  Just ignore.  */
2552                     break;
2553                 }
2554               else
2555                 /* Invalid designation sequence.  Just ignore it.  */
2556                 break;
2557             }
2558           else if (c == 'N' || c == 'O')
2559             {
2560               /* ESC <Fe> for SS2 or SS3.  */
2561               single_shifting = 1;
2562               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2563               break;
2564             }
2565           else if (c >= '0' && c <= '4')
2566             {
2567               /* ESC <Fp> for start/end composition.  */
2568               found |= CATEGORY_MASK_ISO;
2569               break;
2570             }
2571           else
2572             {
2573               /* Invalid escape sequence.  Just ignore it.  */
2574               break;
2575             }
2576
2577           /* We found a valid designation sequence for CHARSET.  */
2578           rejected |= CATEGORY_MASK_ISO_8BIT;
2579           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2580                               id))
2581             found |= CATEGORY_MASK_ISO_7;
2582           else
2583             rejected |= CATEGORY_MASK_ISO_7;
2584           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2585                               id))
2586             found |= CATEGORY_MASK_ISO_7_TIGHT;
2587           else
2588             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2589           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2590                               id))
2591             found |= CATEGORY_MASK_ISO_7_ELSE;
2592           else
2593             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2594           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2595                               id))
2596             found |= CATEGORY_MASK_ISO_8_ELSE;
2597           else
2598             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2599           break;
2600
2601         case ISO_CODE_SO:
2602         case ISO_CODE_SI:
2603           /* Locking shift out/in.  */
2604           if (inhibit_iso_escape_detection)
2605             break;
2606           single_shifting = 0;
2607           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2608           found |= CATEGORY_MASK_ISO_ELSE;
2609           break;
2610
2611         case ISO_CODE_CSI:
2612           /* Control sequence introducer.  */
2613           single_shifting = 0;
2614           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2615           found |= CATEGORY_MASK_ISO_8_ELSE;
2616           goto check_extra_latin;
2617
2618
2619         case ISO_CODE_SS2:
2620         case ISO_CODE_SS3:
2621           /* Single shift.   */
2622           if (inhibit_iso_escape_detection)
2623             break;
2624           single_shifting = 1;
2625           rejected |= CATEGORY_MASK_ISO_7BIT;
2626           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2627               & CODING_ISO_FLAG_SINGLE_SHIFT)
2628             found |= CATEGORY_MASK_ISO_8_1;
2629           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2630               & CODING_ISO_FLAG_SINGLE_SHIFT)
2631             found |= CATEGORY_MASK_ISO_8_2;
2632           goto check_extra_latin;
2633
2634         default:
2635           if (c < 0)
2636             continue;
2637           if (c < 0x80)
2638             {
2639               single_shifting = 0;
2640               break;
2641             }
2642           if (c >= 0xA0)
2643             {
2644               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2645               found |= CATEGORY_MASK_ISO_8_1;
2646               /* Check the length of succeeding codes of the range
2647                  0xA0..0FF.  If the byte length is even, we include
2648                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2649                  only when we are not single shifting.  */
2650               if (! single_shifting
2651                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2652                 {
2653                   int i = 1;
2654                   while (src < src_end)
2655                     {
2656                       ONE_MORE_BYTE (c);
2657                       if (c < 0xA0)
2658                         break;
2659                       i++;
2660                     }
2661
2662                   if (i & 1 && src < src_end)
2663                     rejected |= CATEGORY_MASK_ISO_8_2;
2664                   else
2665                     found |= CATEGORY_MASK_ISO_8_2;
2666                 }
2667               break;
2668             }
2669         check_extra_latin:
2670           single_shifting = 0;
2671           if (! VECTORP (Vlatin_extra_code_table)
2672               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2673             {
2674               rejected = CATEGORY_MASK_ISO;
2675               break;
2676             }
2677           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2678               & CODING_ISO_FLAG_LATIN_EXTRA)
2679             found |= CATEGORY_MASK_ISO_8_1;
2680           else
2681             rejected |= CATEGORY_MASK_ISO_8_1;
2682           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2683               & CODING_ISO_FLAG_LATIN_EXTRA)
2684             found |= CATEGORY_MASK_ISO_8_2;
2685           else
2686             rejected |= CATEGORY_MASK_ISO_8_2;
2687         }
2688     }
2689   detect_info->rejected |= CATEGORY_MASK_ISO;
2690   return 0;
2691
2692  no_more_source:
2693   detect_info->rejected |= rejected;
2694   detect_info->found |= (found & ~rejected);
2695   return 1;
2696 }
2697
2698
2699 /* Set designation state into CODING.  */
2700 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2701   do {                                                                  \
2702     int id, prev;                                                       \
2703                                                                         \
2704     if (final < '0' || final >= 128                                     \
2705         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2706         || !SAFE_CHARSET_P (coding, id))                                \
2707       {                                                                 \
2708         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2709         goto invalid_code;                                              \
2710       }                                                                 \
2711     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2712     if (id == charset_jisx0201_roman)                                   \
2713       {                                                                 \
2714         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2715           id = charset_ascii;                                           \
2716       }                                                                 \
2717     else if (id == charset_jisx0208_1978)                               \
2718       {                                                                 \
2719         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2720           id = charset_jisx0208;                                        \
2721       }                                                                 \
2722     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2723     /* If there was an invalid designation to REG previously, and this  \
2724        designation is ASCII to REG, we should keep this designation     \
2725        sequence.  */                                                    \
2726     if (prev == -2 && id == charset_ascii)                              \
2727       goto invalid_code;                                                \
2728   } while (0)
2729
2730
2731 #define MAYBE_FINISH_COMPOSITION()                              \
2732   do {                                                          \
2733     int i;                                                      \
2734     if (composition_state == COMPOSING_NO)                      \
2735       break;                                                    \
2736     /* It is assured that we have enough room for producing     \
2737        characters stored in the table `components'.  */         \
2738     if (charbuf + component_idx > charbuf_end)                  \
2739       goto no_more_source;                                      \
2740     composition_state = COMPOSING_NO;                           \
2741     if (method == COMPOSITION_RELATIVE                          \
2742         || method == COMPOSITION_WITH_ALTCHARS)                 \
2743       {                                                         \
2744         for (i = 0; i < component_idx; i++)                     \
2745           *charbuf++ = components[i];                           \
2746         char_offset += component_idx;                           \
2747       }                                                         \
2748     else                                                        \
2749       {                                                         \
2750         for (i = 0; i < component_idx; i += 2)                  \
2751           *charbuf++ = components[i];                           \
2752         char_offset += (component_idx / 2) + 1;                 \
2753       }                                                         \
2754   } while (0)
2755
2756
2757 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2758    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2759    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2760    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2761    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2762   */
2763
2764 #define DECODE_COMPOSITION_START(c1)                                    \
2765   do {                                                                  \
2766     if (c1 == '0'                                                       \
2767         && composition_state == COMPOSING_COMPONENT_RULE)               \
2768       {                                                                 \
2769         component_len = component_idx;                                  \
2770         composition_state = COMPOSING_CHAR;                             \
2771       }                                                                 \
2772     else                                                                \
2773       {                                                                 \
2774         const unsigned char *p;                                         \
2775                                                                         \
2776         MAYBE_FINISH_COMPOSITION ();                                    \
2777         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2778           goto no_more_source;                                          \
2779         for (p = src; p < src_end - 1; p++)                             \
2780           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2781             break;                                                      \
2782         if (p == src_end - 1)                                           \
2783           {                                                             \
2784             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
2785               goto invalid_code;                                        \
2786             goto no_more_source;                                        \
2787           }                                                             \
2788                                                                         \
2789         /* This is surely the start of a composition.  */               \
2790         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2791                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2792                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2793                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2794         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2795                              : COMPOSING_COMPONENT_CHAR);               \
2796         component_idx = component_len = 0;                              \
2797       }                                                                 \
2798   } while (0)
2799
2800
2801 /* Handle compositoin end sequence ESC 1.  */
2802
2803 #define DECODE_COMPOSITION_END()                                        \
2804   do {                                                                  \
2805     int nchars = (component_len > 0 ? component_idx - component_len     \
2806                   : method == COMPOSITION_RELATIVE ? component_idx      \
2807                   : (component_idx + 1) / 2);                           \
2808     int i;                                                              \
2809     int *saved_charbuf = charbuf;                                       \
2810     int from = char_offset;                                             \
2811     int to = from + nchars;                                             \
2812                                                                         \
2813     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
2814     if (method != COMPOSITION_RELATIVE)                                 \
2815       {                                                                 \
2816         if (component_len == 0)                                         \
2817           for (i = 0; i < component_idx; i++)                           \
2818             *charbuf++ = components[i];                                 \
2819         else                                                            \
2820           for (i = 0; i < component_len; i++)                           \
2821             *charbuf++ = components[i];                                 \
2822         *saved_charbuf = saved_charbuf - charbuf;                       \
2823       }                                                                 \
2824     if (method == COMPOSITION_WITH_RULE)                                \
2825       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2826         *charbuf++ = components[i];                                     \
2827     else                                                                \
2828       for (i = component_len; i < component_idx; i++, char_offset++)    \
2829         *charbuf++ = components[i];                                     \
2830     coding->annotated = 1;                                              \
2831     composition_state = COMPOSING_NO;                                   \
2832   } while (0)
2833
2834
2835 /* Decode a composition rule from the byte C1 (and maybe one more byte
2836    from SRC) and store one encoded composition rule in
2837    coding->cmp_data.  */
2838
2839 #define DECODE_COMPOSITION_RULE(c1)                                     \
2840   do {                                                                  \
2841     (c1) -= 32;                                                         \
2842     if (c1 < 81)                /* old format (before ver.21) */        \
2843       {                                                                 \
2844         int gref = (c1) / 9;                                            \
2845         int nref = (c1) % 9;                                            \
2846         if (gref == 4) gref = 10;                                       \
2847         if (nref == 4) nref = 10;                                       \
2848         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2849       }                                                                 \
2850     else if (c1 < 93)           /* new format (after ver.21) */         \
2851       {                                                                 \
2852         ONE_MORE_BYTE (c2);                                             \
2853         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2854       }                                                                 \
2855     else                                                                \
2856       c1 = 0;                                                           \
2857   } while (0)
2858
2859
2860 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2861
2862 static void
2863 decode_coding_iso_2022 (coding)
2864      struct coding_system *coding;
2865 {
2866   const unsigned char *src = coding->source + coding->consumed;
2867   const unsigned char *src_end = coding->source + coding->src_bytes;
2868   const unsigned char *src_base;
2869   int *charbuf = coding->charbuf;
2870   int *charbuf_end
2871     = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2872   int consumed_chars = 0, consumed_chars_base;
2873   int multibytep = coding->src_multibyte;
2874   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2875   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2876   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2877   struct charset *charset;
2878   int c;
2879   /* For handling composition sequence.  */
2880 #define COMPOSING_NO                    0
2881 #define COMPOSING_CHAR                  1
2882 #define COMPOSING_RULE                  2
2883 #define COMPOSING_COMPONENT_CHAR        3
2884 #define COMPOSING_COMPONENT_RULE        4
2885
2886   int composition_state = COMPOSING_NO;
2887   enum composition_method method;
2888   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2889   int component_idx;
2890   int component_len;
2891   Lisp_Object attrs, charset_list;
2892   int char_offset = coding->produced_char;
2893   int last_offset = char_offset;
2894   int last_id = charset_ascii;
2895
2896   CODING_GET_INFO (coding, attrs, charset_list);
2897   setup_iso_safe_charsets (attrs);
2898
2899   while (1)
2900     {
2901       int c1, c2;
2902
2903       src_base = src;
2904       consumed_chars_base = consumed_chars;
2905
2906       if (charbuf >= charbuf_end)
2907         break;
2908
2909       ONE_MORE_BYTE (c1);
2910       if (c1 < 0)
2911         goto invalid_code;
2912
2913       /* We produce at most one character.  */
2914       switch (iso_code_class [c1])
2915         {
2916         case ISO_0x20_or_0x7F:
2917           if (composition_state != COMPOSING_NO)
2918             {
2919               if (composition_state == COMPOSING_RULE
2920                   || composition_state == COMPOSING_COMPONENT_RULE)
2921                 {
2922                   DECODE_COMPOSITION_RULE (c1);
2923                   components[component_idx++] = c1;
2924                   composition_state--;
2925                   continue;
2926                 }
2927             }
2928           if (charset_id_0 < 0
2929               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2930             /* This is SPACE or DEL.  */
2931             charset = CHARSET_FROM_ID (charset_ascii);
2932           else
2933             charset = CHARSET_FROM_ID (charset_id_0);
2934           break;
2935
2936         case ISO_graphic_plane_0:
2937           if (composition_state != COMPOSING_NO)
2938             {
2939               if (composition_state == COMPOSING_RULE
2940                   || composition_state == COMPOSING_COMPONENT_RULE)
2941                 {
2942                   DECODE_COMPOSITION_RULE (c1);
2943                   components[component_idx++] = c1;
2944                   composition_state--;
2945                   continue;
2946                 }
2947             }
2948           charset = CHARSET_FROM_ID (charset_id_0);
2949           break;
2950
2951         case ISO_0xA0_or_0xFF:
2952           if (charset_id_1 < 0
2953               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2954               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2955             goto invalid_code;
2956           /* This is a graphic character, we fall down ... */
2957
2958         case ISO_graphic_plane_1:
2959           if (charset_id_1 < 0)
2960             goto invalid_code;
2961           charset = CHARSET_FROM_ID (charset_id_1);
2962           break;
2963
2964         case ISO_control_0:
2965           MAYBE_FINISH_COMPOSITION ();
2966           charset = CHARSET_FROM_ID (charset_ascii);
2967           break;
2968
2969         case ISO_control_1:
2970           MAYBE_FINISH_COMPOSITION ();
2971           goto invalid_code;
2972
2973         case ISO_shift_out:
2974           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2975               || CODING_ISO_DESIGNATION (coding, 1) < 0)
2976             goto invalid_code;
2977           CODING_ISO_INVOCATION (coding, 0) = 1;
2978           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2979           continue;
2980
2981         case ISO_shift_in:
2982           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2983             goto invalid_code;
2984           CODING_ISO_INVOCATION (coding, 0) = 0;
2985           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2986           continue;
2987
2988         case ISO_single_shift_2_7:
2989         case ISO_single_shift_2:
2990           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2991             goto invalid_code;
2992           /* SS2 is handled as an escape sequence of ESC 'N' */
2993           c1 = 'N';
2994           goto label_escape_sequence;
2995
2996         case ISO_single_shift_3:
2997           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2998             goto invalid_code;
2999           /* SS2 is handled as an escape sequence of ESC 'O' */
3000           c1 = 'O';
3001           goto label_escape_sequence;
3002
3003         case ISO_control_sequence_introducer:
3004           /* CSI is handled as an escape sequence of ESC '[' ...  */
3005           c1 = '[';
3006           goto label_escape_sequence;
3007
3008         case ISO_escape:
3009           ONE_MORE_BYTE (c1);
3010         label_escape_sequence:
3011           /* Escape sequences handled here are invocation,
3012              designation, direction specification, and character
3013              composition specification.  */
3014           switch (c1)
3015             {
3016             case '&':           /* revision of following character set */
3017               ONE_MORE_BYTE (c1);
3018               if (!(c1 >= '@' && c1 <= '~'))
3019                 goto invalid_code;
3020               ONE_MORE_BYTE (c1);
3021               if (c1 != ISO_CODE_ESC)
3022                 goto invalid_code;
3023               ONE_MORE_BYTE (c1);
3024               goto label_escape_sequence;
3025
3026             case '$':           /* designation of 2-byte character set */
3027               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3028                 goto invalid_code;
3029               ONE_MORE_BYTE (c1);
3030               if (c1 >= '@' && c1 <= 'B')
3031                 {       /* designation of JISX0208.1978, GB2312.1980,
3032                            or JISX0208.1980 */
3033                   DECODE_DESIGNATION (0, 2, 0, c1);
3034                 }
3035               else if (c1 >= 0x28 && c1 <= 0x2B)
3036                 {       /* designation of DIMENSION2_CHARS94 character set */
3037                   ONE_MORE_BYTE (c2);
3038                   DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
3039                 }
3040               else if (c1 >= 0x2C && c1 <= 0x2F)
3041                 {       /* designation of DIMENSION2_CHARS96 character set */
3042                   ONE_MORE_BYTE (c2);
3043                   DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
3044                 }
3045               else
3046                 goto invalid_code;
3047               /* We must update these variables now.  */
3048               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3049               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3050               continue;
3051
3052             case 'n':           /* invocation of locking-shift-2 */
3053               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3054                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3055                 goto invalid_code;
3056               CODING_ISO_INVOCATION (coding, 0) = 2;
3057               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3058               continue;
3059
3060             case 'o':           /* invocation of locking-shift-3 */
3061               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3062                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3063                 goto invalid_code;
3064               CODING_ISO_INVOCATION (coding, 0) = 3;
3065               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3066               continue;
3067
3068             case 'N':           /* invocation of single-shift-2 */
3069               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3070                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3071                 goto invalid_code;
3072               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
3073               ONE_MORE_BYTE (c1);
3074               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3075                 goto invalid_code;
3076               break;
3077
3078             case 'O':           /* invocation of single-shift-3 */
3079               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3080                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3081                 goto invalid_code;
3082               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
3083               ONE_MORE_BYTE (c1);
3084               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3085                 goto invalid_code;
3086               break;
3087
3088             case '0': case '2': case '3': case '4': /* start composition */
3089               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3090                 goto invalid_code;
3091               DECODE_COMPOSITION_START (c1);
3092               continue;
3093
3094             case '1':           /* end composition */
3095               if (composition_state == COMPOSING_NO)
3096                 goto invalid_code;
3097               DECODE_COMPOSITION_END ();
3098               continue;
3099
3100             case '[':           /* specification of direction */
3101               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3102                 goto invalid_code;
3103               /* For the moment, nested direction is not supported.
3104                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3105                  left-to-right, and nozero means right-to-left.  */
3106               ONE_MORE_BYTE (c1);
3107               switch (c1)
3108                 {
3109                 case ']':       /* end of the current direction */
3110                   coding->mode &= ~CODING_MODE_DIRECTION;
3111
3112                 case '0':       /* end of the current direction */
3113                 case '1':       /* start of left-to-right direction */
3114                   ONE_MORE_BYTE (c1);
3115                   if (c1 == ']')
3116                     coding->mode &= ~CODING_MODE_DIRECTION;
3117                   else
3118                     goto invalid_code;
3119                   break;
3120
3121                 case '2':       /* start of right-to-left direction */
3122                   ONE_MORE_BYTE (c1);
3123                   if (c1 == ']')
3124                     coding->mode |= CODING_MODE_DIRECTION;
3125                   else
3126                     goto invalid_code;
3127                   break;
3128
3129                 default:
3130                   goto invalid_code;
3131                 }
3132               continue;
3133
3134             case '%':
3135               ONE_MORE_BYTE (c1);
3136               if (c1 == '/')
3137                 {
3138                   /* CTEXT extended segment:
3139                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3140                      We keep these bytes as is for the moment.
3141                      They may be decoded by post-read-conversion.  */
3142                   int dim, M, L;
3143                   int size;
3144
3145                   ONE_MORE_BYTE (dim);
3146                   ONE_MORE_BYTE (M);
3147                   ONE_MORE_BYTE (L);
3148                   size = ((M - 128) * 128) + (L - 128);
3149                   if (charbuf + 8 + size > charbuf_end)
3150                     goto break_loop;
3151                   *charbuf++ = ISO_CODE_ESC;
3152                   *charbuf++ = '%';
3153                   *charbuf++ = '/';
3154                   *charbuf++ = dim;
3155                   *charbuf++ = BYTE8_TO_CHAR (M);
3156                   *charbuf++ = BYTE8_TO_CHAR (L);
3157                   while (size-- > 0)
3158                     {
3159                       ONE_MORE_BYTE (c1);
3160                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3161                     }
3162                 }
3163               else if (c1 == 'G')
3164                 {
3165                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3166                      ESC % G --UTF-8-BYTES-- ESC % @
3167                      We keep these bytes as is for the moment.
3168                      They may be decoded by post-read-conversion.  */
3169                   int *p = charbuf;
3170
3171                   if (p + 6 > charbuf_end)
3172                     goto break_loop;
3173                   *p++ = ISO_CODE_ESC;
3174                   *p++ = '%';
3175                   *p++ = 'G';
3176                   while (p < charbuf_end)
3177                     {
3178                       ONE_MORE_BYTE (c1);
3179                       if (c1 == ISO_CODE_ESC
3180                           && src + 1 < src_end
3181                           && src[0] == '%'
3182                           && src[1] == '@')
3183                         break;
3184                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3185                     }
3186                   if (p + 3 > charbuf_end)
3187                     goto break_loop;
3188                   *p++ = ISO_CODE_ESC;
3189                   *p++ = '%';
3190                   *p++ = '@';
3191                   charbuf = p;
3192                 }
3193               else
3194                 goto invalid_code;
3195               continue;
3196               break;
3197
3198             default:
3199               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3200                 goto invalid_code;
3201               if (c1 >= 0x28 && c1 <= 0x2B)
3202                 {       /* designation of DIMENSION1_CHARS94 character set */
3203                   ONE_MORE_BYTE (c2);
3204                   DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
3205                 }
3206               else if (c1 >= 0x2C && c1 <= 0x2F)
3207                 {       /* designation of DIMENSION1_CHARS96 character set */
3208                   ONE_MORE_BYTE (c2);
3209                   DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
3210                 }
3211               else
3212                 goto invalid_code;
3213               /* We must update these variables now.  */
3214               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3215               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3216               continue;
3217             }
3218         }
3219
3220       if (charset->id != charset_ascii
3221           && last_id != charset->id)
3222         {
3223           if (last_id != charset_ascii)
3224             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3225           last_id = charset->id;
3226           last_offset = char_offset;
3227         }
3228
3229       /* Now we know CHARSET and 1st position code C1 of a character.
3230          Produce a decoded character while getting 2nd position code
3231          C2 if necessary.  */
3232       c1 &= 0x7F;
3233       if (CHARSET_DIMENSION (charset) > 1)
3234         {
3235           ONE_MORE_BYTE (c2);
3236           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3237             /* C2 is not in a valid range.  */
3238             goto invalid_code;
3239           c1 = (c1 << 8) | (c2 & 0x7F);
3240           if (CHARSET_DIMENSION (charset) > 2)
3241             {
3242               ONE_MORE_BYTE (c2);
3243               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3244                 /* C2 is not in a valid range.  */
3245                 goto invalid_code;
3246               c1 = (c1 << 8) | (c2 & 0x7F);
3247             }
3248         }
3249
3250       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3251       if (c < 0)
3252         {
3253           MAYBE_FINISH_COMPOSITION ();
3254           for (; src_base < src; src_base++, char_offset++)
3255             {
3256               if (ASCII_BYTE_P (*src_base))
3257                 *charbuf++ = *src_base;
3258               else
3259                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3260             }
3261         }
3262       else if (composition_state == COMPOSING_NO)
3263         {
3264           *charbuf++ = c;
3265           char_offset++;
3266         }
3267       else
3268         {
3269           components[component_idx++] = c;
3270           if (method == COMPOSITION_WITH_RULE
3271               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3272                   && composition_state == COMPOSING_COMPONENT_CHAR))
3273             composition_state++;
3274         }
3275       continue;
3276
3277     invalid_code:
3278       MAYBE_FINISH_COMPOSITION ();
3279       src = src_base;
3280       consumed_chars = consumed_chars_base;
3281       ONE_MORE_BYTE (c);
3282       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3283       char_offset++;
3284       coding->errors++;
3285       continue;
3286
3287     break_loop:
3288       break;
3289     }
3290
3291  no_more_source:
3292   if (last_id != charset_ascii)
3293     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3294   coding->consumed_char += consumed_chars_base;
3295   coding->consumed = src_base - coding->source;
3296   coding->charbuf_used = charbuf - coding->charbuf;
3297 }
3298
3299
3300 /* ISO2022 encoding stuff.  */
3301
3302 /*
3303    It is not enough to say just "ISO2022" on encoding, we have to
3304    specify more details.  In Emacs, each coding system of ISO2022
3305    variant has the following specifications:
3306         1. Initial designation to G0 thru G3.
3307         2. Allows short-form designation?
3308         3. ASCII should be designated to G0 before control characters?
3309         4. ASCII should be designated to G0 at end of line?
3310         5. 7-bit environment or 8-bit environment?
3311         6. Use locking-shift?
3312         7. Use Single-shift?
3313    And the following two are only for Japanese:
3314         8. Use ASCII in place of JIS0201-1976-Roman?
3315         9. Use JISX0208-1983 in place of JISX0208-1978?
3316    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3317    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3318    details.
3319 */
3320
3321 /* Produce codes (escape sequence) for designating CHARSET to graphic
3322    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3323    '@', 'A', or 'B' and the coding system CODING allows, produce
3324    designation sequence of short-form.  */
3325
3326 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3327   do {                                                                  \
3328     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3329     char *intermediate_char_94 = "()*+";                                \
3330     char *intermediate_char_96 = ",-./";                                \
3331     int revision = -1;                                                  \
3332     int c;                                                              \
3333                                                                         \
3334     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3335       revision = CHARSET_ISO_REVISION (charset);                        \
3336                                                                         \
3337     if (revision >= 0)                                                  \
3338       {                                                                 \
3339         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3340         EMIT_ONE_BYTE ('@' + revision);                                 \
3341       }                                                                 \
3342     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3343     if (CHARSET_DIMENSION (charset) == 1)                               \
3344       {                                                                 \
3345         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3346           c = intermediate_char_94[reg];                                \
3347         else                                                            \
3348           c = intermediate_char_96[reg];                                \
3349         EMIT_ONE_ASCII_BYTE (c);                                        \
3350       }                                                                 \
3351     else                                                                \
3352       {                                                                 \
3353         EMIT_ONE_ASCII_BYTE ('$');                                      \
3354         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3355           {                                                             \
3356             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3357                 || reg != 0                                             \
3358                 || final_char < '@' || final_char > 'B')                \
3359               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3360           }                                                             \
3361         else                                                            \
3362           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3363       }                                                                 \
3364     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3365                                                                         \
3366     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3367   } while (0)
3368
3369
3370 /* The following two macros produce codes (control character or escape
3371    sequence) for ISO2022 single-shift functions (single-shift-2 and
3372    single-shift-3).  */
3373
3374 #define ENCODE_SINGLE_SHIFT_2                                           \
3375   do {                                                                  \
3376     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3377       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3378     else                                                                \
3379       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3380     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3381   } while (0)
3382
3383
3384 #define ENCODE_SINGLE_SHIFT_3                                           \
3385   do {                                                                  \
3386     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3387       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3388     else                                                                \
3389       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3390     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3391   } while (0)
3392
3393
3394 /* The following four macros produce codes (control character or
3395    escape sequence) for ISO2022 locking-shift functions (shift-in,
3396    shift-out, locking-shift-2, and locking-shift-3).  */
3397
3398 #define ENCODE_SHIFT_IN                                 \
3399   do {                                                  \
3400     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3401     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3402   } while (0)
3403
3404
3405 #define ENCODE_SHIFT_OUT                                \
3406   do {                                                  \
3407     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3408     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3409   } while (0)
3410
3411
3412 #define ENCODE_LOCKING_SHIFT_2                          \
3413   do {                                                  \
3414     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3415     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3416   } while (0)
3417
3418
3419 #define ENCODE_LOCKING_SHIFT_3                          \
3420   do {                                                  \
3421     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3422     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3423   } while (0)
3424
3425
3426 /* Produce codes for a DIMENSION1 character whose character set is
3427    CHARSET and whose position-code is C1.  Designation and invocation
3428    sequences are also produced in advance if necessary.  */
3429
3430 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3431   do {                                                                  \
3432     int id = CHARSET_ID (charset);                                      \
3433                                                                         \
3434     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3435         && id == charset_ascii)                                         \
3436       {                                                                 \
3437         id = charset_jisx0201_roman;                                    \
3438         charset = CHARSET_FROM_ID (id);                                 \
3439       }                                                                 \
3440                                                                         \
3441     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3442       {                                                                 \
3443         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3444           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3445         else                                                            \
3446           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3447         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3448         break;                                                          \
3449       }                                                                 \
3450     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3451       {                                                                 \
3452         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3453         break;                                                          \
3454       }                                                                 \
3455     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3456       {                                                                 \
3457         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3458         break;                                                          \
3459       }                                                                 \
3460     else                                                                \
3461       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3462          must invoke it, or, at first, designate it to some graphic     \
3463          register.  Then repeat the loop to actually produce the        \
3464          character.  */                                                 \
3465       dst = encode_invocation_designation (charset, coding, dst,        \
3466                                            &produced_chars);            \
3467   } while (1)
3468
3469
3470 /* Produce codes for a DIMENSION2 character whose character set is
3471    CHARSET and whose position-codes are C1 and C2.  Designation and
3472    invocation codes are also produced in advance if necessary.  */
3473
3474 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3475   do {                                                                  \
3476     int id = CHARSET_ID (charset);                                      \
3477                                                                         \
3478     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3479         && id == charset_jisx0208)                                      \
3480       {                                                                 \
3481         id = charset_jisx0208_1978;                                     \
3482         charset = CHARSET_FROM_ID (id);                                 \
3483       }                                                                 \
3484                                                                         \
3485     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3486       {                                                                 \
3487         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3488           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3489         else                                                            \
3490           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3491         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3492         break;                                                          \
3493       }                                                                 \
3494     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3495       {                                                                 \
3496         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3497         break;                                                          \
3498       }                                                                 \
3499     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3500       {                                                                 \
3501         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3502         break;                                                          \
3503       }                                                                 \
3504     else                                                                \
3505       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3506          must invoke it, or, at first, designate it to some graphic     \
3507          register.  Then repeat the loop to actually produce the        \
3508          character.  */                                                 \
3509       dst = encode_invocation_designation (charset, coding, dst,        \
3510                                            &produced_chars);            \
3511   } while (1)
3512
3513
3514 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3515   do {                                                                     \
3516     int code = ENCODE_CHAR ((charset),(c));                                \
3517                                                                            \
3518     if (CHARSET_DIMENSION (charset) == 1)                                  \
3519       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3520     else                                                                   \
3521       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3522   } while (0)
3523
3524
3525 /* Produce designation and invocation codes at a place pointed by DST
3526    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3527    Return new DST.  */
3528
3529 unsigned char *
3530 encode_invocation_designation (charset, coding, dst, p_nchars)
3531      struct charset *charset;
3532      struct coding_system *coding;
3533      unsigned char *dst;
3534      int *p_nchars;
3535 {
3536   int multibytep = coding->dst_multibyte;
3537   int produced_chars = *p_nchars;
3538   int reg;                      /* graphic register number */
3539   int id = CHARSET_ID (charset);
3540
3541   /* At first, check designations.  */
3542   for (reg = 0; reg < 4; reg++)
3543     if (id == CODING_ISO_DESIGNATION (coding, reg))
3544       break;
3545
3546   if (reg >= 4)
3547     {
3548       /* CHARSET is not yet designated to any graphic registers.  */
3549       /* At first check the requested designation.  */
3550       reg = CODING_ISO_REQUEST (coding, id);
3551       if (reg < 0)
3552         /* Since CHARSET requests no special designation, designate it
3553            to graphic register 0.  */
3554         reg = 0;
3555
3556       ENCODE_DESIGNATION (charset, reg, coding);
3557     }
3558
3559   if (CODING_ISO_INVOCATION (coding, 0) != reg
3560       && CODING_ISO_INVOCATION (coding, 1) != reg)
3561     {
3562       /* Since the graphic register REG is not invoked to any graphic
3563          planes, invoke it to graphic plane 0.  */
3564       switch (reg)
3565         {
3566         case 0:                 /* graphic register 0 */
3567           ENCODE_SHIFT_IN;
3568           break;
3569
3570         case 1:                 /* graphic register 1 */
3571           ENCODE_SHIFT_OUT;
3572           break;
3573
3574         case 2:                 /* graphic register 2 */
3575           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3576             ENCODE_SINGLE_SHIFT_2;
3577           else
3578             ENCODE_LOCKING_SHIFT_2;
3579           break;
3580
3581         case 3:                 /* graphic register 3 */
3582           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3583             ENCODE_SINGLE_SHIFT_3;
3584           else
3585             ENCODE_LOCKING_SHIFT_3;
3586           break;
3587         }
3588     }
3589
3590   *p_nchars = produced_chars;
3591   return dst;
3592 }
3593
3594 /* The following three macros produce codes for indicating direction
3595    of text.  */
3596 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3597   do {                                                                  \
3598     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3599       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3600     else                                                                \
3601       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3602   } while (0)
3603
3604
3605 #define ENCODE_DIRECTION_R2L()                  \
3606   do {                                          \
3607     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3608     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3609   } while (0)
3610
3611
3612 #define ENCODE_DIRECTION_L2R()                  \
3613   do {                                          \
3614     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3615     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3616   } while (0)
3617
3618
3619 /* Produce codes for designation and invocation to reset the graphic
3620    planes and registers to initial state.  */
3621 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3622   do {                                                                  \
3623     int reg;                                                            \
3624     struct charset *charset;                                            \
3625                                                                         \
3626     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3627       ENCODE_SHIFT_IN;                                                  \
3628     for (reg = 0; reg < 4; reg++)                                       \
3629       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3630           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3631               != CODING_ISO_INITIAL (coding, reg)))                     \
3632         {                                                               \
3633           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3634           ENCODE_DESIGNATION (charset, reg, coding);                    \
3635         }                                                               \
3636   } while (0)
3637
3638
3639 /* Produce designation sequences of charsets in the line started from
3640    SRC to a place pointed by DST, and return updated DST.
3641
3642    If the current block ends before any end-of-line, we may fail to
3643    find all the necessary designations.  */
3644
3645 static unsigned char *
3646 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3647      struct coding_system *coding;
3648      int *charbuf, *charbuf_end;
3649      unsigned char *dst;
3650 {
3651   struct charset *charset;
3652   /* Table of charsets to be designated to each graphic register.  */
3653   int r[4];
3654   int c, found = 0, reg;
3655   int produced_chars = 0;
3656   int multibytep = coding->dst_multibyte;
3657   Lisp_Object attrs;
3658   Lisp_Object charset_list;
3659
3660   attrs = CODING_ID_ATTRS (coding->id);
3661   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3662   if (EQ (charset_list, Qiso_2022))
3663     charset_list = Viso_2022_charset_list;
3664
3665   for (reg = 0; reg < 4; reg++)
3666     r[reg] = -1;
3667
3668   while (found < 4)
3669     {
3670       int id;
3671
3672       c = *charbuf++;
3673       if (c == '\n')
3674         break;
3675       charset = char_charset (c, charset_list, NULL);
3676       id = CHARSET_ID (charset);
3677       reg = CODING_ISO_REQUEST (coding, id);
3678       if (reg >= 0 && r[reg] < 0)
3679         {
3680           found++;
3681           r[reg] = id;
3682         }
3683     }
3684
3685   if (found)
3686     {
3687       for (reg = 0; reg < 4; reg++)
3688         if (r[reg] >= 0
3689             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3690           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3691     }
3692
3693   return dst;
3694 }
3695
3696 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3697
3698 static int
3699 encode_coding_iso_2022 (coding)
3700      struct coding_system *coding;
3701 {
3702   int multibytep = coding->dst_multibyte;
3703   int *charbuf = coding->charbuf;
3704   int *charbuf_end = charbuf + coding->charbuf_used;
3705   unsigned char *dst = coding->destination + coding->produced;
3706   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3707   int safe_room = 16;
3708   int bol_designation
3709     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3710        && CODING_ISO_BOL (coding));
3711   int produced_chars = 0;
3712   Lisp_Object attrs, eol_type, charset_list;
3713   int ascii_compatible;
3714   int c;
3715   int preferred_charset_id = -1;
3716
3717   CODING_GET_INFO (coding, attrs, charset_list);
3718   eol_type = CODING_ID_EOL_TYPE (coding->id);
3719   if (VECTORP (eol_type))
3720     eol_type = Qunix;
3721
3722   setup_iso_safe_charsets (attrs);
3723   /* Charset list may have been changed.  */
3724   charset_list = CODING_ATTR_CHARSET_LIST (attrs);              \
3725   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3726
3727   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3728
3729   while (charbuf < charbuf_end)
3730     {
3731       ASSURE_DESTINATION (safe_room);
3732
3733       if (bol_designation)
3734         {
3735           unsigned char *dst_prev = dst;
3736
3737           /* We have to produce designation sequences if any now.  */
3738           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3739           bol_designation = 0;
3740           /* We are sure that designation sequences are all ASCII bytes.  */
3741           produced_chars += dst - dst_prev;
3742         }
3743
3744       c = *charbuf++;
3745
3746       if (c < 0)
3747         {
3748           /* Handle an annotation.  */
3749           switch (*charbuf)
3750             {
3751             case CODING_ANNOTATE_COMPOSITION_MASK:
3752               /* Not yet implemented.  */
3753               break;
3754             case CODING_ANNOTATE_CHARSET_MASK:
3755               preferred_charset_id = charbuf[3];
3756               if (preferred_charset_id >= 0
3757                   && NILP (Fmemq (make_number (preferred_charset_id),
3758                                   charset_list)))
3759                 preferred_charset_id = -1;
3760               break;
3761             default:
3762               abort ();
3763             }
3764           charbuf += -c - 1;
3765           continue;
3766         }
3767
3768       /* Now encode the character C.  */
3769       if (c < 0x20 || c == 0x7F)
3770         {
3771           if (c == '\n'
3772               || (c == '\r' && EQ (eol_type, Qmac)))
3773             {
3774               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3775                 ENCODE_RESET_PLANE_AND_REGISTER ();
3776               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3777                 {
3778                   int i;
3779
3780                   for (i = 0; i < 4; i++)
3781                     CODING_ISO_DESIGNATION (coding, i)
3782                       = CODING_ISO_INITIAL (coding, i);
3783                 }
3784               bol_designation
3785                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3786             }
3787           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3788             ENCODE_RESET_PLANE_AND_REGISTER ();
3789           EMIT_ONE_ASCII_BYTE (c);
3790         }
3791       else if (ASCII_CHAR_P (c))
3792         {
3793           if (ascii_compatible)
3794             EMIT_ONE_ASCII_BYTE (c);
3795           else
3796             {
3797               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3798               ENCODE_ISO_CHARACTER (charset, c);
3799             }
3800         }
3801       else if (CHAR_BYTE8_P (c))
3802         {
3803           c = CHAR_TO_BYTE8 (c);
3804           EMIT_ONE_BYTE (c);
3805         }
3806       else
3807         {
3808           struct charset *charset;
3809
3810           if (preferred_charset_id >= 0)
3811             {
3812               charset = CHARSET_FROM_ID (preferred_charset_id);
3813               if (! CHAR_CHARSET_P (c, charset))
3814                 charset = char_charset (c, charset_list, NULL);
3815             }
3816           else
3817             charset = char_charset (c, charset_list, NULL);
3818           if (!charset)
3819             {
3820               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3821                 {
3822                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3823                   charset = CHARSET_FROM_ID (charset_ascii);
3824                 }
3825               else
3826                 {
3827                   c = coding->default_char;
3828                   charset = char_charset (c, charset_list, NULL);
3829                 }
3830             }
3831           ENCODE_ISO_CHARACTER (charset, c);
3832         }
3833     }
3834
3835   if (coding->mode & CODING_MODE_LAST_BLOCK
3836       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3837     {
3838       ASSURE_DESTINATION (safe_room);
3839       ENCODE_RESET_PLANE_AND_REGISTER ();
3840     }
3841   record_conversion_result (coding, CODING_RESULT_SUCCESS);
3842   CODING_ISO_BOL (coding) = bol_designation;
3843   coding->produced_char += produced_chars;
3844   coding->produced = dst - coding->destination;
3845   return 0;
3846 }
3847
3848 \f
3849 /*** 8,9. SJIS and BIG5 handlers ***/
3850
3851 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3852    quite widely.  So, for the moment, Emacs supports them in the bare
3853    C code.  But, in the future, they may be supported only by CCL.  */
3854
3855 /* SJIS is a coding system encoding three character sets: ASCII, right
3856    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3857    as is.  A character of charset katakana-jisx0201 is encoded by
3858    "position-code + 0x80".  A character of charset japanese-jisx0208
3859    is encoded in 2-byte but two position-codes are divided and shifted
3860    so that it fit in the range below.
3861
3862    --- CODE RANGE of SJIS ---
3863    (character set)      (range)
3864    ASCII                0x00 .. 0x7F
3865    KATAKANA-JISX0201    0xA0 .. 0xDF
3866    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3867             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3868    -------------------------------
3869
3870 */
3871
3872 /* BIG5 is a coding system encoding two character sets: ASCII and
3873    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3874    character set and is encoded in two-byte.
3875
3876    --- CODE RANGE of BIG5 ---
3877    (character set)      (range)
3878    ASCII                0x00 .. 0x7F
3879    Big5 (1st byte)      0xA1 .. 0xFE
3880         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3881    --------------------------
3882
3883   */
3884
3885 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3886    Check if a text is encoded in SJIS.  If it is, return
3887    CATEGORY_MASK_SJIS, else return 0.  */
3888
3889 static int
3890 detect_coding_sjis (coding, detect_info)
3891      struct coding_system *coding;
3892      struct coding_detection_info *detect_info;
3893 {
3894   const unsigned char *src = coding->source, *src_base;
3895   const unsigned char *src_end = coding->source + coding->src_bytes;
3896   int multibytep = coding->src_multibyte;
3897   int consumed_chars = 0;
3898   int found = 0;
3899   int c;
3900
3901   detect_info->checked |= CATEGORY_MASK_SJIS;
3902   /* A coding system of this category is always ASCII compatible.  */
3903   src += coding->head_ascii;
3904
3905   while (1)
3906     {
3907       src_base = src;
3908       ONE_MORE_BYTE (c);
3909       if (c < 0x80)
3910         continue;
3911       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3912         {
3913           ONE_MORE_BYTE (c);
3914           if (c < 0x40 || c == 0x7F || c > 0xFC)
3915             break;
3916           found = CATEGORY_MASK_SJIS;
3917         }
3918       else if (c >= 0xA0 && c < 0xE0)
3919         found = CATEGORY_MASK_SJIS;
3920       else
3921         break;
3922     }
3923   detect_info->rejected |= CATEGORY_MASK_SJIS;
3924   return 0;
3925
3926  no_more_source:
3927   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
3928     {
3929       detect_info->rejected |= CATEGORY_MASK_SJIS;
3930       return 0;
3931     }
3932   detect_info->found |= found;
3933   return 1;
3934 }
3935
3936 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3937    Check if a text is encoded in BIG5.  If it is, return
3938    CATEGORY_MASK_BIG5, else return 0.  */
3939
3940 static int
3941 detect_coding_big5 (coding, detect_info)
3942      struct coding_system *coding;
3943      struct coding_detection_info *detect_info;
3944 {
3945   const unsigned char *src = coding->source, *src_base;
3946   const unsigned char *src_end = coding->source + coding->src_bytes;
3947   int multibytep = coding->src_multibyte;
3948   int consumed_chars = 0;
3949   int found = 0;
3950   int c;
3951
3952   detect_info->checked |= CATEGORY_MASK_BIG5;
3953   /* A coding system of this category is always ASCII compatible.  */
3954   src += coding->head_ascii;
3955
3956   while (1)
3957     {
3958       src_base = src;
3959       ONE_MORE_BYTE (c);
3960       if (c < 0x80)
3961         continue;
3962       if (c >= 0xA1)
3963         {
3964           ONE_MORE_BYTE (c);
3965           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3966             return 0;
3967           found = CATEGORY_MASK_BIG5;
3968         }
3969       else
3970         break;
3971     }
3972   detect_info->rejected |= CATEGORY_MASK_BIG5;
3973   return 0;
3974
3975  no_more_source:
3976   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
3977     {
3978       detect_info->rejected |= CATEGORY_MASK_BIG5;
3979       return 0;
3980     }
3981   detect_info->found |= found;
3982   return 1;
3983 }
3984
3985 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3986    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3987
3988 static void
3989 decode_coding_sjis (coding)
3990      struct coding_system *coding;
3991 {
3992   const unsigned char *src = coding->source + coding->consumed;
3993   const unsigned char *src_end = coding->source + coding->src_bytes;
3994   const unsigned char *src_base;
3995   int *charbuf = coding->charbuf;
3996   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3997   int consumed_chars = 0, consumed_chars_base;
3998   int multibytep = coding->src_multibyte;
3999   struct charset *charset_roman, *charset_kanji, *charset_kana;
4000   Lisp_Object attrs, charset_list, val;
4001   int char_offset = coding->produced_char;
4002   int last_offset = char_offset;
4003   int last_id = charset_ascii;
4004
4005   CODING_GET_INFO (coding, attrs, charset_list);
4006
4007   val = charset_list;
4008   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4009   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4010   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4011
4012   while (1)
4013     {
4014       int c, c1;
4015       struct charset *charset;
4016
4017       src_base = src;
4018       consumed_chars_base = consumed_chars;
4019
4020       if (charbuf >= charbuf_end)
4021         break;
4022
4023       ONE_MORE_BYTE (c);
4024       if (c < 0)
4025         goto invalid_code;
4026       if (c < 0x80)
4027         charset = charset_roman;
4028       else
4029         {
4030           if (c >= 0xF0)
4031             goto invalid_code;
4032           if (c < 0xA0 || c >= 0xE0)
4033             {
4034               /* SJIS -> JISX0208 */
4035               ONE_MORE_BYTE (c1);
4036               if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4037                 goto invalid_code;
4038               c = (c << 8) | c1;
4039               SJIS_TO_JIS (c);
4040               charset = charset_kanji;
4041             }
4042           else if (c > 0xA0)
4043             {
4044               /* SJIS -> JISX0201-Kana */
4045               c &= 0x7F;
4046               charset = charset_kana;
4047             }
4048           else
4049             goto invalid_code;
4050         }
4051       if (charset->id != charset_ascii
4052           && last_id != charset->id)
4053         {
4054           if (last_id != charset_ascii)
4055             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4056           last_id = charset->id;
4057           last_offset = char_offset;
4058         }
4059       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4060       *charbuf++ = c;
4061       char_offset++;
4062       continue;
4063
4064     invalid_code:
4065       src = src_base;
4066       consumed_chars = consumed_chars_base;
4067       ONE_MORE_BYTE (c);
4068       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4069       char_offset++;
4070       coding->errors++;
4071     }
4072
4073  no_more_source:
4074   if (last_id != charset_ascii)
4075     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4076   coding->consumed_char += consumed_chars_base;
4077   coding->consumed = src_base - coding->source;
4078   coding->charbuf_used = charbuf - coding->charbuf;
4079 }
4080
4081 static void
4082 decode_coding_big5 (coding)
4083      struct coding_system *coding;
4084 {
4085   const unsigned char *src = coding->source + coding->consumed;
4086   const unsigned char *src_end = coding->source + coding->src_bytes;
4087   const unsigned char *src_base;
4088   int *charbuf = coding->charbuf;
4089   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4090   int consumed_chars = 0, consumed_chars_base;
4091   int multibytep = coding->src_multibyte;
4092   struct charset *charset_roman, *charset_big5;
4093   Lisp_Object attrs, charset_list, val;
4094   int char_offset = coding->produced_char;
4095   int last_offset = char_offset;
4096   int last_id = charset_ascii;
4097
4098   CODING_GET_INFO (coding, attrs, charset_list);
4099   val = charset_list;
4100   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4101   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4102
4103   while (1)
4104     {
4105       int c, c1;
4106       struct charset *charset;
4107
4108       src_base = src;
4109       consumed_chars_base = consumed_chars;
4110
4111       if (charbuf >= charbuf_end)
4112         break;
4113
4114       ONE_MORE_BYTE (c);
4115
4116       if (c < 0)
4117         goto invalid_code;
4118       if (c < 0x80)
4119         charset = charset_roman;
4120       else
4121         {
4122           /* BIG5 -> Big5 */
4123           if (c < 0xA1 || c > 0xFE)
4124             goto invalid_code;
4125           ONE_MORE_BYTE (c1);
4126           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4127             goto invalid_code;
4128           c = c << 8 | c1;
4129           charset = charset_big5;
4130         }
4131       if (charset->id != charset_ascii
4132           && last_id != charset->id)
4133         {
4134           if (last_id != charset_ascii)
4135             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4136           last_id = charset->id;
4137           last_offset = char_offset;
4138         }
4139       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4140       *charbuf++ = c;
4141       char_offset++;
4142       continue;
4143
4144     invalid_code:
4145       src = src_base;
4146       consumed_chars = consumed_chars_base;
4147       ONE_MORE_BYTE (c);
4148       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4149       char_offset++;
4150       coding->errors++;
4151     }
4152
4153  no_more_source:
4154   if (last_id != charset_ascii)
4155     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4156   coding->consumed_char += consumed_chars_base;
4157   coding->consumed = src_base - coding->source;
4158   coding->charbuf_used = charbuf - coding->charbuf;
4159 }
4160
4161 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4162    This function can encode charsets `ascii', `katakana-jisx0201',
4163    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4164    are sure that all these charsets are registered as official charset
4165    (i.e. do not have extended leading-codes).  Characters of other
4166    charsets are produced without any encoding.  If SJIS_P is 1, encode
4167    SJIS text, else encode BIG5 text.  */
4168
4169 static int
4170 encode_coding_sjis (coding)
4171      struct coding_system *coding;
4172 {
4173   int multibytep = coding->dst_multibyte;
4174   int *charbuf = coding->charbuf;
4175   int *charbuf_end = charbuf + coding->charbuf_used;
4176   unsigned char *dst = coding->destination + coding->produced;
4177   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4178   int safe_room = 4;
4179   int produced_chars = 0;
4180   Lisp_Object attrs, charset_list, val;
4181   int ascii_compatible;
4182   struct charset *charset_roman, *charset_kanji, *charset_kana;
4183   int c;
4184
4185   CODING_GET_INFO (coding, attrs, charset_list);
4186   val = charset_list;
4187   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4188   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4189   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4190
4191   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4192
4193   while (charbuf < charbuf_end)
4194     {
4195       ASSURE_DESTINATION (safe_room);
4196       c = *charbuf++;
4197       /* Now encode the character C.  */
4198       if (ASCII_CHAR_P (c) && ascii_compatible)
4199         EMIT_ONE_ASCII_BYTE (c);
4200       else if (CHAR_BYTE8_P (c))
4201         {
4202           c = CHAR_TO_BYTE8 (c);
4203           EMIT_ONE_BYTE (c);
4204         }
4205       else
4206         {
4207           unsigned code;
4208           struct charset *charset = char_charset (c, charset_list, &code);
4209
4210           if (!charset)
4211             {
4212               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4213                 {
4214                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4215                   charset = CHARSET_FROM_ID (charset_ascii);
4216                 }
4217               else
4218                 {
4219                   c = coding->default_char;
4220                   charset = char_charset (c, charset_list, &code);
4221                 }
4222             }
4223           if (code == CHARSET_INVALID_CODE (charset))
4224             abort ();
4225           if (charset == charset_kanji)
4226             {
4227               int c1, c2;
4228               JIS_TO_SJIS (code);
4229               c1 = code >> 8, c2 = code & 0xFF;
4230               EMIT_TWO_BYTES (c1, c2);
4231             }
4232           else if (charset == charset_kana)
4233             EMIT_ONE_BYTE (code | 0x80);
4234           else
4235             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4236         }
4237     }
4238   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4239   coding->produced_char += produced_chars;
4240   coding->produced = dst - coding->destination;
4241   return 0;
4242 }
4243
4244 static int
4245 encode_coding_big5 (coding)
4246      struct coding_system *coding;
4247 {
4248   int multibytep = coding->dst_multibyte;
4249   int *charbuf = coding->charbuf;
4250   int *charbuf_end = charbuf + coding->charbuf_used;
4251   unsigned char *dst = coding->destination + coding->produced;
4252   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4253   int safe_room = 4;
4254   int produced_chars = 0;
4255   Lisp_Object attrs, charset_list, val;
4256   int ascii_compatible;
4257   struct charset *charset_roman, *charset_big5;
4258   int c;
4259
4260   CODING_GET_INFO (coding, attrs, charset_list);
4261   val = charset_list;
4262   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4263   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4264   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4265
4266   while (charbuf < charbuf_end)
4267     {
4268       ASSURE_DESTINATION (safe_room);
4269       c = *charbuf++;
4270       /* Now encode the character C.  */
4271       if (ASCII_CHAR_P (c) && ascii_compatible)
4272         EMIT_ONE_ASCII_BYTE (c);
4273       else if (CHAR_BYTE8_P (c))
4274         {
4275           c = CHAR_TO_BYTE8 (c);
4276           EMIT_ONE_BYTE (c);
4277         }
4278       else
4279         {
4280           unsigned code;
4281           struct charset *charset = char_charset (c, charset_list, &code);
4282
4283           if (! charset)
4284             {
4285               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4286                 {
4287                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4288                   charset = CHARSET_FROM_ID (charset_ascii);
4289                 }
4290               else
4291                 {
4292                   c = coding->default_char;
4293                   charset = char_charset (c, charset_list, &code);
4294                 }
4295             }
4296           if (code == CHARSET_INVALID_CODE (charset))
4297             abort ();
4298           if (charset == charset_big5)
4299             {
4300               int c1, c2;
4301
4302               c1 = code >> 8, c2 = code & 0xFF;
4303               EMIT_TWO_BYTES (c1, c2);
4304             }
4305           else
4306             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4307         }
4308     }
4309   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4310   coding->produced_char += produced_chars;
4311   coding->produced = dst - coding->destination;
4312   return 0;
4313 }
4314
4315 \f
4316 /*** 10. CCL handlers ***/
4317
4318 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4319    Check if a text is encoded in a coding system of which
4320    encoder/decoder are written in CCL program.  If it is, return
4321    CATEGORY_MASK_CCL, else return 0.  */
4322
4323 static int
4324 detect_coding_ccl (coding, detect_info)
4325      struct coding_system *coding;
4326      struct coding_detection_info *detect_info;
4327 {
4328   const unsigned char *src = coding->source, *src_base;
4329   const unsigned char *src_end = coding->source + coding->src_bytes;
4330   int multibytep = coding->src_multibyte;
4331   int consumed_chars = 0;
4332   int found = 0;
4333   unsigned char *valids = CODING_CCL_VALIDS (coding);
4334   int head_ascii = coding->head_ascii;
4335   Lisp_Object attrs;
4336
4337   detect_info->checked |= CATEGORY_MASK_CCL;
4338
4339   coding = &coding_categories[coding_category_ccl];
4340   attrs = CODING_ID_ATTRS (coding->id);
4341   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4342     src += head_ascii;
4343
4344   while (1)
4345     {
4346       int c;
4347
4348       src_base = src;
4349       ONE_MORE_BYTE (c);
4350       if (c < 0 || ! valids[c])
4351         break;
4352       if ((valids[c] > 1))
4353         found = CATEGORY_MASK_CCL;
4354     }
4355   detect_info->rejected |= CATEGORY_MASK_CCL;
4356   return 0;
4357
4358  no_more_source:
4359   detect_info->found |= found;
4360   return 1;
4361 }
4362
4363 static void
4364 decode_coding_ccl (coding)
4365      struct coding_system *coding;
4366 {
4367   const unsigned char *src = coding->source + coding->consumed;
4368   const unsigned char *src_end = coding->source + coding->src_bytes;
4369   int *charbuf = coding->charbuf;
4370   int *charbuf_end = charbuf + coding->charbuf_size;
4371   int consumed_chars = 0;
4372   int multibytep = coding->src_multibyte;
4373   struct ccl_program ccl;
4374   int source_charbuf[1024];
4375   int source_byteidx[1024];
4376   Lisp_Object attrs, charset_list;
4377
4378   CODING_GET_INFO (coding, attrs, charset_list);
4379   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4380
4381   while (src < src_end)
4382     {
4383       const unsigned char *p = src;
4384       int *source, *source_end;
4385       int i = 0;
4386
4387       if (multibytep)
4388         while (i < 1024 && p < src_end)
4389           {
4390             source_byteidx[i] = p - src;
4391             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4392           }
4393       else
4394         while (i < 1024 && p < src_end)
4395           source_charbuf[i++] = *p++;
4396
4397       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4398         ccl.last_block = 1;
4399
4400       source = source_charbuf;
4401       source_end = source + i;
4402       while (source < source_end)
4403         {
4404           ccl_driver (&ccl, source, charbuf,
4405                       source_end - source, charbuf_end - charbuf,
4406                       charset_list);
4407           source += ccl.consumed;
4408           charbuf += ccl.produced;
4409           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4410             break;
4411         }
4412       if (source < source_end)
4413         src += source_byteidx[source - source_charbuf];
4414       else
4415         src = p;
4416       consumed_chars += source - source_charbuf;
4417
4418       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4419           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4420         break;
4421     }
4422
4423   switch (ccl.status)
4424     {
4425     case CCL_STAT_SUSPEND_BY_SRC:
4426       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4427       break;
4428     case CCL_STAT_SUSPEND_BY_DST:
4429       break;
4430     case CCL_STAT_QUIT:
4431     case CCL_STAT_INVALID_CMD:
4432       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4433       break;
4434     default:
4435       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4436       break;
4437     }
4438   coding->consumed_char += consumed_chars;
4439   coding->consumed = src - coding->source;
4440   coding->charbuf_used = charbuf - coding->charbuf;
4441 }
4442
4443 static int
4444 encode_coding_ccl (coding)
4445      struct coding_system *coding;
4446 {
4447   struct ccl_program ccl;
4448   int multibytep = coding->dst_multibyte;
4449   int *charbuf = coding->charbuf;
4450   int *charbuf_end = charbuf + coding->charbuf_used;
4451   unsigned char *dst = coding->destination + coding->produced;
4452   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4453   unsigned char *adjusted_dst_end = dst_end - 1;
4454   int destination_charbuf[1024];
4455   int i, produced_chars = 0;
4456   Lisp_Object attrs, charset_list;
4457
4458   CODING_GET_INFO (coding, attrs, charset_list);
4459   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4460
4461   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4462   ccl.dst_multibyte = coding->dst_multibyte;
4463
4464   while (charbuf < charbuf_end && dst < adjusted_dst_end)
4465     {
4466       int dst_bytes = dst_end - dst;
4467       if (dst_bytes > 1024)
4468         dst_bytes = 1024;
4469
4470       ccl_driver (&ccl, charbuf, destination_charbuf,
4471                   charbuf_end - charbuf, dst_bytes, charset_list);
4472       charbuf += ccl.consumed;
4473       if (multibytep)
4474         for (i = 0; i < ccl.produced; i++)
4475           EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4476       else
4477         {
4478           for (i = 0; i < ccl.produced; i++)
4479             *dst++ = destination_charbuf[i] & 0xFF;
4480           produced_chars += ccl.produced;
4481         }
4482     }
4483
4484   switch (ccl.status)
4485     {
4486     case CCL_STAT_SUSPEND_BY_SRC:
4487       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4488       break;
4489     case CCL_STAT_SUSPEND_BY_DST:
4490       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4491       break;
4492     case CCL_STAT_QUIT:
4493     case CCL_STAT_INVALID_CMD:
4494       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4495       break;
4496     default:
4497       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4498       break;
4499     }
4500
4501   coding->produced_char += produced_chars;
4502   coding->produced = dst - coding->destination;
4503   return 0;
4504 }
4505
4506
4507 \f
4508 /*** 10, 11. no-conversion handlers ***/
4509
4510 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4511
4512 static void
4513 decode_coding_raw_text (coding)
4514      struct coding_system *coding;
4515 {
4516   coding->chars_at_source = 1;
4517   coding->consumed_char = 0;
4518   coding->consumed = 0;
4519   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4520 }
4521
4522 static int
4523 encode_coding_raw_text (coding)
4524      struct coding_system *coding;
4525 {
4526   int multibytep = coding->dst_multibyte;
4527   int *charbuf = coding->charbuf;
4528   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4529   unsigned char *dst = coding->destination + coding->produced;
4530   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4531   int produced_chars = 0;
4532   int c;
4533
4534   if (multibytep)
4535     {
4536       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4537
4538       if (coding->src_multibyte)
4539         while (charbuf < charbuf_end)
4540           {
4541             ASSURE_DESTINATION (safe_room);
4542             c = *charbuf++;
4543             if (ASCII_CHAR_P (c))
4544               EMIT_ONE_ASCII_BYTE (c);
4545             else if (CHAR_BYTE8_P (c))
4546               {
4547                 c = CHAR_TO_BYTE8 (c);
4548                 EMIT_ONE_BYTE (c);
4549               }
4550             else
4551               {
4552                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4553
4554                 CHAR_STRING_ADVANCE (c, p1);
4555                 while (p0 < p1)
4556                   {
4557                     EMIT_ONE_BYTE (*p0);
4558                     p0++;
4559                   }
4560               }
4561           }
4562       else
4563         while (charbuf < charbuf_end)
4564           {
4565             ASSURE_DESTINATION (safe_room);
4566             c = *charbuf++;
4567             EMIT_ONE_BYTE (c);
4568           }
4569     }
4570   else
4571     {
4572       if (coding->src_multibyte)
4573         {
4574           int safe_room = MAX_MULTIBYTE_LENGTH;
4575
4576           while (charbuf < charbuf_end)
4577             {
4578               ASSURE_DESTINATION (safe_room);
4579               c = *charbuf++;
4580               if (ASCII_CHAR_P (c))
4581                 *dst++ = c;
4582               else if (CHAR_BYTE8_P (c))
4583                 *dst++ = CHAR_TO_BYTE8 (c);
4584               else
4585                 CHAR_STRING_ADVANCE (c, dst);
4586               produced_chars++;
4587             }
4588         }
4589       else
4590         {
4591           ASSURE_DESTINATION (charbuf_end - charbuf);
4592           while (charbuf < charbuf_end && dst < dst_end)
4593             *dst++ = *charbuf++;
4594           produced_chars = dst - (coding->destination + coding->dst_bytes);
4595         }
4596     }
4597   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4598   coding->produced_char += produced_chars;
4599   coding->produced = dst - coding->destination;
4600   return 0;
4601 }
4602
4603 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4604    Check if a text is encoded in a charset-based coding system.  If it
4605    is, return 1, else return 0.  */
4606
4607 static int
4608 detect_coding_charset (coding, detect_info)
4609      struct coding_system *coding;
4610      struct coding_detection_info *detect_info;
4611 {
4612   const unsigned char *src = coding->source, *src_base;
4613   const unsigned char *src_end = coding->source + coding->src_bytes;
4614   int multibytep = coding->src_multibyte;
4615   int consumed_chars = 0;
4616   Lisp_Object attrs, valids;
4617   int found = 0;
4618
4619   detect_info->checked |= CATEGORY_MASK_CHARSET;
4620
4621   coding = &coding_categories[coding_category_charset];
4622   attrs = CODING_ID_ATTRS (coding->id);
4623   valids = AREF (attrs, coding_attr_charset_valids);
4624
4625   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4626     src += coding->head_ascii;
4627
4628   while (1)
4629     {
4630       int c;
4631
4632       src_base = src;
4633       ONE_MORE_BYTE (c);
4634       if (c < 0)
4635         continue;
4636       if (NILP (AREF (valids, c)))
4637         break;
4638       if (c >= 0x80)
4639         found = CATEGORY_MASK_CHARSET;
4640     }
4641   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4642   return 0;
4643
4644  no_more_source:
4645   detect_info->found |= found;
4646   return 1;
4647 }
4648
4649 static void
4650 decode_coding_charset (coding)
4651      struct coding_system *coding;
4652 {
4653   const unsigned char *src = coding->source + coding->consumed;
4654   const unsigned char *src_end = coding->source + coding->src_bytes;
4655   const unsigned char *src_base;
4656   int *charbuf = coding->charbuf;
4657   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4658   int consumed_chars = 0, consumed_chars_base;
4659   int multibytep = coding->src_multibyte;
4660   Lisp_Object attrs, charset_list, valids;
4661   int char_offset = coding->produced_char;
4662   int last_offset = char_offset;
4663   int last_id = charset_ascii;
4664
4665   CODING_GET_INFO (coding, attrs, charset_list);
4666   valids = AREF (attrs, coding_attr_charset_valids);
4667
4668   while (1)
4669     {
4670       int c;
4671       Lisp_Object val;
4672       struct charset *charset;
4673       int dim;
4674       int len = 1;
4675       unsigned code;
4676
4677       src_base = src;
4678       consumed_chars_base = consumed_chars;
4679
4680       if (charbuf >= charbuf_end)
4681         break;
4682
4683       ONE_MORE_BYTE (c);
4684       if (c < 0)
4685         goto invalid_code;
4686       code = c;
4687
4688       val = AREF (valids, c);
4689       if (NILP (val))
4690         goto invalid_code;
4691       if (INTEGERP (val))
4692         {
4693           charset = CHARSET_FROM_ID (XFASTINT (val));
4694           dim = CHARSET_DIMENSION (charset);
4695           while (len < dim)
4696             {
4697               ONE_MORE_BYTE (c);
4698               code = (code << 8) | c;
4699               len++;
4700             }
4701           CODING_DECODE_CHAR (coding, src, src_base, src_end,
4702                               charset, code, c);
4703         }
4704       else
4705         {
4706           /* VAL is a list of charset IDs.  It is assured that the
4707              list is sorted by charset dimensions (smaller one
4708              comes first).  */
4709           while (CONSP (val))
4710             {
4711               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4712               dim = CHARSET_DIMENSION (charset);
4713               while (len < dim)
4714                 {
4715                   ONE_MORE_BYTE (c);
4716                   code = (code << 8) | c;
4717                   len++;
4718                 }
4719               CODING_DECODE_CHAR (coding, src, src_base,
4720                                   src_end, charset, code, c);
4721               if (c >= 0)
4722                 break;
4723               val = XCDR (val);
4724             }
4725         }
4726       if (c < 0)
4727         goto invalid_code;
4728       if (charset->id != charset_ascii
4729           && last_id != charset->id)
4730         {
4731           if (last_id != charset_ascii)
4732             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4733           last_id = charset->id;
4734           last_offset = char_offset;
4735         }
4736
4737       *charbuf++ = c;
4738       char_offset++;
4739       continue;
4740
4741     invalid_code:
4742       src = src_base;
4743       consumed_chars = consumed_chars_base;
4744       ONE_MORE_BYTE (c);
4745       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4746       char_offset++;
4747       coding->errors++;
4748     }
4749
4750  no_more_source:
4751   if (last_id != charset_ascii)
4752     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4753   coding->consumed_char += consumed_chars_base;
4754   coding->consumed = src_base - coding->source;
4755   coding->charbuf_used = charbuf - coding->charbuf;
4756 }
4757
4758 static int
4759 encode_coding_charset (coding)
4760      struct coding_system *coding;
4761 {
4762   int multibytep = coding->dst_multibyte;
4763   int *charbuf = coding->charbuf;
4764   int *charbuf_end = charbuf + coding->charbuf_used;
4765   unsigned char *dst = coding->destination + coding->produced;
4766   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4767   int safe_room = MAX_MULTIBYTE_LENGTH;
4768   int produced_chars = 0;
4769   Lisp_Object attrs, charset_list;
4770   int ascii_compatible;
4771   int c;
4772
4773   CODING_GET_INFO (coding, attrs, charset_list);
4774   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4775
4776   while (charbuf < charbuf_end)
4777     {
4778       struct charset *charset;
4779       unsigned code;
4780
4781       ASSURE_DESTINATION (safe_room);
4782       c = *charbuf++;
4783       if (ascii_compatible && ASCII_CHAR_P (c))
4784         EMIT_ONE_ASCII_BYTE (c);
4785       else if (CHAR_BYTE8_P (c))
4786         {
4787           c = CHAR_TO_BYTE8 (c);
4788           EMIT_ONE_BYTE (c);
4789         }
4790       else
4791         {
4792           charset = char_charset (c, charset_list, &code);
4793           if (charset)
4794             {
4795               if (CHARSET_DIMENSION (charset) == 1)
4796                 EMIT_ONE_BYTE (code);
4797               else if (CHARSET_DIMENSION (charset) == 2)
4798                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4799               else if (CHARSET_DIMENSION (charset) == 3)
4800                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4801               else
4802                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4803                                  (code >> 8) & 0xFF, code & 0xFF);
4804             }
4805           else
4806             {
4807               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4808                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4809               else
4810                 c = coding->default_char;
4811               EMIT_ONE_BYTE (c);
4812             }
4813         }
4814     }
4815
4816   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4817   coding->produced_char += produced_chars;
4818   coding->produced = dst - coding->destination;
4819   return 0;
4820 }
4821
4822 \f
4823 /*** 7. C library functions ***/
4824
4825 /* Setup coding context CODING from information about CODING_SYSTEM.
4826    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4827    CODING_SYSTEM is invalid, signal an error.  */
4828
4829 void
4830 setup_coding_system (coding_system, coding)
4831      Lisp_Object coding_system;
4832      struct coding_system *coding;
4833 {
4834   Lisp_Object attrs;
4835   Lisp_Object eol_type;
4836   Lisp_Object coding_type;
4837   Lisp_Object val;
4838
4839   if (NILP (coding_system))
4840     coding_system = Qno_conversion;
4841
4842   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4843
4844   attrs = CODING_ID_ATTRS (coding->id);
4845   eol_type = CODING_ID_EOL_TYPE (coding->id);
4846
4847   coding->mode = 0;
4848   coding->head_ascii = -1;
4849   coding->common_flags
4850     = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4851   if (! NILP (CODING_ATTR_POST_READ (attrs)))
4852     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4853   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4854     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
4855   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4856     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4857
4858   val = CODING_ATTR_SAFE_CHARSETS (attrs);
4859   coding->max_charset_id = SCHARS (val) - 1;
4860   coding->safe_charsets = (char *) SDATA (val);
4861   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4862
4863   coding_type = CODING_ATTR_TYPE (attrs);
4864   if (EQ (coding_type, Qundecided))
4865     {
4866       coding->detector = NULL;
4867       coding->decoder = decode_coding_raw_text;
4868       coding->encoder = encode_coding_raw_text;
4869       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4870     }
4871   else if (EQ (coding_type, Qiso_2022))
4872     {
4873       int i;
4874       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4875
4876       /* Invoke graphic register 0 to plane 0.  */
4877       CODING_ISO_INVOCATION (coding, 0) = 0;
4878       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
4879       CODING_ISO_INVOCATION (coding, 1)
4880         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4881       /* Setup the initial status of designation.  */
4882       for (i = 0; i < 4; i++)
4883         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4884       /* Not single shifting initially.  */
4885       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4886       /* Beginning of buffer should also be regarded as bol. */
4887       CODING_ISO_BOL (coding) = 1;
4888       coding->detector = detect_coding_iso_2022;
4889       coding->decoder = decode_coding_iso_2022;
4890       coding->encoder = encode_coding_iso_2022;
4891       if (flags & CODING_ISO_FLAG_SAFE)
4892         coding->mode |= CODING_MODE_SAFE_ENCODING;
4893       coding->common_flags
4894         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4895             | CODING_REQUIRE_FLUSHING_MASK);
4896       if (flags & CODING_ISO_FLAG_COMPOSITION)
4897         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4898       if (flags & CODING_ISO_FLAG_DESIGNATION)
4899         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
4900       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4901         {
4902           setup_iso_safe_charsets (attrs);
4903           val = CODING_ATTR_SAFE_CHARSETS (attrs);
4904           coding->max_charset_id = SCHARS (val) - 1;
4905           coding->safe_charsets = (char *) SDATA (val);
4906         }
4907       CODING_ISO_FLAGS (coding) = flags;
4908     }
4909   else if (EQ (coding_type, Qcharset))
4910     {
4911       coding->detector = detect_coding_charset;
4912       coding->decoder = decode_coding_charset;
4913       coding->encoder = encode_coding_charset;
4914       coding->common_flags
4915         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4916     }
4917   else if (EQ (coding_type, Qutf_8))
4918     {
4919       coding->detector = detect_coding_utf_8;
4920       coding->decoder = decode_coding_utf_8;
4921       coding->encoder = encode_coding_utf_8;
4922       coding->common_flags
4923         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4924     }
4925   else if (EQ (coding_type, Qutf_16))
4926     {
4927       val = AREF (attrs, coding_attr_utf_16_bom);
4928       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4929                                     : EQ (val, Qt) ? utf_16_with_bom
4930                                     : utf_16_without_bom);
4931       val = AREF (attrs, coding_attr_utf_16_endian);
4932       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
4933                                        : utf_16_little_endian);
4934       CODING_UTF_16_SURROGATE (coding) = 0;
4935       coding->detector = detect_coding_utf_16;
4936       coding->decoder = decode_coding_utf_16;
4937       coding->encoder = encode_coding_utf_16;
4938       coding->common_flags
4939         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4940       if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
4941         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4942     }
4943   else if (EQ (coding_type, Qccl))
4944     {
4945       coding->detector = detect_coding_ccl;
4946       coding->decoder = decode_coding_ccl;
4947       coding->encoder = encode_coding_ccl;
4948       coding->common_flags
4949         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4950             | CODING_REQUIRE_FLUSHING_MASK);
4951     }
4952   else if (EQ (coding_type, Qemacs_mule))
4953     {
4954       coding->detector = detect_coding_emacs_mule;
4955       coding->decoder = decode_coding_emacs_mule;
4956       coding->encoder = encode_coding_emacs_mule;
4957       coding->common_flags
4958         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4959       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4960           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4961         {
4962           Lisp_Object tail, safe_charsets;
4963           int max_charset_id = 0;
4964
4965           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4966                tail = XCDR (tail))
4967             if (max_charset_id < XFASTINT (XCAR (tail)))
4968               max_charset_id = XFASTINT (XCAR (tail));
4969           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4970                                         make_number (255));
4971           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4972                tail = XCDR (tail))
4973             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
4974           coding->max_charset_id = max_charset_id;
4975           coding->safe_charsets = (char *) SDATA (safe_charsets);
4976         }
4977     }
4978   else if (EQ (coding_type, Qshift_jis))
4979     {
4980       coding->detector = detect_coding_sjis;
4981       coding->decoder = decode_coding_sjis;
4982       coding->encoder = encode_coding_sjis;
4983       coding->common_flags
4984         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4985     }
4986   else if (EQ (coding_type, Qbig5))
4987     {
4988       coding->detector = detect_coding_big5;
4989       coding->decoder = decode_coding_big5;
4990       coding->encoder = encode_coding_big5;
4991       coding->common_flags
4992         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4993     }
4994   else                          /* EQ (coding_type, Qraw_text) */
4995     {
4996       coding->detector = NULL;
4997       coding->decoder = decode_coding_raw_text;
4998       coding->encoder = encode_coding_raw_text;
4999     }
5000
5001   return;
5002 }
5003
5004 /* Return raw-text or one of its subsidiaries that has the same
5005    eol_type as CODING-SYSTEM.  */
5006
5007 Lisp_Object
5008 raw_text_coding_system (coding_system)
5009      Lisp_Object coding_system;
5010 {
5011   Lisp_Object spec, attrs;
5012   Lisp_Object eol_type, raw_text_eol_type;
5013
5014   if (NILP (coding_system))
5015     return Qraw_text;
5016   spec = CODING_SYSTEM_SPEC (coding_system);
5017   attrs = AREF (spec, 0);
5018
5019   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5020     return coding_system;
5021
5022   eol_type = AREF (spec, 2);
5023   if (VECTORP (eol_type))
5024     return Qraw_text;
5025   spec = CODING_SYSTEM_SPEC (Qraw_text);
5026   raw_text_eol_type = AREF (spec, 2);
5027   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5028           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5029           : AREF (raw_text_eol_type, 2));
5030 }
5031
5032
5033 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5034    does, return one of the subsidiary that has the same eol-spec as
5035    PARENT.  Otherwise, return CODING_SYSTEM.  */
5036
5037 Lisp_Object
5038 coding_inherit_eol_type (coding_system, parent)
5039      Lisp_Object coding_system, parent;
5040 {
5041   Lisp_Object spec, eol_type;
5042
5043   if (NILP (coding_system))
5044     coding_system = Qraw_text;
5045   spec = CODING_SYSTEM_SPEC (coding_system);
5046   eol_type = AREF (spec, 2);
5047   if (VECTORP (eol_type)
5048       && ! NILP (parent))
5049     {
5050       Lisp_Object parent_spec;
5051       Lisp_Object parent_eol_type;
5052
5053       parent_spec
5054         = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5055       parent_eol_type = AREF (parent_spec, 2);
5056       if (EQ (parent_eol_type, Qunix))
5057         coding_system = AREF (eol_type, 0);
5058       else if (EQ (parent_eol_type, Qdos))
5059         coding_system = AREF (eol_type, 1);
5060       else if (EQ (parent_eol_type, Qmac))
5061         coding_system = AREF (eol_type, 2);
5062     }
5063   return coding_system;
5064 }
5065
5066 /* Emacs has a mechanism to automatically detect a coding system if it
5067    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5068    it's impossible to distinguish some coding systems accurately
5069    because they use the same range of codes.  So, at first, coding
5070    systems are categorized into 7, those are:
5071
5072    o coding-category-emacs-mule
5073
5074         The category for a coding system which has the same code range
5075         as Emacs' internal format.  Assigned the coding-system (Lisp
5076         symbol) `emacs-mule' by default.
5077
5078    o coding-category-sjis
5079
5080         The category for a coding system which has the same code range
5081         as SJIS.  Assigned the coding-system (Lisp
5082         symbol) `japanese-shift-jis' by default.
5083
5084    o coding-category-iso-7
5085
5086         The category for a coding system which has the same code range
5087         as ISO2022 of 7-bit environment.  This doesn't use any locking
5088         shift and single shift functions.  This can encode/decode all
5089         charsets.  Assigned the coding-system (Lisp symbol)
5090         `iso-2022-7bit' by default.
5091
5092    o coding-category-iso-7-tight
5093
5094         Same as coding-category-iso-7 except that this can
5095         encode/decode only the specified charsets.
5096
5097    o coding-category-iso-8-1
5098
5099         The category for a coding system which has the same code range
5100         as ISO2022 of 8-bit environment and graphic plane 1 used only
5101         for DIMENSION1 charset.  This doesn't use any locking shift
5102         and single shift functions.  Assigned the coding-system (Lisp
5103         symbol) `iso-latin-1' by default.
5104
5105    o coding-category-iso-8-2
5106
5107         The category for a coding system which has the same code range
5108         as ISO2022 of 8-bit environment and graphic plane 1 used only
5109         for DIMENSION2 charset.  This doesn't use any locking shift
5110         and single shift functions.  Assigned the coding-system (Lisp
5111         symbol) `japanese-iso-8bit' by default.
5112
5113    o coding-category-iso-7-else
5114
5115         The category for a coding system which has the same code range
5116         as ISO2022 of 7-bit environemnt but uses locking shift or
5117         single shift functions.  Assigned the coding-system (Lisp
5118         symbol) `iso-2022-7bit-lock' by default.
5119
5120    o coding-category-iso-8-else
5121
5122         The category for a coding system which has the same code range
5123         as ISO2022 of 8-bit environemnt but uses locking shift or
5124         single shift functions.  Assigned the coding-system (Lisp
5125         symbol) `iso-2022-8bit-ss2' by default.
5126
5127    o coding-category-big5
5128
5129         The category for a coding system which has the same code range
5130         as BIG5.  Assigned the coding-system (Lisp symbol)
5131         `cn-big5' by default.
5132
5133    o coding-category-utf-8
5134
5135         The category for a coding system which has the same code range
5136         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
5137         symbol) `utf-8' by default.
5138
5139    o coding-category-utf-16-be
5140
5141         The category for a coding system in which a text has an
5142         Unicode signature (cf. Unicode Standard) in the order of BIG
5143         endian at the head.  Assigned the coding-system (Lisp symbol)
5144         `utf-16-be' by default.
5145
5146    o coding-category-utf-16-le
5147
5148         The category for a coding system in which a text has an
5149         Unicode signature (cf. Unicode Standard) in the order of
5150         LITTLE endian at the head.  Assigned the coding-system (Lisp
5151         symbol) `utf-16-le' by default.
5152
5153    o coding-category-ccl
5154
5155         The category for a coding system of which encoder/decoder is
5156         written in CCL programs.  The default value is nil, i.e., no
5157         coding system is assigned.
5158
5159    o coding-category-binary
5160
5161         The category for a coding system not categorized in any of the
5162         above.  Assigned the coding-system (Lisp symbol)
5163         `no-conversion' by default.
5164
5165    Each of them is a Lisp symbol and the value is an actual
5166    `coding-system's (this is also a Lisp symbol) assigned by a user.
5167    What Emacs does actually is to detect a category of coding system.
5168    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5169    decide only one possible category, it selects a category of the
5170    highest priority.  Priorities of categories are also specified by a
5171    user in a Lisp variable `coding-category-list'.
5172
5173 */
5174
5175 #define EOL_SEEN_NONE   0
5176 #define EOL_SEEN_LF     1
5177 #define EOL_SEEN_CR     2
5178 #define EOL_SEEN_CRLF   4
5179
5180 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5181    SOURCE is encoded.  If CATEGORY is one of
5182    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5183    two-byte, else they are encoded by one-byte.
5184
5185    Return one of EOL_SEEN_XXX.  */
5186
5187 #define MAX_EOL_CHECK_COUNT 3
5188
5189 static int
5190 detect_eol (source, src_bytes, category)
5191      unsigned char *source;
5192      EMACS_INT src_bytes;
5193      enum coding_category category;
5194 {
5195   unsigned char *src = source, *src_end = src + src_bytes;
5196   unsigned char c;
5197   int total  = 0;
5198   int eol_seen = EOL_SEEN_NONE;
5199
5200   if ((1 << category) & CATEGORY_MASK_UTF_16)
5201     {
5202       int msb, lsb;
5203
5204       msb = category == (coding_category_utf_16_le
5205                          | coding_category_utf_16_le_nosig);
5206       lsb = 1 - msb;
5207
5208       while (src + 1 < src_end)
5209         {
5210           c = src[lsb];
5211           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5212             {
5213               int this_eol;
5214
5215               if (c == '\n')
5216                 this_eol = EOL_SEEN_LF;
5217               else if (src + 3 >= src_end
5218                        || src[msb + 2] != 0
5219                        || src[lsb + 2] != '\n')
5220                 this_eol = EOL_SEEN_CR;
5221               else
5222                 this_eol = EOL_SEEN_CRLF;
5223
5224               if (eol_seen == EOL_SEEN_NONE)
5225                 /* This is the first end-of-line.  */
5226                 eol_seen = this_eol;
5227               else if (eol_seen != this_eol)
5228                 {
5229                   /* The found type is different from what found before.  */
5230                   eol_seen = EOL_SEEN_LF;
5231                   break;
5232                 }
5233               if (++total == MAX_EOL_CHECK_COUNT)
5234                 break;
5235             }
5236           src += 2;
5237         }
5238     }
5239   else
5240     {
5241       while (src < src_end)
5242         {
5243           c = *src++;
5244           if (c == '\n' || c == '\r')
5245             {
5246               int this_eol;
5247
5248               if (c == '\n')
5249                 this_eol = EOL_SEEN_LF;
5250               else if (src >= src_end || *src != '\n')
5251                 this_eol = EOL_SEEN_CR;
5252               else
5253                 this_eol = EOL_SEEN_CRLF, src++;
5254
5255               if (eol_seen == EOL_SEEN_NONE)
5256                 /* This is the first end-of-line.  */
5257                 eol_seen = this_eol;
5258               else if (eol_seen != this_eol)
5259                 {
5260                   /* The found type is different from what found before.  */
5261                   eol_seen = EOL_SEEN_LF;
5262                   break;
5263                 }
5264               if (++total == MAX_EOL_CHECK_COUNT)
5265                 break;
5266             }
5267         }
5268     }
5269   return eol_seen;
5270 }
5271
5272
5273 static Lisp_Object
5274 adjust_coding_eol_type (coding, eol_seen)
5275      struct coding_system *coding;
5276      int eol_seen;
5277 {
5278   Lisp_Object eol_type;
5279
5280   eol_type = CODING_ID_EOL_TYPE (coding->id);
5281   if (eol_seen & EOL_SEEN_LF)
5282     {
5283       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5284       eol_type = Qunix;
5285     }
5286   else if (eol_seen & EOL_SEEN_CRLF)
5287     {
5288       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5289       eol_type = Qdos;
5290     }
5291   else if (eol_seen & EOL_SEEN_CR)
5292     {
5293       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5294       eol_type = Qmac;
5295     }
5296   return eol_type;
5297 }
5298
5299 /* Detect how a text specified in CODING is encoded.  If a coding
5300    system is detected, update fields of CODING by the detected coding
5301    system.  */
5302
5303 void
5304 detect_coding (coding)
5305      struct coding_system *coding;
5306 {
5307   const unsigned char *src, *src_end;
5308   Lisp_Object attrs, coding_type;
5309
5310   coding->consumed = coding->consumed_char = 0;
5311   coding->produced = coding->produced_char = 0;
5312   coding_set_source (coding);
5313
5314   src_end = coding->source + coding->src_bytes;
5315
5316   /* If we have not yet decided the text encoding type, detect it
5317      now.  */
5318   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5319     {
5320       int c, i;
5321
5322       for (i = 0, src = coding->source; src < src_end; i++, src++)
5323         {
5324           c = *src;
5325           if (c & 0x80 || (c < 0x20 && (c == 0
5326                                         || c == ISO_CODE_ESC
5327                                         || c == ISO_CODE_SI
5328                                         || c == ISO_CODE_SO)))
5329             break;
5330         }
5331       coding->head_ascii = src - (coding->source + coding->consumed);
5332
5333       if (coding->head_ascii < coding->src_bytes)
5334         {
5335           struct coding_detection_info detect_info;
5336           enum coding_category category;
5337           struct coding_system *this;
5338
5339           detect_info.checked = detect_info.found = detect_info.rejected = 0;
5340           for (i = 0; i < coding_category_raw_text; i++)
5341             {
5342               category = coding_priorities[i];
5343               this = coding_categories + category;
5344               if (this->id < 0)
5345                 {
5346                   /* No coding system of this category is defined.  */
5347                   detect_info.rejected |= (1 << category);
5348                 }
5349               else if (category >= coding_category_raw_text)
5350                 continue;
5351               else if (detect_info.checked & (1 << category))
5352                 {
5353                   if (detect_info.found & (1 << category))
5354                     break;
5355                 }
5356               else if ((*(this->detector)) (coding, &detect_info)
5357                        && detect_info.found & (1 << category))
5358                 {
5359                   if (category == coding_category_utf_16_auto)
5360                     {
5361                       if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5362                         category = coding_category_utf_16_le;
5363                       else
5364                         category = coding_category_utf_16_be;
5365                     }
5366                   break;
5367                 }
5368             }
5369           if (i < coding_category_raw_text)
5370             setup_coding_system (CODING_ID_NAME (this->id), coding);
5371           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5372             setup_coding_system (Qraw_text, coding);
5373           else if (detect_info.rejected)
5374             for (i = 0; i < coding_category_raw_text; i++)
5375               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5376                 {
5377                   this = coding_categories + coding_priorities[i];
5378                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5379                   break;
5380                 }
5381         }
5382     }
5383   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5384            == coding_category_utf_16_auto)
5385     {
5386       Lisp_Object coding_systems;
5387       struct coding_detection_info detect_info;
5388
5389       coding_systems
5390         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5391       detect_info.found = detect_info.rejected = 0;
5392       if (CONSP (coding_systems)
5393           && detect_coding_utf_16 (coding, &detect_info))
5394         {
5395           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5396             setup_coding_system (XCAR (coding_systems), coding);
5397           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5398             setup_coding_system (XCDR (coding_systems), coding);
5399         }
5400     }
5401 }
5402
5403
5404 static void
5405 decode_eol (coding)
5406      struct coding_system *coding;
5407 {
5408   Lisp_Object eol_type;
5409   unsigned char *p, *pbeg, *pend;
5410
5411   eol_type = CODING_ID_EOL_TYPE (coding->id);
5412   if (EQ (eol_type, Qunix))
5413     return;
5414
5415   if (NILP (coding->dst_object))
5416     pbeg = coding->destination;
5417   else
5418     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5419   pend = pbeg + coding->produced;
5420
5421   if (VECTORP (eol_type))
5422     {
5423       int eol_seen = EOL_SEEN_NONE;
5424
5425       for (p = pbeg; p < pend; p++)
5426         {
5427           if (*p == '\n')
5428             eol_seen |= EOL_SEEN_LF;
5429           else if (*p == '\r')
5430             {
5431               if (p + 1 < pend && *(p + 1) == '\n')
5432                 {
5433                   eol_seen |= EOL_SEEN_CRLF;
5434                   p++;
5435                 }
5436               else
5437                 eol_seen |= EOL_SEEN_CR;
5438             }
5439         }
5440       if (eol_seen != EOL_SEEN_NONE
5441           && eol_seen != EOL_SEEN_LF
5442           && eol_seen != EOL_SEEN_CRLF
5443           && eol_seen != EOL_SEEN_CR)
5444         eol_seen = EOL_SEEN_LF;
5445       if (eol_seen != EOL_SEEN_NONE)
5446         eol_type = adjust_coding_eol_type (coding, eol_seen);
5447     }
5448
5449   if (EQ (eol_type, Qmac))
5450     {
5451       for (p = pbeg; p < pend; p++)
5452         if (*p == '\r')
5453           *p = '\n';
5454     }
5455   else if (EQ (eol_type, Qdos))
5456     {
5457       int n = 0;
5458
5459       if (NILP (coding->dst_object))
5460         {
5461           for (p = pend - 2; p >= pbeg; p--)
5462             if (*p == '\r')
5463               {
5464                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5465                 n++;
5466               }
5467         }
5468       else
5469         {
5470           for (p = pend - 2; p >= pbeg; p--)
5471             if (*p == '\r')
5472               {
5473                 int pos_byte = coding->dst_pos_byte + (p - pbeg);
5474                 int pos = BYTE_TO_CHAR (pos_byte);
5475
5476                 del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5477                 n++;
5478               }
5479         }
5480       coding->produced -= n;
5481       coding->produced_char -= n;
5482     }
5483 }
5484
5485 static void
5486 translate_chars (coding, table)
5487      struct coding_system *coding;
5488      Lisp_Object table;
5489 {
5490   int *charbuf = coding->charbuf;
5491   int *charbuf_end = charbuf + coding->charbuf_used;
5492   int c;
5493
5494   if (coding->chars_at_source)
5495     return;
5496
5497   while (charbuf < charbuf_end)
5498     {
5499       c = *charbuf;
5500       if (c < 0)
5501         charbuf += c;
5502       else
5503         *charbuf++ = translate_char (table, c);
5504     }
5505 }
5506
5507 static int
5508 produce_chars (coding)
5509      struct coding_system *coding;
5510 {
5511   unsigned char *dst = coding->destination + coding->produced;
5512   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5513   int produced;
5514   int produced_chars = 0;
5515
5516   if (! coding->chars_at_source)
5517     {
5518       /* Characters are in coding->charbuf.  */
5519       int *buf = coding->charbuf;
5520       int *buf_end = buf + coding->charbuf_used;
5521       unsigned char *adjusted_dst_end;
5522
5523       if (BUFFERP (coding->src_object)
5524           && EQ (coding->src_object, coding->dst_object))
5525         dst_end = ((unsigned char *) coding->source) + coding->consumed;
5526       adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5527
5528       while (buf < buf_end)
5529         {
5530           int c = *buf++;
5531
5532           if (dst >= adjusted_dst_end)
5533             {
5534               dst = alloc_destination (coding,
5535                                        buf_end - buf + MAX_MULTIBYTE_LENGTH,
5536                                        dst);
5537               dst_end = coding->destination + coding->dst_bytes;
5538               adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5539             }
5540           if (c >= 0)
5541             {
5542               if (coding->dst_multibyte
5543                   || ! CHAR_BYTE8_P (c))
5544                 CHAR_STRING_ADVANCE (c, dst);
5545               else
5546                 *dst++ = CHAR_TO_BYTE8 (c);
5547               produced_chars++;
5548             }
5549           else
5550             /* This is an annotation datum.  (-C) is the length of
5551                it.  */
5552             buf += -c - 1;
5553         }
5554     }
5555   else
5556     {
5557       const unsigned char *src = coding->source;
5558       const unsigned char *src_end = src + coding->src_bytes;
5559       Lisp_Object eol_type;
5560
5561       eol_type = CODING_ID_EOL_TYPE (coding->id);
5562
5563       if (coding->src_multibyte != coding->dst_multibyte)
5564         {
5565           if (coding->src_multibyte)
5566             {
5567               int multibytep = 1;
5568               int consumed_chars;
5569
5570               while (1)
5571                 {
5572                   const unsigned char *src_base = src;
5573                   int c;
5574
5575                   ONE_MORE_BYTE (c);
5576                   if (c == '\r')
5577                     {
5578                       if (EQ (eol_type, Qdos))
5579                         {
5580                           if (src == src_end)
5581                             {
5582                               record_conversion_result
5583                                 (coding, CODING_RESULT_INSUFFICIENT_SRC);
5584                               goto no_more_source;
5585                             }
5586                           if (*src == '\n')
5587                             c = *src++;
5588                         }
5589                       else if (EQ (eol_type, Qmac))
5590                         c = '\n';
5591                     }
5592                   if (dst == dst_end)
5593                     {
5594                       coding->consumed = src - coding->source;
5595
5596                     if (EQ (coding->src_object, coding->dst_object))
5597                       dst_end = (unsigned char *) src;
5598                     if (dst == dst_end)
5599                       {
5600                         dst = alloc_destination (coding, src_end - src + 1,
5601                                                  dst);
5602                         dst_end = coding->destination + coding->dst_bytes;
5603                         coding_set_source (coding);
5604                         src = coding->source + coding->consumed;
5605                         src_end = coding->source + coding->src_bytes;
5606                       }
5607                     }
5608                   *dst++ = c;
5609                   produced_chars++;
5610                 }
5611             no_more_source:
5612               ;
5613             }
5614           else
5615             while (src < src_end)
5616               {
5617                 int multibytep = 1;
5618                 int c = *src++;
5619
5620                 if (c == '\r')
5621                   {
5622                     if (EQ (eol_type, Qdos))
5623                       {
5624                         if (src < src_end
5625                             && *src == '\n')
5626                           c = *src++;
5627                       }
5628                     else if (EQ (eol_type, Qmac))
5629                       c = '\n';
5630                   }
5631                 if (dst >= dst_end - 1)
5632                   {
5633                     coding->consumed = src - coding->source;
5634
5635                     if (EQ (coding->src_object, coding->dst_object))
5636                       dst_end = (unsigned char *) src;
5637                     if (dst >= dst_end - 1)
5638                       {
5639                         dst = alloc_destination (coding, src_end - src + 2,
5640                                                  dst);
5641                         dst_end = coding->destination + coding->dst_bytes;
5642                         coding_set_source (coding);
5643                         src = coding->source + coding->consumed;
5644                         src_end = coding->source + coding->src_bytes;
5645                       }
5646                   }
5647                 EMIT_ONE_BYTE (c);
5648               }
5649         }
5650       else
5651         {
5652           if (!EQ (coding->src_object, coding->dst_object))
5653             {
5654               int require = coding->src_bytes - coding->dst_bytes;
5655
5656               if (require > 0)
5657                 {
5658                   EMACS_INT offset = src - coding->source;
5659
5660                   dst = alloc_destination (coding, require, dst);
5661                   coding_set_source (coding);
5662                   src = coding->source + offset;
5663                   src_end = coding->source + coding->src_bytes;
5664                 }
5665             }
5666           produced_chars = coding->src_chars;
5667           while (src < src_end)
5668             {
5669               int c = *src++;
5670
5671               if (c == '\r')
5672                 {
5673                   if (EQ (eol_type, Qdos))
5674                     {
5675                       if (src < src_end
5676                           && *src == '\n')
5677                         c = *src++;
5678                       produced_chars--;
5679                     }
5680                   else if (EQ (eol_type, Qmac))
5681                     c = '\n';
5682                 }
5683               *dst++ = c;
5684             }
5685         }
5686       coding->consumed = coding->src_bytes;
5687       coding->consumed_char = coding->src_chars;
5688     }
5689
5690   produced = dst - (coding->destination + coding->produced);
5691   if (BUFFERP (coding->dst_object))
5692     insert_from_gap (produced_chars, produced);
5693   coding->produced += produced;
5694   coding->produced_char += produced_chars;
5695   return produced_chars;
5696 }
5697
5698 /* Compose text in CODING->object according to the annotation data at
5699    CHARBUF.  CHARBUF is an array:
5700      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5701  */
5702
5703 static INLINE void
5704 produce_composition (coding, charbuf)
5705      struct coding_system *coding;
5706      int *charbuf;
5707 {
5708   int len;
5709   EMACS_INT from, to;
5710   enum composition_method method;
5711   Lisp_Object components;
5712
5713   len = -charbuf[0];
5714   from = coding->dst_pos + charbuf[2];
5715   to = coding->dst_pos + charbuf[3];
5716   method = (enum composition_method) (charbuf[4]);
5717
5718   if (method == COMPOSITION_RELATIVE)
5719     components = Qnil;
5720   else
5721     {
5722       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5723       int i;
5724
5725       len -= 5;
5726       charbuf += 5;
5727       for (i = 0; i < len; i++)
5728         args[i] = make_number (charbuf[i]);
5729       components = (method == COMPOSITION_WITH_ALTCHARS
5730                     ? Fstring (len, args) : Fvector (len, args));
5731     }
5732   compose_text (from, to, components, Qnil, coding->dst_object);
5733 }
5734
5735
5736 /* Put `charset' property on text in CODING->object according to
5737    the annotation data at CHARBUF.  CHARBUF is an array:
5738      [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5739  */
5740
5741 static INLINE void
5742 produce_charset (coding, charbuf)
5743      struct coding_system *coding;
5744      int *charbuf;
5745 {
5746   EMACS_INT from = coding->dst_pos + charbuf[2];
5747   EMACS_INT to = coding->dst_pos + charbuf[3];
5748   struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
5749
5750   Fput_text_property (make_number (from), make_number (to),
5751                       Qcharset, CHARSET_NAME (charset),
5752                       coding->dst_object);
5753 }
5754
5755
5756 #define CHARBUF_SIZE 0x4000
5757
5758 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
5759   do {                                                                  \
5760     int size = CHARBUF_SIZE;;                                           \
5761                                                                         \
5762     coding->charbuf = NULL;                                             \
5763     while (size > 1024)                                                 \
5764       {                                                                 \
5765         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
5766         if (coding->charbuf)                                            \
5767           break;                                                        \
5768         size >>= 1;                                                     \
5769       }                                                                 \
5770     if (! coding->charbuf)                                              \
5771       {                                                                 \
5772         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
5773         return coding->result;                                          \
5774       }                                                                 \
5775     coding->charbuf_size = size;                                        \
5776   } while (0)
5777
5778
5779 static void
5780 produce_annotation (coding)
5781      struct coding_system *coding;
5782 {
5783   int *charbuf = coding->charbuf;
5784   int *charbuf_end = charbuf + coding->charbuf_used;
5785
5786   if (NILP (coding->dst_object))
5787     return;
5788
5789   while (charbuf < charbuf_end)
5790     {
5791       if (*charbuf >= 0)
5792         charbuf++;
5793       else
5794         {
5795           int len = -*charbuf;
5796           switch (charbuf[1])
5797             {
5798             case CODING_ANNOTATE_COMPOSITION_MASK:
5799               produce_composition (coding, charbuf);
5800               break;
5801             case CODING_ANNOTATE_CHARSET_MASK:
5802               produce_charset (coding, charbuf);
5803               break;
5804             default:
5805               abort ();
5806             }
5807           charbuf += len;
5808         }
5809     }
5810 }
5811
5812 /* Decode the data at CODING->src_object into CODING->dst_object.
5813    CODING->src_object is a buffer, a string, or nil.
5814    CODING->dst_object is a buffer.
5815
5816    If CODING->src_object is a buffer, it must be the current buffer.
5817    In this case, if CODING->src_pos is positive, it is a position of
5818    the source text in the buffer, otherwise, the source text is in the
5819    gap area of the buffer, and CODING->src_pos specifies the offset of
5820    the text from GPT (which must be the same as PT).  If this is the
5821    same buffer as CODING->dst_object, CODING->src_pos must be
5822    negative.
5823
5824    If CODING->src_object is a string, CODING->src_pos in an index to
5825    that string.
5826
5827    If CODING->src_object is nil, CODING->source must already point to
5828    the non-relocatable memory area.  In this case, CODING->src_pos is
5829    an offset from CODING->source.
5830
5831    The decoded data is inserted at the current point of the buffer
5832    CODING->dst_object.
5833 */
5834
5835 static int
5836 decode_coding (coding)
5837      struct coding_system *coding;
5838 {
5839   Lisp_Object attrs;
5840   Lisp_Object undo_list;
5841
5842   if (BUFFERP (coding->src_object)
5843       && coding->src_pos > 0
5844       && coding->src_pos < GPT
5845       && coding->src_pos + coding->src_chars > GPT)
5846     move_gap_both (coding->src_pos, coding->src_pos_byte);
5847
5848   undo_list = Qt;
5849   if (BUFFERP (coding->dst_object))
5850     {
5851       if (current_buffer != XBUFFER (coding->dst_object))
5852         set_buffer_internal (XBUFFER (coding->dst_object));
5853       if (GPT != PT)
5854         move_gap_both (PT, PT_BYTE);
5855       undo_list = current_buffer->undo_list;
5856       current_buffer->undo_list = Qt;
5857     }
5858
5859   coding->consumed = coding->consumed_char = 0;
5860   coding->produced = coding->produced_char = 0;
5861   coding->chars_at_source = 0;
5862   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5863   coding->errors = 0;
5864
5865   ALLOC_CONVERSION_WORK_AREA (coding);
5866
5867   attrs = CODING_ID_ATTRS (coding->id);
5868
5869   do
5870     {
5871       coding_set_source (coding);
5872       coding->annotated = 0;
5873       (*(coding->decoder)) (coding);
5874       if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5875         translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5876       else if (!NILP (Vstandard_translation_table_for_decode))
5877         translate_chars (coding, Vstandard_translation_table_for_decode);
5878       coding_set_destination (coding);
5879       produce_chars (coding);
5880       if (coding->annotated)
5881         produce_annotation (coding);
5882     }
5883   while (coding->consumed < coding->src_bytes
5884          && ! coding->result);
5885
5886   coding->carryover_bytes = 0;
5887   if (coding->consumed < coding->src_bytes)
5888     {
5889       int nbytes = coding->src_bytes - coding->consumed;
5890       const unsigned char *src;
5891
5892       coding_set_source (coding);
5893       coding_set_destination (coding);
5894       src = coding->source + coding->consumed;
5895
5896       if (coding->mode & CODING_MODE_LAST_BLOCK)
5897         {
5898           /* Flush out unprocessed data as binary chars.  We are sure
5899              that the number of data is less than the size of
5900              coding->charbuf.  */
5901           coding->charbuf_used = 0;
5902           while (nbytes-- > 0)
5903             {
5904               int c = *src++;
5905
5906               coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
5907             }
5908           produce_chars (coding);
5909         }
5910       else
5911         {
5912           /* Record unprocessed bytes in coding->carryover.  We are
5913              sure that the number of data is less than the size of
5914              coding->carryover.  */
5915           unsigned char *p = coding->carryover;
5916
5917           coding->carryover_bytes = nbytes;
5918           while (nbytes-- > 0)
5919             *p++ = *src++;
5920         }
5921       coding->consumed = coding->src_bytes;
5922     }
5923
5924   if (BUFFERP (coding->dst_object))
5925     {
5926       current_buffer->undo_list = undo_list;
5927       record_insert (coding->dst_pos, coding->produced_char);
5928     }
5929   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5930     decode_eol (coding);
5931   return coding->result;
5932 }
5933
5934
5935 /* Extract an annotation datum from a composition starting at POS and
5936    ending before LIMIT of CODING->src_object (buffer or string), store
5937    the data in BUF, set *STOP to a starting position of the next
5938    composition (if any) or to LIMIT, and return the address of the
5939    next element of BUF.
5940
5941    If such an annotation is not found, set *STOP to a starting
5942    position of a composition after POS (if any) or to LIMIT, and
5943    return BUF.  */
5944
5945 static INLINE int *
5946 handle_composition_annotation (pos, limit, coding, buf, stop)
5947      EMACS_INT pos, limit;
5948      struct coding_system *coding;
5949      int *buf;
5950      EMACS_INT *stop;
5951 {
5952   EMACS_INT start, end;
5953   Lisp_Object prop;
5954
5955   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5956       || end > limit)
5957     *stop = limit;
5958   else if (start > pos)
5959     *stop = start;
5960   else
5961     {
5962       if (start == pos)
5963         {
5964           /* We found a composition.  Store the corresponding
5965              annotation data in BUF.  */
5966           int *head = buf;
5967           enum composition_method method = COMPOSITION_METHOD (prop);
5968           int nchars = COMPOSITION_LENGTH (prop);
5969
5970           ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5971           if (method != COMPOSITION_RELATIVE)
5972             {
5973               Lisp_Object components;
5974               int len, i, i_byte;
5975
5976               components = COMPOSITION_COMPONENTS (prop);
5977               if (VECTORP (components))
5978                 {
5979                   len = XVECTOR (components)->size;
5980                   for (i = 0; i < len; i++)
5981                     *buf++ = XINT (AREF (components, i));
5982                 }
5983               else if (STRINGP (components))
5984                 {
5985                   len = SCHARS (components);
5986                   i = i_byte = 0;
5987                   while (i < len)
5988                     {
5989                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
5990                       buf++;
5991                     }
5992                 }
5993               else if (INTEGERP (components))
5994                 {
5995                   len = 1;
5996                   *buf++ = XINT (components);
5997                 }
5998               else if (CONSP (components))
5999                 {
6000                   for (len = 0; CONSP (components);
6001                        len++, components = XCDR (components))
6002                     *buf++ = XINT (XCAR (components));
6003                 }
6004               else
6005                 abort ();
6006               *head -= len;
6007             }
6008         }
6009
6010       if (find_composition (end, limit, &start, &end, &prop,
6011                             coding->src_object)
6012           && end <= limit)
6013         *stop = start;
6014       else
6015         *stop = limit;
6016     }
6017   return buf;
6018 }
6019
6020
6021 /* Extract an annotation datum from a text property `charset' at POS of
6022    CODING->src_object (buffer of string), store the data in BUF, set
6023    *STOP to the position where the value of `charset' property changes
6024    (limiting by LIMIT), and return the address of the next element of
6025    BUF.
6026
6027    If the property value is nil, set *STOP to the position where the
6028    property value is non-nil (limiting by LIMIT), and return BUF.  */
6029
6030 static INLINE int *
6031 handle_charset_annotation (pos, limit, coding, buf, stop)
6032      EMACS_INT pos, limit;
6033      struct coding_system *coding;
6034      int *buf;
6035      EMACS_INT *stop;
6036 {
6037   Lisp_Object val, next;
6038   int id;
6039
6040   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6041   if (! NILP (val) && CHARSETP (val))
6042     id = XINT (CHARSET_SYMBOL_ID (val));
6043   else
6044     id = -1;
6045   ADD_CHARSET_DATA (buf, 0, 0, id);
6046   next = Fnext_single_property_change (make_number (pos), Qcharset,
6047                                        coding->src_object,
6048                                        make_number (limit));
6049   *stop = XINT (next);
6050   return buf;
6051 }
6052
6053
6054 static void
6055 consume_chars (coding)
6056      struct coding_system *coding;
6057 {
6058   int *buf = coding->charbuf;
6059   int *buf_end = coding->charbuf + coding->charbuf_size;
6060   const unsigned char *src = coding->source + coding->consumed;
6061   const unsigned char *src_end = coding->source + coding->src_bytes;
6062   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6063   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6064   int multibytep = coding->src_multibyte;
6065   Lisp_Object eol_type;
6066   int c;
6067   EMACS_INT stop, stop_composition, stop_charset;
6068
6069   eol_type = CODING_ID_EOL_TYPE (coding->id);
6070   if (VECTORP (eol_type))
6071     eol_type = Qunix;
6072
6073   /* Note: composition handling is not yet implemented.  */
6074   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6075
6076   if (NILP (coding->src_object))
6077     stop = stop_composition = stop_charset = end_pos;
6078   else
6079     {
6080       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6081         stop = stop_composition = pos;
6082       else
6083         stop = stop_composition = end_pos;
6084       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6085         stop = stop_charset = pos;
6086       else
6087         stop_charset = end_pos;
6088     }
6089
6090   /* Compensate for CRLF and conversion.  */
6091   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6092   while (buf < buf_end)
6093     {
6094       if (pos == stop)
6095         {
6096           if (pos == end_pos)
6097             break;
6098           if (pos == stop_composition)
6099             buf = handle_composition_annotation (pos, end_pos, coding,
6100                                                  buf, &stop_composition);
6101           if (pos == stop_charset)
6102             buf = handle_charset_annotation (pos, end_pos, coding,
6103                                              buf, &stop_charset);
6104           stop = (stop_composition < stop_charset
6105                   ? stop_composition : stop_charset);
6106         }
6107
6108       if (! multibytep)
6109         {
6110           EMACS_INT bytes;
6111
6112           if (! CODING_FOR_UNIBYTE (coding)
6113               && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6114             c = STRING_CHAR_ADVANCE (src), pos += bytes;
6115           else
6116             c = *src++, pos++;
6117         }
6118       else
6119         c = STRING_CHAR_ADVANCE (src), pos++;
6120       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6121         c = '\n';
6122       if (! EQ (eol_type, Qunix))
6123         {
6124           if (c == '\n')
6125             {
6126               if (EQ (eol_type, Qdos))
6127                 *buf++ = '\r';
6128               else
6129                 c = '\r';
6130             }
6131         }
6132       *buf++ = c;
6133     }
6134
6135   coding->consumed = src - coding->source;
6136   coding->consumed_char = pos - coding->src_pos;
6137   coding->charbuf_used = buf - coding->charbuf;
6138   coding->chars_at_source = 0;
6139 }
6140
6141
6142 /* Encode the text at CODING->src_object into CODING->dst_object.
6143    CODING->src_object is a buffer or a string.
6144    CODING->dst_object is a buffer or nil.
6145
6146    If CODING->src_object is a buffer, it must be the current buffer.
6147    In this case, if CODING->src_pos is positive, it is a position of
6148    the source text in the buffer, otherwise. the source text is in the
6149    gap area of the buffer, and coding->src_pos specifies the offset of
6150    the text from GPT (which must be the same as PT).  If this is the
6151    same buffer as CODING->dst_object, CODING->src_pos must be
6152    negative and CODING should not have `pre-write-conversion'.
6153
6154    If CODING->src_object is a string, CODING should not have
6155    `pre-write-conversion'.
6156
6157    If CODING->dst_object is a buffer, the encoded data is inserted at
6158    the current point of that buffer.
6159
6160    If CODING->dst_object is nil, the encoded data is placed at the
6161    memory area specified by CODING->destination.  */
6162
6163 static int
6164 encode_coding (coding)
6165      struct coding_system *coding;
6166 {
6167   Lisp_Object attrs;
6168
6169   attrs = CODING_ID_ATTRS (coding->id);
6170
6171   if (BUFFERP (coding->dst_object))
6172     {
6173       set_buffer_internal (XBUFFER (coding->dst_object));
6174       coding->dst_multibyte
6175         = ! NILP (current_buffer->enable_multibyte_characters);
6176     }
6177
6178   coding->consumed = coding->consumed_char = 0;
6179   coding->produced = coding->produced_char = 0;
6180   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6181   coding->errors = 0;
6182
6183   ALLOC_CONVERSION_WORK_AREA (coding);
6184
6185   do {
6186     coding_set_source (coding);
6187     consume_chars (coding);
6188
6189     if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
6190       translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
6191     else if (!NILP (Vstandard_translation_table_for_encode))
6192       translate_chars (coding, Vstandard_translation_table_for_encode);
6193
6194     coding_set_destination (coding);
6195     (*(coding->encoder)) (coding);
6196   } while (coding->consumed_char < coding->src_chars);
6197
6198   if (BUFFERP (coding->dst_object))
6199     insert_from_gap (coding->produced_char, coding->produced);
6200
6201   return (coding->result);
6202 }
6203
6204
6205 /* Name (or base name) of work buffer for code conversion.  */
6206 static Lisp_Object Vcode_conversion_workbuf_name;
6207
6208 /* A working buffer used by the top level conversion.  Once it is
6209    created, it is never destroyed.  It has the name
6210    Vcode_conversion_workbuf_name.  The other working buffers are
6211    destroyed after the use is finished, and their names are modified
6212    versions of Vcode_conversion_workbuf_name.  */
6213 static Lisp_Object Vcode_conversion_reused_workbuf;
6214
6215 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
6216 static int reused_workbuf_in_use;
6217
6218
6219 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
6220    multibyteness of returning buffer.  */
6221
6222 Lisp_Object
6223 make_conversion_work_buffer (multibyte)
6224 {
6225   Lisp_Object name, workbuf;
6226   struct buffer *current;
6227
6228   if (reused_workbuf_in_use++)
6229     {
6230       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6231       workbuf = Fget_buffer_create (name);
6232     }
6233   else
6234     {
6235       name = Vcode_conversion_workbuf_name;
6236       workbuf = Fget_buffer_create (name);
6237       if (NILP (Vcode_conversion_reused_workbuf))
6238         Vcode_conversion_reused_workbuf = workbuf;
6239     }
6240   current = current_buffer;
6241   set_buffer_internal (XBUFFER (workbuf));
6242   Ferase_buffer ();
6243   current_buffer->undo_list = Qt;
6244   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6245   set_buffer_internal (current);
6246   return workbuf;
6247 }
6248
6249
6250 static Lisp_Object
6251 code_conversion_restore (arg)
6252      Lisp_Object arg;
6253 {
6254   Lisp_Object current, workbuf;
6255
6256   current = XCAR (arg);
6257   workbuf = XCDR (arg);
6258   if (! NILP (workbuf))
6259     {
6260       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6261         reused_workbuf_in_use = 0;
6262       else if (! NILP (Fbuffer_live_p (workbuf)))
6263         Fkill_buffer (workbuf);
6264     }
6265   set_buffer_internal (XBUFFER (current));
6266   return Qnil;
6267 }
6268
6269 Lisp_Object
6270 code_conversion_save (with_work_buf, multibyte)
6271      int with_work_buf, multibyte;
6272 {
6273   Lisp_Object workbuf = Qnil;
6274
6275   if (with_work_buf)
6276     workbuf = make_conversion_work_buffer (multibyte);
6277   record_unwind_protect (code_conversion_restore,
6278                          Fcons (Fcurrent_buffer (), workbuf));
6279   return workbuf;
6280 }
6281
6282 int
6283 decode_coding_gap (coding, chars, bytes)
6284      struct coding_system *coding;
6285      EMACS_INT chars, bytes;
6286 {
6287   int count = specpdl_ptr - specpdl;
6288   Lisp_Object attrs;
6289
6290   code_conversion_save (0, 0);
6291
6292   coding->src_object = Fcurrent_buffer ();
6293   coding->src_chars = chars;
6294   coding->src_bytes = bytes;
6295   coding->src_pos = -chars;
6296   coding->src_pos_byte = -bytes;
6297   coding->src_multibyte = chars < bytes;
6298   coding->dst_object = coding->src_object;
6299   coding->dst_pos = PT;
6300   coding->dst_pos_byte = PT_BYTE;
6301   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6302   coding->mode |= CODING_MODE_LAST_BLOCK;
6303
6304   if (CODING_REQUIRE_DETECTION (coding))
6305     detect_coding (coding);
6306
6307   decode_coding (coding);
6308
6309   attrs = CODING_ID_ATTRS (coding->id);
6310   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6311     {
6312       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6313       Lisp_Object val;
6314
6315       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6316       val = call1 (CODING_ATTR_POST_READ (attrs),
6317                    make_number (coding->produced_char));
6318       CHECK_NATNUM (val);
6319       coding->produced_char += Z - prev_Z;
6320       coding->produced += Z_BYTE - prev_Z_BYTE;
6321     }
6322
6323   unbind_to (count, Qnil);
6324   return coding->result;
6325 }
6326
6327 int
6328 encode_coding_gap (coding, chars, bytes)
6329      struct coding_system *coding;
6330      EMACS_INT chars, bytes;
6331 {
6332   int count = specpdl_ptr - specpdl;
6333
6334   code_conversion_save (0, 0);
6335
6336   coding->src_object = Fcurrent_buffer ();
6337   coding->src_chars = chars;
6338   coding->src_bytes = bytes;
6339   coding->src_pos = -chars;
6340   coding->src_pos_byte = -bytes;
6341   coding->src_multibyte = chars < bytes;
6342   coding->dst_object = coding->src_object;
6343   coding->dst_pos = PT;
6344   coding->dst_pos_byte = PT_BYTE;
6345
6346   encode_coding (coding);
6347
6348   unbind_to (count, Qnil);
6349   return coding->result;
6350 }
6351
6352
6353 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6354    SRC_OBJECT into DST_OBJECT by coding context CODING.
6355
6356    SRC_OBJECT is a buffer, a string, or Qnil.
6357
6358    If it is a buffer, the text is at point of the buffer.  FROM and TO
6359    are positions in the buffer.
6360
6361    If it is a string, the text is at the beginning of the string.
6362    FROM and TO are indices to the string.
6363
6364    If it is nil, the text is at coding->source.  FROM and TO are
6365    indices to coding->source.
6366
6367    DST_OBJECT is a buffer, Qt, or Qnil.
6368
6369    If it is a buffer, the decoded text is inserted at point of the
6370    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6371    is deleted.
6372
6373    If it is Qt, a string is made from the decoded text, and
6374    set in CODING->dst_object.
6375
6376    If it is Qnil, the decoded text is stored at CODING->destination.
6377    The caller must allocate CODING->dst_bytes bytes at
6378    CODING->destination by xmalloc.  If the decoded text is longer than
6379    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6380  */
6381
6382 void
6383 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6384                       dst_object)
6385      struct coding_system *coding;
6386      Lisp_Object src_object;
6387      EMACS_INT from, from_byte, to, to_byte;
6388      Lisp_Object dst_object;
6389 {
6390   int count = specpdl_ptr - specpdl;
6391   unsigned char *destination;
6392   EMACS_INT dst_bytes;
6393   EMACS_INT chars = to - from;
6394   EMACS_INT bytes = to_byte - from_byte;
6395   Lisp_Object attrs;
6396   Lisp_Object buffer;
6397   int saved_pt = -1, saved_pt_byte;
6398
6399   buffer = Fcurrent_buffer ();
6400
6401   if (NILP (dst_object))
6402     {
6403       destination = coding->destination;
6404       dst_bytes = coding->dst_bytes;
6405     }
6406
6407   coding->src_object = src_object;
6408   coding->src_chars = chars;
6409   coding->src_bytes = bytes;
6410   coding->src_multibyte = chars < bytes;
6411
6412   if (STRINGP (src_object))
6413     {
6414       coding->src_pos = from;
6415       coding->src_pos_byte = from_byte;
6416     }
6417   else if (BUFFERP (src_object))
6418     {
6419       set_buffer_internal (XBUFFER (src_object));
6420       if (from != GPT)
6421         move_gap_both (from, from_byte);
6422       if (EQ (src_object, dst_object))
6423         {
6424           saved_pt = PT, saved_pt_byte = PT_BYTE;
6425           TEMP_SET_PT_BOTH (from, from_byte);
6426           del_range_both (from, from_byte, to, to_byte, 1);
6427           coding->src_pos = -chars;
6428           coding->src_pos_byte = -bytes;
6429         }
6430       else
6431         {
6432           coding->src_pos = from;
6433           coding->src_pos_byte = from_byte;
6434         }
6435     }
6436
6437   if (CODING_REQUIRE_DETECTION (coding))
6438     detect_coding (coding);
6439   attrs = CODING_ID_ATTRS (coding->id);
6440
6441   if (EQ (dst_object, Qt)
6442       || (! NILP (CODING_ATTR_POST_READ (attrs))
6443           && NILP (dst_object)))
6444     {
6445       coding->dst_object = code_conversion_save (1, 1);
6446       coding->dst_pos = BEG;
6447       coding->dst_pos_byte = BEG_BYTE;
6448       coding->dst_multibyte = 1;
6449     }
6450   else if (BUFFERP (dst_object))
6451     {
6452       code_conversion_save (0, 0);
6453       coding->dst_object = dst_object;
6454       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6455       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6456       coding->dst_multibyte
6457         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6458     }
6459   else
6460     {
6461       code_conversion_save (0, 0);
6462       coding->dst_object = Qnil;
6463       coding->dst_multibyte = 1;
6464     }
6465
6466   decode_coding (coding);
6467
6468   if (BUFFERP (coding->dst_object))
6469     set_buffer_internal (XBUFFER (coding->dst_object));
6470
6471   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6472     {
6473       struct gcpro gcpro1, gcpro2;
6474       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6475       Lisp_Object val;
6476
6477       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6478       GCPRO2 (coding->src_object, coding->dst_object);
6479       val = call1 (CODING_ATTR_POST_READ (attrs),
6480                    make_number (coding->produced_char));
6481       UNGCPRO;
6482       CHECK_NATNUM (val);
6483       coding->produced_char += Z - prev_Z;
6484       coding->produced += Z_BYTE - prev_Z_BYTE;
6485     }
6486
6487   if (EQ (dst_object, Qt))
6488     {
6489       coding->dst_object = Fbuffer_string ();
6490     }
6491   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6492     {
6493       set_buffer_internal (XBUFFER (coding->dst_object));
6494       if (dst_bytes < coding->produced)
6495         {
6496           destination
6497             = (unsigned char *) xrealloc (destination, coding->produced);
6498           if (! destination)
6499             {
6500               record_conversion_result (coding,
6501                                         CODING_RESULT_INSUFFICIENT_DST);
6502               unbind_to (count, Qnil);
6503               return;
6504             }
6505           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6506             move_gap_both (BEGV, BEGV_BYTE);
6507           bcopy (BEGV_ADDR, destination, coding->produced);
6508           coding->destination = destination;
6509         }
6510     }
6511
6512   if (saved_pt >= 0)
6513     {
6514       /* This is the case of:
6515          (BUFFERP (src_object) && EQ (src_object, dst_object))
6516          As we have moved PT while replacing the original buffer
6517          contents, we must recover it now.  */
6518       set_buffer_internal (XBUFFER (src_object));
6519       if (saved_pt < from)
6520         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6521       else if (saved_pt < from + chars)
6522         TEMP_SET_PT_BOTH (from, from_byte);
6523       else if (! NILP (current_buffer->enable_multibyte_characters))
6524         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6525                           saved_pt_byte + (coding->produced - bytes));
6526       else
6527         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6528                           saved_pt_byte + (coding->produced - bytes));
6529     }
6530
6531   unbind_to (count, coding->dst_object);
6532 }
6533
6534
6535 void
6536 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6537                       dst_object)
6538      struct coding_system *coding;
6539      Lisp_Object src_object;
6540      EMACS_INT from, from_byte, to, to_byte;
6541      Lisp_Object dst_object;
6542 {
6543   int count = specpdl_ptr - specpdl;
6544   EMACS_INT chars = to - from;
6545   EMACS_INT bytes = to_byte - from_byte;
6546   Lisp_Object attrs;
6547   Lisp_Object buffer;
6548   int saved_pt = -1, saved_pt_byte;
6549
6550   buffer = Fcurrent_buffer ();
6551
6552   coding->src_object = src_object;
6553   coding->src_chars = chars;
6554   coding->src_bytes = bytes;
6555   coding->src_multibyte = chars < bytes;
6556
6557   attrs = CODING_ID_ATTRS (coding->id);
6558
6559   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6560     {
6561       coding->src_object = code_conversion_save (1, coding->src_multibyte);
6562       set_buffer_internal (XBUFFER (coding->src_object));
6563       if (STRINGP (src_object))
6564         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6565       else if (BUFFERP (src_object))
6566         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6567       else
6568         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6569
6570       if (EQ (src_object, dst_object))
6571         {
6572           set_buffer_internal (XBUFFER (src_object));
6573           saved_pt = PT, saved_pt_byte = PT_BYTE;
6574           del_range_both (from, from_byte, to, to_byte, 1);
6575           set_buffer_internal (XBUFFER (coding->src_object));
6576         }
6577
6578       call2 (CODING_ATTR_PRE_WRITE (attrs),
6579              make_number (BEG), make_number (Z));
6580       coding->src_object = Fcurrent_buffer ();
6581       if (BEG != GPT)
6582         move_gap_both (BEG, BEG_BYTE);
6583       coding->src_chars = Z - BEG;
6584       coding->src_bytes = Z_BYTE - BEG_BYTE;
6585       coding->src_pos = BEG;
6586       coding->src_pos_byte = BEG_BYTE;
6587       coding->src_multibyte = Z < Z_BYTE;
6588     }
6589   else if (STRINGP (src_object))
6590     {
6591       code_conversion_save (0, 0);
6592       coding->src_pos = from;
6593       coding->src_pos_byte = from_byte;
6594     }
6595   else if (BUFFERP (src_object))
6596     {
6597       code_conversion_save (0, 0);
6598       set_buffer_internal (XBUFFER (src_object));
6599       if (EQ (src_object, dst_object))
6600         {
6601           saved_pt = PT, saved_pt_byte = PT_BYTE;
6602           coding->src_object = del_range_1 (from, to, 1, 1);
6603           coding->src_pos = 0;
6604           coding->src_pos_byte = 0;
6605         }
6606       else
6607         {
6608           if (from < GPT && to >= GPT)
6609             move_gap_both (from, from_byte);
6610           coding->src_pos = from;
6611           coding->src_pos_byte = from_byte;
6612         }
6613     }
6614   else
6615     code_conversion_save (0, 0);
6616
6617   if (BUFFERP (dst_object))
6618     {
6619       coding->dst_object = dst_object;
6620       if (EQ (src_object, dst_object))
6621         {
6622           coding->dst_pos = from;
6623           coding->dst_pos_byte = from_byte;
6624         }
6625       else
6626         {
6627           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6628           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6629         }
6630       coding->dst_multibyte
6631         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6632     }
6633   else if (EQ (dst_object, Qt))
6634     {
6635       coding->dst_object = Qnil;
6636       coding->dst_bytes = coding->src_chars;
6637       if (coding->dst_bytes == 0)
6638         coding->dst_bytes = 1;
6639       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6640       coding->dst_multibyte = 0;
6641     }
6642   else
6643     {
6644       coding->dst_object = Qnil;
6645       coding->dst_multibyte = 0;
6646     }
6647
6648   encode_coding (coding);
6649
6650   if (EQ (dst_object, Qt))
6651     {
6652       if (BUFFERP (coding->dst_object))
6653         coding->dst_object = Fbuffer_string ();
6654       else
6655         {
6656           coding->dst_object
6657             = make_unibyte_string ((char *) coding->destination,
6658                                    coding->produced);
6659           xfree (coding->destination);
6660         }
6661     }
6662
6663   if (saved_pt >= 0)
6664     {
6665       /* This is the case of:
6666          (BUFFERP (src_object) && EQ (src_object, dst_object))
6667          As we have moved PT while replacing the original buffer
6668          contents, we must recover it now.  */
6669       set_buffer_internal (XBUFFER (src_object));
6670       if (saved_pt < from)
6671         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6672       else if (saved_pt < from + chars)
6673         TEMP_SET_PT_BOTH (from, from_byte);
6674       else if (! NILP (current_buffer->enable_multibyte_characters))
6675         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6676                           saved_pt_byte + (coding->produced - bytes));
6677       else
6678         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6679                           saved_pt_byte + (coding->produced - bytes));
6680     }
6681
6682   unbind_to (count, Qnil);
6683 }
6684
6685
6686 Lisp_Object
6687 preferred_coding_system ()
6688 {
6689   int id = coding_categories[coding_priorities[0]].id;
6690
6691   return CODING_ID_NAME (id);
6692 }
6693
6694 \f
6695 #ifdef emacs
6696 /*** 8. Emacs Lisp library functions ***/
6697
6698 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6699        doc: /* Return t if OBJECT is nil or a coding-system.
6700 See the documentation of `define-coding-system' for information
6701 about coding-system objects.  */)
6702      (obj)
6703      Lisp_Object obj;
6704 {
6705   return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6706 }
6707
6708 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6709        Sread_non_nil_coding_system, 1, 1, 0,
6710        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6711      (prompt)
6712      Lisp_Object prompt;
6713 {
6714   Lisp_Object val;
6715   do
6716     {
6717       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6718                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6719     }
6720   while (SCHARS (val) == 0);
6721   return (Fintern (val, Qnil));
6722 }
6723
6724 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6725        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6726 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6727      (prompt, default_coding_system)
6728      Lisp_Object prompt, default_coding_system;
6729 {
6730   Lisp_Object val;
6731   if (SYMBOLP (default_coding_system))
6732     XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
6733   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6734                           Qt, Qnil, Qcoding_system_history,
6735                           default_coding_system, Qnil);
6736   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6737 }
6738
6739 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6740        1, 1, 0,
6741        doc: /* Check validity of CODING-SYSTEM.
6742 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.  */)
6743   (coding_system)
6744      Lisp_Object coding_system;
6745 {
6746   CHECK_SYMBOL (coding_system);
6747   if (!NILP (Fcoding_system_p (coding_system)))
6748     return coding_system;
6749   while (1)
6750     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6751 }
6752
6753 \f
6754 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
6755    HIGHEST is nonzero, return the coding system of the highest
6756    priority among the detected coding systems.  Otherwize return a
6757    list of detected coding systems sorted by their priorities.  If
6758    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6759    multibyte form but contains only ASCII and eight-bit chars.
6760    Otherwise, the bytes are raw bytes.
6761
6762    CODING-SYSTEM controls the detection as below:
6763
6764    If it is nil, detect both text-format and eol-format.  If the
6765    text-format part of CODING-SYSTEM is already specified
6766    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
6767    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6768    detect only text-format.  */
6769
6770 Lisp_Object
6771 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
6772                       coding_system)
6773      const unsigned char *src;
6774      int src_chars, src_bytes, highest;
6775      int multibytep;
6776      Lisp_Object coding_system;
6777 {
6778   const unsigned char *src_end = src + src_bytes;
6779   Lisp_Object attrs, eol_type;
6780   Lisp_Object val;
6781   struct coding_system coding;
6782   int id;
6783   struct coding_detection_info detect_info;
6784   enum coding_category base_category;
6785
6786   if (NILP (coding_system))
6787     coding_system = Qundecided;
6788   setup_coding_system (coding_system, &coding);
6789   attrs = CODING_ID_ATTRS (coding.id);
6790   eol_type = CODING_ID_EOL_TYPE (coding.id);
6791   coding_system = CODING_ATTR_BASE_NAME (attrs);
6792
6793   coding.source = src;
6794   coding.src_chars = src_chars;
6795   coding.src_bytes = src_bytes;
6796   coding.src_multibyte = multibytep;
6797   coding.consumed = 0;
6798   coding.mode |= CODING_MODE_LAST_BLOCK;
6799
6800   detect_info.checked = detect_info.found = detect_info.rejected = 0;
6801
6802   /* At first, detect text-format if necessary.  */
6803   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
6804   if (base_category == coding_category_undecided)
6805     {
6806       enum coding_category category;
6807       struct coding_system *this;
6808       int c, i;
6809
6810       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
6811       for (i = 0; src < src_end; i++, src++)
6812         {
6813           c = *src;
6814           if (c & 0x80 || (c < 0x20 && (c == 0
6815                                         || c == ISO_CODE_ESC
6816                                         || c == ISO_CODE_SI
6817                                         || c == ISO_CODE_SO)))
6818             break;
6819         }
6820       coding.head_ascii = src - coding.source;
6821
6822       if (src < src_end)
6823         for (i = 0; i < coding_category_raw_text; i++)
6824           {
6825             category = coding_priorities[i];
6826             this = coding_categories + category;
6827
6828             if (this->id < 0)
6829               {
6830                 /* No coding system of this category is defined.  */
6831                 detect_info.rejected |= (1 << category);
6832               }
6833             else if (category >= coding_category_raw_text)
6834               continue;
6835             else if (detect_info.checked & (1 << category))
6836               {
6837                 if (highest
6838                     && (detect_info.found & (1 << category)))
6839                   break;
6840               }
6841             else
6842               {
6843                 if ((*(this->detector)) (&coding, &detect_info)
6844                     && highest
6845                     && (detect_info.found & (1 << category)))
6846                   {
6847                     if (category == coding_category_utf_16_auto)
6848                       {
6849                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6850                           category = coding_category_utf_16_le;
6851                         else
6852                           category = coding_category_utf_16_be;
6853                       }
6854                     break;
6855                   }
6856               }
6857           }
6858
6859       if (detect_info.rejected == CATEGORY_MASK_ANY)
6860         {
6861           detect_info.found = CATEGORY_MASK_RAW_TEXT;
6862           id = coding_categories[coding_category_raw_text].id;
6863           val = Fcons (make_number (id), Qnil);
6864         }
6865       else if (! detect_info.rejected && ! detect_info.found)
6866         {
6867           detect_info.found = CATEGORY_MASK_ANY;
6868           id = coding_categories[coding_category_undecided].id;
6869           val = Fcons (make_number (id), Qnil);
6870         }
6871       else if (highest)
6872         {
6873           if (detect_info.found)
6874             {
6875               detect_info.found = 1 << category;
6876               val = Fcons (make_number (this->id), Qnil);
6877             }
6878           else
6879             for (i = 0; i < coding_category_raw_text; i++)
6880               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6881                 {
6882                   detect_info.found = 1 << coding_priorities[i];
6883                   id = coding_categories[coding_priorities[i]].id;
6884                   val = Fcons (make_number (id), Qnil);
6885                   break;
6886                 }
6887         }
6888       else
6889         {
6890           int mask = detect_info.rejected | detect_info.found;
6891           int found = 0;
6892           val = Qnil;
6893
6894           for (i = coding_category_raw_text - 1; i >= 0; i--)
6895             {
6896               category = coding_priorities[i];
6897               if (! (mask & (1 << category)))
6898                 {
6899                   found |= 1 << category;
6900                   id = coding_categories[category].id;
6901                   val = Fcons (make_number (id), val);
6902                 }
6903             }
6904           for (i = coding_category_raw_text - 1; i >= 0; i--)
6905             {
6906               category = coding_priorities[i];
6907               if (detect_info.found & (1 << category))
6908                 {
6909                   id = coding_categories[category].id;
6910                   val = Fcons (make_number (id), val);
6911                 }
6912             }
6913           detect_info.found |= found;
6914         }
6915     }
6916   else if (base_category == coding_category_utf_16_auto)
6917     {
6918       if (detect_coding_utf_16 (&coding, &detect_info))
6919         {
6920           enum coding_category category;
6921           struct coding_system *this;
6922
6923           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
6924             this = coding_categories + coding_category_utf_16_le;
6925           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
6926             this = coding_categories + coding_category_utf_16_be;
6927           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
6928             this = coding_categories + coding_category_utf_16_be_nosig;
6929           else
6930             this = coding_categories + coding_category_utf_16_le_nosig;
6931           val = Fcons (make_number (this->id), Qnil);
6932         }
6933     }
6934   else
6935     {
6936       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6937       val = Fcons (make_number (coding.id), Qnil);
6938     }
6939
6940   /* Then, detect eol-format if necessary.  */
6941   {
6942     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
6943     Lisp_Object tail;
6944
6945     if (VECTORP (eol_type))
6946       {
6947         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
6948           normal_eol = detect_eol (coding.source, src_bytes,
6949                                    coding_category_raw_text);
6950         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6951                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
6952           utf_16_be_eol = detect_eol (coding.source, src_bytes,
6953                                       coding_category_utf_16_be);
6954         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6955                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
6956           utf_16_le_eol = detect_eol (coding.source, src_bytes,
6957                                       coding_category_utf_16_le);
6958       }
6959     else
6960       {
6961         if (EQ (eol_type, Qunix))
6962           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6963         else if (EQ (eol_type, Qdos))
6964           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6965         else
6966           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6967       }
6968
6969     for (tail = val; CONSP (tail); tail = XCDR (tail))
6970       {
6971         enum coding_category category;
6972         int this_eol;
6973
6974         id = XINT (XCAR (tail));
6975         attrs = CODING_ID_ATTRS (id);
6976         category = XINT (CODING_ATTR_CATEGORY (attrs));
6977         eol_type = CODING_ID_EOL_TYPE (id);
6978         if (VECTORP (eol_type))
6979           {
6980             if (category == coding_category_utf_16_be
6981                 || category == coding_category_utf_16_be_nosig)
6982               this_eol = utf_16_be_eol;
6983             else if (category == coding_category_utf_16_le
6984                      || category == coding_category_utf_16_le_nosig)
6985               this_eol = utf_16_le_eol;
6986             else
6987               this_eol = normal_eol;
6988
6989             if (this_eol == EOL_SEEN_LF)
6990               XSETCAR (tail, AREF (eol_type, 0));
6991             else if (this_eol == EOL_SEEN_CRLF)
6992               XSETCAR (tail, AREF (eol_type, 1));
6993             else if (this_eol == EOL_SEEN_CR)
6994               XSETCAR (tail, AREF (eol_type, 2));
6995             else
6996               XSETCAR (tail, CODING_ID_NAME (id));
6997           }
6998         else
6999           XSETCAR (tail, CODING_ID_NAME (id));
7000       }
7001   }
7002
7003   return (highest ? XCAR (val) : val);
7004 }
7005
7006
7007 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7008        2, 3, 0,
7009        doc: /* Detect coding system of the text in the region between START and END.
7010 Return a list of possible coding systems ordered by priority.
7011
7012 If only ASCII characters are found, it returns a list of single element
7013 `undecided' or its subsidiary coding system according to a detected
7014 end-of-line format.
7015
7016 If optional argument HIGHEST is non-nil, return the coding system of
7017 highest priority.  */)
7018      (start, end, highest)
7019      Lisp_Object start, end, highest;
7020 {
7021   int from, to;
7022   int from_byte, to_byte;
7023
7024   CHECK_NUMBER_COERCE_MARKER (start);
7025   CHECK_NUMBER_COERCE_MARKER (end);
7026
7027   validate_region (&start, &end);
7028   from = XINT (start), to = XINT (end);
7029   from_byte = CHAR_TO_BYTE (from);
7030   to_byte = CHAR_TO_BYTE (to);
7031
7032   if (from < GPT && to >= GPT)
7033     move_gap_both (to, to_byte);
7034
7035   return detect_coding_system (BYTE_POS_ADDR (from_byte),
7036                                to - from, to_byte - from_byte,
7037                                !NILP (highest),
7038                                !NILP (current_buffer
7039                                       ->enable_multibyte_characters),
7040                                Qnil);
7041 }
7042
7043 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7044        1, 2, 0,
7045        doc: /* Detect coding system of the text in STRING.
7046 Return a list of possible coding systems ordered by priority.
7047
7048 If only ASCII characters are found, it returns a list of single element
7049 `undecided' or its subsidiary coding system according to a detected
7050 end-of-line format.
7051
7052 If optional argument HIGHEST is non-nil, return the coding system of
7053 highest priority.  */)
7054      (string, highest)
7055      Lisp_Object string, highest;
7056 {
7057   CHECK_STRING (string);
7058
7059   return detect_coding_system (SDATA (string),
7060                                SCHARS (string), SBYTES (string),
7061                                !NILP (highest), STRING_MULTIBYTE (string),
7062                                Qnil);
7063 }
7064
7065
7066 static INLINE int
7067 char_encodable_p (c, attrs)
7068      int c;
7069      Lisp_Object attrs;
7070 {
7071   Lisp_Object tail;
7072   struct charset *charset;
7073
7074   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7075        CONSP (tail); tail = XCDR (tail))
7076     {
7077       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7078       if (CHAR_CHARSET_P (c, charset))
7079         break;
7080     }
7081   return (! NILP (tail));
7082 }
7083
7084
7085 /* Return a list of coding systems that safely encode the text between
7086    START and END.  If EXCLUDE is non-nil, it is a list of coding
7087    systems not to check.  The returned list doesn't contain any such
7088    coding systems.  In any case, if the text contains only ASCII or is
7089    unibyte, return t.  */
7090
7091 DEFUN ("find-coding-systems-region-internal",
7092        Ffind_coding_systems_region_internal,
7093        Sfind_coding_systems_region_internal, 2, 3, 0,
7094        doc: /* Internal use only.  */)
7095      (start, end, exclude)
7096      Lisp_Object start, end, exclude;
7097 {
7098   Lisp_Object coding_attrs_list, safe_codings;
7099   EMACS_INT start_byte, end_byte;
7100   const unsigned char *p, *pbeg, *pend;
7101   int c;
7102   Lisp_Object tail, elt;
7103
7104   if (STRINGP (start))
7105     {
7106       if (!STRING_MULTIBYTE (start)
7107           || SCHARS (start) == SBYTES (start))
7108         return Qt;
7109       start_byte = 0;
7110       end_byte = SBYTES (start);
7111     }
7112   else
7113     {
7114       CHECK_NUMBER_COERCE_MARKER (start);
7115       CHECK_NUMBER_COERCE_MARKER (end);
7116       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7117         args_out_of_range (start, end);
7118       if (NILP (current_buffer->enable_multibyte_characters))
7119         return Qt;
7120       start_byte = CHAR_TO_BYTE (XINT (start));
7121       end_byte = CHAR_TO_BYTE (XINT (end));
7122       if (XINT (end) - XINT (start) == end_byte - start_byte)
7123         return Qt;
7124
7125       if (XINT (start) < GPT && XINT (end) > GPT)
7126         {
7127           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7128             move_gap_both (XINT (start), start_byte);
7129           else
7130             move_gap_both (XINT (end), end_byte);
7131         }
7132     }
7133
7134   coding_attrs_list = Qnil;
7135   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7136     if (NILP (exclude)
7137         || NILP (Fmemq (XCAR (tail), exclude)))
7138       {
7139         Lisp_Object attrs;
7140
7141         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7142         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7143             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7144           coding_attrs_list = Fcons (attrs, coding_attrs_list);
7145       }
7146
7147   if (STRINGP (start))
7148     p = pbeg = SDATA (start);
7149   else
7150     p = pbeg = BYTE_POS_ADDR (start_byte);
7151   pend = p + (end_byte - start_byte);
7152
7153   while (p < pend && ASCII_BYTE_P (*p)) p++;
7154   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7155
7156   while (p < pend)
7157     {
7158       if (ASCII_BYTE_P (*p))
7159         p++;
7160       else
7161         {
7162           c = STRING_CHAR_ADVANCE (p);
7163
7164           charset_map_loaded = 0;
7165           for (tail = coding_attrs_list; CONSP (tail);)
7166             {
7167               elt = XCAR (tail);
7168               if (NILP (elt))
7169                 tail = XCDR (tail);
7170               else if (char_encodable_p (c, elt))
7171                 tail = XCDR (tail);
7172               else if (CONSP (XCDR (tail)))
7173                 {
7174                   XSETCAR (tail, XCAR (XCDR (tail)));
7175                   XSETCDR (tail, XCDR (XCDR (tail)));
7176                 }
7177               else
7178                 {
7179                   XSETCAR (tail, Qnil);
7180                   tail = XCDR (tail);
7181                 }
7182             }
7183           if (charset_map_loaded)
7184             {
7185               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7186
7187               if (STRINGP (start))
7188                 pbeg = SDATA (start);
7189               else
7190                 pbeg = BYTE_POS_ADDR (start_byte);
7191               p = pbeg + p_offset;
7192               pend = pbeg + pend_offset;
7193             }
7194         }
7195     }
7196
7197   safe_codings = Qnil;
7198   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7199     if (! NILP (XCAR (tail)))
7200       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7201
7202   return safe_codings;
7203 }
7204
7205
7206 DEFUN ("unencodable-char-position", Funencodable_char_position,
7207        Sunencodable_char_position, 3, 5, 0,
7208        doc: /*
7209 Return position of first un-encodable character in a region.
7210 START and END specfiy the region and CODING-SYSTEM specifies the
7211 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
7212
7213 If optional 4th argument COUNT is non-nil, it specifies at most how
7214 many un-encodable characters to search.  In this case, the value is a
7215 list of positions.
7216
7217 If optional 5th argument STRING is non-nil, it is a string to search
7218 for un-encodable characters.  In that case, START and END are indexes
7219 to the string.  */)
7220      (start, end, coding_system, count, string)
7221      Lisp_Object start, end, coding_system, count, string;
7222 {
7223   int n;
7224   struct coding_system coding;
7225   Lisp_Object attrs, charset_list;
7226   Lisp_Object positions;
7227   int from, to;
7228   const unsigned char *p, *stop, *pend;
7229   int ascii_compatible;
7230
7231   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7232   attrs = CODING_ID_ATTRS (coding.id);
7233   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7234     return Qnil;
7235   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7236   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7237
7238   if (NILP (string))
7239     {
7240       validate_region (&start, &end);
7241       from = XINT (start);
7242       to = XINT (end);
7243       if (NILP (current_buffer->enable_multibyte_characters)
7244           || (ascii_compatible
7245               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7246         return Qnil;
7247       p = CHAR_POS_ADDR (from);
7248       pend = CHAR_POS_ADDR (to);
7249       if (from < GPT && to >= GPT)
7250         stop = GPT_ADDR;
7251       else
7252         stop = pend;
7253     }
7254   else
7255     {
7256       CHECK_STRING (string);
7257       CHECK_NATNUM (start);
7258       CHECK_NATNUM (end);
7259       from = XINT (start);
7260       to = XINT (end);
7261       if (from > to
7262           || to > SCHARS (string))
7263         args_out_of_range_3 (string, start, end);
7264       if (! STRING_MULTIBYTE (string))
7265         return Qnil;
7266       p = SDATA (string) + string_char_to_byte (string, from);
7267       stop = pend = SDATA (string) + string_char_to_byte (string, to);
7268       if (ascii_compatible && (to - from) == (pend - p))
7269         return Qnil;
7270     }
7271
7272   if (NILP (count))
7273     n = 1;
7274   else
7275     {
7276       CHECK_NATNUM (count);
7277       n = XINT (count);
7278     }
7279
7280   positions = Qnil;
7281   while (1)
7282     {
7283       int c;
7284
7285       if (ascii_compatible)
7286         while (p < stop && ASCII_BYTE_P (*p))
7287           p++, from++;
7288       if (p >= stop)
7289         {
7290           if (p >= pend)
7291             break;
7292           stop = pend;
7293           p = GAP_END_ADDR;
7294         }
7295
7296       c = STRING_CHAR_ADVANCE (p);
7297       if (! (ASCII_CHAR_P (c) && ascii_compatible)
7298           && ! char_charset (c, charset_list, NULL))
7299         {
7300           positions = Fcons (make_number (from), positions);
7301           n--;
7302           if (n == 0)
7303             break;
7304         }
7305
7306       from++;
7307     }
7308
7309   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7310 }
7311
7312
7313 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7314        Scheck_coding_systems_region, 3, 3, 0,
7315        doc: /* Check if the region is encodable by coding systems.
7316
7317 START and END are buffer positions specifying the region.
7318 CODING-SYSTEM-LIST is a list of coding systems to check.
7319
7320 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7321 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7322 whole region, POS0, POS1, ... are buffer positions where non-encodable
7323 characters are found.
7324
7325 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7326 value is nil.
7327
7328 START may be a string.  In that case, check if the string is
7329 encodable, and the value contains indices to the string instead of
7330 buffer positions.  END is ignored.  */)
7331      (start, end, coding_system_list)
7332      Lisp_Object start, end, coding_system_list;
7333 {
7334   Lisp_Object list;
7335   EMACS_INT start_byte, end_byte;
7336   int pos;
7337   const unsigned char *p, *pbeg, *pend;
7338   int c;
7339   Lisp_Object tail, elt;
7340
7341   if (STRINGP (start))
7342     {
7343       if (!STRING_MULTIBYTE (start)
7344           && SCHARS (start) != SBYTES (start))
7345         return Qnil;
7346       start_byte = 0;
7347       end_byte = SBYTES (start);
7348       pos = 0;
7349     }
7350   else
7351     {
7352       CHECK_NUMBER_COERCE_MARKER (start);
7353       CHECK_NUMBER_COERCE_MARKER (end);
7354       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7355         args_out_of_range (start, end);
7356       if (NILP (current_buffer->enable_multibyte_characters))
7357         return Qnil;
7358       start_byte = CHAR_TO_BYTE (XINT (start));
7359       end_byte = CHAR_TO_BYTE (XINT (end));
7360       if (XINT (end) - XINT (start) == end_byte - start_byte)
7361         return Qt;
7362
7363       if (XINT (start) < GPT && XINT (end) > GPT)
7364         {
7365           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7366             move_gap_both (XINT (start), start_byte);
7367           else
7368             move_gap_both (XINT (end), end_byte);
7369         }
7370       pos = XINT (start);
7371     }
7372
7373   list = Qnil;
7374   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7375     {
7376       elt = XCAR (tail);
7377       list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
7378                                        Qnil)),
7379                     list);
7380     }
7381
7382   if (STRINGP (start))
7383     p = pbeg = SDATA (start);
7384   else
7385     p = pbeg = BYTE_POS_ADDR (start_byte);
7386   pend = p + (end_byte - start_byte);
7387
7388   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7389   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7390
7391   while (p < pend)
7392     {
7393       if (ASCII_BYTE_P (*p))
7394         p++;
7395       else
7396         {
7397           c = STRING_CHAR_ADVANCE (p);
7398
7399           charset_map_loaded = 0;
7400           for (tail = list; CONSP (tail); tail = XCDR (tail))
7401             {
7402               elt = XCDR (XCAR (tail));
7403               if (! char_encodable_p (c, XCAR (elt)))
7404                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7405             }
7406           if (charset_map_loaded)
7407             {
7408               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7409
7410               if (STRINGP (start))
7411                 pbeg = SDATA (start);
7412               else
7413                 pbeg = BYTE_POS_ADDR (start_byte);
7414               p = pbeg + p_offset;
7415               pend = pbeg + pend_offset;
7416             }
7417         }
7418       pos++;
7419     }
7420
7421   tail = list;
7422   list = Qnil;
7423   for (; CONSP (tail); tail = XCDR (tail))
7424     {
7425       elt = XCAR (tail);
7426       if (CONSP (XCDR (XCDR (elt))))
7427         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7428                       list);
7429     }
7430
7431   return list;
7432 }
7433
7434
7435
7436 Lisp_Object
7437 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7438      Lisp_Object start, end, coding_system, dst_object;
7439      int encodep, norecord;
7440 {
7441   struct coding_system coding;
7442   EMACS_INT from, from_byte, to, to_byte;
7443   Lisp_Object src_object;
7444
7445   CHECK_NUMBER_COERCE_MARKER (start);
7446   CHECK_NUMBER_COERCE_MARKER (end);
7447   if (NILP (coding_system))
7448     coding_system = Qno_conversion;
7449   else
7450     CHECK_CODING_SYSTEM (coding_system);
7451   src_object = Fcurrent_buffer ();
7452   if (NILP (dst_object))
7453     dst_object = src_object;
7454   else if (! EQ (dst_object, Qt))
7455     CHECK_BUFFER (dst_object);
7456
7457   validate_region (&start, &end);
7458   from = XFASTINT (start);
7459   from_byte = CHAR_TO_BYTE (from);
7460   to = XFASTINT (end);
7461   to_byte = CHAR_TO_BYTE (to);
7462
7463   setup_coding_system (coding_system, &coding);
7464   coding.mode |= CODING_MODE_LAST_BLOCK;
7465
7466   if (encodep)
7467     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7468                           dst_object);
7469   else
7470     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7471                           dst_object);
7472   if (! norecord)
7473     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7474
7475   return (BUFFERP (dst_object)
7476           ? make_number (coding.produced_char)
7477           : coding.dst_object);
7478 }
7479
7480
7481 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7482        3, 4, "r\nzCoding system: ",
7483        doc: /* Decode the current region from the specified coding system.
7484 When called from a program, takes four arguments:
7485         START, END, CODING-SYSTEM, and DESTINATION.
7486 START and END are buffer positions.
7487
7488 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7489 If nil, the region between START and END is replace by the decoded text.
7490 If buffer, the decoded text is inserted in the buffer.
7491 If t, the decoded text is returned.
7492
7493 This function sets `last-coding-system-used' to the precise coding system
7494 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7495 not fully specified.)
7496 It returns the length of the decoded text.  */)
7497      (start, end, coding_system, destination)
7498      Lisp_Object start, end, coding_system, destination;
7499 {
7500   return code_convert_region (start, end, coding_system, destination, 0, 0);
7501 }
7502
7503 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7504        3, 4, "r\nzCoding system: ",
7505        doc: /* Encode the current region by specified coding system.
7506 When called from a program, takes three arguments:
7507 START, END, and CODING-SYSTEM.  START and END are buffer positions.
7508
7509 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7510 If nil, the region between START and END is replace by the encoded text.
7511 If buffer, the encoded text is inserted in the buffer.
7512 If t, the encoded text is returned.
7513
7514 This function sets `last-coding-system-used' to the precise coding system
7515 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7516 not fully specified.)
7517 It returns the length of the encoded text.  */)
7518   (start, end, coding_system, destination)
7519      Lisp_Object start, end, coding_system, destination;
7520 {
7521   return code_convert_region (start, end, coding_system, destination, 1, 0);
7522 }
7523
7524 Lisp_Object
7525 code_convert_string (string, coding_system, dst_object,
7526                      encodep, nocopy, norecord)
7527      Lisp_Object string, coding_system, dst_object;
7528      int encodep, nocopy, norecord;
7529 {
7530   struct coding_system coding;
7531   EMACS_INT chars, bytes;
7532
7533   CHECK_STRING (string);
7534   if (NILP (coding_system))
7535     {
7536       if (! norecord)
7537         Vlast_coding_system_used = Qno_conversion;
7538       if (NILP (dst_object))
7539         return (nocopy ? Fcopy_sequence (string) : string);
7540     }
7541
7542   if (NILP (coding_system))
7543     coding_system = Qno_conversion;
7544   else
7545     CHECK_CODING_SYSTEM (coding_system);
7546   if (NILP (dst_object))
7547     dst_object = Qt;
7548   else if (! EQ (dst_object, Qt))
7549     CHECK_BUFFER (dst_object);
7550
7551   setup_coding_system (coding_system, &coding);
7552   coding.mode |= CODING_MODE_LAST_BLOCK;
7553   chars = SCHARS (string);
7554   bytes = SBYTES (string);
7555   if (encodep)
7556     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7557   else
7558     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7559   if (! norecord)
7560     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7561
7562   return (BUFFERP (dst_object)
7563           ? make_number (coding.produced_char)
7564           : coding.dst_object);
7565 }
7566
7567
7568 /* Encode or decode STRING according to CODING_SYSTEM.
7569    Do not set Vlast_coding_system_used.
7570
7571    This function is called only from macros DECODE_FILE and
7572    ENCODE_FILE, thus we ignore character composition.  */
7573
7574 Lisp_Object
7575 code_convert_string_norecord (string, coding_system, encodep)
7576      Lisp_Object string, coding_system;
7577      int encodep;
7578 {
7579   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
7580 }
7581
7582
7583 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7584        2, 4, 0,
7585        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7586
7587 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7588 if the decoding operation is trivial.
7589
7590 Optional fourth arg BUFFER non-nil meant that the decoded text is
7591 inserted in BUFFER instead of returned as a string.  In this case,
7592 the return value is BUFFER.
7593
7594 This function sets `last-coding-system-used' to the precise coding system
7595 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7596 not fully specified.  */)
7597   (string, coding_system, nocopy, buffer)
7598      Lisp_Object string, coding_system, nocopy, buffer;
7599 {
7600   return code_convert_string (string, coding_system, buffer,
7601                               0, ! NILP (nocopy), 0);
7602 }
7603
7604 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7605        2, 4, 0,
7606        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7607
7608 Optional third arg NOCOPY non-nil means it is OK to return STRING
7609 itself if the encoding operation is trivial.
7610
7611 Optional fourth arg BUFFER non-nil meant that the encoded text is
7612 inserted in BUFFER instead of returned as a string.  In this case,
7613 the return value is BUFFER.
7614
7615 This function sets `last-coding-system-used' to the precise coding system
7616 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7617 not fully specified.)  */)
7618      (string, coding_system, nocopy, buffer)
7619      Lisp_Object string, coding_system, nocopy, buffer;
7620 {
7621   return code_convert_string (string, coding_system, buffer,
7622                               1, ! NILP (nocopy), 1);
7623 }
7624
7625 \f
7626 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7627        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7628 Return the corresponding character.  */)
7629      (code)
7630      Lisp_Object code;
7631 {
7632   Lisp_Object spec, attrs, val;
7633   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7634   int c;
7635
7636   CHECK_NATNUM (code);
7637   c = XFASTINT (code);
7638   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7639   attrs = AREF (spec, 0);
7640
7641   if (ASCII_BYTE_P (c)
7642       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7643     return code;
7644
7645   val = CODING_ATTR_CHARSET_LIST (attrs);
7646   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7647   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7648   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
7649
7650   if (c <= 0x7F)
7651     charset = charset_roman;
7652   else if (c >= 0xA0 && c < 0xDF)
7653     {
7654       charset = charset_kana;
7655       c -= 0x80;
7656     }
7657   else
7658     {
7659       int s1 = c >> 8, s2 = c & 0xFF;
7660
7661       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7662           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7663         error ("Invalid code: %d", code);
7664       SJIS_TO_JIS (c);
7665       charset = charset_kanji;
7666     }
7667   c = DECODE_CHAR (charset, c);
7668   if (c < 0)
7669     error ("Invalid code: %d", code);
7670   return make_number (c);
7671 }
7672
7673
7674 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7675        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7676 Return the corresponding code in SJIS.  */)
7677      (ch)
7678     Lisp_Object ch;
7679 {
7680   Lisp_Object spec, attrs, charset_list;
7681   int c;
7682   struct charset *charset;
7683   unsigned code;
7684
7685   CHECK_CHARACTER (ch);
7686   c = XFASTINT (ch);
7687   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7688   attrs = AREF (spec, 0);
7689
7690   if (ASCII_CHAR_P (c)
7691       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7692     return ch;
7693
7694   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7695   charset = char_charset (c, charset_list, &code);
7696   if (code == CHARSET_INVALID_CODE (charset))
7697     error ("Can't encode by shift_jis encoding: %d", c);
7698   JIS_TO_SJIS (code);
7699
7700   return make_number (code);
7701 }
7702
7703 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7704        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7705 Return the corresponding character.  */)
7706      (code)
7707      Lisp_Object code;
7708 {
7709   Lisp_Object spec, attrs, val;
7710   struct charset *charset_roman, *charset_big5, *charset;
7711   int c;
7712
7713   CHECK_NATNUM (code);
7714   c = XFASTINT (code);
7715   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7716   attrs = AREF (spec, 0);
7717
7718   if (ASCII_BYTE_P (c)
7719       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7720     return code;
7721
7722   val = CODING_ATTR_CHARSET_LIST (attrs);
7723   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7724   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7725
7726   if (c <= 0x7F)
7727     charset = charset_roman;
7728   else
7729     {
7730       int b1 = c >> 8, b2 = c & 0x7F;
7731       if (b1 < 0xA1 || b1 > 0xFE
7732           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7733         error ("Invalid code: %d", code);
7734       charset = charset_big5;
7735     }
7736   c = DECODE_CHAR (charset, (unsigned )c);
7737   if (c < 0)
7738     error ("Invalid code: %d", code);
7739   return make_number (c);
7740 }
7741
7742 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7743        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7744 Return the corresponding character code in Big5.  */)
7745      (ch)
7746      Lisp_Object ch;
7747 {
7748   Lisp_Object spec, attrs, charset_list;
7749   struct charset *charset;
7750   int c;
7751   unsigned code;
7752
7753   CHECK_CHARACTER (ch);
7754   c = XFASTINT (ch);
7755   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7756   attrs = AREF (spec, 0);
7757   if (ASCII_CHAR_P (c)
7758       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7759     return ch;
7760
7761   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7762   charset = char_charset (c, charset_list, &code);
7763   if (code == CHARSET_INVALID_CODE (charset))
7764     error ("Can't encode by Big5 encoding: %d", c);
7765
7766   return make_number (code);
7767 }
7768
7769 \f
7770 DEFUN ("set-terminal-coding-system-internal",
7771        Fset_terminal_coding_system_internal,
7772        Sset_terminal_coding_system_internal, 1, 1, 0,
7773        doc: /* Internal use only.  */)
7774      (coding_system)
7775      Lisp_Object coding_system;
7776 {
7777   CHECK_SYMBOL (coding_system);
7778   setup_coding_system (Fcheck_coding_system (coding_system),
7779                         &terminal_coding);
7780
7781   /* We had better not send unsafe characters to terminal.  */
7782   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7783   /* Characer composition should be disabled.  */
7784   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7785   terminal_coding.src_multibyte = 1;
7786   terminal_coding.dst_multibyte = 0;
7787   return Qnil;
7788 }
7789
7790 DEFUN ("set-safe-terminal-coding-system-internal",
7791        Fset_safe_terminal_coding_system_internal,
7792        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7793        doc: /* Internal use only.  */)
7794      (coding_system)
7795      Lisp_Object coding_system;
7796 {
7797   CHECK_SYMBOL (coding_system);
7798   setup_coding_system (Fcheck_coding_system (coding_system),
7799                        &safe_terminal_coding);
7800   /* Characer composition should be disabled.  */
7801   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7802   safe_terminal_coding.src_multibyte = 1;
7803   safe_terminal_coding.dst_multibyte = 0;
7804   return Qnil;
7805 }
7806
7807 DEFUN ("terminal-coding-system",
7808        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7809        doc: /* Return coding system specified for terminal output.  */)
7810      ()
7811 {
7812   return CODING_ID_NAME (terminal_coding.id);
7813 }
7814
7815 DEFUN ("set-keyboard-coding-system-internal",
7816        Fset_keyboard_coding_system_internal,
7817        Sset_keyboard_coding_system_internal, 1, 1, 0,
7818        doc: /* Internal use only.  */)
7819      (coding_system)
7820      Lisp_Object coding_system;
7821 {
7822   CHECK_SYMBOL (coding_system);
7823   setup_coding_system (Fcheck_coding_system (coding_system),
7824                        &keyboard_coding);
7825   /* Characer composition should be disabled.  */
7826   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7827   return Qnil;
7828 }
7829
7830 DEFUN ("keyboard-coding-system",
7831        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7832        doc: /* Return coding system specified for decoding keyboard input.  */)
7833      ()
7834 {
7835   return CODING_ID_NAME (keyboard_coding.id);
7836 }
7837
7838 \f
7839 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7840        Sfind_operation_coding_system,  1, MANY, 0,
7841        doc: /* Choose a coding system for an operation based on the target name.
7842 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7843 DECODING-SYSTEM is the coding system to use for decoding
7844 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7845 for encoding (in case OPERATION does encoding).
7846
7847 The first argument OPERATION specifies an I/O primitive:
7848   For file I/O, `insert-file-contents' or `write-region'.
7849   For process I/O, `call-process', `call-process-region', or `start-process'.
7850   For network I/O, `open-network-stream'.
7851
7852 The remaining arguments should be the same arguments that were passed
7853 to the primitive.  Depending on which primitive, one of those arguments
7854 is selected as the TARGET.  For example, if OPERATION does file I/O,
7855 whichever argument specifies the file name is TARGET.
7856
7857 TARGET has a meaning which depends on OPERATION:
7858   For file I/O, TARGET is a file name.
7859   For process I/O, TARGET is a process name.
7860   For network I/O, TARGET is a service name or a port number
7861
7862 This function looks up what specified for TARGET in,
7863 `file-coding-system-alist', `process-coding-system-alist',
7864 or `network-coding-system-alist' depending on OPERATION.
7865 They may specify a coding system, a cons of coding systems,
7866 or a function symbol to call.
7867 In the last case, we call the function with one argument,
7868 which is a list of all the arguments given to this function.
7869
7870 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7871      (nargs, args)
7872      int nargs;
7873      Lisp_Object *args;
7874 {
7875   Lisp_Object operation, target_idx, target, val;
7876   register Lisp_Object chain;
7877
7878   if (nargs < 2)
7879     error ("Too few arguments");
7880   operation = args[0];
7881   if (!SYMBOLP (operation)
7882       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7883     error ("Invalid first arguement");
7884   if (nargs < 1 + XINT (target_idx))
7885     error ("Too few arguments for operation: %s",
7886            SDATA (SYMBOL_NAME (operation)));
7887   target = args[XINT (target_idx) + 1];
7888   if (!(STRINGP (target)
7889         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7890     error ("Invalid %dth argument", XINT (target_idx) + 1);
7891
7892   chain = ((EQ (operation, Qinsert_file_contents)
7893             || EQ (operation, Qwrite_region))
7894            ? Vfile_coding_system_alist
7895            : (EQ (operation, Qopen_network_stream)
7896               ? Vnetwork_coding_system_alist
7897               : Vprocess_coding_system_alist));
7898   if (NILP (chain))
7899     return Qnil;
7900
7901   for (; CONSP (chain); chain = XCDR (chain))
7902     {
7903       Lisp_Object elt;
7904
7905       elt = XCAR (chain);
7906       if (CONSP (elt)
7907           && ((STRINGP (target)
7908                && STRINGP (XCAR (elt))
7909                && fast_string_match (XCAR (elt), target) >= 0)
7910               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7911         {
7912           val = XCDR (elt);
7913           /* Here, if VAL is both a valid coding system and a valid
7914              function symbol, we return VAL as a coding system.  */
7915           if (CONSP (val))
7916             return val;
7917           if (! SYMBOLP (val))
7918             return Qnil;
7919           if (! NILP (Fcoding_system_p (val)))
7920             return Fcons (val, val);
7921           if (! NILP (Ffboundp (val)))
7922             {
7923               val = call1 (val, Flist (nargs, args));
7924               if (CONSP (val))
7925                 return val;
7926               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7927                 return Fcons (val, val);
7928             }
7929           return Qnil;
7930         }
7931     }
7932   return Qnil;
7933 }
7934
7935 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7936        Sset_coding_system_priority, 0, MANY, 0,
7937        doc: /* Assign higher priority to the coding systems given as arguments.
7938 If multiple coding systems belongs to the same category,
7939 all but the first one are ignored.
7940
7941 usage: (set-coding-system-priority ...)  */)
7942      (nargs, args)
7943      int nargs;
7944      Lisp_Object *args;
7945 {
7946   int i, j;
7947   int changed[coding_category_max];
7948   enum coding_category priorities[coding_category_max];
7949
7950   bzero (changed, sizeof changed);
7951
7952   for (i = j = 0; i < nargs; i++)
7953     {
7954       enum coding_category category;
7955       Lisp_Object spec, attrs;
7956
7957       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7958       attrs = AREF (spec, 0);
7959       category = XINT (CODING_ATTR_CATEGORY (attrs));
7960       if (changed[category])
7961         /* Ignore this coding system because a coding system of the
7962            same category already had a higher priority.  */
7963         continue;
7964       changed[category] = 1;
7965       priorities[j++] = category;
7966       if (coding_categories[category].id >= 0
7967           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7968         setup_coding_system (args[i], &coding_categories[category]);
7969       Fset (AREF (Vcoding_category_table, category), args[i]);
7970     }
7971
7972   /* Now we have decided top J priorities.  Reflect the order of the
7973      original priorities to the remaining priorities.  */
7974
7975   for (i = j, j = 0; i < coding_category_max; i++, j++)
7976     {
7977       while (j < coding_category_max
7978              && changed[coding_priorities[j]])
7979         j++;
7980       if (j == coding_category_max)
7981         abort ();
7982       priorities[i] = coding_priorities[j];
7983     }
7984
7985   bcopy (priorities, coding_priorities, sizeof priorities);
7986
7987   /* Update `coding-category-list'.  */
7988   Vcoding_category_list = Qnil;
7989   for (i = coding_category_max - 1; i >= 0; i--)
7990     Vcoding_category_list
7991       = Fcons (AREF (Vcoding_category_table, priorities[i]),
7992                Vcoding_category_list);
7993
7994   return Qnil;
7995 }
7996
7997 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7998        Scoding_system_priority_list, 0, 1, 0,
7999        doc: /* Return a list of coding systems ordered by their priorities.
8000 HIGHESTP non-nil means just return the highest priority one.  */)
8001      (highestp)
8002      Lisp_Object highestp;
8003 {
8004   int i;
8005   Lisp_Object val;
8006
8007   for (i = 0, val = Qnil; i < coding_category_max; i++)
8008     {
8009       enum coding_category category = coding_priorities[i];
8010       int id = coding_categories[category].id;
8011       Lisp_Object attrs;
8012
8013       if (id < 0)
8014         continue;
8015       attrs = CODING_ID_ATTRS (id);
8016       if (! NILP (highestp))
8017         return CODING_ATTR_BASE_NAME (attrs);
8018       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8019     }
8020   return Fnreverse (val);
8021 }
8022
8023 static char *suffixes[] = { "-unix", "-dos", "-mac" };
8024
8025 static Lisp_Object
8026 make_subsidiaries (base)
8027      Lisp_Object base;
8028 {
8029   Lisp_Object subsidiaries;
8030   int base_name_len = SBYTES (SYMBOL_NAME (base));
8031   char *buf = (char *) alloca (base_name_len + 6);
8032   int i;
8033
8034   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
8035   subsidiaries = Fmake_vector (make_number (3), Qnil);
8036   for (i = 0; i < 3; i++)
8037     {
8038       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8039       ASET (subsidiaries, i, intern (buf));
8040     }
8041   return subsidiaries;
8042 }
8043
8044
8045 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8046        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
8047        doc: /* For internal use only.
8048 usage: (define-coding-system-internal ...)  */)
8049      (nargs, args)
8050      int nargs;
8051      Lisp_Object *args;
8052 {
8053   Lisp_Object name;
8054   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
8055   Lisp_Object attrs;            /* Vector of attributes.  */
8056   Lisp_Object eol_type;
8057   Lisp_Object aliases;
8058   Lisp_Object coding_type, charset_list, safe_charsets;
8059   enum coding_category category;
8060   Lisp_Object tail, val;
8061   int max_charset_id = 0;
8062   int i;
8063
8064   if (nargs < coding_arg_max)
8065     goto short_args;
8066
8067   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
8068
8069   name = args[coding_arg_name];
8070   CHECK_SYMBOL (name);
8071   CODING_ATTR_BASE_NAME (attrs) = name;
8072
8073   val = args[coding_arg_mnemonic];
8074   if (! STRINGP (val))
8075     CHECK_CHARACTER (val);
8076   CODING_ATTR_MNEMONIC (attrs) = val;
8077
8078   coding_type = args[coding_arg_coding_type];
8079   CHECK_SYMBOL (coding_type);
8080   CODING_ATTR_TYPE (attrs) = coding_type;
8081
8082   charset_list = args[coding_arg_charset_list];
8083   if (SYMBOLP (charset_list))
8084     {
8085       if (EQ (charset_list, Qiso_2022))
8086         {
8087           if (! EQ (coding_type, Qiso_2022))
8088             error ("Invalid charset-list");
8089           charset_list = Viso_2022_charset_list;
8090         }
8091       else if (EQ (charset_list, Qemacs_mule))
8092         {
8093           if (! EQ (coding_type, Qemacs_mule))
8094             error ("Invalid charset-list");
8095           charset_list = Vemacs_mule_charset_list;
8096         }
8097       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8098         if (max_charset_id < XFASTINT (XCAR (tail)))
8099           max_charset_id = XFASTINT (XCAR (tail));
8100     }
8101   else
8102     {
8103       charset_list = Fcopy_sequence (charset_list);
8104       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
8105         {
8106           struct charset *charset;
8107
8108           val = Fcar (tail);
8109           CHECK_CHARSET_GET_CHARSET (val, charset);
8110           if (EQ (coding_type, Qiso_2022)
8111               ? CHARSET_ISO_FINAL (charset) < 0
8112               : EQ (coding_type, Qemacs_mule)
8113               ? CHARSET_EMACS_MULE_ID (charset) < 0
8114               : 0)
8115             error ("Can't handle charset `%s'",
8116                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8117
8118           XSETCAR (tail, make_number (charset->id));
8119           if (max_charset_id < charset->id)
8120             max_charset_id = charset->id;
8121         }
8122     }
8123   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
8124
8125   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8126                                 make_number (255));
8127   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8128     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
8129   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
8130
8131   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
8132
8133   val = args[coding_arg_decode_translation_table];
8134   if (! NILP (val))
8135     CHECK_CHAR_TABLE (val);
8136   CODING_ATTR_DECODE_TBL (attrs) = val;
8137
8138   val = args[coding_arg_encode_translation_table];
8139   if (! NILP (val))
8140     CHECK_CHAR_TABLE (val);
8141   CODING_ATTR_ENCODE_TBL (attrs) = val;
8142
8143   val = args[coding_arg_post_read_conversion];
8144   CHECK_SYMBOL (val);
8145   CODING_ATTR_POST_READ (attrs) = val;
8146
8147   val = args[coding_arg_pre_write_conversion];
8148   CHECK_SYMBOL (val);
8149   CODING_ATTR_PRE_WRITE (attrs) = val;
8150
8151   val = args[coding_arg_default_char];
8152   if (NILP (val))
8153     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8154   else
8155     {
8156       CHECK_CHARACTER (val);
8157       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8158     }
8159
8160   val = args[coding_arg_for_unibyte];
8161   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
8162
8163   val = args[coding_arg_plist];
8164   CHECK_LIST (val);
8165   CODING_ATTR_PLIST (attrs) = val;
8166
8167   if (EQ (coding_type, Qcharset))
8168     {
8169       /* Generate a lisp vector of 256 elements.  Each element is nil,
8170          integer, or a list of charset IDs.
8171
8172          If Nth element is nil, the byte code N is invalid in this
8173          coding system.
8174
8175          If Nth element is a number NUM, N is the first byte of a
8176          charset whose ID is NUM.
8177
8178          If Nth element is a list of charset IDs, N is the first byte
8179          of one of them.  The list is sorted by dimensions of the
8180          charsets.  A charset of smaller dimension comes firtst. */
8181       val = Fmake_vector (make_number (256), Qnil);
8182
8183       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8184         {
8185           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8186           int dim = CHARSET_DIMENSION (charset);
8187           int idx = (dim - 1) * 4;
8188
8189           if (CHARSET_ASCII_COMPATIBLE_P (charset))
8190             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8191
8192           for (i = charset->code_space[idx];
8193                i <= charset->code_space[idx + 1]; i++)
8194             {
8195               Lisp_Object tmp, tmp2;
8196               int dim2;
8197
8198               tmp = AREF (val, i);
8199               if (NILP (tmp))
8200                 tmp = XCAR (tail);
8201               else if (NUMBERP (tmp))
8202                 {
8203                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8204                   if (dim < dim2)
8205                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
8206                   else
8207                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
8208                 }
8209               else
8210                 {
8211                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8212                     {
8213                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8214                       if (dim < dim2)
8215                         break;
8216                     }
8217                   if (NILP (tmp2))
8218                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8219                   else
8220                     {
8221                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8222                       XSETCAR (tmp2, XCAR (tail));
8223                     }
8224                 }
8225               ASET (val, i, tmp);
8226             }
8227         }
8228       ASET (attrs, coding_attr_charset_valids, val);
8229       category = coding_category_charset;
8230     }
8231   else if (EQ (coding_type, Qccl))
8232     {
8233       Lisp_Object valids;
8234
8235       if (nargs < coding_arg_ccl_max)
8236         goto short_args;
8237
8238       val = args[coding_arg_ccl_decoder];
8239       CHECK_CCL_PROGRAM (val);
8240       if (VECTORP (val))
8241         val = Fcopy_sequence (val);
8242       ASET (attrs, coding_attr_ccl_decoder, val);
8243
8244       val = args[coding_arg_ccl_encoder];
8245       CHECK_CCL_PROGRAM (val);
8246       if (VECTORP (val))
8247         val = Fcopy_sequence (val);
8248       ASET (attrs, coding_attr_ccl_encoder, val);
8249
8250       val = args[coding_arg_ccl_valids];
8251       valids = Fmake_string (make_number (256), make_number (0));
8252       for (tail = val; !NILP (tail); tail = Fcdr (tail))
8253         {
8254           int from, to;
8255
8256           val = Fcar (tail);
8257           if (INTEGERP (val))
8258             {
8259               from = to = XINT (val);
8260               if (from < 0 || from > 255)
8261                 args_out_of_range_3 (val, make_number (0), make_number (255));
8262             }
8263           else
8264             {
8265               CHECK_CONS (val);
8266               CHECK_NATNUM_CAR (val);
8267               CHECK_NATNUM_CDR (val);
8268               from = XINT (XCAR (val));
8269               if (from > 255)
8270                 args_out_of_range_3 (XCAR (val),
8271                                      make_number (0), make_number (255));
8272               to = XINT (XCDR (val));
8273               if (to < from || to > 255)
8274                 args_out_of_range_3 (XCDR (val),
8275                                      XCAR (val), make_number (255));
8276             }
8277           for (i = from; i <= to; i++)
8278             SSET (valids, i, 1);
8279         }
8280       ASET (attrs, coding_attr_ccl_valids, valids);
8281
8282       category = coding_category_ccl;
8283     }
8284   else if (EQ (coding_type, Qutf_16))
8285     {
8286       Lisp_Object bom, endian;
8287
8288       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8289
8290       if (nargs < coding_arg_utf16_max)
8291         goto short_args;
8292
8293       bom = args[coding_arg_utf16_bom];
8294       if (! NILP (bom) && ! EQ (bom, Qt))
8295         {
8296           CHECK_CONS (bom);
8297           val = XCAR (bom);
8298           CHECK_CODING_SYSTEM (val);
8299           val = XCDR (bom);
8300           CHECK_CODING_SYSTEM (val);
8301         }
8302       ASET (attrs, coding_attr_utf_16_bom, bom);
8303
8304       endian = args[coding_arg_utf16_endian];
8305       CHECK_SYMBOL (endian);
8306       if (NILP (endian))
8307         endian = Qbig;
8308       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8309         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
8310       ASET (attrs, coding_attr_utf_16_endian, endian);
8311
8312       category = (CONSP (bom)
8313                   ? coding_category_utf_16_auto
8314                   : NILP (bom)
8315                   ? (EQ (endian, Qbig)
8316                      ? coding_category_utf_16_be_nosig
8317                      : coding_category_utf_16_le_nosig)
8318                   : (EQ (endian, Qbig)
8319                      ? coding_category_utf_16_be
8320                      : coding_category_utf_16_le));
8321     }
8322   else if (EQ (coding_type, Qiso_2022))
8323     {
8324       Lisp_Object initial, reg_usage, request, flags;
8325       int i;
8326
8327       if (nargs < coding_arg_iso2022_max)
8328         goto short_args;
8329
8330       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8331       CHECK_VECTOR (initial);
8332       for (i = 0; i < 4; i++)
8333         {
8334           val = Faref (initial, make_number (i));
8335           if (! NILP (val))
8336             {
8337               struct charset *charset;
8338
8339               CHECK_CHARSET_GET_CHARSET (val, charset);
8340               ASET (initial, i, make_number (CHARSET_ID (charset)));
8341               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8342                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8343             }
8344           else
8345             ASET (initial, i, make_number (-1));
8346         }
8347
8348       reg_usage = args[coding_arg_iso2022_reg_usage];
8349       CHECK_CONS (reg_usage);
8350       CHECK_NUMBER_CAR (reg_usage);
8351       CHECK_NUMBER_CDR (reg_usage);
8352
8353       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8354       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8355         {
8356           int id;
8357           Lisp_Object tmp;
8358
8359           val = Fcar (tail);
8360           CHECK_CONS (val);
8361           tmp = XCAR (val);
8362           CHECK_CHARSET_GET_ID (tmp, id);
8363           CHECK_NATNUM_CDR (val);
8364           if (XINT (XCDR (val)) >= 4)
8365             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8366           XSETCAR (val, make_number (id));
8367         }
8368
8369       flags = args[coding_arg_iso2022_flags];
8370       CHECK_NATNUM (flags);
8371       i = XINT (flags);
8372       if (EQ (args[coding_arg_charset_list], Qiso_2022))
8373         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8374
8375       ASET (attrs, coding_attr_iso_initial, initial);
8376       ASET (attrs, coding_attr_iso_usage, reg_usage);
8377       ASET (attrs, coding_attr_iso_request, request);
8378       ASET (attrs, coding_attr_iso_flags, flags);
8379       setup_iso_safe_charsets (attrs);
8380
8381       if (i & CODING_ISO_FLAG_SEVEN_BITS)
8382         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8383                           | CODING_ISO_FLAG_SINGLE_SHIFT))
8384                     ? coding_category_iso_7_else
8385                     : EQ (args[coding_arg_charset_list], Qiso_2022)
8386                     ? coding_category_iso_7
8387                     : coding_category_iso_7_tight);
8388       else
8389         {
8390           int id = XINT (AREF (initial, 1));
8391
8392           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8393                        || EQ (args[coding_arg_charset_list], Qiso_2022)
8394                        || id < 0)
8395                       ? coding_category_iso_8_else
8396                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8397                       ? coding_category_iso_8_1
8398                       : coding_category_iso_8_2);
8399         }
8400       if (category != coding_category_iso_8_1
8401           && category != coding_category_iso_8_2)
8402         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8403     }
8404   else if (EQ (coding_type, Qemacs_mule))
8405     {
8406       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8407         ASET (attrs, coding_attr_emacs_mule_full, Qt);
8408       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8409       category = coding_category_emacs_mule;
8410     }
8411   else if (EQ (coding_type, Qshift_jis))
8412     {
8413
8414       struct charset *charset;
8415
8416       if (XINT (Flength (charset_list)) != 3)
8417         error ("There should be just three charsets");
8418
8419       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8420       if (CHARSET_DIMENSION (charset) != 1)
8421         error ("Dimension of charset %s is not one",
8422                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8423       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8424         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8425
8426       charset_list = XCDR (charset_list);
8427       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8428       if (CHARSET_DIMENSION (charset) != 1)
8429         error ("Dimension of charset %s is not one",
8430                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8431
8432       charset_list = XCDR (charset_list);
8433       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8434       if (CHARSET_DIMENSION (charset) != 2)
8435         error ("Dimension of charset %s is not two",
8436                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8437
8438       category = coding_category_sjis;
8439       Vsjis_coding_system = name;
8440     }
8441   else if (EQ (coding_type, Qbig5))
8442     {
8443       struct charset *charset;
8444
8445       if (XINT (Flength (charset_list)) != 2)
8446         error ("There should be just two charsets");
8447
8448       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8449       if (CHARSET_DIMENSION (charset) != 1)
8450         error ("Dimension of charset %s is not one",
8451                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8452       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8453         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8454
8455       charset_list = XCDR (charset_list);
8456       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8457       if (CHARSET_DIMENSION (charset) != 2)
8458         error ("Dimension of charset %s is not two",
8459                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8460
8461       category = coding_category_big5;
8462       Vbig5_coding_system = name;
8463     }
8464   else if (EQ (coding_type, Qraw_text))
8465     {
8466       category = coding_category_raw_text;
8467       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8468     }
8469   else if (EQ (coding_type, Qutf_8))
8470     {
8471       category = coding_category_utf_8;
8472       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8473     }
8474   else if (EQ (coding_type, Qundecided))
8475     category = coding_category_undecided;
8476   else
8477     error ("Invalid coding system type: %s",
8478            SDATA (SYMBOL_NAME (coding_type)));
8479
8480   CODING_ATTR_CATEGORY (attrs) = make_number (category);
8481   CODING_ATTR_PLIST (attrs)
8482     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8483                                 CODING_ATTR_PLIST (attrs)));
8484
8485   eol_type = args[coding_arg_eol_type];
8486   if (! NILP (eol_type)
8487       && ! EQ (eol_type, Qunix)
8488       && ! EQ (eol_type, Qdos)
8489       && ! EQ (eol_type, Qmac))
8490     error ("Invalid eol-type");
8491
8492   aliases = Fcons (name, Qnil);
8493
8494   if (NILP (eol_type))
8495     {
8496       eol_type = make_subsidiaries (name);
8497       for (i = 0; i < 3; i++)
8498         {
8499           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8500
8501           this_name = AREF (eol_type, i);
8502           this_aliases = Fcons (this_name, Qnil);
8503           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8504           this_spec = Fmake_vector (make_number (3), attrs);
8505           ASET (this_spec, 1, this_aliases);
8506           ASET (this_spec, 2, this_eol_type);
8507           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8508           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8509           Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8510                                         Vcoding_system_alist);
8511         }
8512     }
8513
8514   spec_vec = Fmake_vector (make_number (3), attrs);
8515   ASET (spec_vec, 1, aliases);
8516   ASET (spec_vec, 2, eol_type);
8517
8518   Fputhash (name, spec_vec, Vcoding_system_hash_table);
8519   Vcoding_system_list = Fcons (name, Vcoding_system_list);
8520   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8521                                 Vcoding_system_alist);
8522
8523   {
8524     int id = coding_categories[category].id;
8525
8526     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8527       setup_coding_system (name, &coding_categories[category]);
8528   }
8529
8530   return Qnil;
8531
8532  short_args:
8533   return Fsignal (Qwrong_number_of_arguments,
8534                   Fcons (intern ("define-coding-system-internal"),
8535                          make_number (nargs)));
8536 }
8537
8538
8539 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8540        Sdefine_coding_system_alias, 2, 2, 0,
8541        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
8542      (alias, coding_system)
8543      Lisp_Object alias, coding_system;
8544 {
8545   Lisp_Object spec, aliases, eol_type;
8546
8547   CHECK_SYMBOL (alias);
8548   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8549   aliases = AREF (spec, 1);
8550   /* ALISES should be a list of length more than zero, and the first
8551      element is a base coding system.  Append ALIAS at the tail of the
8552      list.  */
8553   while (!NILP (XCDR (aliases)))
8554     aliases = XCDR (aliases);
8555   XSETCDR (aliases, Fcons (alias, Qnil));
8556
8557   eol_type = AREF (spec, 2);
8558   if (VECTORP (eol_type))
8559     {
8560       Lisp_Object subsidiaries;
8561       int i;
8562
8563       subsidiaries = make_subsidiaries (alias);
8564       for (i = 0; i < 3; i++)
8565         Fdefine_coding_system_alias (AREF (subsidiaries, i),
8566                                      AREF (eol_type, i));
8567     }
8568
8569   Fputhash (alias, spec, Vcoding_system_hash_table);
8570   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
8571   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8572                                 Vcoding_system_alist);
8573
8574   return Qnil;
8575 }
8576
8577 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8578        1, 1, 0,
8579        doc: /* Return the base of CODING-SYSTEM.
8580 Any alias or subsidiary coding system is not a base coding system.  */)
8581   (coding_system)
8582      Lisp_Object coding_system;
8583 {
8584   Lisp_Object spec, attrs;
8585
8586   if (NILP (coding_system))
8587     return (Qno_conversion);
8588   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8589   attrs = AREF (spec, 0);
8590   return CODING_ATTR_BASE_NAME (attrs);
8591 }
8592
8593 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8594        1, 1, 0,
8595        doc: "Return the property list of CODING-SYSTEM.")
8596      (coding_system)
8597      Lisp_Object coding_system;
8598 {
8599   Lisp_Object spec, attrs;
8600
8601   if (NILP (coding_system))
8602     coding_system = Qno_conversion;
8603   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8604   attrs = AREF (spec, 0);
8605   return CODING_ATTR_PLIST (attrs);
8606 }
8607
8608
8609 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8610        1, 1, 0,
8611        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
8612      (coding_system)
8613      Lisp_Object coding_system;
8614 {
8615   Lisp_Object spec;
8616
8617   if (NILP (coding_system))
8618     coding_system = Qno_conversion;
8619   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8620   return AREF (spec, 1);
8621 }
8622
8623 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8624        Scoding_system_eol_type, 1, 1, 0,
8625        doc: /* Return eol-type of CODING-SYSTEM.
8626 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8627
8628 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8629 and CR respectively.
8630
8631 A vector value indicates that a format of end-of-line should be
8632 detected automatically.  Nth element of the vector is the subsidiary
8633 coding system whose eol-type is N.  */)
8634      (coding_system)
8635      Lisp_Object coding_system;
8636 {
8637   Lisp_Object spec, eol_type;
8638   int n;
8639
8640   if (NILP (coding_system))
8641     coding_system = Qno_conversion;
8642   if (! CODING_SYSTEM_P (coding_system))
8643     return Qnil;
8644   spec = CODING_SYSTEM_SPEC (coding_system);
8645   eol_type = AREF (spec, 2);
8646   if (VECTORP (eol_type))
8647     return Fcopy_sequence (eol_type);
8648   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8649   return make_number (n);
8650 }
8651
8652 #endif /* emacs */
8653
8654 \f
8655 /*** 9. Post-amble ***/
8656
8657 void
8658 init_coding_once ()
8659 {
8660   int i;
8661
8662   for (i = 0; i < coding_category_max; i++)
8663     {
8664       coding_categories[i].id = -1;
8665       coding_priorities[i] = i;
8666     }
8667
8668   /* ISO2022 specific initialize routine.  */
8669   for (i = 0; i < 0x20; i++)
8670     iso_code_class[i] = ISO_control_0;
8671   for (i = 0x21; i < 0x7F; i++)
8672     iso_code_class[i] = ISO_graphic_plane_0;
8673   for (i = 0x80; i < 0xA0; i++)
8674     iso_code_class[i] = ISO_control_1;
8675   for (i = 0xA1; i < 0xFF; i++)
8676     iso_code_class[i] = ISO_graphic_plane_1;
8677   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8678   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8679   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8680   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8681   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8682   iso_code_class[ISO_CODE_ESC] = ISO_escape;
8683   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8684   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8685   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8686
8687   for (i = 0; i < 256; i++)
8688     {
8689       emacs_mule_bytes[i] = 1;
8690     }
8691   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8692   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8693   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8694   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
8695 }
8696
8697 #ifdef emacs
8698
8699 void
8700 syms_of_coding ()
8701 {
8702   staticpro (&Vcoding_system_hash_table);
8703   {
8704     Lisp_Object args[2];
8705     args[0] = QCtest;
8706     args[1] = Qeq;
8707     Vcoding_system_hash_table = Fmake_hash_table (2, args);
8708   }
8709
8710   staticpro (&Vsjis_coding_system);
8711   Vsjis_coding_system = Qnil;
8712
8713   staticpro (&Vbig5_coding_system);
8714   Vbig5_coding_system = Qnil;
8715
8716   staticpro (&Vcode_conversion_reused_workbuf);
8717   Vcode_conversion_reused_workbuf = Qnil;
8718
8719   staticpro (&Vcode_conversion_workbuf_name);
8720   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
8721
8722   reused_workbuf_in_use = 0;
8723
8724   DEFSYM (Qcharset, "charset");
8725   DEFSYM (Qtarget_idx, "target-idx");
8726   DEFSYM (Qcoding_system_history, "coding-system-history");
8727   Fset (Qcoding_system_history, Qnil);
8728
8729   /* Target FILENAME is the first argument.  */
8730   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
8731   /* Target FILENAME is the third argument.  */
8732   Fput (Qwrite_region, Qtarget_idx, make_number (2));
8733
8734   DEFSYM (Qcall_process, "call-process");
8735   /* Target PROGRAM is the first argument.  */
8736   Fput (Qcall_process, Qtarget_idx, make_number (0));
8737
8738   DEFSYM (Qcall_process_region, "call-process-region");
8739   /* Target PROGRAM is the third argument.  */
8740   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8741
8742   DEFSYM (Qstart_process, "start-process");
8743   /* Target PROGRAM is the third argument.  */
8744   Fput (Qstart_process, Qtarget_idx, make_number (2));
8745
8746   DEFSYM (Qopen_network_stream, "open-network-stream");
8747   /* Target SERVICE is the fourth argument.  */
8748   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8749
8750   DEFSYM (Qcoding_system, "coding-system");
8751   DEFSYM (Qcoding_aliases, "coding-aliases");
8752
8753   DEFSYM (Qeol_type, "eol-type");
8754   DEFSYM (Qunix, "unix");
8755   DEFSYM (Qdos, "dos");
8756
8757   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8758   DEFSYM (Qpost_read_conversion, "post-read-conversion");
8759   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8760   DEFSYM (Qdefault_char, "default-char");
8761   DEFSYM (Qundecided, "undecided");
8762   DEFSYM (Qno_conversion, "no-conversion");
8763   DEFSYM (Qraw_text, "raw-text");
8764
8765   DEFSYM (Qiso_2022, "iso-2022");
8766
8767   DEFSYM (Qutf_8, "utf-8");
8768   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
8769
8770   DEFSYM (Qutf_16, "utf-16");
8771   DEFSYM (Qbig, "big");
8772   DEFSYM (Qlittle, "little");
8773
8774   DEFSYM (Qshift_jis, "shift-jis");
8775   DEFSYM (Qbig5, "big5");
8776
8777   DEFSYM (Qcoding_system_p, "coding-system-p");
8778
8779   DEFSYM (Qcoding_system_error, "coding-system-error");
8780   Fput (Qcoding_system_error, Qerror_conditions,
8781         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8782   Fput (Qcoding_system_error, Qerror_message,
8783         build_string ("Invalid coding system"));
8784
8785   /* Intern this now in case it isn't already done.
8786      Setting this variable twice is harmless.
8787      But don't staticpro it here--that is done in alloc.c.  */
8788   Qchar_table_extra_slots = intern ("char-table-extra-slots");
8789
8790   DEFSYM (Qtranslation_table, "translation-table");
8791   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
8792   DEFSYM (Qtranslation_table_id, "translation-table-id");
8793   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8794   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
8795
8796   DEFSYM (Qvalid_codes, "valid-codes");
8797
8798   DEFSYM (Qemacs_mule, "emacs-mule");
8799
8800   DEFSYM (QCcategory, ":category");
8801
8802   Vcoding_category_table
8803     = Fmake_vector (make_number (coding_category_max), Qnil);
8804   staticpro (&Vcoding_category_table);
8805   /* Followings are target of code detection.  */
8806   ASET (Vcoding_category_table, coding_category_iso_7,
8807         intern ("coding-category-iso-7"));
8808   ASET (Vcoding_category_table, coding_category_iso_7_tight,
8809         intern ("coding-category-iso-7-tight"));
8810   ASET (Vcoding_category_table, coding_category_iso_8_1,
8811         intern ("coding-category-iso-8-1"));
8812   ASET (Vcoding_category_table, coding_category_iso_8_2,
8813         intern ("coding-category-iso-8-2"));
8814   ASET (Vcoding_category_table, coding_category_iso_7_else,
8815         intern ("coding-category-iso-7-else"));
8816   ASET (Vcoding_category_table, coding_category_iso_8_else,
8817         intern ("coding-category-iso-8-else"));
8818   ASET (Vcoding_category_table, coding_category_utf_8,
8819         intern ("coding-category-utf-8"));
8820   ASET (Vcoding_category_table, coding_category_utf_16_be,
8821         intern ("coding-category-utf-16-be"));
8822   ASET (Vcoding_category_table, coding_category_utf_16_auto,
8823         intern ("coding-category-utf-16-auto"));
8824   ASET (Vcoding_category_table, coding_category_utf_16_le,
8825         intern ("coding-category-utf-16-le"));
8826   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8827         intern ("coding-category-utf-16-be-nosig"));
8828   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8829         intern ("coding-category-utf-16-le-nosig"));
8830   ASET (Vcoding_category_table, coding_category_charset,
8831         intern ("coding-category-charset"));
8832   ASET (Vcoding_category_table, coding_category_sjis,
8833         intern ("coding-category-sjis"));
8834   ASET (Vcoding_category_table, coding_category_big5,
8835         intern ("coding-category-big5"));
8836   ASET (Vcoding_category_table, coding_category_ccl,
8837         intern ("coding-category-ccl"));
8838   ASET (Vcoding_category_table, coding_category_emacs_mule,
8839         intern ("coding-category-emacs-mule"));
8840   /* Followings are NOT target of code detection.  */
8841   ASET (Vcoding_category_table, coding_category_raw_text,
8842         intern ("coding-category-raw-text"));
8843   ASET (Vcoding_category_table, coding_category_undecided,
8844         intern ("coding-category-undecided"));
8845
8846   DEFSYM (Qinsufficient_source, "insufficient-source");
8847   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
8848   DEFSYM (Qinvalid_source, "invalid-source");
8849   DEFSYM (Qinterrupted, "interrupted");
8850   DEFSYM (Qinsufficient_memory, "insufficient-memory");
8851
8852   defsubr (&Scoding_system_p);
8853   defsubr (&Sread_coding_system);
8854   defsubr (&Sread_non_nil_coding_system);
8855   defsubr (&Scheck_coding_system);
8856   defsubr (&Sdetect_coding_region);
8857   defsubr (&Sdetect_coding_string);
8858   defsubr (&Sfind_coding_systems_region_internal);
8859   defsubr (&Sunencodable_char_position);
8860   defsubr (&Scheck_coding_systems_region);
8861   defsubr (&Sdecode_coding_region);
8862   defsubr (&Sencode_coding_region);
8863   defsubr (&Sdecode_coding_string);
8864   defsubr (&Sencode_coding_string);
8865   defsubr (&Sdecode_sjis_char);
8866   defsubr (&Sencode_sjis_char);
8867   defsubr (&Sdecode_big5_char);
8868   defsubr (&Sencode_big5_char);
8869   defsubr (&Sset_terminal_coding_system_internal);
8870   defsubr (&Sset_safe_terminal_coding_system_internal);
8871   defsubr (&Sterminal_coding_system);
8872   defsubr (&Sset_keyboard_coding_system_internal);
8873   defsubr (&Skeyboard_coding_system);
8874   defsubr (&Sfind_operation_coding_system);
8875   defsubr (&Sset_coding_system_priority);
8876   defsubr (&Sdefine_coding_system_internal);
8877   defsubr (&Sdefine_coding_system_alias);
8878   defsubr (&Scoding_system_base);
8879   defsubr (&Scoding_system_plist);
8880   defsubr (&Scoding_system_aliases);
8881   defsubr (&Scoding_system_eol_type);
8882   defsubr (&Scoding_system_priority_list);
8883
8884   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
8885                doc: /* List of coding systems.
8886
8887 Do not alter the value of this variable manually.  This variable should be
8888 updated by the functions `define-coding-system' and
8889 `define-coding-system-alias'.  */);
8890   Vcoding_system_list = Qnil;
8891
8892   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
8893                doc: /* Alist of coding system names.
8894 Each element is one element list of coding system name.
8895 This variable is given to `completing-read' as TABLE argument.
8896
8897 Do not alter the value of this variable manually.  This variable should be
8898 updated by the functions `make-coding-system' and
8899 `define-coding-system-alias'.  */);
8900   Vcoding_system_alist = Qnil;
8901
8902   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
8903                doc: /* List of coding-categories (symbols) ordered by priority.
8904
8905 On detecting a coding system, Emacs tries code detection algorithms
8906 associated with each coding-category one by one in this order.  When
8907 one algorithm agrees with a byte sequence of source text, the coding
8908 system bound to the corresponding coding-category is selected.  */);
8909   {
8910     int i;
8911
8912     Vcoding_category_list = Qnil;
8913     for (i = coding_category_max - 1; i >= 0; i--)
8914       Vcoding_category_list
8915         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8916                  Vcoding_category_list);
8917   }
8918
8919   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
8920                doc: /* Specify the coding system for read operations.
8921 It is useful to bind this variable with `let', but do not set it globally.
8922 If the value is a coding system, it is used for decoding on read operation.
8923 If not, an appropriate element is used from one of the coding system alists:
8924 There are three such tables, `file-coding-system-alist',
8925 `process-coding-system-alist', and `network-coding-system-alist'.  */);
8926   Vcoding_system_for_read = Qnil;
8927
8928   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
8929                doc: /* Specify the coding system for write operations.
8930 Programs bind this variable with `let', but you should not set it globally.
8931 If the value is a coding system, it is used for encoding of output,
8932 when writing it to a file and when sending it to a file or subprocess.
8933
8934 If this does not specify a coding system, an appropriate element
8935 is used from one of the coding system alists:
8936 There are three such tables, `file-coding-system-alist',
8937 `process-coding-system-alist', and `network-coding-system-alist'.
8938 For output to files, if the above procedure does not specify a coding system,
8939 the value of `buffer-file-coding-system' is used.  */);
8940   Vcoding_system_for_write = Qnil;
8941
8942   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
8943                doc: /*
8944 Coding system used in the latest file or process I/O.  */);
8945   Vlast_coding_system_used = Qnil;
8946
8947   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
8948                doc: /*
8949 Error status of the last code conversion.
8950
8951 When an error was detected in the last code conversion, this variable
8952 is set to one of the following symbols.
8953   `insufficient-source'
8954   `inconsistent-eol'
8955   `invalid-source'
8956   `interrupted'
8957   `insufficient-memory'
8958 When no error was detected, the value doesn't change.  So, to check
8959 the error status of a code conversion by this variable, you must
8960 explicitly set this variable to nil before performing code
8961 conversion.  */);
8962   Vlast_code_conversion_error = Qnil;
8963
8964   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
8965                doc: /*
8966 *Non-nil means always inhibit code conversion of end-of-line format.
8967 See info node `Coding Systems' and info node `Text and Binary' concerning
8968 such conversion.  */);
8969   inhibit_eol_conversion = 0;
8970
8971   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
8972                doc: /*
8973 Non-nil means process buffer inherits coding system of process output.
8974 Bind it to t if the process output is to be treated as if it were a file
8975 read from some filesystem.  */);
8976   inherit_process_coding_system = 0;
8977
8978   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
8979                doc: /*
8980 Alist to decide a coding system to use for a file I/O operation.
8981 The format is ((PATTERN . VAL) ...),
8982 where PATTERN is a regular expression matching a file name,
8983 VAL is a coding system, a cons of coding systems, or a function symbol.
8984 If VAL is a coding system, it is used for both decoding and encoding
8985 the file contents.
8986 If VAL is a cons of coding systems, the car part is used for decoding,
8987 and the cdr part is used for encoding.
8988 If VAL is a function symbol, the function must return a coding system
8989 or a cons of coding systems which are used as above.  The function gets
8990 the arguments with which `find-operation-coding-systems' was called.
8991
8992 See also the function `find-operation-coding-system'
8993 and the variable `auto-coding-alist'.  */);
8994   Vfile_coding_system_alist = Qnil;
8995
8996   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
8997                doc: /*
8998 Alist to decide a coding system to use for a process I/O operation.
8999 The format is ((PATTERN . VAL) ...),
9000 where PATTERN is a regular expression matching a program name,
9001 VAL is a coding system, a cons of coding systems, or a function symbol.
9002 If VAL is a coding system, it is used for both decoding what received
9003 from the program and encoding what sent to the program.
9004 If VAL is a cons of coding systems, the car part is used for decoding,
9005 and the cdr part is used for encoding.
9006 If VAL is a function symbol, the function must return a coding system
9007 or a cons of coding systems which are used as above.
9008
9009 See also the function `find-operation-coding-system'.  */);
9010   Vprocess_coding_system_alist = Qnil;
9011
9012   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
9013                doc: /*
9014 Alist to decide a coding system to use for a network I/O operation.
9015 The format is ((PATTERN . VAL) ...),
9016 where PATTERN is a regular expression matching a network service name
9017 or is a port number to connect to,
9018 VAL is a coding system, a cons of coding systems, or a function symbol.
9019 If VAL is a coding system, it is used for both decoding what received
9020 from the network stream and encoding what sent to the network stream.
9021 If VAL is a cons of coding systems, the car part is used for decoding,
9022 and the cdr part is used for encoding.
9023 If VAL is a function symbol, the function must return a coding system
9024 or a cons of coding systems which are used as above.
9025
9026 See also the function `find-operation-coding-system'.  */);
9027   Vnetwork_coding_system_alist = Qnil;
9028
9029   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
9030                doc: /* Coding system to use with system messages.
9031 Also used for decoding keyboard input on X Window system.  */);
9032   Vlocale_coding_system = Qnil;
9033
9034   /* The eol mnemonics are reset in startup.el system-dependently.  */
9035   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
9036                doc: /*
9037 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
9038   eol_mnemonic_unix = build_string (":");
9039
9040   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
9041                doc: /*
9042 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
9043   eol_mnemonic_dos = build_string ("\\");
9044
9045   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
9046                doc: /*
9047 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
9048   eol_mnemonic_mac = build_string ("/");
9049
9050   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
9051                doc: /*
9052 *String displayed in mode line when end-of-line format is not yet determined.  */);
9053   eol_mnemonic_undecided = build_string (":");
9054
9055   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
9056                doc: /*
9057 *Non-nil enables character translation while encoding and decoding.  */);
9058   Venable_character_translation = Qt;
9059
9060   DEFVAR_LISP ("standard-translation-table-for-decode",
9061                &Vstandard_translation_table_for_decode,
9062                doc: /* Table for translating characters while decoding.  */);
9063   Vstandard_translation_table_for_decode = Qnil;
9064
9065   DEFVAR_LISP ("standard-translation-table-for-encode",
9066                &Vstandard_translation_table_for_encode,
9067                doc: /* Table for translating characters while encoding.  */);
9068   Vstandard_translation_table_for_encode = Qnil;
9069
9070   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
9071                doc: /* Alist of charsets vs revision numbers.
9072 While encoding, if a charset (car part of an element) is found,
9073 designate it with the escape sequence identifying revision (cdr part
9074 of the element).  */);
9075   Vcharset_revision_table = Qnil;
9076
9077   DEFVAR_LISP ("default-process-coding-system",
9078                &Vdefault_process_coding_system,
9079                doc: /* Cons of coding systems used for process I/O by default.
9080 The car part is used for decoding a process output,
9081 the cdr part is used for encoding a text to be sent to a process.  */);
9082   Vdefault_process_coding_system = Qnil;
9083
9084   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
9085                doc: /*
9086 Table of extra Latin codes in the range 128..159 (inclusive).
9087 This is a vector of length 256.
9088 If Nth element is non-nil, the existence of code N in a file
9089 \(or output of subprocess) doesn't prevent it to be detected as
9090 a coding system of ISO 2022 variant which has a flag
9091 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9092 or reading output of a subprocess.
9093 Only 128th through 159th elements has a meaning.  */);
9094   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
9095
9096   DEFVAR_LISP ("select-safe-coding-system-function",
9097                &Vselect_safe_coding_system_function,
9098                doc: /*
9099 Function to call to select safe coding system for encoding a text.
9100
9101 If set, this function is called to force a user to select a proper
9102 coding system which can encode the text in the case that a default
9103 coding system used in each operation can't encode the text.
9104
9105 The default value is `select-safe-coding-system' (which see).  */);
9106   Vselect_safe_coding_system_function = Qnil;
9107
9108   DEFVAR_BOOL ("coding-system-require-warning",
9109                &coding_system_require_warning,
9110                doc: /* Internal use only.
9111 If non-nil, on writing a file, `select-safe-coding-system-function' is
9112 called even if `coding-system-for-write' is non-nil.  The command
9113 `universal-coding-system-argument' binds this variable to t temporarily.  */);
9114   coding_system_require_warning = 0;
9115
9116
9117   DEFVAR_BOOL ("inhibit-iso-escape-detection",
9118                &inhibit_iso_escape_detection,
9119                doc: /*
9120 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9121
9122 By default, on reading a file, Emacs tries to detect how the text is
9123 encoded.  This code detection is sensitive to escape sequences.  If
9124 the sequence is valid as ISO2022, the code is determined as one of
9125 the ISO2022 encodings, and the file is decoded by the corresponding
9126 coding system (e.g. `iso-2022-7bit').
9127
9128 However, there may be a case that you want to read escape sequences in
9129 a file as is.  In such a case, you can set this variable to non-nil.
9130 Then, as the code detection ignores any escape sequences, no file is
9131 detected as encoded in some ISO2022 encoding.  The result is that all
9132 escape sequences become visible in a buffer.
9133
9134 The default value is nil, and it is strongly recommended not to change
9135 it.  That is because many Emacs Lisp source files that contain
9136 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9137 in Emacs's distribution, and they won't be decoded correctly on
9138 reading if you suppress escape sequence detection.
9139
9140 The other way to read escape sequences in a file without decoding is
9141 to explicitly specify some coding system that doesn't use ISO2022's
9142 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
9143   inhibit_iso_escape_detection = 0;
9144
9145   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
9146                doc: /* Char table for translating self-inserting characters.
9147 This is applied to the result of input methods, not their input.  See also
9148 `keyboard-translate-table'.  */);
9149     Vtranslation_table_for_input = Qnil;
9150
9151   {
9152     Lisp_Object args[coding_arg_max];
9153     Lisp_Object plist[16];
9154     int i;
9155
9156     for (i = 0; i < coding_arg_max; i++)
9157       args[i] = Qnil;
9158
9159     plist[0] = intern (":name");
9160     plist[1] = args[coding_arg_name] = Qno_conversion;
9161     plist[2] = intern (":mnemonic");
9162     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9163     plist[4] = intern (":coding-type");
9164     plist[5] = args[coding_arg_coding_type] = Qraw_text;
9165     plist[6] = intern (":ascii-compatible-p");
9166     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9167     plist[8] = intern (":default-char");
9168     plist[9] = args[coding_arg_default_char] = make_number (0);
9169     plist[10] = intern (":for-unibyte");
9170     plist[11] = args[coding_arg_for_unibyte] = Qt;
9171     plist[12] = intern (":docstring");
9172     plist[13] = build_string ("Do no conversion.\n\
9173 \n\
9174 When you visit a file with this coding, the file is read into a\n\
9175 unibyte buffer as is, thus each byte of a file is treated as a\n\
9176 character.");
9177     plist[14] = intern (":eol-type");
9178     plist[15] = args[coding_arg_eol_type] = Qunix;
9179     args[coding_arg_plist] = Flist (16, plist);
9180     Fdefine_coding_system_internal (coding_arg_max, args);
9181   }
9182
9183   setup_coding_system (Qno_conversion, &keyboard_coding);
9184   setup_coding_system (Qno_conversion, &terminal_coding);
9185   setup_coding_system (Qno_conversion, &safe_terminal_coding);
9186
9187   {
9188     int i;
9189
9190     for (i = 0; i < coding_category_max; i++)
9191       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9192   }
9193 }
9194
9195 char *
9196 emacs_strerror (error_number)
9197      int error_number;
9198 {
9199   char *str;
9200
9201   synchronize_system_messages_locale ();
9202   str = strerror (error_number);
9203
9204   if (! NILP (Vlocale_coding_system))
9205     {
9206       Lisp_Object dec = code_convert_string_norecord (build_string (str),
9207                                                       Vlocale_coding_system,
9208                                                       0);
9209       str = (char *) SDATA (dec);
9210     }
9211
9212   return str;
9213 }
9214
9215 #endif /* emacs */