src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3      Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   5    Copyright (C) 2003
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX, and update the members of
 148   DETECT_INFO.
 149
 150   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 151
 152   Below is the template of these functions.  */
 153
 154 #if 0
 155 static int
 156 detect_coding_XXX (coding, detect_info)
 157      struct coding_system *coding;
 158      struct coding_detection_info *detect_info;
 159 {
 160   unsigned char *src = coding->source;
 161   unsigned char *src_end = coding->source + coding->src_bytes;
 162   int multibytep = coding->src_multibyte;
 163   int consumed_chars = 0;
 164   int found = 0;
 165   ...;
 166
 167   while (1)
 168     {
 169       /* Get one byte from the source.  If the souce is exausted, jump
 170          to no_more_source:.  */
 171       ONE_MORE_BYTE (c);
 172
 173       if (! __C_conforms_to_XXX___ (c))
 174         break;
 175       if (! __C_strongly_suggests_XXX__ (c))
 176         found = CATEGORY_MASK_XXX;
 177     }
 178   /* The byte sequence is invalid for XXX.  */
 179   detect_info->rejected |= CATEGORY_MASK_XXX;
 180   return 0;
 181
 182  no_more_source:
 183   /* The source exausted successfully.  */
 184   detect_info->found |= found;
 185   return 1;
 186 }
 187 #endif
 188
 189 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 190
 191   These functions decode a byte sequence specified as a source by
 192   CODING.  The resulting multibyte text goes to a place pointed to by
 193   CODING->charbuf, the length of which should not exceed
 194   CODING->charbuf_size;
 195
 196   These functions set the information of original and decoded texts in
 197   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 198   They also set CODING->result to one of CODING_RESULT_XXX indicating
 199   how the decoding is finished.
 200
 201   Below is the template of these functions.  */
 202
 203 #if 0
 204 static void
 205 decode_coding_XXXX (coding)
 206      struct coding_system *coding;
 207 {
 208   unsigned char *src = coding->source + coding->consumed;
 209   unsigned char *src_end = coding->source + coding->src_bytes;
 210   /* SRC_BASE remembers the start position in source in each loop.
 211      The loop will be exited when there's not enough source code, or
 212      when there's no room in CHARBUF for a decoded character.  */
 213   unsigned char *src_base;
 214   /* A buffer to produce decoded characters.  */
 215   int *charbuf = coding->charbuf;
 216   int *charbuf_end = charbuf + coding->charbuf_size;
 217   int multibytep = coding->src_multibyte;
 218
 219   while (1)
 220     {
 221       src_base = src;
 222       if (charbuf < charbuf_end)
 223         /* No more room to produce a decoded character.  */
 224         break;
 225       ONE_MORE_BYTE (c);
 226       /* Decode it. */
 227     }
 228
 229  no_more_source:
 230   if (src_base < src_end
 231       && coding->mode & CODING_MODE_LAST_BLOCK)
 232     /* If the source ends by partial bytes to construct a character,
 233        treat them as eight-bit raw data.  */
 234     while (src_base < src_end && charbuf < charbuf_end)
 235       *charbuf++ = *src_base++;
 236   /* Remember how many bytes and characters we consumed.  If the
 237      source is multibyte, the bytes and chars are not identical.  */
 238   coding->consumed = coding->consumed_char = src_base - coding->source;
 239   /* Remember how many characters we produced.  */
 240   coding->charbuf_used = charbuf - coding->charbuf;
 241 }
 242 #endif
 243
 244 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 245
 246   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 247   internal multibyte format by CODING.  The resulting byte sequence
 248   goes to a place pointed to by DESTINATION, the length of which
 249   should not exceed DST_BYTES.
 250
 251   These functions set the information of original and encoded texts in
 252   the members produced, produced_char, consumed, and consumed_char of
 253   the structure *CODING.  They also set the member result to one of
 254   CODING_RESULT_XXX indicating how the encoding finished.
 255
 256   DST_BYTES zero means that source area and destination area are
 257   overlapped, which means that we can produce a encoded text until it
 258   reaches at the head of not-yet-encoded source text.
 259
 260   Below is a template of these functions.  */
 261 #if 0
 262 static void
 263 encode_coding_XXX (coding)
 264      struct coding_system *coding;
 265 {
 266   int multibytep = coding->dst_multibyte;
 267   int *charbuf = coding->charbuf;
 268   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 269   unsigned char *dst = coding->destination + coding->produced;
 270   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 271   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 272   int produced_chars = 0;
 273
 274   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 275     {
 276       int c = *charbuf;
 277       /* Encode C into DST, and increment DST.  */
 278     }
 279  label_no_more_destination:
 280   /* How many chars and bytes we produced.  */
 281   coding->produced_char += produced_chars;
 282   coding->produced = dst - coding->destination;
 283 }
 284 #endif
 285
 286 \f
 287 /*** 1. Preamble ***/
 288
 289 #include <config.h>
 290 #include <stdio.h>
 291
 292 #include "lisp.h"
 293 #include "buffer.h"
 294 #include "character.h"
 295 #include "charset.h"
 296 #include "ccl.h"
 297 #include "composite.h"
 298 #include "coding.h"
 299 #include "window.h"
 300
 301 Lisp_Object Vcoding_system_hash_table;
 302
 303 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 304 Lisp_Object Qunix, Qdos;
 305 extern Lisp_Object Qmac;        /* frame.c */
 306 Lisp_Object Qbuffer_file_coding_system;
 307 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 308 Lisp_Object Qdefault_char;
 309 Lisp_Object Qno_conversion, Qundecided;
 310 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 311 Lisp_Object Qbig, Qlittle;
 312 Lisp_Object Qcoding_system_history;
 313 Lisp_Object Qvalid_codes;
 314 Lisp_Object QCcategory;
 315
 316 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 317 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 318 Lisp_Object Qstart_process, Qopen_network_stream;
 319 Lisp_Object Qtarget_idx;
 320
 321 int coding_system_require_warning;
 322
 323 Lisp_Object Vselect_safe_coding_system_function;
 324
 325 /* Mnemonic string for each format of end-of-line.  */
 326 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 327 /* Mnemonic string to indicate format of end-of-line is not yet
 328    decided.  */
 329 Lisp_Object eol_mnemonic_undecided;
 330
 331 #ifdef emacs
 332
 333 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 334
 335 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 336
 337 /* Coding system emacs-mule and raw-text are for converting only
 338    end-of-line format.  */
 339 Lisp_Object Qemacs_mule, Qraw_text;
 340 Lisp_Object Qutf_8_emacs;
 341
 342 /* Coding-systems are handed between Emacs Lisp programs and C internal
 343    routines by the following three variables.  */
 344 /* Coding-system for reading files and receiving data from process.  */
 345 Lisp_Object Vcoding_system_for_read;
 346 /* Coding-system for writing files and sending data to process.  */
 347 Lisp_Object Vcoding_system_for_write;
 348 /* Coding-system actually used in the latest I/O.  */
 349 Lisp_Object Vlast_coding_system_used;
 350
 351 /* A vector of length 256 which contains information about special
 352    Latin codes (especially for dealing with Microsoft codes).  */
 353 Lisp_Object Vlatin_extra_code_table;
 354
 355 /* Flag to inhibit code conversion of end-of-line format.  */
 356 int inhibit_eol_conversion;
 357
 358 /* Flag to inhibit ISO2022 escape sequence detection.  */
 359 int inhibit_iso_escape_detection;
 360
 361 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 362 int inherit_process_coding_system;
 363
 364 /* Coding system to be used to encode text for terminal display.  */
 365 struct coding_system terminal_coding;
 366
 367 /* Coding system to be used to encode text for terminal display when
 368    terminal coding system is nil.  */
 369 struct coding_system safe_terminal_coding;
 370
 371 /* Coding system of what is sent from terminal keyboard.  */
 372 struct coding_system keyboard_coding;
 373
 374 Lisp_Object Vfile_coding_system_alist;
 375 Lisp_Object Vprocess_coding_system_alist;
 376 Lisp_Object Vnetwork_coding_system_alist;
 377
 378 Lisp_Object Vlocale_coding_system;
 379
 380 #endif /* emacs */
 381
 382 /* Flag to tell if we look up translation table on character code
 383    conversion.  */
 384 Lisp_Object Venable_character_translation;
 385 /* Standard translation table to look up on decoding (reading).  */
 386 Lisp_Object Vstandard_translation_table_for_decode;
 387 /* Standard translation table to look up on encoding (writing).  */
 388 Lisp_Object Vstandard_translation_table_for_encode;
 389
 390 Lisp_Object Qtranslation_table;
 391 Lisp_Object Qtranslation_table_id;
 392 Lisp_Object Qtranslation_table_for_decode;
 393 Lisp_Object Qtranslation_table_for_encode;
 394
 395 /* Alist of charsets vs revision number.  */
 396 static Lisp_Object Vcharset_revision_table;
 397
 398 /* Default coding systems used for process I/O.  */
 399 Lisp_Object Vdefault_process_coding_system;
 400
 401 /* Char table for translating Quail and self-inserting input.  */
 402 Lisp_Object Vtranslation_table_for_input;
 403
 404 /* Global flag to tell that we can't call post-read-conversion and
 405    pre-write-conversion functions.  Usually the value is zero, but it
 406    is set to 1 temporarily while such functions are running.  This is
 407    to avoid infinite recursive call.  */
 408 static int inhibit_pre_post_conversion;
 409
 410 /* Two special coding systems.  */
 411 Lisp_Object Vsjis_coding_system;
 412 Lisp_Object Vbig5_coding_system;
 413
 414
 415 static int detect_coding_utf_8 P_ ((struct coding_system *,
 416                                     struct coding_detection_info *info));
 417 static void decode_coding_utf_8 P_ ((struct coding_system *));
 418 static int encode_coding_utf_8 P_ ((struct coding_system *));
 419
 420 static int detect_coding_utf_16 P_ ((struct coding_system *,
 421                                      struct coding_detection_info *info));
 422 static void decode_coding_utf_16 P_ ((struct coding_system *));
 423 static int encode_coding_utf_16 P_ ((struct coding_system *));
 424
 425 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 426                                        struct coding_detection_info *info));
 427 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 428 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 429
 430 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 431                                          struct coding_detection_info *info));
 432 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 433 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 434
 435 static int detect_coding_sjis P_ ((struct coding_system *,
 436                                    struct coding_detection_info *info));
 437 static void decode_coding_sjis P_ ((struct coding_system *));
 438 static int encode_coding_sjis P_ ((struct coding_system *));
 439
 440 static int detect_coding_big5 P_ ((struct coding_system *,
 441                                    struct coding_detection_info *info));
 442 static void decode_coding_big5 P_ ((struct coding_system *));
 443 static int encode_coding_big5 P_ ((struct coding_system *));
 444
 445 static int detect_coding_ccl P_ ((struct coding_system *,
 446                                   struct coding_detection_info *info));
 447 static void decode_coding_ccl P_ ((struct coding_system *));
 448 static int encode_coding_ccl P_ ((struct coding_system *));
 449
 450 static void decode_coding_raw_text P_ ((struct coding_system *));
 451 static int encode_coding_raw_text P_ ((struct coding_system *));
 452
 453
 454 /* ISO2022 section */
 455
 456 #define CODING_ISO_INITIAL(coding, reg)                 \
 457   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 458                      coding_attr_iso_initial),          \
 459                reg)))
 460
 461
 462 #define CODING_ISO_REQUEST(coding, charset_id)  \
 463   ((charset_id <= (coding)->max_charset_id      \
 464     ? (coding)->safe_charsets[charset_id]       \
 465     : -1))
 466
 467
 468 #define CODING_ISO_FLAGS(coding)        \
 469   ((coding)->spec.iso_2022.flags)
 470 #define CODING_ISO_DESIGNATION(coding, reg)     \
 471   ((coding)->spec.iso_2022.current_designation[reg])
 472 #define CODING_ISO_INVOCATION(coding, plane)    \
 473   ((coding)->spec.iso_2022.current_invocation[plane])
 474 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 475   ((coding)->spec.iso_2022.single_shifting)
 476 #define CODING_ISO_BOL(coding)  \
 477   ((coding)->spec.iso_2022.bol)
 478 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 479   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 480
 481 /* Control characters of ISO2022.  */
 482                         /* code */      /* function */
 483 #define ISO_CODE_LF     0x0A            /* line-feed */
 484 #define ISO_CODE_CR     0x0D            /* carriage-return */
 485 #define ISO_CODE_SO     0x0E            /* shift-out */
 486 #define ISO_CODE_SI     0x0F            /* shift-in */
 487 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 488 #define ISO_CODE_ESC    0x1B            /* escape */
 489 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 490 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 491 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 492
 493 /* All code (1-byte) of ISO2022 is classified into one of the
 494    followings.  */
 495 enum iso_code_class_type
 496   {
 497     ISO_control_0,              /* Control codes in the range
 498                                    0x00..0x1F and 0x7F, except for the
 499                                    following 5 codes.  */
 500     ISO_carriage_return,        /* ISO_CODE_CR (0x0D) */
 501     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 502     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 503     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 504     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 505     ISO_control_1,              /* Control codes in the range
 506                                    0x80..0x9F, except for the
 507                                    following 3 codes.  */
 508     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 509     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 510     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 511     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 512     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 513     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 514     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 515   };
 516
 517 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 518     `iso-flags' attribute of an iso2022 coding system.  */
 519
 520 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 521    instead of the correct short-form sequence (e.g. ESC $ A).  */
 522 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 523
 524 /* If set, reset graphic planes and registers at end-of-line to the
 525    initial state.  */
 526 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 527
 528 /* If set, reset graphic planes and registers before any control
 529    characters to the initial state.  */
 530 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 531
 532 /* If set, encode by 7-bit environment.  */
 533 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 534
 535 /* If set, use locking-shift function.  */
 536 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 537
 538 /* If set, use single-shift function.  Overwrite
 539    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 540 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 541
 542 /* If set, use designation escape sequence.  */
 543 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 544
 545 /* If set, produce revision number sequence.  */
 546 #define CODING_ISO_FLAG_REVISION        0x0080
 547
 548 /* If set, produce ISO6429's direction specifying sequence.  */
 549 #define CODING_ISO_FLAG_DIRECTION       0x0100
 550
 551 /* If set, assume designation states are reset at beginning of line on
 552    output.  */
 553 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 554
 555 /* If set, designation sequence should be placed at beginning of line
 556    on output.  */
 557 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 558
 559 /* If set, do not encode unsafe charactes on output.  */
 560 #define CODING_ISO_FLAG_SAFE            0x0800
 561
 562 /* If set, extra latin codes (128..159) are accepted as a valid code
 563    on input.  */
 564 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 565
 566 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 567
 568 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 569
 570 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 571
 572 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 573
 574 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 575
 576 /* A character to be produced on output if encoding of the original
 577    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 578 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 579
 580
 581 /* UTF-16 section */
 582 #define CODING_UTF_16_BOM(coding)       \
 583   ((coding)->spec.utf_16.bom)
 584
 585 #define CODING_UTF_16_ENDIAN(coding)    \
 586   ((coding)->spec.utf_16.endian)
 587
 588 #define CODING_UTF_16_SURROGATE(coding) \
 589   ((coding)->spec.utf_16.surrogate)
 590
 591
 592 /* CCL section */
 593 #define CODING_CCL_DECODER(coding)      \
 594   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 595 #define CODING_CCL_ENCODER(coding)      \
 596   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 597 #define CODING_CCL_VALIDS(coding)                                          \
 598   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 599
 600 /* Index for each coding category in `coding_categories' */
 601
 602 enum coding_category
 603   {
 604     coding_category_iso_7,
 605     coding_category_iso_7_tight,
 606     coding_category_iso_8_1,
 607     coding_category_iso_8_2,
 608     coding_category_iso_7_else,
 609     coding_category_iso_8_else,
 610     coding_category_utf_8,
 611     coding_category_utf_16_auto,
 612     coding_category_utf_16_be,
 613     coding_category_utf_16_le,
 614     coding_category_utf_16_be_nosig,
 615     coding_category_utf_16_le_nosig,
 616     coding_category_charset,
 617     coding_category_sjis,
 618     coding_category_big5,
 619     coding_category_ccl,
 620     coding_category_emacs_mule,
 621     /* All above are targets of code detection.  */
 622     coding_category_raw_text,
 623     coding_category_undecided,
 624     coding_category_max
 625   };
 626
 627 /* Definitions of flag bits used in detect_coding_XXXX.  */
 628 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 629 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 630 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 631 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 632 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 633 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 634 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 635 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 636 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 637 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 638 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 639 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 640 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 641 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 642 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 643 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 644 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 645 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 646
 647 /* This value is returned if detect_coding_mask () find nothing other
 648    than ASCII characters.  */
 649 #define CATEGORY_MASK_ANY               \
 650   (CATEGORY_MASK_ISO_7                  \
 651    | CATEGORY_MASK_ISO_7_TIGHT          \
 652    | CATEGORY_MASK_ISO_8_1              \
 653    | CATEGORY_MASK_ISO_8_2              \
 654    | CATEGORY_MASK_ISO_7_ELSE           \
 655    | CATEGORY_MASK_ISO_8_ELSE           \
 656    | CATEGORY_MASK_UTF_8                \
 657    | CATEGORY_MASK_UTF_16_BE            \
 658    | CATEGORY_MASK_UTF_16_LE            \
 659    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 660    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 661    | CATEGORY_MASK_CHARSET              \
 662    | CATEGORY_MASK_SJIS                 \
 663    | CATEGORY_MASK_BIG5                 \
 664    | CATEGORY_MASK_CCL                  \
 665    | CATEGORY_MASK_EMACS_MULE)
 666
 667
 668 #define CATEGORY_MASK_ISO_7BIT \
 669   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 670
 671 #define CATEGORY_MASK_ISO_8BIT \
 672   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 673
 674 #define CATEGORY_MASK_ISO_ELSE \
 675   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 676
 677 #define CATEGORY_MASK_ISO_ESCAPE        \
 678   (CATEGORY_MASK_ISO_7                  \
 679    | CATEGORY_MASK_ISO_7_TIGHT          \
 680    | CATEGORY_MASK_ISO_7_ELSE           \
 681    | CATEGORY_MASK_ISO_8_ELSE)
 682
 683 #define CATEGORY_MASK_ISO       \
 684   (  CATEGORY_MASK_ISO_7BIT     \
 685      | CATEGORY_MASK_ISO_8BIT   \
 686      | CATEGORY_MASK_ISO_ELSE)
 687
 688 #define CATEGORY_MASK_UTF_16            \
 689   (CATEGORY_MASK_UTF_16_BE              \
 690    | CATEGORY_MASK_UTF_16_LE            \
 691    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 692    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 693
 694
 695 /* List of symbols `coding-category-xxx' ordered by priority.  This
 696    variable is exposed to Emacs Lisp.  */
 697 static Lisp_Object Vcoding_category_list;
 698
 699 /* Table of coding categories (Lisp symbols).  This variable is for
 700    internal use oly.  */
 701 static Lisp_Object Vcoding_category_table;
 702
 703 /* Table of coding-categories ordered by priority.  */
 704 static enum coding_category coding_priorities[coding_category_max];
 705
 706 /* Nth element is a coding context for the coding system bound to the
 707    Nth coding category.  */
 708 static struct coding_system coding_categories[coding_category_max];
 709
 710 /*** Commonly used macros and functions ***/
 711
 712 #ifndef min
 713 #define min(a, b) ((a) < (b) ? (a) : (b))
 714 #endif
 715 #ifndef max
 716 #define max(a, b) ((a) > (b) ? (a) : (b))
 717 #endif
 718
 719 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list)  \
 720   do {                                                          \
 721     attrs = CODING_ID_ATTRS (coding->id);                       \
 722     eol_type = CODING_ID_EOL_TYPE (coding->id);                 \
 723     if (VECTORP (eol_type))                                     \
 724       eol_type = Qunix;                                         \
 725     charset_list = CODING_ATTR_CHARSET_LIST (attrs);            \
 726   } while (0)
 727
 728
 729 /* Safely get one byte from the source text pointed by SRC which ends
 730    at SRC_END, and set C to that byte.  If there are not enough bytes
 731    in the source, it jumps to `no_more_source'.  The caller
 732    should declare and set these variables appropriately in advance:
 733         src, src_end, multibytep
 734 */
 735
 736 #define ONE_MORE_BYTE(c)                                        \
 737   do {                                                          \
 738     if (src == src_end)                                         \
 739       {                                                         \
 740         if (src_base < src)                                     \
 741           coding->result = CODING_RESULT_INSUFFICIENT_SRC;      \
 742         goto no_more_source;                                    \
 743       }                                                         \
 744     c = *src++;                                                 \
 745     if (multibytep && (c & 0x80))                               \
 746       {                                                         \
 747         if ((c & 0xFE) != 0xC0)                                 \
 748           error ("Undecodable char found");                     \
 749         c = ((c & 1) << 6) | *src++;                            \
 750       }                                                         \
 751     consumed_chars++;                                           \
 752   } while (0)
 753
 754
 755 #define ONE_MORE_BYTE_NO_CHECK(c)               \
 756   do {                                          \
 757     c = *src++;                                 \
 758     if (multibytep && (c & 0x80))               \
 759       {                                         \
 760         if ((c & 0xFE) != 0xC0)                 \
 761           error ("Undecodable char found");     \
 762         c = ((c & 1) << 6) | *src++;            \
 763       }                                         \
 764     consumed_chars++;                           \
 765   } while (0)
 766
 767
 768 /* Store a byte C in the place pointed by DST and increment DST to the
 769    next free point, and increment PRODUCED_CHARS.  The caller should
 770    assure that C is 0..127, and declare and set the variable `dst'
 771    appropriately in advance.
 772 */
 773
 774
 775 #define EMIT_ONE_ASCII_BYTE(c)  \
 776   do {                          \
 777     produced_chars++;           \
 778     *dst++ = (c);               \
 779   } while (0)
 780
 781
 782 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 783
 784 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 785   do {                                  \
 786     produced_chars += 2;                \
 787     *dst++ = (c1), *dst++ = (c2);       \
 788   } while (0)
 789
 790
 791 /* Store a byte C in the place pointed by DST and increment DST to the
 792    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 793    nonzero, store in an appropriate multibyte from.  The caller should
 794    declare and set the variables `dst' and `multibytep' appropriately
 795    in advance.  */
 796
 797 #define EMIT_ONE_BYTE(c)                \
 798   do {                                  \
 799     produced_chars++;                   \
 800     if (multibytep)                     \
 801       {                                 \
 802         int ch = (c);                   \
 803         if (ch >= 0x80)                 \
 804           ch = BYTE8_TO_CHAR (ch);      \
 805         CHAR_STRING_ADVANCE (ch, dst);  \
 806       }                                 \
 807     else                                \
 808       *dst++ = (c);                     \
 809   } while (0)
 810
 811
 812 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 813
 814 #define EMIT_TWO_BYTES(c1, c2)          \
 815   do {                                  \
 816     produced_chars += 2;                \
 817     if (multibytep)                     \
 818       {                                 \
 819         int ch;                         \
 820                                         \
 821         ch = (c1);                      \
 822         if (ch >= 0x80)                 \
 823           ch = BYTE8_TO_CHAR (ch);      \
 824         CHAR_STRING_ADVANCE (ch, dst);  \
 825         ch = (c2);                      \
 826         if (ch >= 0x80)                 \
 827           ch = BYTE8_TO_CHAR (ch);      \
 828         CHAR_STRING_ADVANCE (ch, dst);  \
 829       }                                 \
 830     else                                \
 831       {                                 \
 832         *dst++ = (c1);                  \
 833         *dst++ = (c2);                  \
 834       }                                 \
 835   } while (0)
 836
 837
 838 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 839   do {                                  \
 840     EMIT_ONE_BYTE (c1);                 \
 841     EMIT_TWO_BYTES (c2, c3);            \
 842   } while (0)
 843
 844
 845 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 846   do {                                          \
 847     EMIT_TWO_BYTES (c1, c2);                    \
 848     EMIT_TWO_BYTES (c3, c4);                    \
 849   } while (0)
 850
 851
 852 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 853   do {                                                                       \
 854     charset_map_loaded = 0;                                                  \
 855     c = DECODE_CHAR (charset, code);                                         \
 856     if (charset_map_loaded)                                                  \
 857       {                                                                      \
 858         const unsigned char *orig = coding->source;                          \
 859         EMACS_INT offset;                                                    \
 860                                                                              \
 861         coding_set_source (coding);                                          \
 862         offset = coding->source - orig;                                      \
 863         src += offset;                                                       \
 864         src_base += offset;                                                  \
 865         src_end += offset;                                                   \
 866       }                                                                      \
 867   } while (0)
 868
 869
 870 #define ASSURE_DESTINATION(bytes)                               \
 871   do {                                                          \
 872     if (dst + (bytes) >= dst_end)                               \
 873       {                                                         \
 874         int more_bytes = charbuf_end - charbuf + (bytes);       \
 875                                                                 \
 876         dst = alloc_destination (coding, more_bytes, dst);      \
 877         dst_end = coding->destination + coding->dst_bytes;      \
 878       }                                                         \
 879   } while (0)
 880
 881
 882
 883 static void
 884 coding_set_source (coding)
 885      struct coding_system *coding;
 886 {
 887   if (BUFFERP (coding->src_object))
 888     {
 889       struct buffer *buf = XBUFFER (coding->src_object);
 890
 891       if (coding->src_pos < 0)
 892         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
 893       else
 894         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
 895     }
 896   else if (STRINGP (coding->src_object))
 897     {
 898       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
 899     }
 900   else
 901     /* Otherwise, the source is C string and is never relocated
 902        automatically.  Thus we don't have to update anything.  */
 903     ;
 904 }
 905
 906 static void
 907 coding_set_destination (coding)
 908      struct coding_system *coding;
 909 {
 910   if (BUFFERP (coding->dst_object))
 911     {
 912       if (coding->src_pos < 0)
 913         {
 914           coding->destination = BEG_ADDR + coding->dst_pos_byte - 1;
 915           coding->dst_bytes = (GAP_END_ADDR
 916                                - (coding->src_bytes - coding->consumed)
 917                                - coding->destination);
 918         }
 919       else
 920         {
 921           /* We are sure that coding->dst_pos_byte is before the gap
 922              of the buffer. */
 923           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 924                                  + coding->dst_pos_byte - 1);
 925           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 926                                - coding->destination);
 927         }
 928     }
 929   else
 930     /* Otherwise, the destination is C string and is never relocated
 931        automatically.  Thus we don't have to update anything.  */
 932     ;
 933 }
 934
 935
 936 static void
 937 coding_alloc_by_realloc (coding, bytes)
 938      struct coding_system *coding;
 939      EMACS_INT bytes;
 940 {
 941   coding->destination = (unsigned char *) xrealloc (coding->destination,
 942                                                     coding->dst_bytes + bytes);
 943   coding->dst_bytes += bytes;
 944 }
 945
 946 static void
 947 coding_alloc_by_making_gap (coding, bytes)
 948      struct coding_system *coding;
 949      EMACS_INT bytes;
 950 {
 951   if (BUFFERP (coding->dst_object)
 952       && EQ (coding->src_object, coding->dst_object))
 953     {
 954       EMACS_INT add = coding->src_bytes - coding->consumed;
 955
 956       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
 957       make_gap (bytes);
 958       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
 959     }
 960   else
 961     {
 962       Lisp_Object this_buffer;
 963
 964       this_buffer = Fcurrent_buffer ();
 965       set_buffer_internal (XBUFFER (coding->dst_object));
 966       make_gap (bytes);
 967       set_buffer_internal (XBUFFER (this_buffer));
 968     }
 969 }
 970
 971
 972 static unsigned char *
 973 alloc_destination (coding, nbytes, dst)
 974      struct coding_system *coding;
 975      int nbytes;
 976      unsigned char *dst;
 977 {
 978   EMACS_INT offset = dst - coding->destination;
 979
 980   if (BUFFERP (coding->dst_object))
 981     coding_alloc_by_making_gap (coding, nbytes);
 982   else
 983     coding_alloc_by_realloc (coding, nbytes);
 984   coding->result = CODING_RESULT_SUCCESS;
 985   coding_set_destination (coding);
 986   dst = coding->destination + offset;
 987   return dst;
 988 }
 989
 990 /** Macros for annotations.  */
 991
 992 /* Maximum length of annotation data (sum of annotations for
 993    composition and charset).  */
 994 #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
 995
 996 /* An annotation data is stored in the array coding->charbuf in this
 997    format:
 998      [ -LENGTH ANNOTATION_MASK FROM TO ... ]
 999    LENGTH is the number of elements in the annotation.
1000    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1001    FROM and TO specify the range of text annotated.  They are relative
1002    to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
1003
1004    The format of the following elements depend on ANNOTATION_MASK.
1005
1006    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1007    follows:
1008      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1009    METHOD is one of enum composition_method.
1010    Optionnal COMPOSITION-COMPONENTS are characters and composition
1011    rules.
1012
1013    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1014    follows.  */
1015
1016 #define ADD_ANNOTATION_DATA(buf, len, mask, from, to)   \
1017   do {                                                  \
1018     *(buf)++ = -(len);                                  \
1019     *(buf)++ = (mask);                                  \
1020     *(buf)++ = (from);                                  \
1021     *(buf)++ = (to);                                    \
1022     coding->annotated = 1;                              \
1023   } while (0);
1024
1025 #define ADD_COMPOSITION_DATA(buf, from, to, method)                           \
1026   do {                                                                        \
1027     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
1028     *buf++ = method;                                                          \
1029   } while (0)
1030
1031
1032 #define ADD_CHARSET_DATA(buf, from, to, id)                               \
1033   do {                                                                    \
1034     ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
1035     *buf++ = id;                                                          \
1036   } while (0)
1037
1038 \f
1039 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1040
1041
1042
1043 \f
1044 /*** 3. UTF-8 ***/
1045
1046 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1047    Check if a text is encoded in UTF-8.  If it is, return 1, else
1048    return 0.  */
1049
1050 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1051 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1052 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1053 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1054 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1055 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1056
1057 static int
1058 detect_coding_utf_8 (coding, detect_info)
1059      struct coding_system *coding;
1060      struct coding_detection_info *detect_info;
1061 {
1062   const unsigned char *src = coding->source, *src_base = src;
1063   const unsigned char *src_end = coding->source + coding->src_bytes;
1064   int multibytep = coding->src_multibyte;
1065   int consumed_chars = 0;
1066   int found = 0;
1067   int incomplete;
1068
1069   detect_info->checked |= CATEGORY_MASK_UTF_8;
1070   /* A coding system of this category is always ASCII compatible.  */
1071   src += coding->head_ascii;
1072
1073   while (1)
1074     {
1075       int c, c1, c2, c3, c4;
1076
1077       incomplete = 0;
1078       ONE_MORE_BYTE (c);
1079       if (UTF_8_1_OCTET_P (c))
1080         continue;
1081       incomplete = 1;
1082       ONE_MORE_BYTE (c1);
1083       if (! UTF_8_EXTRA_OCTET_P (c1))
1084         break;
1085       if (UTF_8_2_OCTET_LEADING_P (c))
1086         {
1087           found = CATEGORY_MASK_UTF_8;
1088           continue;
1089         }
1090       ONE_MORE_BYTE (c2);
1091       if (! UTF_8_EXTRA_OCTET_P (c2))
1092         break;
1093       if (UTF_8_3_OCTET_LEADING_P (c))
1094         {
1095           found = CATEGORY_MASK_UTF_8;
1096           continue;
1097         }
1098       ONE_MORE_BYTE (c3);
1099       if (! UTF_8_EXTRA_OCTET_P (c3))
1100         break;
1101       if (UTF_8_4_OCTET_LEADING_P (c))
1102         {
1103           found = CATEGORY_MASK_UTF_8;
1104           continue;
1105         }
1106       ONE_MORE_BYTE (c4);
1107       if (! UTF_8_EXTRA_OCTET_P (c4))
1108         break;
1109       if (UTF_8_5_OCTET_LEADING_P (c))
1110         {
1111           found = CATEGORY_MASK_UTF_8;
1112           continue;
1113         }
1114       break;
1115     }
1116   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1117   return 0;
1118
1119  no_more_source:
1120   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1121     {
1122       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1123       return 0;
1124     }
1125   detect_info->found |= found;
1126   return 1;
1127 }
1128
1129
1130 static void
1131 decode_coding_utf_8 (coding)
1132      struct coding_system *coding;
1133 {
1134   const unsigned char *src = coding->source + coding->consumed;
1135   const unsigned char *src_end = coding->source + coding->src_bytes;
1136   const unsigned char *src_base;
1137   int *charbuf = coding->charbuf;
1138   int *charbuf_end = charbuf + coding->charbuf_size;
1139   int consumed_chars = 0, consumed_chars_base;
1140   int multibytep = coding->src_multibyte;
1141   Lisp_Object attr, eol_type, charset_list;
1142
1143   CODING_GET_INFO (coding, attr, eol_type, charset_list);
1144
1145   while (1)
1146     {
1147       int c, c1, c2, c3, c4, c5;
1148
1149       src_base = src;
1150       consumed_chars_base = consumed_chars;
1151
1152       if (charbuf >= charbuf_end)
1153         break;
1154
1155       ONE_MORE_BYTE (c1);
1156       if (UTF_8_1_OCTET_P(c1))
1157         {
1158           c = c1;
1159           if (c == '\r')
1160             {
1161               if (EQ (eol_type, Qdos))
1162                 {
1163                   if (src == src_end)
1164                     {
1165                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1166                       goto no_more_source;
1167                     }
1168                   if (*src == '\n')
1169                     ONE_MORE_BYTE (c);
1170                 }
1171               else if (EQ (eol_type, Qmac))
1172                 c = '\n';
1173             }
1174         }
1175       else
1176         {
1177           ONE_MORE_BYTE (c2);
1178           if (! UTF_8_EXTRA_OCTET_P (c2))
1179             goto invalid_code;
1180           if (UTF_8_2_OCTET_LEADING_P (c1))
1181             {
1182               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1183               /* Reject overlong sequences here and below.  Encoders
1184                  producing them are incorrect, they can be misleading,
1185                  and they mess up read/write invariance.  */
1186               if (c < 128)
1187                 goto invalid_code;
1188             }
1189           else
1190             {
1191               ONE_MORE_BYTE (c3);
1192               if (! UTF_8_EXTRA_OCTET_P (c3))
1193                 goto invalid_code;
1194               if (UTF_8_3_OCTET_LEADING_P (c1))
1195                 {
1196                   c = (((c1 & 0xF) << 12)
1197                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1198                   if (c < 0x800
1199                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1200                     goto invalid_code;
1201                 }
1202               else
1203                 {
1204                   ONE_MORE_BYTE (c4);
1205                   if (! UTF_8_EXTRA_OCTET_P (c4))
1206                     goto invalid_code;
1207                   if (UTF_8_4_OCTET_LEADING_P (c1))
1208                     {
1209                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1210                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1211                     if (c < 0x10000)
1212                       goto invalid_code;
1213                     }
1214                   else
1215                     {
1216                       ONE_MORE_BYTE (c5);
1217                       if (! UTF_8_EXTRA_OCTET_P (c5))
1218                         goto invalid_code;
1219                       if (UTF_8_5_OCTET_LEADING_P (c1))
1220                         {
1221                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1222                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1223                                | (c5 & 0x3F));
1224                           if ((c > MAX_CHAR) || (c < 0x200000))
1225                             goto invalid_code;
1226                         }
1227                       else
1228                         goto invalid_code;
1229                     }
1230                 }
1231             }
1232         }
1233
1234       *charbuf++ = c;
1235       continue;
1236
1237     invalid_code:
1238       src = src_base;
1239       consumed_chars = consumed_chars_base;
1240       ONE_MORE_BYTE (c);
1241       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1242       coding->errors++;
1243     }
1244
1245  no_more_source:
1246   coding->consumed_char += consumed_chars_base;
1247   coding->consumed = src_base - coding->source;
1248   coding->charbuf_used = charbuf - coding->charbuf;
1249 }
1250
1251
1252 static int
1253 encode_coding_utf_8 (coding)
1254      struct coding_system *coding;
1255 {
1256   int multibytep = coding->dst_multibyte;
1257   int *charbuf = coding->charbuf;
1258   int *charbuf_end = charbuf + coding->charbuf_used;
1259   unsigned char *dst = coding->destination + coding->produced;
1260   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1261   int produced_chars = 0;
1262   int c;
1263
1264   if (multibytep)
1265     {
1266       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1267
1268       while (charbuf < charbuf_end)
1269         {
1270           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1271
1272           ASSURE_DESTINATION (safe_room);
1273           c = *charbuf++;
1274           if (CHAR_BYTE8_P (c))
1275             {
1276               c = CHAR_TO_BYTE8 (c);
1277               EMIT_ONE_BYTE (c);
1278             }
1279           else
1280             {
1281               CHAR_STRING_ADVANCE (c, pend);
1282               for (p = str; p < pend; p++)
1283                 EMIT_ONE_BYTE (*p);
1284             }
1285         }
1286     }
1287   else
1288     {
1289       int safe_room = MAX_MULTIBYTE_LENGTH;
1290
1291       while (charbuf < charbuf_end)
1292         {
1293           ASSURE_DESTINATION (safe_room);
1294           c = *charbuf++;
1295           dst += CHAR_STRING (c, dst);
1296           produced_chars++;
1297         }
1298     }
1299   coding->result = CODING_RESULT_SUCCESS;
1300   coding->produced_char += produced_chars;
1301   coding->produced = dst - coding->destination;
1302   return 0;
1303 }
1304
1305
1306 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1307    Check if a text is encoded in one of UTF-16 based coding systems.
1308    If it is, return 1, else return 0.  */
1309
1310 #define UTF_16_HIGH_SURROGATE_P(val) \
1311   (((val) & 0xFC00) == 0xD800)
1312
1313 #define UTF_16_LOW_SURROGATE_P(val) \
1314   (((val) & 0xFC00) == 0xDC00)
1315
1316 #define UTF_16_INVALID_P(val)   \
1317   (((val) == 0xFFFE)            \
1318    || ((val) == 0xFFFF)         \
1319    || UTF_16_LOW_SURROGATE_P (val))
1320
1321
1322 static int
1323 detect_coding_utf_16 (coding, detect_info)
1324      struct coding_system *coding;
1325      struct coding_detection_info *detect_info;
1326 {
1327   const unsigned char *src = coding->source, *src_base = src;
1328   const unsigned char *src_end = coding->source + coding->src_bytes;
1329   int multibytep = coding->src_multibyte;
1330   int consumed_chars = 0;
1331   int c1, c2;
1332
1333   detect_info->checked |= CATEGORY_MASK_UTF_16;
1334
1335   if (coding->mode & CODING_MODE_LAST_BLOCK
1336       && (coding->src_bytes & 1))
1337     {
1338       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1339       return 0;
1340     }
1341   ONE_MORE_BYTE (c1);
1342   ONE_MORE_BYTE (c2);
1343
1344   if ((c1 == 0xFF) && (c2 == 0xFE))
1345     {
1346       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1347                              | CATEGORY_MASK_UTF_16_AUTO);
1348       detect_info->rejected |= CATEGORY_MASK_UTF_16_BE;
1349     }
1350   else if ((c1 == 0xFE) && (c2 == 0xFF))
1351     {
1352       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1353                              | CATEGORY_MASK_UTF_16_AUTO);
1354       detect_info->rejected |= CATEGORY_MASK_UTF_16_LE;
1355     }
1356  no_more_source:
1357   return 1;
1358 }
1359
1360 static void
1361 decode_coding_utf_16 (coding)
1362      struct coding_system *coding;
1363 {
1364   const unsigned char *src = coding->source + coding->consumed;
1365   const unsigned char *src_end = coding->source + coding->src_bytes;
1366   const unsigned char *src_base;
1367   int *charbuf = coding->charbuf;
1368   int *charbuf_end = charbuf + coding->charbuf_size;
1369   int consumed_chars = 0, consumed_chars_base;
1370   int multibytep = coding->src_multibyte;
1371   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1372   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1373   int surrogate = CODING_UTF_16_SURROGATE (coding);
1374   Lisp_Object attr, eol_type, charset_list;
1375
1376   CODING_GET_INFO (coding, attr, eol_type, charset_list);
1377
1378   if (bom == utf_16_with_bom)
1379     {
1380       int c, c1, c2;
1381
1382       src_base = src;
1383       ONE_MORE_BYTE (c1);
1384       ONE_MORE_BYTE (c2);
1385       c = (c1 << 8) | c2;
1386
1387       if (endian == utf_16_big_endian
1388           ? c != 0xFEFF : c != 0xFFFE)
1389         {
1390           /* The first two bytes are not BOM.  Treat them as bytes
1391              for a normal character.  */
1392           src = src_base;
1393           coding->errors++;
1394         }
1395       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1396     }
1397   else if (bom == utf_16_detect_bom)
1398     {
1399       /* We have already tried to detect BOM and failed in
1400          detect_coding.  */
1401       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1402     }
1403
1404   while (1)
1405     {
1406       int c, c1, c2;
1407
1408       src_base = src;
1409       consumed_chars_base = consumed_chars;
1410
1411       if (charbuf + 2 >= charbuf_end)
1412         break;
1413
1414       ONE_MORE_BYTE (c1);
1415       ONE_MORE_BYTE (c2);
1416       c = (endian == utf_16_big_endian
1417            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1418       if (surrogate)
1419         {
1420           if (! UTF_16_LOW_SURROGATE_P (c))
1421             {
1422               if (endian == utf_16_big_endian)
1423                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1424               else
1425                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1426               *charbuf++ = c1;
1427               *charbuf++ = c2;
1428               coding->errors++;
1429               if (UTF_16_HIGH_SURROGATE_P (c))
1430                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1431               else
1432                 *charbuf++ = c;
1433             }
1434           else
1435             {
1436               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1437               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1438               *charbuf++ = c;
1439             }
1440         }
1441       else
1442         {
1443           if (UTF_16_HIGH_SURROGATE_P (c))
1444             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1445           else
1446             *charbuf++ = c;
1447         }
1448     }
1449
1450  no_more_source:
1451   coding->consumed_char += consumed_chars_base;
1452   coding->consumed = src_base - coding->source;
1453   coding->charbuf_used = charbuf - coding->charbuf;
1454 }
1455
1456 static int
1457 encode_coding_utf_16 (coding)
1458      struct coding_system *coding;
1459 {
1460   int multibytep = coding->dst_multibyte;
1461   int *charbuf = coding->charbuf;
1462   int *charbuf_end = charbuf + coding->charbuf_used;
1463   unsigned char *dst = coding->destination + coding->produced;
1464   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1465   int safe_room = 8;
1466   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1467   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1468   int produced_chars = 0;
1469   Lisp_Object attrs, eol_type, charset_list;
1470   int c;
1471
1472   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1473
1474   if (bom != utf_16_without_bom)
1475     {
1476       ASSURE_DESTINATION (safe_room);
1477       if (big_endian)
1478         EMIT_TWO_BYTES (0xFE, 0xFF);
1479       else
1480         EMIT_TWO_BYTES (0xFF, 0xFE);
1481       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1482     }
1483
1484   while (charbuf < charbuf_end)
1485     {
1486       ASSURE_DESTINATION (safe_room);
1487       c = *charbuf++;
1488       if (c >= MAX_UNICODE_CHAR)
1489         c = coding->default_char;
1490
1491       if (c < 0x10000)
1492         {
1493           if (big_endian)
1494             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1495           else
1496             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1497         }
1498       else
1499         {
1500           int c1, c2;
1501
1502           c -= 0x10000;
1503           c1 = (c >> 10) + 0xD800;
1504           c2 = (c & 0x3FF) + 0xDC00;
1505           if (big_endian)
1506             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1507           else
1508             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1509         }
1510     }
1511   coding->result = CODING_RESULT_SUCCESS;
1512   coding->produced = dst - coding->destination;
1513   coding->produced_char += produced_chars;
1514   return 0;
1515 }
1516
1517 \f
1518 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1519
1520 /* Emacs' internal format for representation of multiple character
1521    sets is a kind of multi-byte encoding, i.e. characters are
1522    represented by variable-length sequences of one-byte codes.
1523
1524    ASCII characters and control characters (e.g. `tab', `newline') are
1525    represented by one-byte sequences which are their ASCII codes, in
1526    the range 0x00 through 0x7F.
1527
1528    8-bit characters of the range 0x80..0x9F are represented by
1529    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1530    code + 0x20).
1531
1532    8-bit characters of the range 0xA0..0xFF are represented by
1533    one-byte sequences which are their 8-bit code.
1534
1535    The other characters are represented by a sequence of `base
1536    leading-code', optional `extended leading-code', and one or two
1537    `position-code's.  The length of the sequence is determined by the
1538    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1539    whereas extended leading-code and position-code take the range 0xA0
1540    through 0xFF.  See `charset.h' for more details about leading-code
1541    and position-code.
1542
1543    --- CODE RANGE of Emacs' internal format ---
1544    character set        range
1545    -------------        -----
1546    ascii                0x00..0x7F
1547    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1548    eight-bit-graphic    0xA0..0xBF
1549    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1550    ---------------------------------------------
1551
1552    As this is the internal character representation, the format is
1553    usually not used externally (i.e. in a file or in a data sent to a
1554    process).  But, it is possible to have a text externally in this
1555    format (i.e. by encoding by the coding system `emacs-mule').
1556
1557    In that case, a sequence of one-byte codes has a slightly different
1558    form.
1559
1560    At first, all characters in eight-bit-control are represented by
1561    one-byte sequences which are their 8-bit code.
1562
1563    Next, character composition data are represented by the byte
1564    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1565    where,
1566         METHOD is 0xF0 plus one of composition method (enum
1567         composition_method),
1568
1569         BYTES is 0xA0 plus a byte length of this composition data,
1570
1571         CHARS is 0x20 plus a number of characters composed by this
1572         data,
1573
1574         COMPONENTs are characters of multibye form or composition
1575         rules encoded by two-byte of ASCII codes.
1576
1577    In addition, for backward compatibility, the following formats are
1578    also recognized as composition data on decoding.
1579
1580    0x80 MSEQ ...
1581    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1582
1583    Here,
1584         MSEQ is a multibyte form but in these special format:
1585           ASCII: 0xA0 ASCII_CODE+0x80,
1586           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1587         RULE is a one byte code of the range 0xA0..0xF0 that
1588         represents a composition rule.
1589   */
1590
1591 char emacs_mule_bytes[256];
1592
1593 int
1594 emacs_mule_char (coding, src, nbytes, nchars, id)
1595      struct coding_system *coding;
1596      unsigned char *src;
1597      int *nbytes, *nchars, *id;
1598 {
1599   const unsigned char *src_end = coding->source + coding->src_bytes;
1600   const unsigned char *src_base = src;
1601   int multibytep = coding->src_multibyte;
1602   struct charset *charset;
1603   unsigned code;
1604   int c;
1605   int consumed_chars = 0;
1606
1607   ONE_MORE_BYTE (c);
1608   switch (emacs_mule_bytes[c])
1609     {
1610     case 2:
1611       if (! (charset = emacs_mule_charset[c]))
1612         goto invalid_code;
1613       ONE_MORE_BYTE (c);
1614       code = c & 0x7F;
1615       break;
1616
1617     case 3:
1618       if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1619           || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1620         {
1621           ONE_MORE_BYTE (c);
1622           if (! (charset = emacs_mule_charset[c]))
1623             goto invalid_code;
1624           ONE_MORE_BYTE (c);
1625           code = c & 0x7F;
1626         }
1627       else
1628         {
1629           if (! (charset = emacs_mule_charset[c]))
1630             goto invalid_code;
1631           ONE_MORE_BYTE (c);
1632           code = (c & 0x7F) << 8;
1633           ONE_MORE_BYTE (c);
1634           code |= c & 0x7F;
1635         }
1636       break;
1637
1638     case 4:
1639       ONE_MORE_BYTE (c);
1640       if (! (charset = emacs_mule_charset[c]))
1641         goto invalid_code;
1642       ONE_MORE_BYTE (c);
1643       code = (c & 0x7F) << 8;
1644       ONE_MORE_BYTE (c);
1645       code |= c & 0x7F;
1646       break;
1647
1648     case 1:
1649       code = c;
1650       charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1651                                  ? charset_ascii : charset_eight_bit);
1652       break;
1653
1654     default:
1655       abort ();
1656     }
1657   c = DECODE_CHAR (charset, code);
1658   if (c < 0)
1659     goto invalid_code;
1660   *nbytes = src - src_base;
1661   *nchars = consumed_chars;
1662   if (id)
1663     *id = charset->id;
1664   return c;
1665
1666  no_more_source:
1667   return -2;
1668
1669  invalid_code:
1670   return -1;
1671 }
1672
1673
1674 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1675    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1676    else return 0.  */
1677
1678 static int
1679 detect_coding_emacs_mule (coding, detect_info)
1680      struct coding_system *coding;
1681      struct coding_detection_info *detect_info;
1682 {
1683   const unsigned char *src = coding->source, *src_base = src;
1684   const unsigned char *src_end = coding->source + coding->src_bytes;
1685   int multibytep = coding->src_multibyte;
1686   int consumed_chars = 0;
1687   int c;
1688   int found = 0;
1689   int incomplete;
1690
1691   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1692   /* A coding system of this category is always ASCII compatible.  */
1693   src += coding->head_ascii;
1694
1695   while (1)
1696     {
1697       incomplete = 0;
1698       ONE_MORE_BYTE (c);
1699       incomplete = 1;
1700
1701       if (c == 0x80)
1702         {
1703           /* Perhaps the start of composite character.  We simple skip
1704              it because analyzing it is too heavy for detecting.  But,
1705              at least, we check that the composite character
1706              constitues of more than 4 bytes.  */
1707           const unsigned char *src_base;
1708
1709         repeat:
1710           src_base = src;
1711           do
1712             {
1713               ONE_MORE_BYTE (c);
1714             }
1715           while (c >= 0xA0);
1716
1717           if (src - src_base <= 4)
1718             break;
1719           found = CATEGORY_MASK_EMACS_MULE;
1720           if (c == 0x80)
1721             goto repeat;
1722         }
1723
1724       if (c < 0x80)
1725         {
1726           if (c < 0x20
1727               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1728             break;
1729         }
1730       else
1731         {
1732           const unsigned char *src_base = src - 1;
1733
1734           do
1735             {
1736               ONE_MORE_BYTE (c);
1737             }
1738           while (c >= 0xA0);
1739           if (src - src_base != emacs_mule_bytes[*src_base])
1740             break;
1741           found = CATEGORY_MASK_EMACS_MULE;
1742         }
1743     }
1744   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1745   return 0;
1746
1747  no_more_source:
1748   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
1749     {
1750       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1751       return 0;
1752     }
1753   detect_info->found |= found;
1754   return 1;
1755 }
1756
1757
1758 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1759
1760 /* Decode a character represented as a component of composition
1761    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1762    update SRC to the head of next character (or an encoded composition
1763    rule).  If SRC doesn't points a composition component, set C to -1.
1764    If SRC points an invalid byte sequence, global exit by a return
1765    value 0.  */
1766
1767 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1768   if (1)                                                        \
1769     {                                                           \
1770       int c;                                                    \
1771       int nbytes, nchars;                                       \
1772                                                                 \
1773       if (src == src_end)                                       \
1774         break;                                                  \
1775       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1776       if (c < 0)                                                \
1777         {                                                       \
1778           if (c == -2)                                          \
1779             break;                                              \
1780           goto invalid_code;                                    \
1781         }                                                       \
1782       *buf++ = c;                                               \
1783       src += nbytes;                                            \
1784       consumed_chars += nchars;                                 \
1785     }                                                           \
1786   else
1787
1788
1789 /* Decode a composition rule represented as a component of composition
1790    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1791    and increment BUF.  If SRC points an invalid byte sequence, set C
1792    to -1.  */
1793
1794 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1795   do {                                                  \
1796     int c, gref, nref;                                  \
1797                                                         \
1798     if (src >= src_end)                                 \
1799       goto invalid_code;                                \
1800     ONE_MORE_BYTE_NO_CHECK (c);                         \
1801     c -= 0x20;                                          \
1802     if (c < 0 || c >= 81)                               \
1803       goto invalid_code;                                \
1804                                                         \
1805     gref = c / 9, nref = c % 9;                         \
1806     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1807   } while (0)
1808
1809
1810 /* Decode a composition rule represented as a component of composition
1811    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1812    and increment BUF.  If SRC points an invalid byte sequence, set C
1813    to -1.  */
1814
1815 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1816   do {                                                  \
1817     int gref, nref;                                     \
1818                                                         \
1819     if (src + 1>= src_end)                              \
1820       goto invalid_code;                                \
1821     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1822     gref -= 0x20;                                       \
1823     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1824     nref -= 0x20;                                       \
1825     if (gref < 0 || gref >= 81                          \
1826         || nref < 0 || nref >= 81)                      \
1827       goto invalid_code;                                \
1828     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1829   } while (0)
1830
1831
1832 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1833   do {                                                                  \
1834     /* Emacs 21 style format.  The first three bytes at SRC are         \
1835        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1836        the byte length of this composition information, CHARS is the    \
1837        number of characters composed by this composition.  */           \
1838     enum composition_method method = c - 0xF2;                          \
1839     int *charbuf_base = charbuf;                                        \
1840     int from, to;                                                       \
1841     int consumed_chars_limit;                                           \
1842     int nbytes, nchars;                                                 \
1843                                                                         \
1844     ONE_MORE_BYTE (c);                                                  \
1845     nbytes = c - 0xA0;                                                  \
1846     if (nbytes < 3)                                                     \
1847       goto invalid_code;                                                \
1848     ONE_MORE_BYTE (c);                                                  \
1849     nchars = c - 0xA0;                                                  \
1850     from = coding->produced + char_offset;                              \
1851     to = from + nchars;                                                 \
1852     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
1853     consumed_chars_limit = consumed_chars_base + nbytes;                \
1854     if (method != COMPOSITION_RELATIVE)                                 \
1855       {                                                                 \
1856         int i = 0;                                                      \
1857         while (consumed_chars < consumed_chars_limit)                   \
1858           {                                                             \
1859             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1860               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1861             else                                                        \
1862               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1863             i++;                                                        \
1864           }                                                             \
1865         if (consumed_chars < consumed_chars_limit)                      \
1866           goto invalid_code;                                            \
1867         charbuf_base[0] -= i;                                           \
1868       }                                                                 \
1869   } while (0)
1870
1871
1872 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)            \
1873   do {                                                          \
1874     /* Emacs 20 style format for relative composition.  */      \
1875     /* Store multibyte form of characters to be composed.  */   \
1876     enum composition_method method = COMPOSITION_RELATIVE;      \
1877     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1878     int *buf = components;                                      \
1879     int i, j;                                                   \
1880     int from, to;                                               \
1881                                                                 \
1882     src = src_base;                                             \
1883     ONE_MORE_BYTE (c);          /* skip 0x80 */                 \
1884     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1885       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
1886     if (i < 2)                                                  \
1887       goto invalid_code;                                        \
1888     from = coding->produced_char + char_offset;                 \
1889     to = from + i;                                              \
1890     ADD_COMPOSITION_DATA (charbuf, from, to, method);           \
1891     for (j = 0; j < i; j++)                                     \
1892       *charbuf++ = components[j];                               \
1893   } while (0)
1894
1895
1896 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
1897   do {                                                          \
1898     /* Emacs 20 style format for rule-base composition.  */     \
1899     /* Store multibyte form of characters to be composed.  */   \
1900     enum composition_method method = COMPOSITION_WITH_RULE;     \
1901     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1902     int *buf = components;                                      \
1903     int i, j;                                                   \
1904     int from, to;                                               \
1905                                                                 \
1906     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
1907     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1908       {                                                         \
1909         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
1910         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
1911       }                                                         \
1912     if (i < 1 || (buf - components) % 2 == 0)                   \
1913       goto invalid_code;                                        \
1914     if (charbuf + i + (i / 2) + 1 < charbuf_end)                \
1915       goto no_more_source;                                      \
1916     from = coding->produced_char + char_offset;                 \
1917     to = from + i;                                              \
1918     ADD_COMPOSITION_DATA (buf, from, to, method);               \
1919     for (j = 0; j < i; j++)                                     \
1920       *charbuf++ = components[j];                               \
1921     for (j = 0; j < i; j += 2)                                  \
1922       *charbuf++ = components[j];                               \
1923   } while (0)
1924
1925
1926 static void
1927 decode_coding_emacs_mule (coding)
1928      struct coding_system *coding;
1929 {
1930   const unsigned char *src = coding->source + coding->consumed;
1931   const unsigned char *src_end = coding->source + coding->src_bytes;
1932   const unsigned char *src_base;
1933   int *charbuf = coding->charbuf;
1934   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
1935   int consumed_chars = 0, consumed_chars_base;
1936   int multibytep = coding->src_multibyte;
1937   Lisp_Object attrs, eol_type, charset_list;
1938   int char_offset = coding->produced_char;
1939   int last_offset = char_offset;
1940   int last_id = charset_ascii;
1941
1942   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1943
1944   while (1)
1945     {
1946       int c;
1947
1948       src_base = src;
1949       consumed_chars_base = consumed_chars;
1950
1951       if (charbuf >= charbuf_end)
1952         break;
1953
1954       ONE_MORE_BYTE (c);
1955
1956       if (c < 0x80)
1957         {
1958           if (c == '\r')
1959             {
1960               if (EQ (eol_type, Qdos))
1961                 {
1962                   if (src == src_end)
1963                     {
1964                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
1965                       goto no_more_source;
1966                     }
1967                   if (*src == '\n')
1968                     ONE_MORE_BYTE (c);
1969                 }
1970               else if (EQ (eol_type, Qmac))
1971                 c = '\n';
1972             }
1973           *charbuf++ = c;
1974           char_offset++;
1975         }
1976       else if (c == 0x80)
1977         {
1978           ONE_MORE_BYTE (c);
1979           if (c - 0xF2 >= COMPOSITION_RELATIVE
1980               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
1981             DECODE_EMACS_MULE_21_COMPOSITION (c);
1982           else if (c < 0xC0)
1983             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1984           else if (c == 0xFF)
1985             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1986           else
1987             goto invalid_code;
1988         }
1989       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1990         {
1991           int nbytes, nchars;
1992           int id;
1993
1994           src = src_base;
1995           consumed_chars = consumed_chars_base;
1996           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
1997           if (c < 0)
1998             {
1999               if (c == -2)
2000                 break;
2001               goto invalid_code;
2002             }
2003           if (last_id != id)
2004             {
2005               if (last_id != charset_ascii)
2006                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2007               last_id = id;
2008               last_offset = char_offset;
2009             }
2010           *charbuf++ = c;
2011           src += nbytes;
2012           consumed_chars += nchars;
2013           char_offset++;
2014         }
2015       continue;
2016
2017     invalid_code:
2018       src = src_base;
2019       consumed_chars = consumed_chars_base;
2020       ONE_MORE_BYTE (c);
2021       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2022       char_offset++;
2023       coding->errors++;
2024     }
2025
2026  no_more_source:
2027   if (last_id != charset_ascii)
2028     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
2029   coding->consumed_char += consumed_chars_base;
2030   coding->consumed = src_base - coding->source;
2031   coding->charbuf_used = charbuf - coding->charbuf;
2032 }
2033
2034
2035 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2036   do {                                          \
2037     if (id < 0xA0)                              \
2038       codes[0] = id, codes[1] = 0;              \
2039     else if (id < 0xE0)                         \
2040       codes[0] = 0x9A, codes[1] = id;           \
2041     else if (id < 0xF0)                         \
2042       codes[0] = 0x9B, codes[1] = id;           \
2043     else if (id < 0xF5)                         \
2044       codes[0] = 0x9C, codes[1] = id;           \
2045     else                                        \
2046       codes[0] = 0x9D, codes[1] = id;           \
2047   } while (0);
2048
2049
2050 static int
2051 encode_coding_emacs_mule (coding)
2052      struct coding_system *coding;
2053 {
2054   int multibytep = coding->dst_multibyte;
2055   int *charbuf = coding->charbuf;
2056   int *charbuf_end = charbuf + coding->charbuf_used;
2057   unsigned char *dst = coding->destination + coding->produced;
2058   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2059   int safe_room = 8;
2060   int produced_chars = 0;
2061   Lisp_Object attrs, eol_type, charset_list;
2062   int c;
2063   int preferred_charset_id = -1;
2064
2065   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2066
2067   while (charbuf < charbuf_end)
2068     {
2069       ASSURE_DESTINATION (safe_room);
2070       c = *charbuf++;
2071
2072       if (c < 0)
2073         {
2074           /* Handle an annotation.  */
2075           switch (*charbuf)
2076             {
2077             case CODING_ANNOTATE_COMPOSITION_MASK:
2078               /* Not yet implemented.  */
2079               break;
2080             case CODING_ANNOTATE_CHARSET_MASK:
2081               preferred_charset_id = charbuf[3];
2082               if (preferred_charset_id >= 0
2083                   && NILP (Fmemq (make_number (preferred_charset_id),
2084                                   charset_list)))
2085                 preferred_charset_id = -1;
2086               break;
2087             default:
2088               abort ();
2089             }
2090           charbuf += -c - 1;
2091           continue;
2092         }
2093
2094       if (ASCII_CHAR_P (c))
2095         EMIT_ONE_ASCII_BYTE (c);
2096       else if (CHAR_BYTE8_P (c))
2097         {
2098           c = CHAR_TO_BYTE8 (c);
2099           EMIT_ONE_BYTE (c);
2100         }
2101       else
2102         {
2103           struct charset *charset;
2104           unsigned code;
2105           int dimension;
2106           int emacs_mule_id;
2107           unsigned char leading_codes[2];
2108
2109           if (preferred_charset_id >= 0)
2110             {
2111               charset = CHARSET_FROM_ID (preferred_charset_id);
2112               if (! CHAR_CHARSET_P (c, charset))
2113                 charset = char_charset (c, charset_list, NULL);
2114             }
2115           else
2116             charset = char_charset (c, charset_list, &code);
2117           if (! charset)
2118             {
2119               c = coding->default_char;
2120               if (ASCII_CHAR_P (c))
2121                 {
2122                   EMIT_ONE_ASCII_BYTE (c);
2123                   continue;
2124                 }
2125               charset = char_charset (c, charset_list, &code);
2126             }
2127           dimension = CHARSET_DIMENSION (charset);
2128           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2129           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2130           EMIT_ONE_BYTE (leading_codes[0]);
2131           if (leading_codes[1])
2132             EMIT_ONE_BYTE (leading_codes[1]);
2133           if (dimension == 1)
2134             EMIT_ONE_BYTE (code);
2135           else
2136             {
2137               EMIT_ONE_BYTE (code >> 8);
2138               EMIT_ONE_BYTE (code & 0xFF);
2139             }
2140         }
2141     }
2142   coding->result = CODING_RESULT_SUCCESS;
2143   coding->produced_char += produced_chars;
2144   coding->produced = dst - coding->destination;
2145   return 0;
2146 }
2147
2148 \f
2149 /*** 7. ISO2022 handlers ***/
2150
2151 /* The following note describes the coding system ISO2022 briefly.
2152    Since the intention of this note is to help understand the
2153    functions in this file, some parts are NOT ACCURATE or are OVERLY
2154    SIMPLIFIED.  For thorough understanding, please refer to the
2155    original document of ISO2022.  This is equivalent to the standard
2156    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2157
2158    ISO2022 provides many mechanisms to encode several character sets
2159    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2160    is encoded using bytes less than 128.  This may make the encoded
2161    text a little bit longer, but the text passes more easily through
2162    several types of gateway, some of which strip off the MSB (Most
2163    Significant Bit).
2164
2165    There are two kinds of character sets: control character sets and
2166    graphic character sets.  The former contain control characters such
2167    as `newline' and `escape' to provide control functions (control
2168    functions are also provided by escape sequences).  The latter
2169    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2170    two control character sets and many graphic character sets.
2171
2172    Graphic character sets are classified into one of the following
2173    four classes, according to the number of bytes (DIMENSION) and
2174    number of characters in one dimension (CHARS) of the set:
2175    - DIMENSION1_CHARS94
2176    - DIMENSION1_CHARS96
2177    - DIMENSION2_CHARS94
2178    - DIMENSION2_CHARS96
2179
2180    In addition, each character set is assigned an identification tag,
2181    unique for each set, called the "final character" (denoted as <F>
2182    hereafter).  The <F> of each character set is decided by ECMA(*)
2183    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2184    (0x30..0x3F are for private use only).
2185
2186    Note (*): ECMA = European Computer Manufacturers Association
2187
2188    Here are examples of graphic character sets [NAME(<F>)]:
2189         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2190         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2191         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2192         o DIMENSION2_CHARS96 -- none for the moment
2193
2194    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2195         C0 [0x00..0x1F] -- control character plane 0
2196         GL [0x20..0x7F] -- graphic character plane 0
2197         C1 [0x80..0x9F] -- control character plane 1
2198         GR [0xA0..0xFF] -- graphic character plane 1
2199
2200    A control character set is directly designated and invoked to C0 or
2201    C1 by an escape sequence.  The most common case is that:
2202    - ISO646's  control character set is designated/invoked to C0, and
2203    - ISO6429's control character set is designated/invoked to C1,
2204    and usually these designations/invocations are omitted in encoded
2205    text.  In a 7-bit environment, only C0 can be used, and a control
2206    character for C1 is encoded by an appropriate escape sequence to
2207    fit into the environment.  All control characters for C1 are
2208    defined to have corresponding escape sequences.
2209
2210    A graphic character set is at first designated to one of four
2211    graphic registers (G0 through G3), then these graphic registers are
2212    invoked to GL or GR.  These designations and invocations can be
2213    done independently.  The most common case is that G0 is invoked to
2214    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2215    these invocations and designations are omitted in encoded text.
2216    In a 7-bit environment, only GL can be used.
2217
2218    When a graphic character set of CHARS94 is invoked to GL, codes
2219    0x20 and 0x7F of the GL area work as control characters SPACE and
2220    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2221    be used.
2222
2223    There are two ways of invocation: locking-shift and single-shift.
2224    With locking-shift, the invocation lasts until the next different
2225    invocation, whereas with single-shift, the invocation affects the
2226    following character only and doesn't affect the locking-shift
2227    state.  Invocations are done by the following control characters or
2228    escape sequences:
2229
2230    ----------------------------------------------------------------------
2231    abbrev  function                  cntrl escape seq   description
2232    ----------------------------------------------------------------------
2233    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2234    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2235    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2236    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2237    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2238    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2239    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2240    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2241    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2242    ----------------------------------------------------------------------
2243    (*) These are not used by any known coding system.
2244
2245    Control characters for these functions are defined by macros
2246    ISO_CODE_XXX in `coding.h'.
2247
2248    Designations are done by the following escape sequences:
2249    ----------------------------------------------------------------------
2250    escape sequence      description
2251    ----------------------------------------------------------------------
2252    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2253    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2254    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2255    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2256    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2257    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2258    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2259    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2260    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2261    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2262    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2263    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2264    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2265    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2266    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2267    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2268    ----------------------------------------------------------------------
2269
2270    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2271    of dimension 1, chars 94, and final character <F>, etc...
2272
2273    Note (*): Although these designations are not allowed in ISO2022,
2274    Emacs accepts them on decoding, and produces them on encoding
2275    CHARS96 character sets in a coding system which is characterized as
2276    7-bit environment, non-locking-shift, and non-single-shift.
2277
2278    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2279    '(' must be omitted.  We refer to this as "short-form" hereafter.
2280
2281    Now you may notice that there are a lot of ways of encoding the
2282    same multilingual text in ISO2022.  Actually, there exist many
2283    coding systems such as Compound Text (used in X11's inter client
2284    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2285    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2286    localized platforms), and all of these are variants of ISO2022.
2287
2288    In addition to the above, Emacs handles two more kinds of escape
2289    sequences: ISO6429's direction specification and Emacs' private
2290    sequence for specifying character composition.
2291
2292    ISO6429's direction specification takes the following form:
2293         o CSI ']'      -- end of the current direction
2294         o CSI '0' ']'  -- end of the current direction
2295         o CSI '1' ']'  -- start of left-to-right text
2296         o CSI '2' ']'  -- start of right-to-left text
2297    The control character CSI (0x9B: control sequence introducer) is
2298    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2299
2300    Character composition specification takes the following form:
2301         o ESC '0' -- start relative composition
2302         o ESC '1' -- end composition
2303         o ESC '2' -- start rule-base composition (*)
2304         o ESC '3' -- start relative composition with alternate chars  (**)
2305         o ESC '4' -- start rule-base composition with alternate chars  (**)
2306   Since these are not standard escape sequences of any ISO standard,
2307   the use of them with these meanings is restricted to Emacs only.
2308
2309   (*) This form is used only in Emacs 20.7 and older versions,
2310   but newer versions can safely decode it.
2311   (**) This form is used only in Emacs 21.1 and newer versions,
2312   and older versions can't decode it.
2313
2314   Here's a list of example usages of these composition escape
2315   sequences (categorized by `enum composition_method').
2316
2317   COMPOSITION_RELATIVE:
2318         ESC 0 CHAR [ CHAR ] ESC 1
2319   COMPOSITION_WITH_RULE:
2320         ESC 2 CHAR [ RULE CHAR ] ESC 1
2321   COMPOSITION_WITH_ALTCHARS:
2322         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2323   COMPOSITION_WITH_RULE_ALTCHARS:
2324         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2325
2326 enum iso_code_class_type iso_code_class[256];
2327
2328 #define SAFE_CHARSET_P(coding, id)      \
2329   ((id) <= (coding)->max_charset_id     \
2330    && (coding)->safe_charsets[id] >= 0)
2331
2332
2333 #define SHIFT_OUT_OK(category)  \
2334   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2335
2336 static void
2337 setup_iso_safe_charsets (attrs)
2338      Lisp_Object attrs;
2339 {
2340   Lisp_Object charset_list, safe_charsets;
2341   Lisp_Object request;
2342   Lisp_Object reg_usage;
2343   Lisp_Object tail;
2344   int reg94, reg96;
2345   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2346   int max_charset_id;
2347
2348   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2349   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2350       && ! EQ (charset_list, Viso_2022_charset_list))
2351     {
2352       CODING_ATTR_CHARSET_LIST (attrs)
2353         = charset_list = Viso_2022_charset_list;
2354       ASET (attrs, coding_attr_safe_charsets, Qnil);
2355     }
2356
2357   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2358     return;
2359
2360   max_charset_id = 0;
2361   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2362     {
2363       int id = XINT (XCAR (tail));
2364       if (max_charset_id < id)
2365         max_charset_id = id;
2366     }
2367
2368   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2369                                 make_number (255));
2370   request = AREF (attrs, coding_attr_iso_request);
2371   reg_usage = AREF (attrs, coding_attr_iso_usage);
2372   reg94 = XINT (XCAR (reg_usage));
2373   reg96 = XINT (XCDR (reg_usage));
2374
2375   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2376     {
2377       Lisp_Object id;
2378       Lisp_Object reg;
2379       struct charset *charset;
2380
2381       id = XCAR (tail);
2382       charset = CHARSET_FROM_ID (XINT (id));
2383       reg = Fcdr (Fassq (id, request));
2384       if (! NILP (reg))
2385         SSET (safe_charsets, XINT (id), XINT (reg));
2386       else if (charset->iso_chars_96)
2387         {
2388           if (reg96 < 4)
2389             SSET (safe_charsets, XINT (id), reg96);
2390         }
2391       else
2392         {
2393           if (reg94 < 4)
2394             SSET (safe_charsets, XINT (id), reg94);
2395         }
2396     }
2397   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2398 }
2399
2400
2401 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2402    Check if a text is encoded in one of ISO-2022 based codig systems.
2403    If it is, return 1, else return 0.  */
2404
2405 static int
2406 detect_coding_iso_2022 (coding, detect_info)
2407      struct coding_system *coding;
2408      struct coding_detection_info *detect_info;
2409 {
2410   const unsigned char *src = coding->source, *src_base = src;
2411   const unsigned char *src_end = coding->source + coding->src_bytes;
2412   int multibytep = coding->src_multibyte;
2413   int single_shifting = 0;
2414   int id;
2415   int c, c1;
2416   int consumed_chars = 0;
2417   int i;
2418   int rejected = 0;
2419   int found = 0;
2420
2421   detect_info->checked |= CATEGORY_MASK_ISO;
2422
2423   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2424     {
2425       struct coding_system *this = &(coding_categories[i]);
2426       Lisp_Object attrs, val;
2427
2428       attrs = CODING_ID_ATTRS (this->id);
2429       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2430           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2431         setup_iso_safe_charsets (attrs);
2432       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2433       this->max_charset_id = SCHARS (val) - 1;
2434       this->safe_charsets = (char *) SDATA (val);
2435     }
2436
2437   /* A coding system of this category is always ASCII compatible.  */
2438   src += coding->head_ascii;
2439
2440   while (rejected != CATEGORY_MASK_ISO)
2441     {
2442       ONE_MORE_BYTE (c);
2443       switch (c)
2444         {
2445         case ISO_CODE_ESC:
2446           if (inhibit_iso_escape_detection)
2447             break;
2448           single_shifting = 0;
2449           ONE_MORE_BYTE (c);
2450           if (c >= '(' && c <= '/')
2451             {
2452               /* Designation sequence for a charset of dimension 1.  */
2453               ONE_MORE_BYTE (c1);
2454               if (c1 < ' ' || c1 >= 0x80
2455                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2456                 /* Invalid designation sequence.  Just ignore.  */
2457                 break;
2458             }
2459           else if (c == '$')
2460             {
2461               /* Designation sequence for a charset of dimension 2.  */
2462               ONE_MORE_BYTE (c);
2463               if (c >= '@' && c <= 'B')
2464                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2465                 id = iso_charset_table[1][0][c];
2466               else if (c >= '(' && c <= '/')
2467                 {
2468                   ONE_MORE_BYTE (c1);
2469                   if (c1 < ' ' || c1 >= 0x80
2470                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2471                     /* Invalid designation sequence.  Just ignore.  */
2472                     break;
2473                 }
2474               else
2475                 /* Invalid designation sequence.  Just ignore it.  */
2476                 break;
2477             }
2478           else if (c == 'N' || c == 'O')
2479             {
2480               /* ESC <Fe> for SS2 or SS3.  */
2481               single_shifting = 1;
2482               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2483               break;
2484             }
2485           else if (c >= '0' && c <= '4')
2486             {
2487               /* ESC <Fp> for start/end composition.  */
2488               found |= CATEGORY_MASK_ISO;
2489               break;
2490             }
2491           else
2492             {
2493               /* Invalid escape sequence.  Just ignore it.  */
2494               break;
2495             }
2496
2497           /* We found a valid designation sequence for CHARSET.  */
2498           rejected |= CATEGORY_MASK_ISO_8BIT;
2499           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2500                               id))
2501             found |= CATEGORY_MASK_ISO_7;
2502           else
2503             rejected |= CATEGORY_MASK_ISO_7;
2504           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2505                               id))
2506             found |= CATEGORY_MASK_ISO_7_TIGHT;
2507           else
2508             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2509           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2510                               id))
2511             found |= CATEGORY_MASK_ISO_7_ELSE;
2512           else
2513             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2514           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2515                               id))
2516             found |= CATEGORY_MASK_ISO_8_ELSE;
2517           else
2518             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2519           break;
2520
2521         case ISO_CODE_SO:
2522         case ISO_CODE_SI:
2523           /* Locking shift out/in.  */
2524           if (inhibit_iso_escape_detection)
2525             break;
2526           single_shifting = 0;
2527           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2528           found |= CATEGORY_MASK_ISO_ELSE;
2529           break;
2530
2531         case ISO_CODE_CSI:
2532           /* Control sequence introducer.  */
2533           single_shifting = 0;
2534           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2535           found |= CATEGORY_MASK_ISO_8_ELSE;
2536           goto check_extra_latin;
2537
2538
2539         case ISO_CODE_SS2:
2540         case ISO_CODE_SS3:
2541           /* Single shift.   */
2542           if (inhibit_iso_escape_detection)
2543             break;
2544           single_shifting = 1;
2545           rejected |= CATEGORY_MASK_ISO_7BIT;
2546           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2547               & CODING_ISO_FLAG_SINGLE_SHIFT)
2548             found |= CATEGORY_MASK_ISO_8_1;
2549           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2550               & CODING_ISO_FLAG_SINGLE_SHIFT)
2551             found |= CATEGORY_MASK_ISO_8_2;
2552           goto check_extra_latin;
2553
2554         default:
2555           if (c < 0x80)
2556             {
2557               single_shifting = 0;
2558               break;
2559             }
2560           if (c >= 0xA0)
2561             {
2562               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2563               found |= CATEGORY_MASK_ISO_8_1;
2564               /* Check the length of succeeding codes of the range
2565                  0xA0..0FF.  If the byte length is even, we include
2566                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2567                  only when we are not single shifting.  */
2568               if (! single_shifting
2569                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2570                 {
2571                   int i = 1;
2572                   while (src < src_end)
2573                     {
2574                       ONE_MORE_BYTE (c);
2575                       if (c < 0xA0)
2576                         break;
2577                       i++;
2578                     }
2579
2580                   if (i & 1 && src < src_end)
2581                     rejected |= CATEGORY_MASK_ISO_8_2;
2582                   else
2583                     found |= CATEGORY_MASK_ISO_8_2;
2584                 }
2585               break;
2586             }
2587         check_extra_latin:
2588           single_shifting = 0;
2589           if (! VECTORP (Vlatin_extra_code_table)
2590               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2591             {
2592               rejected = CATEGORY_MASK_ISO;
2593               break;
2594             }
2595           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2596               & CODING_ISO_FLAG_LATIN_EXTRA)
2597             found |= CATEGORY_MASK_ISO_8_1;
2598           else
2599             rejected |= CATEGORY_MASK_ISO_8_1;
2600           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2601               & CODING_ISO_FLAG_LATIN_EXTRA)
2602             found |= CATEGORY_MASK_ISO_8_2;
2603           else
2604             rejected |= CATEGORY_MASK_ISO_8_2;
2605         }
2606     }
2607   detect_info->rejected |= CATEGORY_MASK_ISO;
2608   return 0;
2609
2610  no_more_source:
2611   detect_info->rejected |= rejected;
2612   detect_info->found |= (found & ~rejected);
2613   return 1;
2614 }
2615
2616
2617 /* Set designation state into CODING.  */
2618 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2619   do {                                                                  \
2620     int id, prev;                                                       \
2621                                                                         \
2622     if (final < '0' || final >= 128                                     \
2623         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2624         || !SAFE_CHARSET_P (coding, id))                                \
2625       {                                                                 \
2626         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2627         goto invalid_code;                                              \
2628       }                                                                 \
2629     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2630     if (id == charset_jisx0201_roman)                                   \
2631       {                                                                 \
2632         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2633           id = charset_ascii;                                           \
2634       }                                                                 \
2635     else if (id == charset_jisx0208_1978)                               \
2636       {                                                                 \
2637         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2638           id = charset_jisx0208;                                        \
2639       }                                                                 \
2640     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2641     /* If there was an invalid designation to REG previously, and this  \
2642        designation is ASCII to REG, we should keep this designation     \
2643        sequence.  */                                                    \
2644     if (prev == -2 && id == charset_ascii)                              \
2645       goto invalid_code;                                                \
2646   } while (0)
2647
2648
2649 #define MAYBE_FINISH_COMPOSITION()                              \
2650   do {                                                          \
2651     int i;                                                      \
2652     if (composition_state == COMPOSING_NO)                      \
2653       break;                                                    \
2654     /* It is assured that we have enough room for producing     \
2655        characters stored in the table `components'.  */         \
2656     if (charbuf + component_idx > charbuf_end)                  \
2657       goto no_more_source;                                      \
2658     composition_state = COMPOSING_NO;                           \
2659     if (method == COMPOSITION_RELATIVE                          \
2660         || method == COMPOSITION_WITH_ALTCHARS)                 \
2661       {                                                         \
2662         for (i = 0; i < component_idx; i++)                     \
2663           *charbuf++ = components[i];                           \
2664         char_offset += component_idx;                           \
2665       }                                                         \
2666     else                                                        \
2667       {                                                         \
2668         for (i = 0; i < component_idx; i += 2)                  \
2669           *charbuf++ = components[i];                           \
2670         char_offset += (component_idx / 2) + 1;                 \
2671       }                                                         \
2672   } while (0)
2673
2674
2675 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2676    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2677    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2678    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2679    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2680   */
2681
2682 #define DECODE_COMPOSITION_START(c1)                                    \
2683   do {                                                                  \
2684     if (c1 == '0'                                                       \
2685         && composition_state == COMPOSING_COMPONENT_RULE)               \
2686       {                                                                 \
2687         component_len = component_idx;                                  \
2688         composition_state = COMPOSING_CHAR;                             \
2689       }                                                                 \
2690     else                                                                \
2691       {                                                                 \
2692         const unsigned char *p;                                         \
2693                                                                         \
2694         MAYBE_FINISH_COMPOSITION ();                                    \
2695         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2696           goto no_more_source;                                          \
2697         for (p = src; p < src_end - 1; p++)                             \
2698           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2699             break;                                                      \
2700         if (p == src_end - 1)                                           \
2701           {                                                             \
2702             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
2703               goto invalid_code;                                        \
2704             goto no_more_source;                                        \
2705           }                                                             \
2706                                                                         \
2707         /* This is surely the start of a composition.  */               \
2708         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2709                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2710                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2711                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2712         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2713                              : COMPOSING_COMPONENT_CHAR);               \
2714         component_idx = component_len = 0;                              \
2715       }                                                                 \
2716   } while (0)
2717
2718
2719 /* Handle compositoin end sequence ESC 1.  */
2720
2721 #define DECODE_COMPOSITION_END()                                        \
2722   do {                                                                  \
2723     int nchars = (component_len > 0 ? component_idx - component_len     \
2724                   : method == COMPOSITION_RELATIVE ? component_idx      \
2725                   : (component_idx + 1) / 2);                           \
2726     int i;                                                              \
2727     int *saved_charbuf = charbuf;                                       \
2728     int from = char_offset;                                             \
2729     int to = from + nchars;                                             \
2730                                                                         \
2731     ADD_COMPOSITION_DATA (charbuf, from, to, method);                   \
2732     if (method != COMPOSITION_RELATIVE)                                 \
2733       {                                                                 \
2734         if (component_len == 0)                                         \
2735           for (i = 0; i < component_idx; i++)                           \
2736             *charbuf++ = components[i];                                 \
2737         else                                                            \
2738           for (i = 0; i < component_len; i++)                           \
2739             *charbuf++ = components[i];                                 \
2740         *saved_charbuf = saved_charbuf - charbuf;                       \
2741       }                                                                 \
2742     if (method == COMPOSITION_WITH_RULE)                                \
2743       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2744         *charbuf++ = components[i];                                     \
2745     else                                                                \
2746       for (i = component_len; i < component_idx; i++, char_offset++)    \
2747         *charbuf++ = components[i];                                     \
2748     coding->annotated = 1;                                              \
2749     composition_state = COMPOSING_NO;                                   \
2750   } while (0)
2751
2752
2753 /* Decode a composition rule from the byte C1 (and maybe one more byte
2754    from SRC) and store one encoded composition rule in
2755    coding->cmp_data.  */
2756
2757 #define DECODE_COMPOSITION_RULE(c1)                                     \
2758   do {                                                                  \
2759     (c1) -= 32;                                                         \
2760     if (c1 < 81)                /* old format (before ver.21) */        \
2761       {                                                                 \
2762         int gref = (c1) / 9;                                            \
2763         int nref = (c1) % 9;                                            \
2764         if (gref == 4) gref = 10;                                       \
2765         if (nref == 4) nref = 10;                                       \
2766         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2767       }                                                                 \
2768     else if (c1 < 93)           /* new format (after ver.21) */         \
2769       {                                                                 \
2770         ONE_MORE_BYTE (c2);                                             \
2771         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2772       }                                                                 \
2773     else                                                                \
2774       c1 = 0;                                                           \
2775   } while (0)
2776
2777
2778 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2779
2780 static void
2781 decode_coding_iso_2022 (coding)
2782      struct coding_system *coding;
2783 {
2784   const unsigned char *src = coding->source + coding->consumed;
2785   const unsigned char *src_end = coding->source + coding->src_bytes;
2786   const unsigned char *src_base;
2787   int *charbuf = coding->charbuf;
2788   int *charbuf_end
2789     = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2790   int consumed_chars = 0, consumed_chars_base;
2791   int multibytep = coding->src_multibyte;
2792   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2793   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2794   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2795   struct charset *charset;
2796   int c;
2797   /* For handling composition sequence.  */
2798 #define COMPOSING_NO                    0
2799 #define COMPOSING_CHAR                  1
2800 #define COMPOSING_RULE                  2
2801 #define COMPOSING_COMPONENT_CHAR        3
2802 #define COMPOSING_COMPONENT_RULE        4
2803
2804   int composition_state = COMPOSING_NO;
2805   enum composition_method method;
2806   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2807   int component_idx;
2808   int component_len;
2809   Lisp_Object attrs, eol_type, charset_list;
2810   int char_offset = coding->produced_char;
2811   int last_offset = char_offset;
2812   int last_id = charset_ascii;
2813
2814   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2815   setup_iso_safe_charsets (attrs);
2816
2817   while (1)
2818     {
2819       int c1, c2;
2820
2821       src_base = src;
2822       consumed_chars_base = consumed_chars;
2823
2824       if (charbuf >= charbuf_end)
2825         break;
2826
2827       ONE_MORE_BYTE (c1);
2828
2829       /* We produce at most one character.  */
2830       switch (iso_code_class [c1])
2831         {
2832         case ISO_0x20_or_0x7F:
2833           if (composition_state != COMPOSING_NO)
2834             {
2835               if (composition_state == COMPOSING_RULE
2836                   || composition_state == COMPOSING_COMPONENT_RULE)
2837                 {
2838                   DECODE_COMPOSITION_RULE (c1);
2839                   components[component_idx++] = c1;
2840                   composition_state--;
2841                   continue;
2842                 }
2843             }
2844           if (charset_id_0 < 0
2845               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2846             /* This is SPACE or DEL.  */
2847             charset = CHARSET_FROM_ID (charset_ascii);
2848           else
2849             charset = CHARSET_FROM_ID (charset_id_0);
2850           break;
2851
2852         case ISO_graphic_plane_0:
2853           if (composition_state != COMPOSING_NO)
2854             {
2855               if (composition_state == COMPOSING_RULE
2856                   || composition_state == COMPOSING_COMPONENT_RULE)
2857                 {
2858                   DECODE_COMPOSITION_RULE (c1);
2859                   components[component_idx++] = c1;
2860                   composition_state--;
2861                   continue;
2862                 }
2863             }
2864           charset = CHARSET_FROM_ID (charset_id_0);
2865           break;
2866
2867         case ISO_0xA0_or_0xFF:
2868           if (charset_id_1 < 0
2869               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2870               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2871             goto invalid_code;
2872           /* This is a graphic character, we fall down ... */
2873
2874         case ISO_graphic_plane_1:
2875           if (charset_id_1 < 0)
2876             goto invalid_code;
2877           charset = CHARSET_FROM_ID (charset_id_1);
2878           break;
2879
2880         case ISO_carriage_return:
2881           if (c1 == '\r')
2882             {
2883               if (EQ (eol_type, Qdos))
2884                 {
2885                   if (src == src_end)
2886                     {
2887                       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
2888                       goto no_more_source;
2889                     }
2890                   if (*src == '\n')
2891                     ONE_MORE_BYTE (c1);
2892                 }
2893               else if (EQ (eol_type, Qmac))
2894                 c1 = '\n';
2895             }
2896           /* fall through */
2897
2898         case ISO_control_0:
2899           MAYBE_FINISH_COMPOSITION ();
2900           charset = CHARSET_FROM_ID (charset_ascii);
2901           break;
2902
2903         case ISO_control_1:
2904           MAYBE_FINISH_COMPOSITION ();
2905           goto invalid_code;
2906
2907         case ISO_shift_out:
2908           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2909               || CODING_ISO_DESIGNATION (coding, 1) < 0)
2910             goto invalid_code;
2911           CODING_ISO_INVOCATION (coding, 0) = 1;
2912           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2913           continue;
2914
2915         case ISO_shift_in:
2916           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2917             goto invalid_code;
2918           CODING_ISO_INVOCATION (coding, 0) = 0;
2919           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2920           continue;
2921
2922         case ISO_single_shift_2_7:
2923         case ISO_single_shift_2:
2924           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2925             goto invalid_code;
2926           /* SS2 is handled as an escape sequence of ESC 'N' */
2927           c1 = 'N';
2928           goto label_escape_sequence;
2929
2930         case ISO_single_shift_3:
2931           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2932             goto invalid_code;
2933           /* SS2 is handled as an escape sequence of ESC 'O' */
2934           c1 = 'O';
2935           goto label_escape_sequence;
2936
2937         case ISO_control_sequence_introducer:
2938           /* CSI is handled as an escape sequence of ESC '[' ...  */
2939           c1 = '[';
2940           goto label_escape_sequence;
2941
2942         case ISO_escape:
2943           ONE_MORE_BYTE (c1);
2944         label_escape_sequence:
2945           /* Escape sequences handled here are invocation,
2946              designation, direction specification, and character
2947              composition specification.  */
2948           switch (c1)
2949             {
2950             case '&':           /* revision of following character set */
2951               ONE_MORE_BYTE (c1);
2952               if (!(c1 >= '@' && c1 <= '~'))
2953                 goto invalid_code;
2954               ONE_MORE_BYTE (c1);
2955               if (c1 != ISO_CODE_ESC)
2956                 goto invalid_code;
2957               ONE_MORE_BYTE (c1);
2958               goto label_escape_sequence;
2959
2960             case '$':           /* designation of 2-byte character set */
2961               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2962                 goto invalid_code;
2963               ONE_MORE_BYTE (c1);
2964               if (c1 >= '@' && c1 <= 'B')
2965                 {       /* designation of JISX0208.1978, GB2312.1980,
2966                            or JISX0208.1980 */
2967                   DECODE_DESIGNATION (0, 2, 0, c1);
2968                 }
2969               else if (c1 >= 0x28 && c1 <= 0x2B)
2970                 {       /* designation of DIMENSION2_CHARS94 character set */
2971                   ONE_MORE_BYTE (c2);
2972                   DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
2973                 }
2974               else if (c1 >= 0x2C && c1 <= 0x2F)
2975                 {       /* designation of DIMENSION2_CHARS96 character set */
2976                   ONE_MORE_BYTE (c2);
2977                   DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
2978                 }
2979               else
2980                 goto invalid_code;
2981               /* We must update these variables now.  */
2982               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2983               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2984               continue;
2985
2986             case 'n':           /* invocation of locking-shift-2 */
2987               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2988                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
2989                 goto invalid_code;
2990               CODING_ISO_INVOCATION (coding, 0) = 2;
2991               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2992               continue;
2993
2994             case 'o':           /* invocation of locking-shift-3 */
2995               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2996                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
2997                 goto invalid_code;
2998               CODING_ISO_INVOCATION (coding, 0) = 3;
2999               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3000               continue;
3001
3002             case 'N':           /* invocation of single-shift-2 */
3003               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3004                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3005                 goto invalid_code;
3006               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
3007               ONE_MORE_BYTE (c1);
3008               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3009                 goto invalid_code;
3010               break;
3011
3012             case 'O':           /* invocation of single-shift-3 */
3013               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3014                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3015                 goto invalid_code;
3016               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
3017               ONE_MORE_BYTE (c1);
3018               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3019                 goto invalid_code;
3020               break;
3021
3022             case '0': case '2': case '3': case '4': /* start composition */
3023               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3024                 goto invalid_code;
3025               DECODE_COMPOSITION_START (c1);
3026               continue;
3027
3028             case '1':           /* end composition */
3029               if (composition_state == COMPOSING_NO)
3030                 goto invalid_code;
3031               DECODE_COMPOSITION_END ();
3032               continue;
3033
3034             case '[':           /* specification of direction */
3035               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3036                 goto invalid_code;
3037               /* For the moment, nested direction is not supported.
3038                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3039                  left-to-right, and nozero means right-to-left.  */
3040               ONE_MORE_BYTE (c1);
3041               switch (c1)
3042                 {
3043                 case ']':       /* end of the current direction */
3044                   coding->mode &= ~CODING_MODE_DIRECTION;
3045
3046                 case '0':       /* end of the current direction */
3047                 case '1':       /* start of left-to-right direction */
3048                   ONE_MORE_BYTE (c1);
3049                   if (c1 == ']')
3050                     coding->mode &= ~CODING_MODE_DIRECTION;
3051                   else
3052                     goto invalid_code;
3053                   break;
3054
3055                 case '2':       /* start of right-to-left direction */
3056                   ONE_MORE_BYTE (c1);
3057                   if (c1 == ']')
3058                     coding->mode |= CODING_MODE_DIRECTION;
3059                   else
3060                     goto invalid_code;
3061                   break;
3062
3063                 default:
3064                   goto invalid_code;
3065                 }
3066               continue;
3067
3068             case '%':
3069               ONE_MORE_BYTE (c1);
3070               if (c1 == '/')
3071                 {
3072                   /* CTEXT extended segment:
3073                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3074                      We keep these bytes as is for the moment.
3075                      They may be decoded by post-read-conversion.  */
3076                   int dim, M, L;
3077                   int size;
3078
3079                   ONE_MORE_BYTE (dim);
3080                   ONE_MORE_BYTE (M);
3081                   ONE_MORE_BYTE (L);
3082                   size = ((M - 128) * 128) + (L - 128);
3083                   if (charbuf + 8 + size > charbuf_end)
3084                     goto break_loop;
3085                   *charbuf++ = ISO_CODE_ESC;
3086                   *charbuf++ = '%';
3087                   *charbuf++ = '/';
3088                   *charbuf++ = dim;
3089                   *charbuf++ = BYTE8_TO_CHAR (M);
3090                   *charbuf++ = BYTE8_TO_CHAR (L);
3091                   while (size-- > 0)
3092                     {
3093                       ONE_MORE_BYTE (c1);
3094                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3095                     }
3096                 }
3097               else if (c1 == 'G')
3098                 {
3099                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3100                      ESC % G --UTF-8-BYTES-- ESC % @
3101                      We keep these bytes as is for the moment.
3102                      They may be decoded by post-read-conversion.  */
3103                   int *p = charbuf;
3104
3105                   if (p + 6 > charbuf_end)
3106                     goto break_loop;
3107                   *p++ = ISO_CODE_ESC;
3108                   *p++ = '%';
3109                   *p++ = 'G';
3110                   while (p < charbuf_end)
3111                     {
3112                       ONE_MORE_BYTE (c1);
3113                       if (c1 == ISO_CODE_ESC
3114                           && src + 1 < src_end
3115                           && src[0] == '%'
3116                           && src[1] == '@')
3117                         break;
3118                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3119                     }
3120                   if (p + 3 > charbuf_end)
3121                     goto break_loop;
3122                   *p++ = ISO_CODE_ESC;
3123                   *p++ = '%';
3124                   *p++ = '@';
3125                   charbuf = p;
3126                 }
3127               else
3128                 goto invalid_code;
3129               continue;
3130               break;
3131
3132             default:
3133               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3134                 goto invalid_code;
3135               if (c1 >= 0x28 && c1 <= 0x2B)
3136                 {       /* designation of DIMENSION1_CHARS94 character set */
3137                   ONE_MORE_BYTE (c2);
3138                   DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
3139                 }
3140               else if (c1 >= 0x2C && c1 <= 0x2F)
3141                 {       /* designation of DIMENSION1_CHARS96 character set */
3142                   ONE_MORE_BYTE (c2);
3143                   DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
3144                 }
3145               else
3146                 goto invalid_code;
3147               /* We must update these variables now.  */
3148               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3149               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3150               continue;
3151             }
3152         }
3153
3154       if (charset->id != charset_ascii
3155           && last_id != charset->id)
3156         {
3157           if (last_id != charset_ascii)
3158             ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3159           last_id = charset->id;
3160           last_offset = char_offset;
3161         }
3162
3163       /* Now we know CHARSET and 1st position code C1 of a character.
3164          Produce a decoded character while getting 2nd position code
3165          C2 if necessary.  */
3166       c1 &= 0x7F;
3167       if (CHARSET_DIMENSION (charset) > 1)
3168         {
3169           ONE_MORE_BYTE (c2);
3170           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3171             /* C2 is not in a valid range.  */
3172             goto invalid_code;
3173           c1 = (c1 << 8) | (c2 & 0x7F);
3174           if (CHARSET_DIMENSION (charset) > 2)
3175             {
3176               ONE_MORE_BYTE (c2);
3177               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3178                 /* C2 is not in a valid range.  */
3179                 goto invalid_code;
3180               c1 = (c1 << 8) | (c2 & 0x7F);
3181             }
3182         }
3183
3184       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3185       if (c < 0)
3186         {
3187           MAYBE_FINISH_COMPOSITION ();
3188           for (; src_base < src; src_base++, char_offset++)
3189             {
3190               if (ASCII_BYTE_P (*src_base))
3191                 *charbuf++ = *src_base;
3192               else
3193                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3194             }
3195         }
3196       else if (composition_state == COMPOSING_NO)
3197         {
3198           *charbuf++ = c;
3199           char_offset++;
3200         }
3201       else
3202         {
3203           components[component_idx++] = c;
3204           if (method == COMPOSITION_WITH_RULE
3205               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3206                   && composition_state == COMPOSING_COMPONENT_CHAR))
3207             composition_state++;
3208         }
3209       continue;
3210
3211     invalid_code:
3212       MAYBE_FINISH_COMPOSITION ();
3213       src = src_base;
3214       consumed_chars = consumed_chars_base;
3215       ONE_MORE_BYTE (c);
3216       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3217       char_offset++;
3218       coding->errors++;
3219       continue;
3220
3221     break_loop:
3222       break;
3223     }
3224
3225  no_more_source:
3226   if (last_id != charset_ascii)
3227     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
3228   coding->consumed_char += consumed_chars_base;
3229   coding->consumed = src_base - coding->source;
3230   coding->charbuf_used = charbuf - coding->charbuf;
3231 }
3232
3233
3234 /* ISO2022 encoding stuff.  */
3235
3236 /*
3237    It is not enough to say just "ISO2022" on encoding, we have to
3238    specify more details.  In Emacs, each coding system of ISO2022
3239    variant has the following specifications:
3240         1. Initial designation to G0 thru G3.
3241         2. Allows short-form designation?
3242         3. ASCII should be designated to G0 before control characters?
3243         4. ASCII should be designated to G0 at end of line?
3244         5. 7-bit environment or 8-bit environment?
3245         6. Use locking-shift?
3246         7. Use Single-shift?
3247    And the following two are only for Japanese:
3248         8. Use ASCII in place of JIS0201-1976-Roman?
3249         9. Use JISX0208-1983 in place of JISX0208-1978?
3250    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3251    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3252    details.
3253 */
3254
3255 /* Produce codes (escape sequence) for designating CHARSET to graphic
3256    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3257    '@', 'A', or 'B' and the coding system CODING allows, produce
3258    designation sequence of short-form.  */
3259
3260 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3261   do {                                                                  \
3262     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3263     char *intermediate_char_94 = "()*+";                                \
3264     char *intermediate_char_96 = ",-./";                                \
3265     int revision = -1;                                                  \
3266     int c;                                                              \
3267                                                                         \
3268     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3269       revision = CHARSET_ISO_REVISION (charset);                        \
3270                                                                         \
3271     if (revision >= 0)                                                  \
3272       {                                                                 \
3273         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3274         EMIT_ONE_BYTE ('@' + revision);                                 \
3275       }                                                                 \
3276     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3277     if (CHARSET_DIMENSION (charset) == 1)                               \
3278       {                                                                 \
3279         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3280           c = intermediate_char_94[reg];                                \
3281         else                                                            \
3282           c = intermediate_char_96[reg];                                \
3283         EMIT_ONE_ASCII_BYTE (c);                                        \
3284       }                                                                 \
3285     else                                                                \
3286       {                                                                 \
3287         EMIT_ONE_ASCII_BYTE ('$');                                      \
3288         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3289           {                                                             \
3290             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3291                 || reg != 0                                             \
3292                 || final_char < '@' || final_char > 'B')                \
3293               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3294           }                                                             \
3295         else                                                            \
3296           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3297       }                                                                 \
3298     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3299                                                                         \
3300     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3301   } while (0)
3302
3303
3304 /* The following two macros produce codes (control character or escape
3305    sequence) for ISO2022 single-shift functions (single-shift-2 and
3306    single-shift-3).  */
3307
3308 #define ENCODE_SINGLE_SHIFT_2                                           \
3309   do {                                                                  \
3310     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3311       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3312     else                                                                \
3313       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3314     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3315   } while (0)
3316
3317
3318 #define ENCODE_SINGLE_SHIFT_3                                           \
3319   do {                                                                  \
3320     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3321       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3322     else                                                                \
3323       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3324     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3325   } while (0)
3326
3327
3328 /* The following four macros produce codes (control character or
3329    escape sequence) for ISO2022 locking-shift functions (shift-in,
3330    shift-out, locking-shift-2, and locking-shift-3).  */
3331
3332 #define ENCODE_SHIFT_IN                                 \
3333   do {                                                  \
3334     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3335     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3336   } while (0)
3337
3338
3339 #define ENCODE_SHIFT_OUT                                \
3340   do {                                                  \
3341     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3342     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3343   } while (0)
3344
3345
3346 #define ENCODE_LOCKING_SHIFT_2                          \
3347   do {                                                  \
3348     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3349     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3350   } while (0)
3351
3352
3353 #define ENCODE_LOCKING_SHIFT_3                          \
3354   do {                                                  \
3355     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3356     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3357   } while (0)
3358
3359
3360 /* Produce codes for a DIMENSION1 character whose character set is
3361    CHARSET and whose position-code is C1.  Designation and invocation
3362    sequences are also produced in advance if necessary.  */
3363
3364 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3365   do {                                                                  \
3366     int id = CHARSET_ID (charset);                                      \
3367                                                                         \
3368     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3369         && id == charset_ascii)                                         \
3370       {                                                                 \
3371         id = charset_jisx0201_roman;                                    \
3372         charset = CHARSET_FROM_ID (id);                                 \
3373       }                                                                 \
3374                                                                         \
3375     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3376       {                                                                 \
3377         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3378           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3379         else                                                            \
3380           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3381         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3382         break;                                                          \
3383       }                                                                 \
3384     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3385       {                                                                 \
3386         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3387         break;                                                          \
3388       }                                                                 \
3389     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3390       {                                                                 \
3391         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3392         break;                                                          \
3393       }                                                                 \
3394     else                                                                \
3395       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3396          must invoke it, or, at first, designate it to some graphic     \
3397          register.  Then repeat the loop to actually produce the        \
3398          character.  */                                                 \
3399       dst = encode_invocation_designation (charset, coding, dst,        \
3400                                            &produced_chars);            \
3401   } while (1)
3402
3403
3404 /* Produce codes for a DIMENSION2 character whose character set is
3405    CHARSET and whose position-codes are C1 and C2.  Designation and
3406    invocation codes are also produced in advance if necessary.  */
3407
3408 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3409   do {                                                                  \
3410     int id = CHARSET_ID (charset);                                      \
3411                                                                         \
3412     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3413         && id == charset_jisx0208)                                      \
3414       {                                                                 \
3415         id = charset_jisx0208_1978;                                     \
3416         charset = CHARSET_FROM_ID (id);                                 \
3417       }                                                                 \
3418                                                                         \
3419     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3420       {                                                                 \
3421         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3422           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3423         else                                                            \
3424           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3425         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3426         break;                                                          \
3427       }                                                                 \
3428     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3429       {                                                                 \
3430         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3431         break;                                                          \
3432       }                                                                 \
3433     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3434       {                                                                 \
3435         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3436         break;                                                          \
3437       }                                                                 \
3438     else                                                                \
3439       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3440          must invoke it, or, at first, designate it to some graphic     \
3441          register.  Then repeat the loop to actually produce the        \
3442          character.  */                                                 \
3443       dst = encode_invocation_designation (charset, coding, dst,        \
3444                                            &produced_chars);            \
3445   } while (1)
3446
3447
3448 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3449   do {                                                                     \
3450     int code = ENCODE_CHAR ((charset),(c));                                \
3451                                                                            \
3452     if (CHARSET_DIMENSION (charset) == 1)                                  \
3453       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3454     else                                                                   \
3455       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3456   } while (0)
3457
3458
3459 /* Produce designation and invocation codes at a place pointed by DST
3460    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3461    Return new DST.  */
3462
3463 unsigned char *
3464 encode_invocation_designation (charset, coding, dst, p_nchars)
3465      struct charset *charset;
3466      struct coding_system *coding;
3467      unsigned char *dst;
3468      int *p_nchars;
3469 {
3470   int multibytep = coding->dst_multibyte;
3471   int produced_chars = *p_nchars;
3472   int reg;                      /* graphic register number */
3473   int id = CHARSET_ID (charset);
3474
3475   /* At first, check designations.  */
3476   for (reg = 0; reg < 4; reg++)
3477     if (id == CODING_ISO_DESIGNATION (coding, reg))
3478       break;
3479
3480   if (reg >= 4)
3481     {
3482       /* CHARSET is not yet designated to any graphic registers.  */
3483       /* At first check the requested designation.  */
3484       reg = CODING_ISO_REQUEST (coding, id);
3485       if (reg < 0)
3486         /* Since CHARSET requests no special designation, designate it
3487            to graphic register 0.  */
3488         reg = 0;
3489
3490       ENCODE_DESIGNATION (charset, reg, coding);
3491     }
3492
3493   if (CODING_ISO_INVOCATION (coding, 0) != reg
3494       && CODING_ISO_INVOCATION (coding, 1) != reg)
3495     {
3496       /* Since the graphic register REG is not invoked to any graphic
3497          planes, invoke it to graphic plane 0.  */
3498       switch (reg)
3499         {
3500         case 0:                 /* graphic register 0 */
3501           ENCODE_SHIFT_IN;
3502           break;
3503
3504         case 1:                 /* graphic register 1 */
3505           ENCODE_SHIFT_OUT;
3506           break;
3507
3508         case 2:                 /* graphic register 2 */
3509           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3510             ENCODE_SINGLE_SHIFT_2;
3511           else
3512             ENCODE_LOCKING_SHIFT_2;
3513           break;
3514
3515         case 3:                 /* graphic register 3 */
3516           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3517             ENCODE_SINGLE_SHIFT_3;
3518           else
3519             ENCODE_LOCKING_SHIFT_3;
3520           break;
3521         }
3522     }
3523
3524   *p_nchars = produced_chars;
3525   return dst;
3526 }
3527
3528 /* The following three macros produce codes for indicating direction
3529    of text.  */
3530 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3531   do {                                                                  \
3532     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3533       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3534     else                                                                \
3535       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3536   } while (0)
3537
3538
3539 #define ENCODE_DIRECTION_R2L()                  \
3540   do {                                          \
3541     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3542     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3543   } while (0)
3544
3545
3546 #define ENCODE_DIRECTION_L2R()                  \
3547   do {                                          \
3548     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3549     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3550   } while (0)
3551
3552
3553 /* Produce codes for designation and invocation to reset the graphic
3554    planes and registers to initial state.  */
3555 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3556   do {                                                                  \
3557     int reg;                                                            \
3558     struct charset *charset;                                            \
3559                                                                         \
3560     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3561       ENCODE_SHIFT_IN;                                                  \
3562     for (reg = 0; reg < 4; reg++)                                       \
3563       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3564           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3565               != CODING_ISO_INITIAL (coding, reg)))                     \
3566         {                                                               \
3567           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3568           ENCODE_DESIGNATION (charset, reg, coding);                    \
3569         }                                                               \
3570   } while (0)
3571
3572
3573 /* Produce designation sequences of charsets in the line started from
3574    SRC to a place pointed by DST, and return updated DST.
3575
3576    If the current block ends before any end-of-line, we may fail to
3577    find all the necessary designations.  */
3578
3579 static unsigned char *
3580 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3581      struct coding_system *coding;
3582      int *charbuf, *charbuf_end;
3583      unsigned char *dst;
3584 {
3585   struct charset *charset;
3586   /* Table of charsets to be designated to each graphic register.  */
3587   int r[4];
3588   int c, found = 0, reg;
3589   int produced_chars = 0;
3590   int multibytep = coding->dst_multibyte;
3591   Lisp_Object attrs;
3592   Lisp_Object charset_list;
3593
3594   attrs = CODING_ID_ATTRS (coding->id);
3595   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3596   if (EQ (charset_list, Qiso_2022))
3597     charset_list = Viso_2022_charset_list;
3598
3599   for (reg = 0; reg < 4; reg++)
3600     r[reg] = -1;
3601
3602   while (found < 4)
3603     {
3604       int id;
3605
3606       c = *charbuf++;
3607       if (c == '\n')
3608         break;
3609       charset = char_charset (c, charset_list, NULL);
3610       id = CHARSET_ID (charset);
3611       reg = CODING_ISO_REQUEST (coding, id);
3612       if (reg >= 0 && r[reg] < 0)
3613         {
3614           found++;
3615           r[reg] = id;
3616         }
3617     }
3618
3619   if (found)
3620     {
3621       for (reg = 0; reg < 4; reg++)
3622         if (r[reg] >= 0
3623             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3624           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3625     }
3626
3627   return dst;
3628 }
3629
3630 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3631
3632 static int
3633 encode_coding_iso_2022 (coding)
3634      struct coding_system *coding;
3635 {
3636   int multibytep = coding->dst_multibyte;
3637   int *charbuf = coding->charbuf;
3638   int *charbuf_end = charbuf + coding->charbuf_used;
3639   unsigned char *dst = coding->destination + coding->produced;
3640   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3641   int safe_room = 16;
3642   int bol_designation
3643     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3644        && CODING_ISO_BOL (coding));
3645   int produced_chars = 0;
3646   Lisp_Object attrs, eol_type, charset_list;
3647   int ascii_compatible;
3648   int c;
3649   int preferred_charset_id = -1;
3650
3651   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3652   setup_iso_safe_charsets (attrs);
3653   /* Charset list may have been changed.  */
3654   charset_list = CODING_ATTR_CHARSET_LIST (attrs);              \
3655   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3656
3657   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3658
3659   while (charbuf < charbuf_end)
3660     {
3661       ASSURE_DESTINATION (safe_room);
3662
3663       if (bol_designation)
3664         {
3665           unsigned char *dst_prev = dst;
3666
3667           /* We have to produce designation sequences if any now.  */
3668           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3669           bol_designation = 0;
3670           /* We are sure that designation sequences are all ASCII bytes.  */
3671           produced_chars += dst - dst_prev;
3672         }
3673
3674       c = *charbuf++;
3675
3676       if (c < 0)
3677         {
3678           /* Handle an annotation.  */
3679           switch (*charbuf)
3680             {
3681             case CODING_ANNOTATE_COMPOSITION_MASK:
3682               /* Not yet implemented.  */
3683               break;
3684             case CODING_ANNOTATE_CHARSET_MASK:
3685               preferred_charset_id = charbuf[3];
3686               if (preferred_charset_id >= 0
3687                   && NILP (Fmemq (make_number (preferred_charset_id),
3688                                   charset_list)))
3689                 preferred_charset_id = -1;
3690               break;
3691             default:
3692               abort ();
3693             }
3694           charbuf += -c - 1;
3695           continue;
3696         }
3697
3698       /* Now encode the character C.  */
3699       if (c < 0x20 || c == 0x7F)
3700         {
3701           if (c == '\n'
3702               || (c == '\r' && EQ (eol_type, Qmac)))
3703             {
3704               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3705                 ENCODE_RESET_PLANE_AND_REGISTER ();
3706               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3707                 {
3708                   int i;
3709
3710                   for (i = 0; i < 4; i++)
3711                     CODING_ISO_DESIGNATION (coding, i)
3712                       = CODING_ISO_INITIAL (coding, i);
3713                 }
3714               bol_designation
3715                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3716             }
3717           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3718             ENCODE_RESET_PLANE_AND_REGISTER ();
3719           EMIT_ONE_ASCII_BYTE (c);
3720         }
3721       else if (ASCII_CHAR_P (c))
3722         {
3723           if (ascii_compatible)
3724             EMIT_ONE_ASCII_BYTE (c);
3725           else
3726             {
3727               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3728               ENCODE_ISO_CHARACTER (charset, c);
3729             }
3730         }
3731       else if (CHAR_BYTE8_P (c))
3732         {
3733           c = CHAR_TO_BYTE8 (c);
3734           EMIT_ONE_BYTE (c);
3735         }
3736       else
3737         {
3738           struct charset *charset;
3739
3740           if (preferred_charset_id >= 0)
3741             {
3742               charset = CHARSET_FROM_ID (preferred_charset_id);
3743               if (! CHAR_CHARSET_P (c, charset))
3744                 charset = char_charset (c, charset_list, NULL);
3745             }
3746           else
3747             charset = char_charset (c, charset_list, NULL);
3748           if (!charset)
3749             {
3750               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3751                 {
3752                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3753                   charset = CHARSET_FROM_ID (charset_ascii);
3754                 }
3755               else
3756                 {
3757                   c = coding->default_char;
3758                   charset = char_charset (c, charset_list, NULL);
3759                 }
3760             }
3761           ENCODE_ISO_CHARACTER (charset, c);
3762         }
3763     }
3764
3765   if (coding->mode & CODING_MODE_LAST_BLOCK
3766       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3767     {
3768       ASSURE_DESTINATION (safe_room);
3769       ENCODE_RESET_PLANE_AND_REGISTER ();
3770     }
3771   coding->result = CODING_RESULT_SUCCESS;
3772   CODING_ISO_BOL (coding) = bol_designation;
3773   coding->produced_char += produced_chars;
3774   coding->produced = dst - coding->destination;
3775   return 0;
3776 }
3777
3778 \f
3779 /*** 8,9. SJIS and BIG5 handlers ***/
3780
3781 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3782    quite widely.  So, for the moment, Emacs supports them in the bare
3783    C code.  But, in the future, they may be supported only by CCL.  */
3784
3785 /* SJIS is a coding system encoding three character sets: ASCII, right
3786    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3787    as is.  A character of charset katakana-jisx0201 is encoded by
3788    "position-code + 0x80".  A character of charset japanese-jisx0208
3789    is encoded in 2-byte but two position-codes are divided and shifted
3790    so that it fit in the range below.
3791
3792    --- CODE RANGE of SJIS ---
3793    (character set)      (range)
3794    ASCII                0x00 .. 0x7F
3795    KATAKANA-JISX0201    0xA0 .. 0xDF
3796    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3797             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3798    -------------------------------
3799
3800 */
3801
3802 /* BIG5 is a coding system encoding two character sets: ASCII and
3803    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3804    character set and is encoded in two-byte.
3805
3806    --- CODE RANGE of BIG5 ---
3807    (character set)      (range)
3808    ASCII                0x00 .. 0x7F
3809    Big5 (1st byte)      0xA1 .. 0xFE
3810         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3811    --------------------------
3812
3813   */
3814
3815 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3816    Check if a text is encoded in SJIS.  If it is, return
3817    CATEGORY_MASK_SJIS, else return 0.  */
3818
3819 static int
3820 detect_coding_sjis (coding, detect_info)
3821      struct coding_system *coding;
3822      struct coding_detection_info *detect_info;
3823 {
3824   const unsigned char *src = coding->source, *src_base = src;
3825   const unsigned char *src_end = coding->source + coding->src_bytes;
3826   int multibytep = coding->src_multibyte;
3827   int consumed_chars = 0;
3828   int found = 0;
3829   int c;
3830   int incomplete;
3831
3832   detect_info->checked |= CATEGORY_MASK_SJIS;
3833   /* A coding system of this category is always ASCII compatible.  */
3834   src += coding->head_ascii;
3835
3836   while (1)
3837     {
3838       incomplete = 0;
3839       ONE_MORE_BYTE (c);
3840       incomplete = 1;
3841       if (c < 0x80)
3842         continue;
3843       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3844         {
3845           ONE_MORE_BYTE (c);
3846           if (c < 0x40 || c == 0x7F || c > 0xFC)
3847             break;
3848           found = CATEGORY_MASK_SJIS;
3849         }
3850       else if (c >= 0xA0 && c < 0xE0)
3851         found = CATEGORY_MASK_SJIS;
3852       else
3853         break;
3854     }
3855   detect_info->rejected |= CATEGORY_MASK_SJIS;
3856   return 0;
3857
3858  no_more_source:
3859   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3860     {
3861       detect_info->rejected |= CATEGORY_MASK_SJIS;
3862       return 0;
3863     }
3864   detect_info->found |= found;
3865   return 1;
3866 }
3867
3868 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3869    Check if a text is encoded in BIG5.  If it is, return
3870    CATEGORY_MASK_BIG5, else return 0.  */
3871
3872 static int
3873 detect_coding_big5 (coding, detect_info)
3874      struct coding_system *coding;
3875      struct coding_detection_info *detect_info;
3876 {
3877   const unsigned char *src = coding->source, *src_base = src;
3878   const unsigned char *src_end = coding->source + coding->src_bytes;
3879   int multibytep = coding->src_multibyte;
3880   int consumed_chars = 0;
3881   int found = 0;
3882   int c;
3883   int incomplete;
3884
3885   detect_info->checked |= CATEGORY_MASK_BIG5;
3886   /* A coding system of this category is always ASCII compatible.  */
3887   src += coding->head_ascii;
3888
3889   while (1)
3890     {
3891       incomplete = 0;
3892       ONE_MORE_BYTE (c);
3893       incomplete = 1;
3894       if (c < 0x80)
3895         continue;
3896       if (c >= 0xA1)
3897         {
3898           ONE_MORE_BYTE (c);
3899           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3900             return 0;
3901           found = CATEGORY_MASK_BIG5;
3902         }
3903       else
3904         break;
3905     }
3906   detect_info->rejected |= CATEGORY_MASK_BIG5;
3907   return 0;
3908
3909  no_more_source:
3910   if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK)
3911     {
3912       detect_info->rejected |= CATEGORY_MASK_BIG5;
3913       return 0;
3914     }
3915   detect_info->found |= found;
3916   return 1;
3917 }
3918
3919 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3920    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3921
3922 static void
3923 decode_coding_sjis (coding)
3924      struct coding_system *coding;
3925 {
3926   const unsigned char *src = coding->source + coding->consumed;
3927   const unsigned char *src_end = coding->source + coding->src_bytes;
3928   const unsigned char *src_base;
3929   int *charbuf = coding->charbuf;
3930   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3931   int consumed_chars = 0, consumed_chars_base;
3932   int multibytep = coding->src_multibyte;
3933   struct charset *charset_roman, *charset_kanji, *charset_kana;
3934   Lisp_Object attrs, eol_type, charset_list, val;
3935   int char_offset = coding->produced_char;
3936   int last_offset = char_offset;
3937   int last_id = charset_ascii;
3938
3939   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3940
3941   val = charset_list;
3942   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3943   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3944   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3945
3946   while (1)
3947     {
3948       int c, c1;
3949
3950       src_base = src;
3951       consumed_chars_base = consumed_chars;
3952
3953       if (charbuf >= charbuf_end)
3954         break;
3955
3956       ONE_MORE_BYTE (c);
3957
3958       if (c == '\r')
3959         {
3960           if (EQ (eol_type, Qdos))
3961             {
3962               if (src == src_end)
3963                 {
3964                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
3965                   goto no_more_source;
3966                 }
3967               if (*src == '\n')
3968                 ONE_MORE_BYTE (c);
3969             }
3970           else if (EQ (eol_type, Qmac))
3971             c = '\n';
3972         }
3973       else
3974         {
3975           struct charset *charset;
3976
3977           if (c < 0x80)
3978             charset = charset_roman;
3979           else
3980             {
3981               if (c >= 0xF0)
3982                 goto invalid_code;
3983               if (c < 0xA0 || c >= 0xE0)
3984                 {
3985                   /* SJIS -> JISX0208 */
3986                   ONE_MORE_BYTE (c1);
3987                   if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3988                     goto invalid_code;
3989                   c = (c << 8) | c1;
3990                   SJIS_TO_JIS (c);
3991                   charset = charset_kanji;
3992                 }
3993               else if (c > 0xA0)
3994                 {
3995                   /* SJIS -> JISX0201-Kana */
3996                   c &= 0x7F;
3997                   charset = charset_kana;
3998                 }
3999               else
4000                 goto invalid_code;
4001             }
4002           if (charset->id != charset_ascii
4003               && last_id != charset->id)
4004             {
4005               if (last_id != charset_ascii)
4006                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4007               last_id = charset->id;
4008               last_offset = char_offset;
4009             }
4010           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4011         }
4012       *charbuf++ = c;
4013       char_offset++;
4014       continue;
4015
4016     invalid_code:
4017       src = src_base;
4018       consumed_chars = consumed_chars_base;
4019       ONE_MORE_BYTE (c);
4020       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4021       char_offset++;
4022       coding->errors++;
4023     }
4024
4025  no_more_source:
4026   if (last_id != charset_ascii)
4027     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4028   coding->consumed_char += consumed_chars_base;
4029   coding->consumed = src_base - coding->source;
4030   coding->charbuf_used = charbuf - coding->charbuf;
4031 }
4032
4033 static void
4034 decode_coding_big5 (coding)
4035      struct coding_system *coding;
4036 {
4037   const unsigned char *src = coding->source + coding->consumed;
4038   const unsigned char *src_end = coding->source + coding->src_bytes;
4039   const unsigned char *src_base;
4040   int *charbuf = coding->charbuf;
4041   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4042   int consumed_chars = 0, consumed_chars_base;
4043   int multibytep = coding->src_multibyte;
4044   struct charset *charset_roman, *charset_big5;
4045   Lisp_Object attrs, eol_type, charset_list, val;
4046   int char_offset = coding->produced_char;
4047   int last_offset = char_offset;
4048   int last_id = charset_ascii;
4049
4050   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4051   val = charset_list;
4052   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4053   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4054
4055   while (1)
4056     {
4057       int c, c1;
4058
4059       src_base = src;
4060       consumed_chars_base = consumed_chars;
4061
4062       if (charbuf >= charbuf_end)
4063         break;
4064
4065       ONE_MORE_BYTE (c);
4066
4067       if (c == '\r')
4068         {
4069           if (EQ (eol_type, Qdos))
4070             {
4071               if (src == src_end)
4072                 {
4073                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4074                   goto no_more_source;
4075                 }
4076               if (*src == '\n')
4077                 ONE_MORE_BYTE (c);
4078             }
4079           else if (EQ (eol_type, Qmac))
4080             c = '\n';
4081         }
4082       else
4083         {
4084           struct charset *charset;
4085           if (c < 0x80)
4086             charset = charset_roman;
4087           else
4088             {
4089               /* BIG5 -> Big5 */
4090               if (c < 0xA1 || c > 0xFE)
4091                 goto invalid_code;
4092               ONE_MORE_BYTE (c1);
4093               if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4094                 goto invalid_code;
4095               c = c << 8 | c1;
4096               charset = charset_big5;
4097             }
4098           if (charset->id != charset_ascii
4099               && last_id != charset->id)
4100             {
4101               if (last_id != charset_ascii)
4102                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4103               last_id = charset->id;
4104               last_offset = char_offset;
4105             }
4106           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4107         }
4108
4109       *charbuf++ = c;
4110       char_offset++;
4111       continue;
4112
4113     invalid_code:
4114       src = src_base;
4115       consumed_chars = consumed_chars_base;
4116       ONE_MORE_BYTE (c);
4117       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4118       char_offset++;
4119       coding->errors++;
4120     }
4121
4122  no_more_source:
4123   if (last_id != charset_ascii)
4124     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4125   coding->consumed_char += consumed_chars_base;
4126   coding->consumed = src_base - coding->source;
4127   coding->charbuf_used = charbuf - coding->charbuf;
4128 }
4129
4130 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4131    This function can encode charsets `ascii', `katakana-jisx0201',
4132    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4133    are sure that all these charsets are registered as official charset
4134    (i.e. do not have extended leading-codes).  Characters of other
4135    charsets are produced without any encoding.  If SJIS_P is 1, encode
4136    SJIS text, else encode BIG5 text.  */
4137
4138 static int
4139 encode_coding_sjis (coding)
4140      struct coding_system *coding;
4141 {
4142   int multibytep = coding->dst_multibyte;
4143   int *charbuf = coding->charbuf;
4144   int *charbuf_end = charbuf + coding->charbuf_used;
4145   unsigned char *dst = coding->destination + coding->produced;
4146   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4147   int safe_room = 4;
4148   int produced_chars = 0;
4149   Lisp_Object attrs, eol_type, charset_list, val;
4150   int ascii_compatible;
4151   struct charset *charset_roman, *charset_kanji, *charset_kana;
4152   int c;
4153
4154   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4155   val = charset_list;
4156   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4157   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4158   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
4159
4160   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4161
4162   while (charbuf < charbuf_end)
4163     {
4164       ASSURE_DESTINATION (safe_room);
4165       c = *charbuf++;
4166       /* Now encode the character C.  */
4167       if (ASCII_CHAR_P (c) && ascii_compatible)
4168         EMIT_ONE_ASCII_BYTE (c);
4169       else if (CHAR_BYTE8_P (c))
4170         {
4171           c = CHAR_TO_BYTE8 (c);
4172           EMIT_ONE_BYTE (c);
4173         }
4174       else
4175         {
4176           unsigned code;
4177           struct charset *charset = char_charset (c, charset_list, &code);
4178
4179           if (!charset)
4180             {
4181               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4182                 {
4183                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4184                   charset = CHARSET_FROM_ID (charset_ascii);
4185                 }
4186               else
4187                 {
4188                   c = coding->default_char;
4189                   charset = char_charset (c, charset_list, &code);
4190                 }
4191             }
4192           if (code == CHARSET_INVALID_CODE (charset))
4193             abort ();
4194           if (charset == charset_kanji)
4195             {
4196               int c1, c2;
4197               JIS_TO_SJIS (code);
4198               c1 = code >> 8, c2 = code & 0xFF;
4199               EMIT_TWO_BYTES (c1, c2);
4200             }
4201           else if (charset == charset_kana)
4202             EMIT_ONE_BYTE (code | 0x80);
4203           else
4204             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4205         }
4206     }
4207   coding->result = CODING_RESULT_SUCCESS;
4208   coding->produced_char += produced_chars;
4209   coding->produced = dst - coding->destination;
4210   return 0;
4211 }
4212
4213 static int
4214 encode_coding_big5 (coding)
4215      struct coding_system *coding;
4216 {
4217   int multibytep = coding->dst_multibyte;
4218   int *charbuf = coding->charbuf;
4219   int *charbuf_end = charbuf + coding->charbuf_used;
4220   unsigned char *dst = coding->destination + coding->produced;
4221   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4222   int safe_room = 4;
4223   int produced_chars = 0;
4224   Lisp_Object attrs, eol_type, charset_list, val;
4225   int ascii_compatible;
4226   struct charset *charset_roman, *charset_big5;
4227   int c;
4228
4229   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4230   val = charset_list;
4231   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4232   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4233   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4234
4235   while (charbuf < charbuf_end)
4236     {
4237       ASSURE_DESTINATION (safe_room);
4238       c = *charbuf++;
4239       /* Now encode the character C.  */
4240       if (ASCII_CHAR_P (c) && ascii_compatible)
4241         EMIT_ONE_ASCII_BYTE (c);
4242       else if (CHAR_BYTE8_P (c))
4243         {
4244           c = CHAR_TO_BYTE8 (c);
4245           EMIT_ONE_BYTE (c);
4246         }
4247       else
4248         {
4249           unsigned code;
4250           struct charset *charset = char_charset (c, charset_list, &code);
4251
4252           if (! charset)
4253             {
4254               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4255                 {
4256                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4257                   charset = CHARSET_FROM_ID (charset_ascii);
4258                 }
4259               else
4260                 {
4261                   c = coding->default_char;
4262                   charset = char_charset (c, charset_list, &code);
4263                 }
4264             }
4265           if (code == CHARSET_INVALID_CODE (charset))
4266             abort ();
4267           if (charset == charset_big5)
4268             {
4269               int c1, c2;
4270
4271               c1 = code >> 8, c2 = code & 0xFF;
4272               EMIT_TWO_BYTES (c1, c2);
4273             }
4274           else
4275             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4276         }
4277     }
4278   coding->result = CODING_RESULT_SUCCESS;
4279   coding->produced_char += produced_chars;
4280   coding->produced = dst - coding->destination;
4281   return 0;
4282 }
4283
4284 \f
4285 /*** 10. CCL handlers ***/
4286
4287 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4288    Check if a text is encoded in a coding system of which
4289    encoder/decoder are written in CCL program.  If it is, return
4290    CATEGORY_MASK_CCL, else return 0.  */
4291
4292 static int
4293 detect_coding_ccl (coding, detect_info)
4294      struct coding_system *coding;
4295      struct coding_detection_info *detect_info;
4296 {
4297   const unsigned char *src = coding->source, *src_base = src;
4298   const unsigned char *src_end = coding->source + coding->src_bytes;
4299   int multibytep = coding->src_multibyte;
4300   int consumed_chars = 0;
4301   int found = 0;
4302   unsigned char *valids = CODING_CCL_VALIDS (coding);
4303   int head_ascii = coding->head_ascii;
4304   Lisp_Object attrs;
4305
4306   detect_info->checked |= CATEGORY_MASK_CCL;
4307
4308   coding = &coding_categories[coding_category_ccl];
4309   attrs = CODING_ID_ATTRS (coding->id);
4310   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4311     src += head_ascii;
4312
4313   while (1)
4314     {
4315       int c;
4316       ONE_MORE_BYTE (c);
4317       if (! valids[c])
4318         break;
4319       if ((valids[c] > 1))
4320         found = CATEGORY_MASK_CCL;
4321     }
4322   detect_info->rejected |= CATEGORY_MASK_CCL;
4323   return 0;
4324
4325  no_more_source:
4326   detect_info->found |= found;
4327   return 1;
4328 }
4329
4330 static void
4331 decode_coding_ccl (coding)
4332      struct coding_system *coding;
4333 {
4334   const unsigned char *src = coding->source + coding->consumed;
4335   const unsigned char *src_end = coding->source + coding->src_bytes;
4336   int *charbuf = coding->charbuf;
4337   int *charbuf_end = charbuf + coding->charbuf_size;
4338   int consumed_chars = 0;
4339   int multibytep = coding->src_multibyte;
4340   struct ccl_program ccl;
4341   int source_charbuf[1024];
4342   int source_byteidx[1024];
4343   Lisp_Object attrs, eol_type, charset_list;
4344
4345   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4346   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4347
4348   while (src < src_end)
4349     {
4350       const unsigned char *p = src;
4351       int *source, *source_end;
4352       int i = 0;
4353
4354       if (multibytep)
4355         while (i < 1024 && p < src_end)
4356           {
4357             source_byteidx[i] = p - src;
4358             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4359           }
4360       else
4361         while (i < 1024 && p < src_end)
4362           source_charbuf[i++] = *p++;
4363
4364       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4365         ccl.last_block = 1;
4366
4367       source = source_charbuf;
4368       source_end = source + i;
4369       while (source < source_end)
4370         {
4371           ccl_driver (&ccl, source, charbuf,
4372                       source_end - source, charbuf_end - charbuf,
4373                       charset_list);
4374           source += ccl.consumed;
4375           charbuf += ccl.produced;
4376           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4377             break;
4378         }
4379       if (source < source_end)
4380         src += source_byteidx[source - source_charbuf];
4381       else
4382         src = p;
4383       consumed_chars += source - source_charbuf;
4384
4385       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4386           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4387         break;
4388     }
4389
4390   switch (ccl.status)
4391     {
4392     case CCL_STAT_SUSPEND_BY_SRC:
4393       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4394       break;
4395     case CCL_STAT_SUSPEND_BY_DST:
4396       break;
4397     case CCL_STAT_QUIT:
4398     case CCL_STAT_INVALID_CMD:
4399       coding->result = CODING_RESULT_INTERRUPT;
4400       break;
4401     default:
4402       coding->result = CODING_RESULT_SUCCESS;
4403       break;
4404     }
4405   coding->consumed_char += consumed_chars;
4406   coding->consumed = src - coding->source;
4407   coding->charbuf_used = charbuf - coding->charbuf;
4408 }
4409
4410 static int
4411 encode_coding_ccl (coding)
4412      struct coding_system *coding;
4413 {
4414   struct ccl_program ccl;
4415   int multibytep = coding->dst_multibyte;
4416   int *charbuf = coding->charbuf;
4417   int *charbuf_end = charbuf + coding->charbuf_used;
4418   unsigned char *dst = coding->destination + coding->produced;
4419   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4420   unsigned char *adjusted_dst_end = dst_end - 1;
4421   int destination_charbuf[1024];
4422   int i, produced_chars = 0;
4423   Lisp_Object attrs, eol_type, charset_list;
4424
4425   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4426   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4427
4428   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4429   ccl.dst_multibyte = coding->dst_multibyte;
4430
4431   while (charbuf < charbuf_end && dst < adjusted_dst_end)
4432     {
4433       int dst_bytes = dst_end - dst;
4434       if (dst_bytes > 1024)
4435         dst_bytes = 1024;
4436
4437       ccl_driver (&ccl, charbuf, destination_charbuf,
4438                   charbuf_end - charbuf, dst_bytes, charset_list);
4439       charbuf += ccl.consumed;
4440       if (multibytep)
4441         for (i = 0; i < ccl.produced; i++)
4442           EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4443       else
4444         {
4445           for (i = 0; i < ccl.produced; i++)
4446             *dst++ = destination_charbuf[i] & 0xFF;
4447           produced_chars += ccl.produced;
4448         }
4449     }
4450
4451   switch (ccl.status)
4452     {
4453     case CCL_STAT_SUSPEND_BY_SRC:
4454       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4455       break;
4456     case CCL_STAT_SUSPEND_BY_DST:
4457       coding->result = CODING_RESULT_INSUFFICIENT_DST;
4458       break;
4459     case CCL_STAT_QUIT:
4460     case CCL_STAT_INVALID_CMD:
4461       coding->result = CODING_RESULT_INTERRUPT;
4462       break;
4463     default:
4464       coding->result = CODING_RESULT_SUCCESS;
4465       break;
4466     }
4467
4468   coding->produced_char += produced_chars;
4469   coding->produced = dst - coding->destination;
4470   return 0;
4471 }
4472
4473
4474 \f
4475 /*** 10, 11. no-conversion handlers ***/
4476
4477 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4478
4479 static void
4480 decode_coding_raw_text (coding)
4481      struct coding_system *coding;
4482 {
4483   coding->chars_at_source = 1;
4484   coding->consumed_char = 0;
4485   coding->consumed = 0;
4486   coding->result = CODING_RESULT_SUCCESS;
4487 }
4488
4489 static int
4490 encode_coding_raw_text (coding)
4491      struct coding_system *coding;
4492 {
4493   int multibytep = coding->dst_multibyte;
4494   int *charbuf = coding->charbuf;
4495   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4496   unsigned char *dst = coding->destination + coding->produced;
4497   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4498   int produced_chars = 0;
4499   int c;
4500
4501   if (multibytep)
4502     {
4503       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4504
4505       if (coding->src_multibyte)
4506         while (charbuf < charbuf_end)
4507           {
4508             ASSURE_DESTINATION (safe_room);
4509             c = *charbuf++;
4510             if (ASCII_CHAR_P (c))
4511               EMIT_ONE_ASCII_BYTE (c);
4512             else if (CHAR_BYTE8_P (c))
4513               {
4514                 c = CHAR_TO_BYTE8 (c);
4515                 EMIT_ONE_BYTE (c);
4516               }
4517             else
4518               {
4519                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4520
4521                 CHAR_STRING_ADVANCE (c, p1);
4522                 while (p0 < p1)
4523                   {
4524                     EMIT_ONE_BYTE (*p0);
4525                     p0++;
4526                   }
4527               }
4528           }
4529       else
4530         while (charbuf < charbuf_end)
4531           {
4532             ASSURE_DESTINATION (safe_room);
4533             c = *charbuf++;
4534             EMIT_ONE_BYTE (c);
4535           }
4536     }
4537   else
4538     {
4539       if (coding->src_multibyte)
4540         {
4541           int safe_room = MAX_MULTIBYTE_LENGTH;
4542
4543           while (charbuf < charbuf_end)
4544             {
4545               ASSURE_DESTINATION (safe_room);
4546               c = *charbuf++;
4547               if (ASCII_CHAR_P (c))
4548                 *dst++ = c;
4549               else if (CHAR_BYTE8_P (c))
4550                 *dst++ = CHAR_TO_BYTE8 (c);
4551               else
4552                 CHAR_STRING_ADVANCE (c, dst);
4553               produced_chars++;
4554             }
4555         }
4556       else
4557         {
4558           ASSURE_DESTINATION (charbuf_end - charbuf);
4559           while (charbuf < charbuf_end && dst < dst_end)
4560             *dst++ = *charbuf++;
4561           produced_chars = dst - (coding->destination + coding->dst_bytes);
4562         }
4563     }
4564   coding->result = CODING_RESULT_SUCCESS;
4565   coding->produced_char += produced_chars;
4566   coding->produced = dst - coding->destination;
4567   return 0;
4568 }
4569
4570 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4571    Check if a text is encoded in a charset-based coding system.  If it
4572    is, return 1, else return 0.  */
4573
4574 static int
4575 detect_coding_charset (coding, detect_info)
4576      struct coding_system *coding;
4577      struct coding_detection_info *detect_info;
4578 {
4579   const unsigned char *src = coding->source, *src_base = src;
4580   const unsigned char *src_end = coding->source + coding->src_bytes;
4581   int multibytep = coding->src_multibyte;
4582   int consumed_chars = 0;
4583   Lisp_Object attrs, valids;
4584   int found = 0;
4585
4586   detect_info->checked |= CATEGORY_MASK_CHARSET;
4587
4588   coding = &coding_categories[coding_category_charset];
4589   attrs = CODING_ID_ATTRS (coding->id);
4590   valids = AREF (attrs, coding_attr_charset_valids);
4591
4592   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4593     src += coding->head_ascii;
4594
4595   while (1)
4596     {
4597       int c;
4598
4599       ONE_MORE_BYTE (c);
4600       if (NILP (AREF (valids, c)))
4601         break;
4602       if (c >= 0x80)
4603         found = CATEGORY_MASK_CHARSET;
4604     }
4605   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4606   return 0;
4607
4608  no_more_source:
4609   detect_info->found |= found;
4610   return 1;
4611 }
4612
4613 static void
4614 decode_coding_charset (coding)
4615      struct coding_system *coding;
4616 {
4617   const unsigned char *src = coding->source + coding->consumed;
4618   const unsigned char *src_end = coding->source + coding->src_bytes;
4619   const unsigned char *src_base;
4620   int *charbuf = coding->charbuf;
4621   int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4622   int consumed_chars = 0, consumed_chars_base;
4623   int multibytep = coding->src_multibyte;
4624   Lisp_Object attrs, eol_type, charset_list, valids;
4625   int char_offset = coding->produced_char;
4626   int last_offset = char_offset;
4627   int last_id = charset_ascii;
4628
4629   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4630   valids = AREF (attrs, coding_attr_charset_valids);
4631
4632   while (1)
4633     {
4634       int c;
4635
4636       src_base = src;
4637       consumed_chars_base = consumed_chars;
4638
4639       if (charbuf >= charbuf_end)
4640         break;
4641
4642       ONE_MORE_BYTE (c);
4643       if (c == '\r')
4644         {
4645           /* Here we assume that no charset maps '\r' to something
4646              else.  */
4647           if (EQ (eol_type, Qdos))
4648             {
4649               if (src == src_end)
4650                 {
4651                   coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4652                   goto no_more_source;
4653                 }
4654               if (*src == '\n')
4655                 ONE_MORE_BYTE (c);
4656             }
4657           else if (EQ (eol_type, Qmac))
4658             c = '\n';
4659         }
4660       else
4661         {
4662           Lisp_Object val;
4663           struct charset *charset;
4664           int dim;
4665           int len = 1;
4666           unsigned code = c;
4667
4668           val = AREF (valids, c);
4669           if (NILP (val))
4670             goto invalid_code;
4671           if (INTEGERP (val))
4672             {
4673               charset = CHARSET_FROM_ID (XFASTINT (val));
4674               dim = CHARSET_DIMENSION (charset);
4675               while (len < dim)
4676                 {
4677                   ONE_MORE_BYTE (c);
4678                   code = (code << 8) | c;
4679                   len++;
4680                 }
4681               CODING_DECODE_CHAR (coding, src, src_base, src_end,
4682                                   charset, code, c);
4683             }
4684           else
4685             {
4686               /* VAL is a list of charset IDs.  It is assured that the
4687                  list is sorted by charset dimensions (smaller one
4688                  comes first).  */
4689               while (CONSP (val))
4690                 {
4691                   charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4692                   dim = CHARSET_DIMENSION (charset);
4693                   while (len < dim)
4694                     {
4695                       ONE_MORE_BYTE (c);
4696                       code = (code << 8) | c;
4697                       len++;
4698                     }
4699                   CODING_DECODE_CHAR (coding, src, src_base,
4700                                       src_end, charset, code, c);
4701                   if (c >= 0)
4702                     break;
4703                   val = XCDR (val);
4704                 }
4705             }
4706           if (c < 0)
4707             goto invalid_code;
4708           if (charset->id != charset_ascii
4709               && last_id != charset->id)
4710             {
4711               if (last_id != charset_ascii)
4712                 ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4713               last_id = charset->id;
4714               last_offset = char_offset;
4715             }
4716         }
4717       *charbuf++ = c;
4718       char_offset++;
4719       continue;
4720
4721     invalid_code:
4722       src = src_base;
4723       consumed_chars = consumed_chars_base;
4724       ONE_MORE_BYTE (c);
4725       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4726       char_offset++;
4727       coding->errors++;
4728     }
4729
4730  no_more_source:
4731   if (last_id != charset_ascii)
4732     ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
4733   coding->consumed_char += consumed_chars_base;
4734   coding->consumed = src_base - coding->source;
4735   coding->charbuf_used = charbuf - coding->charbuf;
4736 }
4737
4738 static int
4739 encode_coding_charset (coding)
4740      struct coding_system *coding;
4741 {
4742   int multibytep = coding->dst_multibyte;
4743   int *charbuf = coding->charbuf;
4744   int *charbuf_end = charbuf + coding->charbuf_used;
4745   unsigned char *dst = coding->destination + coding->produced;
4746   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4747   int safe_room = MAX_MULTIBYTE_LENGTH;
4748   int produced_chars = 0;
4749   Lisp_Object attrs, eol_type, charset_list;
4750   int ascii_compatible;
4751   int c;
4752
4753   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4754   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4755
4756   while (charbuf < charbuf_end)
4757     {
4758       struct charset *charset;
4759       unsigned code;
4760
4761       ASSURE_DESTINATION (safe_room);
4762       c = *charbuf++;
4763       if (ascii_compatible && ASCII_CHAR_P (c))
4764         EMIT_ONE_ASCII_BYTE (c);
4765       else if (CHAR_BYTE8_P (c))
4766         {
4767           c = CHAR_TO_BYTE8 (c);
4768           EMIT_ONE_BYTE (c);
4769         }
4770       else
4771         {
4772           charset = char_charset (c, charset_list, &code);
4773           if (charset)
4774             {
4775               if (CHARSET_DIMENSION (charset) == 1)
4776                 EMIT_ONE_BYTE (code);
4777               else if (CHARSET_DIMENSION (charset) == 2)
4778                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4779               else if (CHARSET_DIMENSION (charset) == 3)
4780                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4781               else
4782                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4783                                  (code >> 8) & 0xFF, code & 0xFF);
4784             }
4785           else
4786             {
4787               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4788                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4789               else
4790                 c = coding->default_char;
4791               EMIT_ONE_BYTE (c);
4792             }
4793         }
4794     }
4795
4796   coding->result = CODING_RESULT_SUCCESS;
4797   coding->produced_char += produced_chars;
4798   coding->produced = dst - coding->destination;
4799   return 0;
4800 }
4801
4802 \f
4803 /*** 7. C library functions ***/
4804
4805 /* Setup coding context CODING from information about CODING_SYSTEM.
4806    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4807    CODING_SYSTEM is invalid, signal an error.  */
4808
4809 void
4810 setup_coding_system (coding_system, coding)
4811      Lisp_Object coding_system;
4812      struct coding_system *coding;
4813 {
4814   Lisp_Object attrs;
4815   Lisp_Object eol_type;
4816   Lisp_Object coding_type;
4817   Lisp_Object val;
4818
4819   if (NILP (coding_system))
4820     coding_system = Qno_conversion;
4821
4822   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4823
4824   attrs = CODING_ID_ATTRS (coding->id);
4825   eol_type = CODING_ID_EOL_TYPE (coding->id);
4826
4827   coding->mode = 0;
4828   coding->head_ascii = -1;
4829   coding->common_flags
4830     = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4831   if (! NILP (CODING_ATTR_POST_READ (attrs)))
4832     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
4833   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
4834     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
4835   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
4836     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4837
4838   val = CODING_ATTR_SAFE_CHARSETS (attrs);
4839   coding->max_charset_id = SCHARS (val) - 1;
4840   coding->safe_charsets = (char *) SDATA (val);
4841   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4842
4843   coding_type = CODING_ATTR_TYPE (attrs);
4844   if (EQ (coding_type, Qundecided))
4845     {
4846       coding->detector = NULL;
4847       coding->decoder = decode_coding_raw_text;
4848       coding->encoder = encode_coding_raw_text;
4849       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4850     }
4851   else if (EQ (coding_type, Qiso_2022))
4852     {
4853       int i;
4854       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4855
4856       /* Invoke graphic register 0 to plane 0.  */
4857       CODING_ISO_INVOCATION (coding, 0) = 0;
4858       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
4859       CODING_ISO_INVOCATION (coding, 1)
4860         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4861       /* Setup the initial status of designation.  */
4862       for (i = 0; i < 4; i++)
4863         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4864       /* Not single shifting initially.  */
4865       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4866       /* Beginning of buffer should also be regarded as bol. */
4867       CODING_ISO_BOL (coding) = 1;
4868       coding->detector = detect_coding_iso_2022;
4869       coding->decoder = decode_coding_iso_2022;
4870       coding->encoder = encode_coding_iso_2022;
4871       if (flags & CODING_ISO_FLAG_SAFE)
4872         coding->mode |= CODING_MODE_SAFE_ENCODING;
4873       coding->common_flags
4874         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4875             | CODING_REQUIRE_FLUSHING_MASK);
4876       if (flags & CODING_ISO_FLAG_COMPOSITION)
4877         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4878       if (flags & CODING_ISO_FLAG_DESIGNATION)
4879         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
4880       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4881         {
4882           setup_iso_safe_charsets (attrs);
4883           val = CODING_ATTR_SAFE_CHARSETS (attrs);
4884           coding->max_charset_id = SCHARS (val) - 1;
4885           coding->safe_charsets = (char *) SDATA (val);
4886         }
4887       CODING_ISO_FLAGS (coding) = flags;
4888     }
4889   else if (EQ (coding_type, Qcharset))
4890     {
4891       coding->detector = detect_coding_charset;
4892       coding->decoder = decode_coding_charset;
4893       coding->encoder = encode_coding_charset;
4894       coding->common_flags
4895         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4896     }
4897   else if (EQ (coding_type, Qutf_8))
4898     {
4899       coding->detector = detect_coding_utf_8;
4900       coding->decoder = decode_coding_utf_8;
4901       coding->encoder = encode_coding_utf_8;
4902       coding->common_flags
4903         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4904     }
4905   else if (EQ (coding_type, Qutf_16))
4906     {
4907       val = AREF (attrs, coding_attr_utf_16_bom);
4908       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4909                                     : EQ (val, Qt) ? utf_16_with_bom
4910                                     : utf_16_without_bom);
4911       val = AREF (attrs, coding_attr_utf_16_endian);
4912       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
4913                                        : utf_16_little_endian);
4914       CODING_UTF_16_SURROGATE (coding) = 0;
4915       coding->detector = detect_coding_utf_16;
4916       coding->decoder = decode_coding_utf_16;
4917       coding->encoder = encode_coding_utf_16;
4918       coding->common_flags
4919         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4920       if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
4921         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4922     }
4923   else if (EQ (coding_type, Qccl))
4924     {
4925       coding->detector = detect_coding_ccl;
4926       coding->decoder = decode_coding_ccl;
4927       coding->encoder = encode_coding_ccl;
4928       coding->common_flags
4929         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4930             | CODING_REQUIRE_FLUSHING_MASK);
4931     }
4932   else if (EQ (coding_type, Qemacs_mule))
4933     {
4934       coding->detector = detect_coding_emacs_mule;
4935       coding->decoder = decode_coding_emacs_mule;
4936       coding->encoder = encode_coding_emacs_mule;
4937       coding->common_flags
4938         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4939       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4940           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4941         {
4942           Lisp_Object tail, safe_charsets;
4943           int max_charset_id = 0;
4944
4945           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4946                tail = XCDR (tail))
4947             if (max_charset_id < XFASTINT (XCAR (tail)))
4948               max_charset_id = XFASTINT (XCAR (tail));
4949           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4950                                         make_number (255));
4951           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4952                tail = XCDR (tail))
4953             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
4954           coding->max_charset_id = max_charset_id;
4955           coding->safe_charsets = (char *) SDATA (safe_charsets);
4956         }
4957     }
4958   else if (EQ (coding_type, Qshift_jis))
4959     {
4960       coding->detector = detect_coding_sjis;
4961       coding->decoder = decode_coding_sjis;
4962       coding->encoder = encode_coding_sjis;
4963       coding->common_flags
4964         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4965     }
4966   else if (EQ (coding_type, Qbig5))
4967     {
4968       coding->detector = detect_coding_big5;
4969       coding->decoder = decode_coding_big5;
4970       coding->encoder = encode_coding_big5;
4971       coding->common_flags
4972         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4973     }
4974   else                          /* EQ (coding_type, Qraw_text) */
4975     {
4976       coding->detector = NULL;
4977       coding->decoder = decode_coding_raw_text;
4978       coding->encoder = encode_coding_raw_text;
4979     }
4980
4981   return;
4982 }
4983
4984 /* Return raw-text or one of its subsidiaries that has the same
4985    eol_type as CODING-SYSTEM.  */
4986
4987 Lisp_Object
4988 raw_text_coding_system (coding_system)
4989      Lisp_Object coding_system;
4990 {
4991   Lisp_Object spec, attrs;
4992   Lisp_Object eol_type, raw_text_eol_type;
4993
4994   if (NILP (coding_system))
4995     return Qraw_text;
4996   spec = CODING_SYSTEM_SPEC (coding_system);
4997   attrs = AREF (spec, 0);
4998
4999   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5000     return coding_system;
5001
5002   eol_type = AREF (spec, 2);
5003   if (VECTORP (eol_type))
5004     return Qraw_text;
5005   spec = CODING_SYSTEM_SPEC (Qraw_text);
5006   raw_text_eol_type = AREF (spec, 2);
5007   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5008           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5009           : AREF (raw_text_eol_type, 2));
5010 }
5011
5012
5013 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5014    does, return one of the subsidiary that has the same eol-spec as
5015    PARENT.  Otherwise, return CODING_SYSTEM.  */
5016
5017 Lisp_Object
5018 coding_inherit_eol_type (coding_system, parent)
5019      Lisp_Object coding_system, parent;
5020 {
5021   Lisp_Object spec, attrs, eol_type;
5022
5023   if (NILP (coding_system))
5024     coding_system = Qraw_text;
5025   spec = CODING_SYSTEM_SPEC (coding_system);
5026   attrs = AREF (spec, 0);
5027   eol_type = AREF (spec, 2);
5028   if (VECTORP (eol_type)
5029       && ! NILP (parent))
5030     {
5031       Lisp_Object parent_spec;
5032       Lisp_Object parent_eol_type;
5033
5034       parent_spec
5035         = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
5036       parent_eol_type = AREF (parent_spec, 2);
5037       if (EQ (parent_eol_type, Qunix))
5038         coding_system = AREF (eol_type, 0);
5039       else if (EQ (parent_eol_type, Qdos))
5040         coding_system = AREF (eol_type, 1);
5041       else if (EQ (parent_eol_type, Qmac))
5042         coding_system = AREF (eol_type, 2);
5043     }
5044   return coding_system;
5045 }
5046
5047 /* Emacs has a mechanism to automatically detect a coding system if it
5048    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5049    it's impossible to distinguish some coding systems accurately
5050    because they use the same range of codes.  So, at first, coding
5051    systems are categorized into 7, those are:
5052
5053    o coding-category-emacs-mule
5054
5055         The category for a coding system which has the same code range
5056         as Emacs' internal format.  Assigned the coding-system (Lisp
5057         symbol) `emacs-mule' by default.
5058
5059    o coding-category-sjis
5060
5061         The category for a coding system which has the same code range
5062         as SJIS.  Assigned the coding-system (Lisp
5063         symbol) `japanese-shift-jis' by default.
5064
5065    o coding-category-iso-7
5066
5067         The category for a coding system which has the same code range
5068         as ISO2022 of 7-bit environment.  This doesn't use any locking
5069         shift and single shift functions.  This can encode/decode all
5070         charsets.  Assigned the coding-system (Lisp symbol)
5071         `iso-2022-7bit' by default.
5072
5073    o coding-category-iso-7-tight
5074
5075         Same as coding-category-iso-7 except that this can
5076         encode/decode only the specified charsets.
5077
5078    o coding-category-iso-8-1
5079
5080         The category for a coding system which has the same code range
5081         as ISO2022 of 8-bit environment and graphic plane 1 used only
5082         for DIMENSION1 charset.  This doesn't use any locking shift
5083         and single shift functions.  Assigned the coding-system (Lisp
5084         symbol) `iso-latin-1' by default.
5085
5086    o coding-category-iso-8-2
5087
5088         The category for a coding system which has the same code range
5089         as ISO2022 of 8-bit environment and graphic plane 1 used only
5090         for DIMENSION2 charset.  This doesn't use any locking shift
5091         and single shift functions.  Assigned the coding-system (Lisp
5092         symbol) `japanese-iso-8bit' by default.
5093
5094    o coding-category-iso-7-else
5095
5096         The category for a coding system which has the same code range
5097         as ISO2022 of 7-bit environemnt but uses locking shift or
5098         single shift functions.  Assigned the coding-system (Lisp
5099         symbol) `iso-2022-7bit-lock' by default.
5100
5101    o coding-category-iso-8-else
5102
5103         The category for a coding system which has the same code range
5104         as ISO2022 of 8-bit environemnt but uses locking shift or
5105         single shift functions.  Assigned the coding-system (Lisp
5106         symbol) `iso-2022-8bit-ss2' by default.
5107
5108    o coding-category-big5
5109
5110         The category for a coding system which has the same code range
5111         as BIG5.  Assigned the coding-system (Lisp symbol)
5112         `cn-big5' by default.
5113
5114    o coding-category-utf-8
5115
5116         The category for a coding system which has the same code range
5117         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
5118         symbol) `utf-8' by default.
5119
5120    o coding-category-utf-16-be
5121
5122         The category for a coding system in which a text has an
5123         Unicode signature (cf. Unicode Standard) in the order of BIG
5124         endian at the head.  Assigned the coding-system (Lisp symbol)
5125         `utf-16-be' by default.
5126
5127    o coding-category-utf-16-le
5128
5129         The category for a coding system in which a text has an
5130         Unicode signature (cf. Unicode Standard) in the order of
5131         LITTLE endian at the head.  Assigned the coding-system (Lisp
5132         symbol) `utf-16-le' by default.
5133
5134    o coding-category-ccl
5135
5136         The category for a coding system of which encoder/decoder is
5137         written in CCL programs.  The default value is nil, i.e., no
5138         coding system is assigned.
5139
5140    o coding-category-binary
5141
5142         The category for a coding system not categorized in any of the
5143         above.  Assigned the coding-system (Lisp symbol)
5144         `no-conversion' by default.
5145
5146    Each of them is a Lisp symbol and the value is an actual
5147    `coding-system's (this is also a Lisp symbol) assigned by a user.
5148    What Emacs does actually is to detect a category of coding system.
5149    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5150    decide only one possible category, it selects a category of the
5151    highest priority.  Priorities of categories are also specified by a
5152    user in a Lisp variable `coding-category-list'.
5153
5154 */
5155
5156 #define EOL_SEEN_NONE   0
5157 #define EOL_SEEN_LF     1
5158 #define EOL_SEEN_CR     2
5159 #define EOL_SEEN_CRLF   4
5160
5161 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5162    SOURCE is encoded.  If CATEGORY is one of
5163    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5164    two-byte, else they are encoded by one-byte.
5165
5166    Return one of EOL_SEEN_XXX.  */
5167
5168 #define MAX_EOL_CHECK_COUNT 3
5169
5170 static int
5171 detect_eol (source, src_bytes, category)
5172      unsigned char *source;
5173      EMACS_INT src_bytes;
5174      enum coding_category category;
5175 {
5176   unsigned char *src = source, *src_end = src + src_bytes;
5177   unsigned char c;
5178   int total  = 0;
5179   int eol_seen = EOL_SEEN_NONE;
5180
5181   if ((1 << category) & CATEGORY_MASK_UTF_16)
5182     {
5183       int msb, lsb;
5184
5185       msb = category == (coding_category_utf_16_le
5186                          | coding_category_utf_16_le_nosig);
5187       lsb = 1 - msb;
5188
5189       while (src + 1 < src_end)
5190         {
5191           c = src[lsb];
5192           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5193             {
5194               int this_eol;
5195
5196               if (c == '\n')
5197                 this_eol = EOL_SEEN_LF;
5198               else if (src + 3 >= src_end
5199                        || src[msb + 2] != 0
5200                        || src[lsb + 2] != '\n')
5201                 this_eol = EOL_SEEN_CR;
5202               else
5203                 this_eol = EOL_SEEN_CRLF;
5204
5205               if (eol_seen == EOL_SEEN_NONE)
5206                 /* This is the first end-of-line.  */
5207                 eol_seen = this_eol;
5208               else if (eol_seen != this_eol)
5209                 {
5210                   /* The found type is different from what found before.  */
5211                   eol_seen = EOL_SEEN_LF;
5212                   break;
5213                 }
5214               if (++total == MAX_EOL_CHECK_COUNT)
5215                 break;
5216             }
5217           src += 2;
5218         }
5219     }
5220   else
5221     {
5222       while (src < src_end)
5223         {
5224           c = *src++;
5225           if (c == '\n' || c == '\r')
5226             {
5227               int this_eol;
5228
5229               if (c == '\n')
5230                 this_eol = EOL_SEEN_LF;
5231               else if (src >= src_end || *src != '\n')
5232                 this_eol = EOL_SEEN_CR;
5233               else
5234                 this_eol = EOL_SEEN_CRLF, src++;
5235
5236               if (eol_seen == EOL_SEEN_NONE)
5237                 /* This is the first end-of-line.  */
5238                 eol_seen = this_eol;
5239               else if (eol_seen != this_eol)
5240                 {
5241                   /* The found type is different from what found before.  */
5242                   eol_seen = EOL_SEEN_LF;
5243                   break;
5244                 }
5245               if (++total == MAX_EOL_CHECK_COUNT)
5246                 break;
5247             }
5248         }
5249     }
5250   return eol_seen;
5251 }
5252
5253
5254 static void
5255 adjust_coding_eol_type (coding, eol_seen)
5256      struct coding_system *coding;
5257      int eol_seen;
5258 {
5259   Lisp_Object eol_type;
5260
5261   eol_type = CODING_ID_EOL_TYPE (coding->id);
5262   if (eol_seen & EOL_SEEN_LF)
5263     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5264   else if (eol_seen & EOL_SEEN_CRLF)
5265     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5266   else if (eol_seen & EOL_SEEN_CR)
5267     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5268 }
5269
5270 /* Detect how a text specified in CODING is encoded.  If a coding
5271    system is detected, update fields of CODING by the detected coding
5272    system.  */
5273
5274 void
5275 detect_coding (coding)
5276      struct coding_system *coding;
5277 {
5278   const unsigned char *src, *src_end;
5279   Lisp_Object attrs, coding_type;
5280
5281   coding->consumed = coding->consumed_char = 0;
5282   coding->produced = coding->produced_char = 0;
5283   coding_set_source (coding);
5284
5285   src_end = coding->source + coding->src_bytes;
5286
5287   /* If we have not yet decided the text encoding type, detect it
5288      now.  */
5289   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5290     {
5291       int c, i;
5292
5293       for (src = coding->source; src < src_end; src++)
5294         {
5295           c = *src;
5296           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5297                                         || c == ISO_CODE_SI
5298                                         || c == ISO_CODE_SO)))
5299             break;
5300         }
5301       coding->head_ascii = src - (coding->source + coding->consumed);
5302
5303       if (coding->head_ascii < coding->src_bytes)
5304         {
5305           struct coding_detection_info detect_info;
5306           enum coding_category category;
5307           struct coding_system *this;
5308
5309           detect_info.checked = detect_info.found = detect_info.rejected = 0;
5310           for (i = 0; i < coding_category_raw_text; i++)
5311             {
5312               category = coding_priorities[i];
5313               this = coding_categories + category;
5314               if (this->id < 0)
5315                 {
5316                   /* No coding system of this category is defined.  */
5317                   detect_info.rejected |= (1 << category);
5318                 }
5319               else if (category >= coding_category_raw_text)
5320                 continue;
5321               else if (detect_info.checked & (1 << category))
5322                 {
5323                   if (detect_info.found & (1 << category))
5324                     break;
5325                 }
5326               else if ((*(this->detector)) (coding, &detect_info)
5327                        && detect_info.found & (1 << category))
5328                 break;
5329             }
5330           if (i < coding_category_raw_text)
5331             setup_coding_system (CODING_ID_NAME (this->id), coding);
5332           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5333             setup_coding_system (Qraw_text, coding);
5334           else if (detect_info.rejected)
5335             for (i = 0; i < coding_category_raw_text; i++)
5336               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5337                 {
5338                   this = coding_categories + coding_priorities[i];
5339                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5340                   break;
5341                 }
5342         }
5343     }
5344   else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16))
5345     {
5346       Lisp_Object coding_systems;
5347       struct coding_detection_info detect_info;
5348
5349       coding_systems
5350         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5351       detect_info.found = detect_info.rejected = 0;
5352       if (CONSP (coding_systems)
5353           && detect_coding_utf_16 (coding, &detect_info)
5354           && (detect_info.found & (CATEGORY_MASK_UTF_16_LE
5355                                    | CATEGORY_MASK_UTF_16_BE)))
5356         {
5357           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5358             setup_coding_system (XCAR (coding_systems), coding);
5359           else
5360             setup_coding_system (XCDR (coding_systems), coding);
5361         }
5362     }
5363
5364   attrs = CODING_ID_ATTRS (coding->id);
5365   coding_type = CODING_ATTR_TYPE (attrs);
5366
5367   /* If we have not yet decided the EOL type, detect it now.  But, the
5368      detection is impossible for a CCL based coding system, in which
5369      case, we detct the EOL type after decoding.  */
5370   if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5371       && ! EQ (coding_type, Qccl))
5372     {
5373       int eol_seen = detect_eol (coding->source, coding->src_bytes,
5374                                  XINT (CODING_ATTR_CATEGORY (attrs)));
5375
5376       if (eol_seen != EOL_SEEN_NONE)
5377         adjust_coding_eol_type (coding, eol_seen);
5378     }
5379 }
5380
5381
5382 static void
5383 decode_eol (coding)
5384      struct coding_system *coding;
5385 {
5386   if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
5387     {
5388       unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5389       unsigned char *pend = p + coding->produced;
5390       int eol_seen = EOL_SEEN_NONE;
5391
5392       for (; p < pend; p++)
5393         {
5394           if (*p == '\n')
5395             eol_seen |= EOL_SEEN_LF;
5396           else if (*p == '\r')
5397             {
5398               if (p + 1 < pend && *(p + 1) == '\n')
5399                 {
5400                   eol_seen |= EOL_SEEN_CRLF;
5401                   p++;
5402                 }
5403               else
5404                 eol_seen |= EOL_SEEN_CR;
5405             }
5406         }
5407       if (eol_seen != EOL_SEEN_NONE)
5408         adjust_coding_eol_type (coding, eol_seen);
5409     }
5410
5411   if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5412     {
5413       unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5414       unsigned char *pend = p + coding->produced;
5415
5416       for (; p < pend; p++)
5417         if (*p == '\r')
5418           *p = '\n';
5419     }
5420   else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5421     {
5422       unsigned char *p, *pbeg, *pend;
5423       Lisp_Object undo_list;
5424
5425       move_gap_both (coding->dst_pos + coding->produced_char,
5426                      coding->dst_pos_byte + coding->produced);
5427       undo_list = current_buffer->undo_list;
5428       current_buffer->undo_list = Qt;
5429       del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
5430       current_buffer->undo_list = undo_list;
5431       pbeg = GPT_ADDR;
5432       pend = pbeg + coding->produced;
5433
5434       for (p = pend - 1; p >= pbeg; p--)
5435         if (*p == '\r')
5436           {
5437             safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5438             pend--;
5439           }
5440       coding->produced_char -= coding->produced - (pend - pbeg);
5441       coding->produced = pend - pbeg;
5442       insert_from_gap (coding->produced_char, coding->produced);
5443     }
5444 }
5445
5446 static void
5447 translate_chars (coding, table)
5448      struct coding_system *coding;
5449      Lisp_Object table;
5450 {
5451   int *charbuf = coding->charbuf;
5452   int *charbuf_end = charbuf + coding->charbuf_used;
5453   int c;
5454
5455   if (coding->chars_at_source)
5456     return;
5457
5458   while (charbuf < charbuf_end)
5459     {
5460       c = *charbuf;
5461       if (c < 0)
5462         charbuf += c;
5463       else
5464         *charbuf++ = translate_char (table, c);
5465     }
5466 }
5467
5468 static int
5469 produce_chars (coding)
5470      struct coding_system *coding;
5471 {
5472   unsigned char *dst = coding->destination + coding->produced;
5473   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5474   int produced;
5475   int produced_chars = 0;
5476
5477   if (! coding->chars_at_source)
5478     {
5479       /* Characters are in coding->charbuf.  */
5480       int *buf = coding->charbuf;
5481       int *buf_end = buf + coding->charbuf_used;
5482       unsigned char *adjusted_dst_end;
5483
5484       if (BUFFERP (coding->src_object)
5485           && EQ (coding->src_object, coding->dst_object))
5486         dst_end = ((unsigned char *) coding->source) + coding->consumed;
5487       adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5488
5489       while (buf < buf_end)
5490         {
5491           int c = *buf++;
5492
5493           if (dst >= adjusted_dst_end)
5494             {
5495               dst = alloc_destination (coding,
5496                                        buf_end - buf + MAX_MULTIBYTE_LENGTH,
5497                                        dst);
5498               dst_end = coding->destination + coding->dst_bytes;
5499               adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5500             }
5501           if (c >= 0)
5502             {
5503               if (coding->dst_multibyte
5504                   || ! CHAR_BYTE8_P (c))
5505                 CHAR_STRING_ADVANCE (c, dst);
5506               else
5507                 *dst++ = CHAR_TO_BYTE8 (c);
5508               produced_chars++;
5509             }
5510           else
5511             /* This is an annotation datum.  (-C) is the length of
5512                it.  */
5513             buf += -c - 1;
5514         }
5515     }
5516   else
5517     {
5518       const unsigned char *src = coding->source;
5519       const unsigned char *src_end = src + coding->src_bytes;
5520       Lisp_Object eol_type;
5521
5522       eol_type = CODING_ID_EOL_TYPE (coding->id);
5523
5524       if (coding->src_multibyte != coding->dst_multibyte)
5525         {
5526           if (coding->src_multibyte)
5527             {
5528               int multibytep = 1;
5529               int consumed_chars;
5530
5531               while (1)
5532                 {
5533                   const unsigned char *src_base = src;
5534                   int c;
5535
5536                   ONE_MORE_BYTE (c);
5537                   if (c == '\r')
5538                     {
5539                       if (EQ (eol_type, Qdos))
5540                         {
5541                           if (src == src_end)
5542                             {
5543                               coding->result = CODING_RESULT_INSUFFICIENT_SRC;
5544                               goto no_more_source;
5545                             }
5546                           if (*src == '\n')
5547                             c = *src++;
5548                         }
5549                       else if (EQ (eol_type, Qmac))
5550                         c = '\n';
5551                     }
5552                   if (dst == dst_end)
5553                     {
5554                       coding->consumed = src - coding->source;
5555
5556                     if (EQ (coding->src_object, coding->dst_object))
5557                       dst_end = (unsigned char *) src;
5558                     if (dst == dst_end)
5559                       {
5560                         dst = alloc_destination (coding, src_end - src + 1,
5561                                                  dst);
5562                         dst_end = coding->destination + coding->dst_bytes;
5563                         coding_set_source (coding);
5564                         src = coding->source + coding->consumed;
5565                         src_end = coding->source + coding->src_bytes;
5566                       }
5567                     }
5568                   *dst++ = c;
5569                   produced_chars++;
5570                 }
5571             no_more_source:
5572               ;
5573             }
5574           else
5575             while (src < src_end)
5576               {
5577                 int multibytep = 1;
5578                 int c = *src++;
5579
5580                 if (c == '\r')
5581                   {
5582                     if (EQ (eol_type, Qdos))
5583                       {
5584                         if (src < src_end
5585                             && *src == '\n')
5586                           c = *src++;
5587                       }
5588                     else if (EQ (eol_type, Qmac))
5589                       c = '\n';
5590                   }
5591                 if (dst >= dst_end - 1)
5592                   {
5593                     coding->consumed = src - coding->source;
5594
5595                     if (EQ (coding->src_object, coding->dst_object))
5596                       dst_end = (unsigned char *) src;
5597                     if (dst >= dst_end - 1)
5598                       {
5599                         dst = alloc_destination (coding, src_end - src + 2,
5600                                                  dst);
5601                         dst_end = coding->destination + coding->dst_bytes;
5602                         coding_set_source (coding);
5603                         src = coding->source + coding->consumed;
5604                         src_end = coding->source + coding->src_bytes;
5605                       }
5606                   }
5607                 EMIT_ONE_BYTE (c);
5608               }
5609         }
5610       else
5611         {
5612           if (!EQ (coding->src_object, coding->dst_object))
5613             {
5614               int require = coding->src_bytes - coding->dst_bytes;
5615
5616               if (require > 0)
5617                 {
5618                   EMACS_INT offset = src - coding->source;
5619
5620                   dst = alloc_destination (coding, require, dst);
5621                   coding_set_source (coding);
5622                   src = coding->source + offset;
5623                   src_end = coding->source + coding->src_bytes;
5624                 }
5625             }
5626           produced_chars = coding->src_chars;
5627           while (src < src_end)
5628             {
5629               int c = *src++;
5630
5631               if (c == '\r')
5632                 {
5633                   if (EQ (eol_type, Qdos))
5634                     {
5635                       if (src < src_end
5636                           && *src == '\n')
5637                         c = *src++;
5638                       produced_chars--;
5639                     }
5640                   else if (EQ (eol_type, Qmac))
5641                     c = '\n';
5642                 }
5643               *dst++ = c;
5644             }
5645         }
5646       coding->consumed = coding->src_bytes;
5647       coding->consumed_char = coding->src_chars;
5648     }
5649
5650   produced = dst - (coding->destination + coding->produced);
5651   if (BUFFERP (coding->dst_object))
5652     insert_from_gap (produced_chars, produced);
5653   coding->produced += produced;
5654   coding->produced_char += produced_chars;
5655   return produced_chars;
5656 }
5657
5658 /* Compose text in CODING->object according to the annotation data at
5659    CHARBUF.  CHARBUF is an array:
5660      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
5661  */
5662
5663 static INLINE void
5664 produce_composition (coding, charbuf)
5665      struct coding_system *coding;
5666      int *charbuf;
5667 {
5668   int len;
5669   EMACS_INT from, to;
5670   enum composition_method method;
5671   Lisp_Object components;
5672
5673   len = -charbuf[0];
5674   from = coding->dst_pos + charbuf[2];
5675   to = coding->dst_pos + charbuf[3];
5676   method = (enum composition_method) (charbuf[4]);
5677
5678   if (method == COMPOSITION_RELATIVE)
5679     components = Qnil;
5680   else
5681     {
5682       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5683       int i;
5684
5685       len -= 5;
5686       charbuf += 5;
5687       for (i = 0; i < len; i++)
5688         args[i] = make_number (charbuf[i]);
5689       components = (method == COMPOSITION_WITH_ALTCHARS
5690                     ? Fstring (len, args) : Fvector (len, args));
5691     }
5692   compose_text (from, to, components, Qnil, coding->dst_object);
5693 }
5694
5695
5696 /* Put `charset' property on text in CODING->object according to
5697    the annotation data at CHARBUF.  CHARBUF is an array:
5698      [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
5699  */
5700
5701 static INLINE void
5702 produce_charset (coding, charbuf)
5703      struct coding_system *coding;
5704      int *charbuf;
5705 {
5706   EMACS_INT from = coding->dst_pos + charbuf[2];
5707   EMACS_INT to = coding->dst_pos + charbuf[3];
5708   struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
5709
5710   Fput_text_property (make_number (from), make_number (to),
5711                       Qcharset, CHARSET_NAME (charset),
5712                       coding->dst_object);
5713 }
5714
5715
5716 #define CHARBUF_SIZE 0x4000
5717
5718 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
5719   do {                                                                  \
5720     int size = CHARBUF_SIZE;;                                           \
5721                                                                         \
5722     coding->charbuf = NULL;                                             \
5723     while (size > 1024)                                                 \
5724       {                                                                 \
5725         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
5726         if (coding->charbuf)                                            \
5727           break;                                                        \
5728         size >>= 1;                                                     \
5729       }                                                                 \
5730     if (! coding->charbuf)                                              \
5731       {                                                                 \
5732         coding->result = CODING_RESULT_INSUFFICIENT_MEM;                \
5733         return coding->result;                                          \
5734       }                                                                 \
5735     coding->charbuf_size = size;                                        \
5736   } while (0)
5737
5738
5739 static void
5740 produce_annotation (coding)
5741      struct coding_system *coding;
5742 {
5743   int *charbuf = coding->charbuf;
5744   int *charbuf_end = charbuf + coding->charbuf_used;
5745
5746   if (NILP (coding->dst_object))
5747     return;
5748
5749   while (charbuf < charbuf_end)
5750     {
5751       if (*charbuf >= 0)
5752         charbuf++;
5753       else
5754         {
5755           int len = -*charbuf;
5756           switch (charbuf[1])
5757             {
5758             case CODING_ANNOTATE_COMPOSITION_MASK:
5759               produce_composition (coding, charbuf);
5760               break;
5761             case CODING_ANNOTATE_CHARSET_MASK:
5762               produce_charset (coding, charbuf);
5763               break;
5764             default:
5765               abort ();
5766             }
5767           charbuf += len;
5768         }
5769     }
5770 }
5771
5772 /* Decode the data at CODING->src_object into CODING->dst_object.
5773    CODING->src_object is a buffer, a string, or nil.
5774    CODING->dst_object is a buffer.
5775
5776    If CODING->src_object is a buffer, it must be the current buffer.
5777    In this case, if CODING->src_pos is positive, it is a position of
5778    the source text in the buffer, otherwise, the source text is in the
5779    gap area of the buffer, and CODING->src_pos specifies the offset of
5780    the text from GPT (which must be the same as PT).  If this is the
5781    same buffer as CODING->dst_object, CODING->src_pos must be
5782    negative.
5783
5784    If CODING->src_object is a string, CODING->src_pos in an index to
5785    that string.
5786
5787    If CODING->src_object is nil, CODING->source must already point to
5788    the non-relocatable memory area.  In this case, CODING->src_pos is
5789    an offset from CODING->source.
5790
5791    The decoded data is inserted at the current point of the buffer
5792    CODING->dst_object.
5793 */
5794
5795 static int
5796 decode_coding (coding)
5797      struct coding_system *coding;
5798 {
5799   Lisp_Object attrs;
5800
5801   if (BUFFERP (coding->src_object)
5802       && coding->src_pos > 0
5803       && coding->src_pos < GPT
5804       && coding->src_pos + coding->src_chars > GPT)
5805     move_gap_both (coding->src_pos, coding->src_pos_byte);
5806
5807   if (BUFFERP (coding->dst_object))
5808     {
5809       if (current_buffer != XBUFFER (coding->dst_object))
5810         set_buffer_internal (XBUFFER (coding->dst_object));
5811       if (GPT != PT)
5812         move_gap_both (PT, PT_BYTE);
5813     }
5814
5815   coding->consumed = coding->consumed_char = 0;
5816   coding->produced = coding->produced_char = 0;
5817   coding->chars_at_source = 0;
5818   coding->result = CODING_RESULT_SUCCESS;
5819   coding->errors = 0;
5820
5821   ALLOC_CONVERSION_WORK_AREA (coding);
5822
5823   attrs = CODING_ID_ATTRS (coding->id);
5824
5825   do
5826     {
5827       coding_set_source (coding);
5828       coding->annotated = 0;
5829       (*(coding->decoder)) (coding);
5830       if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5831         translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs));
5832       else if (!NILP (Vstandard_translation_table_for_decode))
5833         translate_chars (coding, Vstandard_translation_table_for_decode);
5834       coding_set_destination (coding);
5835       produce_chars (coding);
5836       if (coding->annotated)
5837         produce_annotation (coding);
5838     }
5839   while (coding->consumed < coding->src_bytes
5840          && ! coding->result);
5841
5842   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5843       && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5844       && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5845     decode_eol (coding);
5846
5847   coding->carryover_bytes = 0;
5848   if (coding->consumed < coding->src_bytes)
5849     {
5850       int nbytes = coding->src_bytes - coding->consumed;
5851       const unsigned char *src;
5852
5853       coding_set_source (coding);
5854       coding_set_destination (coding);
5855       src = coding->source + coding->consumed;
5856
5857       if (coding->mode & CODING_MODE_LAST_BLOCK)
5858         {
5859           /* Flush out unprocessed data as binary chars.  We are sure
5860              that the number of data is less than the size of
5861              coding->charbuf.  */
5862           while (nbytes-- > 0)
5863             {
5864               int c = *src++;
5865
5866               coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c);
5867             }
5868           produce_chars (coding);
5869         }
5870       else
5871         {
5872           /* Record unprocessed bytes in coding->carryover.  We are
5873              sure that the number of data is less than the size of
5874              coding->carryover.  */
5875           unsigned char *p = coding->carryover;
5876
5877           coding->carryover_bytes = nbytes;
5878           while (nbytes-- > 0)
5879             *p++ = *src++;
5880         }
5881       coding->consumed = coding->src_bytes;
5882     }
5883
5884   return coding->result;
5885 }
5886
5887
5888 /* Extract an annotation datum from a composition starting at POS and
5889    ending before LIMIT of CODING->src_object (buffer or string), store
5890    the data in BUF, set *STOP to a starting position of the next
5891    composition (if any) or to LIMIT, and return the address of the
5892    next element of BUF.
5893
5894    If such an annotation is not found, set *STOP to a starting
5895    position of a composition after POS (if any) or to LIMIT, and
5896    return BUF.  */
5897
5898 static INLINE int *
5899 handle_composition_annotation (pos, limit, coding, buf, stop)
5900      EMACS_INT pos, limit;
5901      struct coding_system *coding;
5902      int *buf;
5903      EMACS_INT *stop;
5904 {
5905   EMACS_INT start, end;
5906   Lisp_Object prop;
5907
5908   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
5909       || end > limit)
5910     *stop = limit;
5911   else if (start > pos)
5912     *stop = start;
5913   else
5914     {
5915       if (start == pos)
5916         {
5917           /* We found a composition.  Store the corresponding
5918              annotation data in BUF.  */
5919           int *head = buf;
5920           enum composition_method method = COMPOSITION_METHOD (prop);
5921           int nchars = COMPOSITION_LENGTH (prop);
5922
5923           ADD_COMPOSITION_DATA (buf, 0, nchars, method);
5924           if (method != COMPOSITION_RELATIVE)
5925             {
5926               Lisp_Object components;
5927               int len, i, i_byte;
5928
5929               components = COMPOSITION_COMPONENTS (prop);
5930               if (VECTORP (components))
5931                 {
5932                   len = XVECTOR (components)->size;
5933                   for (i = 0; i < len; i++)
5934                     *buf++ = XINT (AREF (components, i));
5935                 }
5936               else if (STRINGP (components))
5937                 {
5938                   len = SCHARS (components);
5939                   i = i_byte = 0;
5940                   while (i < len)
5941                     {
5942                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
5943                       buf++;
5944                     }
5945                 }
5946               else if (INTEGERP (components))
5947                 {
5948                   len = 1;
5949                   *buf++ = XINT (components);
5950                 }
5951               else if (CONSP (components))
5952                 {
5953                   for (len = 0; CONSP (components);
5954                        len++, components = XCDR (components))
5955                     *buf++ = XINT (XCAR (components));
5956                 }
5957               else
5958                 abort ();
5959               *head -= len;
5960             }
5961         }
5962
5963       if (find_composition (end, limit, &start, &end, &prop,
5964                             coding->src_object)
5965           && end <= limit)
5966         *stop = start;
5967       else
5968         *stop = limit;
5969     }
5970   return buf;
5971 }
5972
5973
5974 /* Extract an annotation datum from a text property `charset' at POS of
5975    CODING->src_object (buffer of string), store the data in BUF, set
5976    *STOP to the position where the value of `charset' property changes
5977    (limiting by LIMIT), and return the address of the next element of
5978    BUF.
5979
5980    If the property value is nil, set *STOP to the position where the
5981    property value is non-nil (limiting by LIMIT), and return BUF.  */
5982
5983 static INLINE int *
5984 handle_charset_annotation (pos, limit, coding, buf, stop)
5985      EMACS_INT pos, limit;
5986      struct coding_system *coding;
5987      int *buf;
5988      EMACS_INT *stop;
5989 {
5990   Lisp_Object val, next;
5991   int id;
5992
5993   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
5994   if (! NILP (val) && CHARSETP (val))
5995     id = XINT (CHARSET_SYMBOL_ID (val));
5996   else
5997     id = -1;
5998   ADD_CHARSET_DATA (buf, 0, 0, id);
5999   next = Fnext_single_property_change (make_number (pos), Qcharset,
6000                                        coding->src_object,
6001                                        make_number (limit));
6002   *stop = XINT (next);
6003   return buf;
6004 }
6005
6006
6007 static void
6008 consume_chars (coding)
6009      struct coding_system *coding;
6010 {
6011   int *buf = coding->charbuf;
6012   int *buf_end = coding->charbuf + coding->charbuf_size;
6013   const unsigned char *src = coding->source + coding->consumed;
6014   const unsigned char *src_end = coding->source + coding->src_bytes;
6015   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6016   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6017   int multibytep = coding->src_multibyte;
6018   Lisp_Object eol_type;
6019   int c;
6020   EMACS_INT stop, stop_composition, stop_charset;
6021
6022   eol_type = CODING_ID_EOL_TYPE (coding->id);
6023   if (VECTORP (eol_type))
6024     eol_type = Qunix;
6025
6026   /* Note: composition handling is not yet implemented.  */
6027   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6028
6029   if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6030     stop = stop_composition = pos;
6031   else
6032     stop = stop_composition = end_pos;
6033   if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6034     stop = stop_charset = pos;
6035   else
6036     stop_charset = end_pos;
6037
6038   /* Compensate for CRLF and annotation.  */
6039   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6040   while (buf < buf_end)
6041     {
6042       if (pos == stop)
6043         {
6044           if (pos == end_pos)
6045             break;
6046           if (pos == stop_composition)
6047             buf = handle_composition_annotation (pos, end_pos, coding,
6048                                                  buf, &stop_composition);
6049           if (pos == stop_charset)
6050             buf = handle_charset_annotation (pos, end_pos, coding,
6051                                              buf, &stop_charset);
6052           stop = (stop_composition < stop_charset
6053                   ? stop_composition : stop_charset);
6054         }
6055
6056       if (! multibytep)
6057         {
6058           EMACS_INT bytes;
6059
6060           if (! CODING_FOR_UNIBYTE (coding)
6061               && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6062             c = STRING_CHAR_ADVANCE (src), pos += bytes;
6063           else
6064             c = *src++, pos++;
6065         }
6066       else
6067         c = STRING_CHAR_ADVANCE (src), pos++;
6068       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6069         c = '\n';
6070       if (! EQ (eol_type, Qunix))
6071         {
6072           if (c == '\n')
6073             {
6074               if (EQ (eol_type, Qdos))
6075                 *buf++ = '\r';
6076               else
6077                 c = '\r';
6078             }
6079         }
6080       *buf++ = c;
6081     }
6082
6083   coding->consumed = src - coding->source;
6084   coding->consumed_char = pos - coding->src_pos;
6085   coding->charbuf_used = buf - coding->charbuf;
6086   coding->chars_at_source = 0;
6087 }
6088
6089
6090 /* Encode the text at CODING->src_object into CODING->dst_object.
6091    CODING->src_object is a buffer or a string.
6092    CODING->dst_object is a buffer or nil.
6093
6094    If CODING->src_object is a buffer, it must be the current buffer.
6095    In this case, if CODING->src_pos is positive, it is a position of
6096    the source text in the buffer, otherwise. the source text is in the
6097    gap area of the buffer, and coding->src_pos specifies the offset of
6098    the text from GPT (which must be the same as PT).  If this is the
6099    same buffer as CODING->dst_object, CODING->src_pos must be
6100    negative and CODING should not have `pre-write-conversion'.
6101
6102    If CODING->src_object is a string, CODING should not have
6103    `pre-write-conversion'.
6104
6105    If CODING->dst_object is a buffer, the encoded data is inserted at
6106    the current point of that buffer.
6107
6108    If CODING->dst_object is nil, the encoded data is placed at the
6109    memory area specified by CODING->destination.  */
6110
6111 static int
6112 encode_coding (coding)
6113      struct coding_system *coding;
6114 {
6115   Lisp_Object attrs;
6116
6117   attrs = CODING_ID_ATTRS (coding->id);
6118
6119   if (BUFFERP (coding->dst_object))
6120     {
6121       set_buffer_internal (XBUFFER (coding->dst_object));
6122       coding->dst_multibyte
6123         = ! NILP (current_buffer->enable_multibyte_characters);
6124     }
6125
6126   coding->consumed = coding->consumed_char = 0;
6127   coding->produced = coding->produced_char = 0;
6128   coding->result = CODING_RESULT_SUCCESS;
6129   coding->errors = 0;
6130
6131   ALLOC_CONVERSION_WORK_AREA (coding);
6132
6133   do {
6134     coding_set_source (coding);
6135     consume_chars (coding);
6136
6137     if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
6138       translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs));
6139     else if (!NILP (Vstandard_translation_table_for_encode))
6140       translate_chars (coding, Vstandard_translation_table_for_encode);
6141
6142     coding_set_destination (coding);
6143     (*(coding->encoder)) (coding);
6144   } while (coding->consumed_char < coding->src_chars);
6145
6146   if (BUFFERP (coding->dst_object))
6147     insert_from_gap (coding->produced_char, coding->produced);
6148
6149   return (coding->result);
6150 }
6151
6152
6153 /* Stack of working buffers used in code conversion.  An nil element
6154    means that the code conversion of that level is not using a working
6155    buffer.  */
6156 Lisp_Object Vcode_conversion_work_buf_list;
6157
6158 /* A working buffer used by the top level conversion.  */
6159 Lisp_Object Vcode_conversion_reused_work_buf;
6160
6161
6162 /* Return a working buffer that can be freely used by the following
6163    code conversion.  MULTIBYTEP specifies the multibyteness of the
6164    buffer.  */
6165
6166 Lisp_Object
6167 make_conversion_work_buffer (multibytep, depth)
6168      int multibytep, depth;
6169 {
6170   struct buffer *current = current_buffer;
6171   Lisp_Object buf, name;
6172
6173   if (depth == 0)
6174     {
6175       if (NILP (Vcode_conversion_reused_work_buf))
6176         Vcode_conversion_reused_work_buf
6177           = Fget_buffer_create (build_string (" *code-converting-work<0>*"));
6178       buf = Vcode_conversion_reused_work_buf;
6179     }
6180   else
6181     {
6182       if (depth < 0)
6183         {
6184           name = build_string (" *code-converting-work*");
6185           name = Fgenerate_new_buffer_name (name, Qnil);
6186         }
6187       else
6188         {
6189           char str[128];
6190
6191           sprintf (str, " *code-converting-work*<%d>", depth);
6192           name = build_string (str);
6193         }
6194       buf = Fget_buffer_create (name);
6195     }
6196   set_buffer_internal (XBUFFER (buf));
6197   current_buffer->undo_list = Qt;
6198   Ferase_buffer ();
6199   Fset_buffer_multibyte (multibytep ? Qt : Qnil);
6200   set_buffer_internal (current);
6201   return buf;
6202 }
6203
6204 static Lisp_Object
6205 code_conversion_restore (buffer)
6206      Lisp_Object buffer;
6207 {
6208   Lisp_Object workbuf;
6209
6210   workbuf = XCAR (Vcode_conversion_work_buf_list);
6211   if (! NILP (workbuf)
6212       && ! EQ (workbuf, Vcode_conversion_reused_work_buf)
6213       && ! NILP (Fbuffer_live_p (workbuf)))
6214     Fkill_buffer (workbuf);
6215   Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
6216   set_buffer_internal (XBUFFER (buffer));
6217   return Qnil;
6218 }
6219
6220 static Lisp_Object
6221 code_conversion_save (buffer, with_work_buf, multibyte)
6222      Lisp_Object buffer;
6223      int with_work_buf, multibyte;
6224 {
6225   Lisp_Object workbuf;
6226
6227   if (with_work_buf)
6228     {
6229       int depth = XINT (Flength (Vcode_conversion_work_buf_list));
6230
6231       workbuf = make_conversion_work_buffer (multibyte, depth);
6232     }
6233   else
6234     workbuf = Qnil;
6235   Vcode_conversion_work_buf_list
6236     = Fcons (workbuf, Vcode_conversion_work_buf_list);
6237   record_unwind_protect (code_conversion_restore, buffer);
6238   return workbuf;
6239 }
6240
6241 int
6242 decode_coding_gap (coding, chars, bytes)
6243      struct coding_system *coding;
6244      EMACS_INT chars, bytes;
6245 {
6246   int count = specpdl_ptr - specpdl;
6247   Lisp_Object attrs;
6248   Lisp_Object buffer;
6249
6250   buffer = Fcurrent_buffer ();
6251   code_conversion_save (buffer, 0, 0);
6252
6253   coding->src_object = buffer;
6254   coding->src_chars = chars;
6255   coding->src_bytes = bytes;
6256   coding->src_pos = -chars;
6257   coding->src_pos_byte = -bytes;
6258   coding->src_multibyte = chars < bytes;
6259   coding->dst_object = buffer;
6260   coding->dst_pos = PT;
6261   coding->dst_pos_byte = PT_BYTE;
6262   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6263   coding->mode |= CODING_MODE_LAST_BLOCK;
6264
6265   if (CODING_REQUIRE_DETECTION (coding))
6266     detect_coding (coding);
6267
6268   decode_coding (coding);
6269
6270   attrs = CODING_ID_ATTRS (coding->id);
6271   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6272     {
6273       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6274       Lisp_Object val;
6275
6276       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6277       val = call1 (CODING_ATTR_POST_READ (attrs),
6278                    make_number (coding->produced_char));
6279       CHECK_NATNUM (val);
6280       coding->produced_char += Z - prev_Z;
6281       coding->produced += Z_BYTE - prev_Z_BYTE;
6282     }
6283
6284   unbind_to (count, Qnil);
6285   return coding->result;
6286 }
6287
6288 int
6289 encode_coding_gap (coding, chars, bytes)
6290      struct coding_system *coding;
6291      EMACS_INT chars, bytes;
6292 {
6293   int count = specpdl_ptr - specpdl;
6294   Lisp_Object buffer;
6295
6296   buffer = Fcurrent_buffer ();
6297   code_conversion_save (buffer, 0, 0);
6298
6299   coding->src_object = buffer;
6300   coding->src_chars = chars;
6301   coding->src_bytes = bytes;
6302   coding->src_pos = -chars;
6303   coding->src_pos_byte = -bytes;
6304   coding->src_multibyte = chars < bytes;
6305   coding->dst_object = coding->src_object;
6306   coding->dst_pos = PT;
6307   coding->dst_pos_byte = PT_BYTE;
6308
6309   encode_coding (coding);
6310
6311   unbind_to (count, Qnil);
6312   return coding->result;
6313 }
6314
6315
6316 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6317    SRC_OBJECT into DST_OBJECT by coding context CODING.
6318
6319    SRC_OBJECT is a buffer, a string, or Qnil.
6320
6321    If it is a buffer, the text is at point of the buffer.  FROM and TO
6322    are positions in the buffer.
6323
6324    If it is a string, the text is at the beginning of the string.
6325    FROM and TO are indices to the string.
6326
6327    If it is nil, the text is at coding->source.  FROM and TO are
6328    indices to coding->source.
6329
6330    DST_OBJECT is a buffer, Qt, or Qnil.
6331
6332    If it is a buffer, the decoded text is inserted at point of the
6333    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6334    is deleted.
6335
6336    If it is Qt, a string is made from the decoded text, and
6337    set in CODING->dst_object.
6338
6339    If it is Qnil, the decoded text is stored at CODING->destination.
6340    The caller must allocate CODING->dst_bytes bytes at
6341    CODING->destination by xmalloc.  If the decoded text is longer than
6342    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6343  */
6344
6345 void
6346 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6347                       dst_object)
6348      struct coding_system *coding;
6349      Lisp_Object src_object;
6350      EMACS_INT from, from_byte, to, to_byte;
6351      Lisp_Object dst_object;
6352 {
6353   int count = specpdl_ptr - specpdl;
6354   unsigned char *destination;
6355   EMACS_INT dst_bytes;
6356   EMACS_INT chars = to - from;
6357   EMACS_INT bytes = to_byte - from_byte;
6358   Lisp_Object attrs;
6359   Lisp_Object buffer;
6360   int saved_pt = -1, saved_pt_byte;
6361
6362   buffer = Fcurrent_buffer ();
6363
6364   if (NILP (dst_object))
6365     {
6366       destination = coding->destination;
6367       dst_bytes = coding->dst_bytes;
6368     }
6369
6370   coding->src_object = src_object;
6371   coding->src_chars = chars;
6372   coding->src_bytes = bytes;
6373   coding->src_multibyte = chars < bytes;
6374
6375   if (STRINGP (src_object))
6376     {
6377       coding->src_pos = from;
6378       coding->src_pos_byte = from_byte;
6379     }
6380   else if (BUFFERP (src_object))
6381     {
6382       set_buffer_internal (XBUFFER (src_object));
6383       if (from != GPT)
6384         move_gap_both (from, from_byte);
6385       if (EQ (src_object, dst_object))
6386         {
6387           saved_pt = PT, saved_pt_byte = PT_BYTE;
6388           TEMP_SET_PT_BOTH (from, from_byte);
6389           del_range_both (from, from_byte, to, to_byte, 1);
6390           coding->src_pos = -chars;
6391           coding->src_pos_byte = -bytes;
6392         }
6393       else
6394         {
6395           coding->src_pos = from;
6396           coding->src_pos_byte = from_byte;
6397         }
6398     }
6399
6400   if (CODING_REQUIRE_DETECTION (coding))
6401     detect_coding (coding);
6402   attrs = CODING_ID_ATTRS (coding->id);
6403
6404   if (EQ (dst_object, Qt)
6405       || (! NILP (CODING_ATTR_POST_READ (attrs))
6406           && NILP (dst_object)))
6407     {
6408       coding->dst_object = code_conversion_save (buffer, 1, 1);
6409       coding->dst_pos = BEG;
6410       coding->dst_pos_byte = BEG_BYTE;
6411       coding->dst_multibyte = 1;
6412     }
6413   else if (BUFFERP (dst_object))
6414     {
6415       code_conversion_save (buffer, 0, 0);
6416       coding->dst_object = dst_object;
6417       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6418       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6419       coding->dst_multibyte
6420         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6421     }
6422   else
6423     {
6424       code_conversion_save (buffer, 0, 0);
6425       coding->dst_object = Qnil;
6426       coding->dst_multibyte = 1;
6427     }
6428
6429   decode_coding (coding);
6430
6431   if (BUFFERP (coding->dst_object))
6432     set_buffer_internal (XBUFFER (coding->dst_object));
6433
6434   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6435     {
6436       struct gcpro gcpro1, gcpro2;
6437       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6438       Lisp_Object val;
6439
6440       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6441       GCPRO2 (coding->src_object, coding->dst_object);
6442       val = call1 (CODING_ATTR_POST_READ (attrs),
6443                    make_number (coding->produced_char));
6444       UNGCPRO;
6445       CHECK_NATNUM (val);
6446       coding->produced_char += Z - prev_Z;
6447       coding->produced += Z_BYTE - prev_Z_BYTE;
6448     }
6449
6450   if (EQ (dst_object, Qt))
6451     {
6452       coding->dst_object = Fbuffer_string ();
6453     }
6454   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6455     {
6456       set_buffer_internal (XBUFFER (coding->dst_object));
6457       if (dst_bytes < coding->produced)
6458         {
6459           destination
6460             = (unsigned char *) xrealloc (destination, coding->produced);
6461           if (! destination)
6462             {
6463               coding->result = CODING_RESULT_INSUFFICIENT_DST;
6464               unbind_to (count, Qnil);
6465               return;
6466             }
6467           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6468             move_gap_both (BEGV, BEGV_BYTE);
6469           bcopy (BEGV_ADDR, destination, coding->produced);
6470           coding->destination = destination;
6471         }
6472     }
6473
6474   if (saved_pt >= 0)
6475     {
6476       /* This is the case of:
6477          (BUFFERP (src_object) && EQ (src_object, dst_object))
6478          As we have moved PT while replacing the original buffer
6479          contents, we must recover it now.  */
6480       set_buffer_internal (XBUFFER (src_object));
6481       if (saved_pt < from)
6482         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6483       else if (saved_pt < from + chars)
6484         TEMP_SET_PT_BOTH (from, from_byte);
6485       else if (! NILP (current_buffer->enable_multibyte_characters))
6486         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6487                           saved_pt_byte + (coding->produced - bytes));
6488       else
6489         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6490                           saved_pt_byte + (coding->produced - bytes));
6491     }
6492
6493   unbind_to (count, Qnil);
6494 }
6495
6496
6497 void
6498 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6499                       dst_object)
6500      struct coding_system *coding;
6501      Lisp_Object src_object;
6502      EMACS_INT from, from_byte, to, to_byte;
6503      Lisp_Object dst_object;
6504 {
6505   int count = specpdl_ptr - specpdl;
6506   EMACS_INT chars = to - from;
6507   EMACS_INT bytes = to_byte - from_byte;
6508   Lisp_Object attrs;
6509   Lisp_Object buffer;
6510   int saved_pt = -1, saved_pt_byte;
6511
6512   buffer = Fcurrent_buffer ();
6513
6514   coding->src_object = src_object;
6515   coding->src_chars = chars;
6516   coding->src_bytes = bytes;
6517   coding->src_multibyte = chars < bytes;
6518
6519   attrs = CODING_ID_ATTRS (coding->id);
6520
6521   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6522     {
6523       coding->src_object = code_conversion_save (buffer, 1,
6524                                                  coding->src_multibyte);
6525       set_buffer_internal (XBUFFER (coding->src_object));
6526       if (STRINGP (src_object))
6527         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6528       else if (BUFFERP (src_object))
6529         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6530       else
6531         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6532
6533       if (EQ (src_object, dst_object))
6534         {
6535           set_buffer_internal (XBUFFER (src_object));
6536           saved_pt = PT, saved_pt_byte = PT_BYTE;
6537           del_range_both (from, from_byte, to, to_byte, 1);
6538           set_buffer_internal (XBUFFER (coding->src_object));
6539         }
6540
6541       call2 (CODING_ATTR_PRE_WRITE (attrs),
6542              make_number (BEG), make_number (Z));
6543       coding->src_object = Fcurrent_buffer ();
6544       if (BEG != GPT)
6545         move_gap_both (BEG, BEG_BYTE);
6546       coding->src_chars = Z - BEG;
6547       coding->src_bytes = Z_BYTE - BEG_BYTE;
6548       coding->src_pos = BEG;
6549       coding->src_pos_byte = BEG_BYTE;
6550       coding->src_multibyte = Z < Z_BYTE;
6551     }
6552   else if (STRINGP (src_object))
6553     {
6554       code_conversion_save (buffer, 0, 0);
6555       coding->src_pos = from;
6556       coding->src_pos_byte = from_byte;
6557     }
6558   else if (BUFFERP (src_object))
6559     {
6560       code_conversion_save (buffer, 0, 0);
6561       set_buffer_internal (XBUFFER (src_object));
6562       if (EQ (src_object, dst_object))
6563         {
6564           saved_pt = PT, saved_pt_byte = PT_BYTE;
6565           coding->src_object = del_range_1 (from, to, 1, 1);
6566           coding->src_pos = 0;
6567           coding->src_pos_byte = 0;
6568         }
6569       else
6570         {
6571           if (from < GPT && to >= GPT)
6572             move_gap_both (from, from_byte);
6573           coding->src_pos = from;
6574           coding->src_pos_byte = from_byte;
6575         }
6576     }
6577   else
6578     code_conversion_save (buffer, 0, 0);
6579
6580   if (BUFFERP (dst_object))
6581     {
6582       coding->dst_object = dst_object;
6583       if (EQ (src_object, dst_object))
6584         {
6585           coding->dst_pos = from;
6586           coding->dst_pos_byte = from_byte;
6587         }
6588       else
6589         {
6590           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6591           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6592         }
6593       coding->dst_multibyte
6594         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6595     }
6596   else if (EQ (dst_object, Qt))
6597     {
6598       coding->dst_object = Qnil;
6599       coding->dst_bytes = coding->src_chars;
6600       if (coding->dst_bytes == 0)
6601         coding->dst_bytes = 1;
6602       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6603       coding->dst_multibyte = 0;
6604     }
6605   else
6606     {
6607       coding->dst_object = Qnil;
6608       coding->dst_multibyte = 0;
6609     }
6610
6611   encode_coding (coding);
6612
6613   if (EQ (dst_object, Qt))
6614     {
6615       if (BUFFERP (coding->dst_object))
6616         coding->dst_object = Fbuffer_string ();
6617       else
6618         {
6619           coding->dst_object
6620             = make_unibyte_string ((char *) coding->destination,
6621                                    coding->produced);
6622           xfree (coding->destination);
6623         }
6624     }
6625
6626   if (saved_pt >= 0)
6627     {
6628       /* This is the case of:
6629          (BUFFERP (src_object) && EQ (src_object, dst_object))
6630          As we have moved PT while replacing the original buffer
6631          contents, we must recover it now.  */
6632       set_buffer_internal (XBUFFER (src_object));
6633       if (saved_pt < from)
6634         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
6635       else if (saved_pt < from + chars)
6636         TEMP_SET_PT_BOTH (from, from_byte);
6637       else if (! NILP (current_buffer->enable_multibyte_characters))
6638         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
6639                           saved_pt_byte + (coding->produced - bytes));
6640       else
6641         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
6642                           saved_pt_byte + (coding->produced - bytes));
6643     }
6644
6645   unbind_to (count, Qnil);
6646 }
6647
6648
6649 Lisp_Object
6650 preferred_coding_system ()
6651 {
6652   int id = coding_categories[coding_priorities[0]].id;
6653
6654   return CODING_ID_NAME (id);
6655 }
6656
6657 \f
6658 #ifdef emacs
6659 /*** 8. Emacs Lisp library functions ***/
6660
6661 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6662        doc: /* Return t if OBJECT is nil or a coding-system.
6663 See the documentation of `define-coding-system' for information
6664 about coding-system objects.  */)
6665      (obj)
6666      Lisp_Object obj;
6667 {
6668   return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6669 }
6670
6671 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6672        Sread_non_nil_coding_system, 1, 1, 0,
6673        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6674      (prompt)
6675      Lisp_Object prompt;
6676 {
6677   Lisp_Object val;
6678   do
6679     {
6680       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6681                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6682     }
6683   while (SCHARS (val) == 0);
6684   return (Fintern (val, Qnil));
6685 }
6686
6687 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6688        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6689 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6690      (prompt, default_coding_system)
6691      Lisp_Object prompt, default_coding_system;
6692 {
6693   Lisp_Object val;
6694   if (SYMBOLP (default_coding_system))
6695     XSETSTRING (default_coding_system, XPNTR (SYMBOL_NAME (default_coding_system)));
6696   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6697                           Qt, Qnil, Qcoding_system_history,
6698                           default_coding_system, Qnil);
6699   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6700 }
6701
6702 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6703        1, 1, 0,
6704        doc: /* Check validity of CODING-SYSTEM.
6705 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.  */)
6706   (coding_system)
6707      Lisp_Object coding_system;
6708 {
6709   CHECK_SYMBOL (coding_system);
6710   if (!NILP (Fcoding_system_p (coding_system)))
6711     return coding_system;
6712   while (1)
6713     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6714 }
6715
6716 \f
6717 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
6718    HIGHEST is nonzero, return the coding system of the highest
6719    priority among the detected coding systems.  Otherwize return a
6720    list of detected coding systems sorted by their priorities.  If
6721    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
6722    multibyte form but contains only ASCII and eight-bit chars.
6723    Otherwise, the bytes are raw bytes.
6724
6725    CODING-SYSTEM controls the detection as below:
6726
6727    If it is nil, detect both text-format and eol-format.  If the
6728    text-format part of CODING-SYSTEM is already specified
6729    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
6730    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
6731    detect only text-format.  */
6732
6733 Lisp_Object
6734 detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6735      const unsigned char *src;
6736      int src_bytes, highest;
6737      int multibytep;
6738      Lisp_Object coding_system;
6739 {
6740   const unsigned char *src_end = src + src_bytes;
6741   Lisp_Object attrs, eol_type;
6742   Lisp_Object val;
6743   struct coding_system coding;
6744   int id;
6745   struct coding_detection_info detect_info;
6746
6747   if (NILP (coding_system))
6748     coding_system = Qundecided;
6749   setup_coding_system (coding_system, &coding);
6750   attrs = CODING_ID_ATTRS (coding.id);
6751   eol_type = CODING_ID_EOL_TYPE (coding.id);
6752   coding_system = CODING_ATTR_BASE_NAME (attrs);
6753
6754   coding.source = src;
6755   coding.src_bytes = src_bytes;
6756   coding.src_multibyte = multibytep;
6757   coding.consumed = 0;
6758   coding.mode |= CODING_MODE_LAST_BLOCK;
6759
6760   detect_info.checked = detect_info.found = detect_info.rejected = 0;
6761
6762   /* At first, detect text-format if necessary.  */
6763   if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided)
6764     {
6765       enum coding_category category;
6766       struct coding_system *this;
6767       int c, i;
6768
6769       for (; src < src_end; src++)
6770         {
6771           c = *src;
6772           if (c & 0x80
6773               || (c < 0x20 && (c == ISO_CODE_ESC
6774                                || c == ISO_CODE_SI
6775                                || c == ISO_CODE_SO)))
6776             break;
6777         }
6778       coding.head_ascii = src - coding.source;
6779
6780       if (src < src_end)
6781         for (i = 0; i < coding_category_raw_text; i++)
6782           {
6783             category = coding_priorities[i];
6784             this = coding_categories + category;
6785
6786             if (this->id < 0)
6787               {
6788                 /* No coding system of this category is defined.  */
6789                 detect_info.rejected |= (1 << category);
6790               }
6791             else if (category >= coding_category_raw_text)
6792               continue;
6793             else if (detect_info.checked & (1 << category))
6794               {
6795                 if (highest
6796                     && (detect_info.found & (1 << category)))
6797                   break;
6798               }
6799             else
6800               {
6801                 if ((*(this->detector)) (&coding, &detect_info)
6802                     && highest
6803                     && (detect_info.found & (1 << category)))
6804                   break;
6805               }
6806           }
6807
6808
6809       if (detect_info.rejected == CATEGORY_MASK_ANY)
6810         {
6811           detect_info.found = CATEGORY_MASK_RAW_TEXT;
6812           id = coding_categories[coding_category_raw_text].id;
6813           val = Fcons (make_number (id), Qnil);
6814         }
6815       else if (! detect_info.rejected && ! detect_info.found)
6816         {
6817           detect_info.found = CATEGORY_MASK_ANY;
6818           id = coding_categories[coding_category_undecided].id;
6819           val = Fcons (make_number (id), Qnil);
6820         }
6821       else if (highest)
6822         {
6823           if (detect_info.found)
6824             {
6825               detect_info.found = 1 << category;
6826               val = Fcons (make_number (this->id), Qnil);
6827             }
6828           else
6829             for (i = 0; i < coding_category_raw_text; i++)
6830               if (! (detect_info.rejected & (1 << coding_priorities[i])))
6831                 {
6832                   detect_info.found = 1 << coding_priorities[i];
6833                   id = coding_categories[coding_priorities[i]].id;
6834                   val = Fcons (make_number (id), Qnil);
6835                   break;
6836                 }
6837         }
6838       else
6839         {
6840           int mask = detect_info.rejected | detect_info.found;
6841           int found = 0;
6842           val = Qnil;
6843
6844           for (i = coding_category_raw_text - 1; i >= 0; i--)
6845             {
6846               category = coding_priorities[i];
6847               if (! (mask & (1 << category)))
6848                 {
6849                   found |= 1 << category;
6850                   id = coding_categories[category].id;
6851                   val = Fcons (make_number (id), val);
6852                 }
6853             }
6854           for (i = coding_category_raw_text - 1; i >= 0; i--)
6855             {
6856               category = coding_priorities[i];
6857               if (detect_info.found & (1 << category))
6858                 {
6859                   id = coding_categories[category].id;
6860                   val = Fcons (make_number (id), val);
6861                 }
6862             }
6863           detect_info.found |= found;
6864         }
6865     }
6866   else
6867     {
6868       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6869       val = Fcons (make_number (coding.id), Qnil);
6870     }
6871
6872   /* Then, detect eol-format if necessary.  */
6873   {
6874     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
6875     Lisp_Object tail;
6876
6877     if (VECTORP (eol_type))
6878       {
6879         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
6880           normal_eol = detect_eol (coding.source, src_bytes,
6881                                    coding_category_raw_text);
6882         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
6883                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
6884           utf_16_be_eol = detect_eol (coding.source, src_bytes,
6885                                       coding_category_utf_16_be);
6886         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
6887                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
6888           utf_16_le_eol = detect_eol (coding.source, src_bytes,
6889                                       coding_category_utf_16_le);
6890       }
6891     else
6892       {
6893         if (EQ (eol_type, Qunix))
6894           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
6895         else if (EQ (eol_type, Qdos))
6896           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
6897         else
6898           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
6899       }
6900
6901     for (tail = val; CONSP (tail); tail = XCDR (tail))
6902       {
6903         enum coding_category category;
6904         int this_eol;
6905
6906         id = XINT (XCAR (tail));
6907         attrs = CODING_ID_ATTRS (id);
6908         category = XINT (CODING_ATTR_CATEGORY (attrs));
6909         eol_type = CODING_ID_EOL_TYPE (id);
6910         if (VECTORP (eol_type))
6911           {
6912             if (category == coding_category_utf_16_be
6913                 || category == coding_category_utf_16_be_nosig)
6914               this_eol = utf_16_be_eol;
6915             else if (category == coding_category_utf_16_le
6916                      || category == coding_category_utf_16_le_nosig)
6917               this_eol = utf_16_le_eol;
6918             else
6919               this_eol = normal_eol;
6920
6921             if (this_eol == EOL_SEEN_LF)
6922               XSETCAR (tail, AREF (eol_type, 0));
6923             else if (this_eol == EOL_SEEN_CRLF)
6924               XSETCAR (tail, AREF (eol_type, 1));
6925             else if (this_eol == EOL_SEEN_CR)
6926               XSETCAR (tail, AREF (eol_type, 2));
6927             else
6928               XSETCAR (tail, CODING_ID_NAME (id));
6929           }
6930         else
6931           XSETCAR (tail, CODING_ID_NAME (id));
6932       }
6933   }
6934
6935   return (highest ? XCAR (val) : val);
6936 }
6937
6938
6939 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6940        2, 3, 0,
6941        doc: /* Detect coding system of the text in the region between START and END.
6942 Return a list of possible coding systems ordered by priority.
6943
6944 If only ASCII characters are found, it returns a list of single element
6945 `undecided' or its subsidiary coding system according to a detected
6946 end-of-line format.
6947
6948 If optional argument HIGHEST is non-nil, return the coding system of
6949 highest priority.  */)
6950      (start, end, highest)
6951      Lisp_Object start, end, highest;
6952 {
6953   int from, to;
6954   int from_byte, to_byte;
6955
6956   CHECK_NUMBER_COERCE_MARKER (start);
6957   CHECK_NUMBER_COERCE_MARKER (end);
6958
6959   validate_region (&start, &end);
6960   from = XINT (start), to = XINT (end);
6961   from_byte = CHAR_TO_BYTE (from);
6962   to_byte = CHAR_TO_BYTE (to);
6963
6964   if (from < GPT && to >= GPT)
6965     move_gap_both (to, to_byte);
6966
6967   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6968                                to_byte - from_byte,
6969                                !NILP (highest),
6970                                !NILP (current_buffer
6971                                       ->enable_multibyte_characters),
6972                                Qnil);
6973 }
6974
6975 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6976        1, 2, 0,
6977        doc: /* Detect coding system of the text in STRING.
6978 Return a list of possible coding systems ordered by priority.
6979
6980 If only ASCII characters are found, it returns a list of single element
6981 `undecided' or its subsidiary coding system according to a detected
6982 end-of-line format.
6983
6984 If optional argument HIGHEST is non-nil, return the coding system of
6985 highest priority.  */)
6986      (string, highest)
6987      Lisp_Object string, highest;
6988 {
6989   CHECK_STRING (string);
6990
6991   return detect_coding_system (SDATA (string), SBYTES (string),
6992                                !NILP (highest), STRING_MULTIBYTE (string),
6993                                Qnil);
6994 }
6995
6996
6997 static INLINE int
6998 char_encodable_p (c, attrs)
6999      int c;
7000      Lisp_Object attrs;
7001 {
7002   Lisp_Object tail;
7003   struct charset *charset;
7004
7005   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7006        CONSP (tail); tail = XCDR (tail))
7007     {
7008       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7009       if (CHAR_CHARSET_P (c, charset))
7010         break;
7011     }
7012   return (! NILP (tail));
7013 }
7014
7015
7016 /* Return a list of coding systems that safely encode the text between
7017    START and END.  If EXCLUDE is non-nil, it is a list of coding
7018    systems not to check.  The returned list doesn't contain any such
7019    coding systems.  In any case, if the text contains only ASCII or is
7020    unibyte, return t.  */
7021
7022 DEFUN ("find-coding-systems-region-internal",
7023        Ffind_coding_systems_region_internal,
7024        Sfind_coding_systems_region_internal, 2, 3, 0,
7025        doc: /* Internal use only.  */)
7026      (start, end, exclude)
7027      Lisp_Object start, end, exclude;
7028 {
7029   Lisp_Object coding_attrs_list, safe_codings;
7030   EMACS_INT start_byte, end_byte;
7031   const unsigned char *p, *pbeg, *pend;
7032   int c;
7033   Lisp_Object tail, elt;
7034
7035   if (STRINGP (start))
7036     {
7037       if (!STRING_MULTIBYTE (start)
7038           || SCHARS (start) == SBYTES (start))
7039         return Qt;
7040       start_byte = 0;
7041       end_byte = SBYTES (start);
7042     }
7043   else
7044     {
7045       CHECK_NUMBER_COERCE_MARKER (start);
7046       CHECK_NUMBER_COERCE_MARKER (end);
7047       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7048         args_out_of_range (start, end);
7049       if (NILP (current_buffer->enable_multibyte_characters))
7050         return Qt;
7051       start_byte = CHAR_TO_BYTE (XINT (start));
7052       end_byte = CHAR_TO_BYTE (XINT (end));
7053       if (XINT (end) - XINT (start) == end_byte - start_byte)
7054         return Qt;
7055
7056       if (XINT (start) < GPT && XINT (end) > GPT)
7057         {
7058           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7059             move_gap_both (XINT (start), start_byte);
7060           else
7061             move_gap_both (XINT (end), end_byte);
7062         }
7063     }
7064
7065   coding_attrs_list = Qnil;
7066   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7067     if (NILP (exclude)
7068         || NILP (Fmemq (XCAR (tail), exclude)))
7069       {
7070         Lisp_Object attrs;
7071
7072         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7073         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7074             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7075           coding_attrs_list = Fcons (attrs, coding_attrs_list);
7076       }
7077
7078   if (STRINGP (start))
7079     p = pbeg = SDATA (start);
7080   else
7081     p = pbeg = BYTE_POS_ADDR (start_byte);
7082   pend = p + (end_byte - start_byte);
7083
7084   while (p < pend && ASCII_BYTE_P (*p)) p++;
7085   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7086
7087   while (p < pend)
7088     {
7089       if (ASCII_BYTE_P (*p))
7090         p++;
7091       else
7092         {
7093           c = STRING_CHAR_ADVANCE (p);
7094
7095           charset_map_loaded = 0;
7096           for (tail = coding_attrs_list; CONSP (tail);)
7097             {
7098               elt = XCAR (tail);
7099               if (NILP (elt))
7100                 tail = XCDR (tail);
7101               else if (char_encodable_p (c, elt))
7102                 tail = XCDR (tail);
7103               else if (CONSP (XCDR (tail)))
7104                 {
7105                   XSETCAR (tail, XCAR (XCDR (tail)));
7106                   XSETCDR (tail, XCDR (XCDR (tail)));
7107                 }
7108               else
7109                 {
7110                   XSETCAR (tail, Qnil);
7111                   tail = XCDR (tail);
7112                 }
7113             }
7114           if (charset_map_loaded)
7115             {
7116               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7117
7118               if (STRINGP (start))
7119                 pbeg = SDATA (start);
7120               else
7121                 pbeg = BYTE_POS_ADDR (start_byte);
7122               p = pbeg + p_offset;
7123               pend = pbeg + pend_offset;
7124             }
7125         }
7126     }
7127
7128   safe_codings = Qnil;
7129   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7130     if (! NILP (XCAR (tail)))
7131       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7132
7133   return safe_codings;
7134 }
7135
7136
7137 DEFUN ("unencodable-char-position", Funencodable_char_position,
7138        Sunencodable_char_position, 3, 5, 0,
7139        doc: /*
7140 Return position of first un-encodable character in a region.
7141 START and END specfiy the region and CODING-SYSTEM specifies the
7142 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
7143
7144 If optional 4th argument COUNT is non-nil, it specifies at most how
7145 many un-encodable characters to search.  In this case, the value is a
7146 list of positions.
7147
7148 If optional 5th argument STRING is non-nil, it is a string to search
7149 for un-encodable characters.  In that case, START and END are indexes
7150 to the string.  */)
7151      (start, end, coding_system, count, string)
7152      Lisp_Object start, end, coding_system, count, string;
7153 {
7154   int n;
7155   struct coding_system coding;
7156   Lisp_Object attrs, charset_list;
7157   Lisp_Object positions;
7158   int from, to;
7159   const unsigned char *p, *stop, *pend;
7160   int ascii_compatible;
7161
7162   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7163   attrs = CODING_ID_ATTRS (coding.id);
7164   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7165     return Qnil;
7166   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7167   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7168
7169   if (NILP (string))
7170     {
7171       validate_region (&start, &end);
7172       from = XINT (start);
7173       to = XINT (end);
7174       if (NILP (current_buffer->enable_multibyte_characters)
7175           || (ascii_compatible
7176               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7177         return Qnil;
7178       p = CHAR_POS_ADDR (from);
7179       pend = CHAR_POS_ADDR (to);
7180       if (from < GPT && to >= GPT)
7181         stop = GPT_ADDR;
7182       else
7183         stop = pend;
7184     }
7185   else
7186     {
7187       CHECK_STRING (string);
7188       CHECK_NATNUM (start);
7189       CHECK_NATNUM (end);
7190       from = XINT (start);
7191       to = XINT (end);
7192       if (from > to
7193           || to > SCHARS (string))
7194         args_out_of_range_3 (string, start, end);
7195       if (! STRING_MULTIBYTE (string))
7196         return Qnil;
7197       p = SDATA (string) + string_char_to_byte (string, from);
7198       stop = pend = SDATA (string) + string_char_to_byte (string, to);
7199       if (ascii_compatible && (to - from) == (pend - p))
7200         return Qnil;
7201     }
7202
7203   if (NILP (count))
7204     n = 1;
7205   else
7206     {
7207       CHECK_NATNUM (count);
7208       n = XINT (count);
7209     }
7210
7211   positions = Qnil;
7212   while (1)
7213     {
7214       int c;
7215
7216       if (ascii_compatible)
7217         while (p < stop && ASCII_BYTE_P (*p))
7218           p++, from++;
7219       if (p >= stop)
7220         {
7221           if (p >= pend)
7222             break;
7223           stop = pend;
7224           p = GAP_END_ADDR;
7225         }
7226
7227       c = STRING_CHAR_ADVANCE (p);
7228       if (! (ASCII_CHAR_P (c) && ascii_compatible)
7229           && ! char_charset (c, charset_list, NULL))
7230         {
7231           positions = Fcons (make_number (from), positions);
7232           n--;
7233           if (n == 0)
7234             break;
7235         }
7236
7237       from++;
7238     }
7239
7240   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7241 }
7242
7243
7244 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7245        Scheck_coding_systems_region, 3, 3, 0,
7246        doc: /* Check if the region is encodable by coding systems.
7247
7248 START and END are buffer positions specifying the region.
7249 CODING-SYSTEM-LIST is a list of coding systems to check.
7250
7251 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7252 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7253 whole region, POS0, POS1, ... are buffer positions where non-encodable
7254 characters are found.
7255
7256 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7257 value is nil.
7258
7259 START may be a string.  In that case, check if the string is
7260 encodable, and the value contains indices to the string instead of
7261 buffer positions.  END is ignored.  */)
7262      (start, end, coding_system_list)
7263      Lisp_Object start, end, coding_system_list;
7264 {
7265   Lisp_Object list;
7266   EMACS_INT start_byte, end_byte;
7267   int pos;
7268   const unsigned char *p, *pbeg, *pend;
7269   int c;
7270   Lisp_Object tail, elt;
7271
7272   if (STRINGP (start))
7273     {
7274       if (!STRING_MULTIBYTE (start)
7275           && SCHARS (start) != SBYTES (start))
7276         return Qnil;
7277       start_byte = 0;
7278       end_byte = SBYTES (start);
7279       pos = 0;
7280     }
7281   else
7282     {
7283       CHECK_NUMBER_COERCE_MARKER (start);
7284       CHECK_NUMBER_COERCE_MARKER (end);
7285       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7286         args_out_of_range (start, end);
7287       if (NILP (current_buffer->enable_multibyte_characters))
7288         return Qnil;
7289       start_byte = CHAR_TO_BYTE (XINT (start));
7290       end_byte = CHAR_TO_BYTE (XINT (end));
7291       if (XINT (end) - XINT (start) == end_byte - start_byte)
7292         return Qt;
7293
7294       if (XINT (start) < GPT && XINT (end) > GPT)
7295         {
7296           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7297             move_gap_both (XINT (start), start_byte);
7298           else
7299             move_gap_both (XINT (end), end_byte);
7300         }
7301       pos = XINT (start);
7302     }
7303
7304   list = Qnil;
7305   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7306     {
7307       elt = XCAR (tail);
7308       list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
7309                                        Qnil)),
7310                     list);
7311     }
7312
7313   if (STRINGP (start))
7314     p = pbeg = SDATA (start);
7315   else
7316     p = pbeg = BYTE_POS_ADDR (start_byte);
7317   pend = p + (end_byte - start_byte);
7318
7319   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
7320   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7321
7322   while (p < pend)
7323     {
7324       if (ASCII_BYTE_P (*p))
7325         p++;
7326       else
7327         {
7328           c = STRING_CHAR_ADVANCE (p);
7329
7330           charset_map_loaded = 0;
7331           for (tail = list; CONSP (tail); tail = XCDR (tail))
7332             {
7333               elt = XCDR (XCAR (tail));
7334               if (! char_encodable_p (c, XCAR (elt)))
7335                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
7336             }
7337           if (charset_map_loaded)
7338             {
7339               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7340
7341               if (STRINGP (start))
7342                 pbeg = SDATA (start);
7343               else
7344                 pbeg = BYTE_POS_ADDR (start_byte);
7345               p = pbeg + p_offset;
7346               pend = pbeg + pend_offset;
7347             }
7348         }
7349       pos++;
7350     }
7351
7352   tail = list;
7353   list = Qnil;
7354   for (; CONSP (tail); tail = XCDR (tail))
7355     {
7356       elt = XCAR (tail);
7357       if (CONSP (XCDR (XCDR (elt))))
7358         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
7359                       list);
7360     }
7361
7362   return list;
7363 }
7364
7365
7366
7367 Lisp_Object
7368 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
7369      Lisp_Object start, end, coding_system, dst_object;
7370      int encodep, norecord;
7371 {
7372   struct coding_system coding;
7373   EMACS_INT from, from_byte, to, to_byte;
7374   Lisp_Object src_object;
7375
7376   CHECK_NUMBER_COERCE_MARKER (start);
7377   CHECK_NUMBER_COERCE_MARKER (end);
7378   if (NILP (coding_system))
7379     coding_system = Qno_conversion;
7380   else
7381     CHECK_CODING_SYSTEM (coding_system);
7382   src_object = Fcurrent_buffer ();
7383   if (NILP (dst_object))
7384     dst_object = src_object;
7385   else if (! EQ (dst_object, Qt))
7386     CHECK_BUFFER (dst_object);
7387
7388   validate_region (&start, &end);
7389   from = XFASTINT (start);
7390   from_byte = CHAR_TO_BYTE (from);
7391   to = XFASTINT (end);
7392   to_byte = CHAR_TO_BYTE (to);
7393
7394   setup_coding_system (coding_system, &coding);
7395   coding.mode |= CODING_MODE_LAST_BLOCK;
7396
7397   if (encodep)
7398     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7399                           dst_object);
7400   else
7401     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
7402                           dst_object);
7403   if (! norecord)
7404     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7405
7406   if (coding.result != CODING_RESULT_SUCCESS)
7407     error ("Code conversion error: %d", coding.result);
7408
7409   return (BUFFERP (dst_object)
7410           ? make_number (coding.produced_char)
7411           : coding.dst_object);
7412 }
7413
7414
7415 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
7416        3, 4, "r\nzCoding system: ",
7417        doc: /* Decode the current region from the specified coding system.
7418 When called from a program, takes four arguments:
7419         START, END, CODING-SYSTEM, and DESTINATION.
7420 START and END are buffer positions.
7421
7422 Optional 4th arguments DESTINATION specifies where the decoded text goes.
7423 If nil, the region between START and END is replace by the decoded text.
7424 If buffer, the decoded text is inserted in the buffer.
7425 If t, the decoded text is returned.
7426
7427 This function sets `last-coding-system-used' to the precise coding system
7428 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7429 not fully specified.)
7430 It returns the length of the decoded text.  */)
7431      (start, end, coding_system, destination)
7432      Lisp_Object start, end, coding_system, destination;
7433 {
7434   return code_convert_region (start, end, coding_system, destination, 0, 0);
7435 }
7436
7437 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
7438        3, 4, "r\nzCoding system: ",
7439        doc: /* Encode the current region by specified coding system.
7440 When called from a program, takes three arguments:
7441 START, END, and CODING-SYSTEM.  START and END are buffer positions.
7442
7443 Optional 4th arguments DESTINATION specifies where the encoded text goes.
7444 If nil, the region between START and END is replace by the encoded text.
7445 If buffer, the encoded text is inserted in the buffer.
7446 If t, the encoded text is returned.
7447
7448 This function sets `last-coding-system-used' to the precise coding system
7449 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7450 not fully specified.)
7451 It returns the length of the encoded text.  */)
7452   (start, end, coding_system, destination)
7453      Lisp_Object start, end, coding_system, destination;
7454 {
7455   return code_convert_region (start, end, coding_system, destination, 1, 0);
7456 }
7457
7458 Lisp_Object
7459 code_convert_string (string, coding_system, dst_object,
7460                      encodep, nocopy, norecord)
7461      Lisp_Object string, coding_system, dst_object;
7462      int encodep, nocopy, norecord;
7463 {
7464   struct coding_system coding;
7465   EMACS_INT chars, bytes;
7466
7467   CHECK_STRING (string);
7468   if (NILP (coding_system))
7469     {
7470       if (! norecord)
7471         Vlast_coding_system_used = Qno_conversion;
7472       if (NILP (dst_object))
7473         return (nocopy ? Fcopy_sequence (string) : string);
7474     }
7475
7476   if (NILP (coding_system))
7477     coding_system = Qno_conversion;
7478   else
7479     CHECK_CODING_SYSTEM (coding_system);
7480   if (NILP (dst_object))
7481     dst_object = Qt;
7482   else if (! EQ (dst_object, Qt))
7483     CHECK_BUFFER (dst_object);
7484
7485   setup_coding_system (coding_system, &coding);
7486   coding.mode |= CODING_MODE_LAST_BLOCK;
7487   chars = SCHARS (string);
7488   bytes = SBYTES (string);
7489   if (encodep)
7490     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7491   else
7492     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
7493   if (! norecord)
7494     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
7495
7496   if (coding.result != CODING_RESULT_SUCCESS)
7497     error ("Code conversion error: %d", coding.result);
7498
7499   return (BUFFERP (dst_object)
7500           ? make_number (coding.produced_char)
7501           : coding.dst_object);
7502 }
7503
7504
7505 /* Encode or decode STRING according to CODING_SYSTEM.
7506    Do not set Vlast_coding_system_used.
7507
7508    This function is called only from macros DECODE_FILE and
7509    ENCODE_FILE, thus we ignore character composition.  */
7510
7511 Lisp_Object
7512 code_convert_string_norecord (string, coding_system, encodep)
7513      Lisp_Object string, coding_system;
7514      int encodep;
7515 {
7516   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
7517 }
7518
7519
7520 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
7521        2, 4, 0,
7522        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
7523
7524 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
7525 if the decoding operation is trivial.
7526
7527 Optional fourth arg BUFFER non-nil meant that the decoded text is
7528 inserted in BUFFER instead of returned as a string.  In this case,
7529 the return value is BUFFER.
7530
7531 This function sets `last-coding-system-used' to the precise coding system
7532 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7533 not fully specified.  */)
7534   (string, coding_system, nocopy, buffer)
7535      Lisp_Object string, coding_system, nocopy, buffer;
7536 {
7537   return code_convert_string (string, coding_system, buffer,
7538                               0, ! NILP (nocopy), 0);
7539 }
7540
7541 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7542        2, 4, 0,
7543        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7544
7545 Optional third arg NOCOPY non-nil means it is OK to return STRING
7546 itself if the encoding operation is trivial.
7547
7548 Optional fourth arg BUFFER non-nil meant that the encoded text is
7549 inserted in BUFFER instead of returned as a string.  In this case,
7550 the return value is BUFFER.
7551
7552 This function sets `last-coding-system-used' to the precise coding system
7553 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7554 not fully specified.)  */)
7555      (string, coding_system, nocopy, buffer)
7556      Lisp_Object string, coding_system, nocopy, buffer;
7557 {
7558   return code_convert_string (string, coding_system, buffer,
7559                               1, ! NILP (nocopy), 1);
7560 }
7561
7562 \f
7563 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7564        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7565 Return the corresponding character.  */)
7566      (code)
7567      Lisp_Object code;
7568 {
7569   Lisp_Object spec, attrs, val;
7570   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
7571   int c;
7572
7573   CHECK_NATNUM (code);
7574   c = XFASTINT (code);
7575   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7576   attrs = AREF (spec, 0);
7577
7578   if (ASCII_BYTE_P (c)
7579       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7580     return code;
7581
7582   val = CODING_ATTR_CHARSET_LIST (attrs);
7583   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7584   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7585   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
7586
7587   if (c <= 0x7F)
7588     charset = charset_roman;
7589   else if (c >= 0xA0 && c < 0xDF)
7590     {
7591       charset = charset_kana;
7592       c -= 0x80;
7593     }
7594   else
7595     {
7596       int s1 = c >> 8, s2 = c & 0xFF;
7597
7598       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
7599           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
7600         error ("Invalid code: %d", code);
7601       SJIS_TO_JIS (c);
7602       charset = charset_kanji;
7603     }
7604   c = DECODE_CHAR (charset, c);
7605   if (c < 0)
7606     error ("Invalid code: %d", code);
7607   return make_number (c);
7608 }
7609
7610
7611 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7612        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7613 Return the corresponding code in SJIS.  */)
7614      (ch)
7615     Lisp_Object ch;
7616 {
7617   Lisp_Object spec, attrs, charset_list;
7618   int c;
7619   struct charset *charset;
7620   unsigned code;
7621
7622   CHECK_CHARACTER (ch);
7623   c = XFASTINT (ch);
7624   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
7625   attrs = AREF (spec, 0);
7626
7627   if (ASCII_CHAR_P (c)
7628       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7629     return ch;
7630
7631   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7632   charset = char_charset (c, charset_list, &code);
7633   if (code == CHARSET_INVALID_CODE (charset))
7634     error ("Can't encode by shift_jis encoding: %d", c);
7635   JIS_TO_SJIS (code);
7636
7637   return make_number (code);
7638 }
7639
7640 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7641        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7642 Return the corresponding character.  */)
7643      (code)
7644      Lisp_Object code;
7645 {
7646   Lisp_Object spec, attrs, val;
7647   struct charset *charset_roman, *charset_big5, *charset;
7648   int c;
7649
7650   CHECK_NATNUM (code);
7651   c = XFASTINT (code);
7652   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7653   attrs = AREF (spec, 0);
7654
7655   if (ASCII_BYTE_P (c)
7656       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7657     return code;
7658
7659   val = CODING_ATTR_CHARSET_LIST (attrs);
7660   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
7661   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
7662
7663   if (c <= 0x7F)
7664     charset = charset_roman;
7665   else
7666     {
7667       int b1 = c >> 8, b2 = c & 0x7F;
7668       if (b1 < 0xA1 || b1 > 0xFE
7669           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
7670         error ("Invalid code: %d", code);
7671       charset = charset_big5;
7672     }
7673   c = DECODE_CHAR (charset, (unsigned )c);
7674   if (c < 0)
7675     error ("Invalid code: %d", code);
7676   return make_number (c);
7677 }
7678
7679 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7680        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7681 Return the corresponding character code in Big5.  */)
7682      (ch)
7683      Lisp_Object ch;
7684 {
7685   Lisp_Object spec, attrs, charset_list;
7686   struct charset *charset;
7687   int c;
7688   unsigned code;
7689
7690   CHECK_CHARACTER (ch);
7691   c = XFASTINT (ch);
7692   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7693   attrs = AREF (spec, 0);
7694   if (ASCII_CHAR_P (c)
7695       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7696     return ch;
7697
7698   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7699   charset = char_charset (c, charset_list, &code);
7700   if (code == CHARSET_INVALID_CODE (charset))
7701     error ("Can't encode by Big5 encoding: %d", c);
7702
7703   return make_number (code);
7704 }
7705
7706 \f
7707 DEFUN ("set-terminal-coding-system-internal",
7708        Fset_terminal_coding_system_internal,
7709        Sset_terminal_coding_system_internal, 1, 1, 0,
7710        doc: /* Internal use only.  */)
7711      (coding_system)
7712      Lisp_Object coding_system;
7713 {
7714   CHECK_SYMBOL (coding_system);
7715   setup_coding_system (Fcheck_coding_system (coding_system),
7716                         &terminal_coding);
7717
7718   /* We had better not send unsafe characters to terminal.  */
7719   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7720   /* Characer composition should be disabled.  */
7721   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7722   terminal_coding.src_multibyte = 1;
7723   terminal_coding.dst_multibyte = 0;
7724   return Qnil;
7725 }
7726
7727 DEFUN ("set-safe-terminal-coding-system-internal",
7728        Fset_safe_terminal_coding_system_internal,
7729        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7730        doc: /* Internal use only.  */)
7731      (coding_system)
7732      Lisp_Object coding_system;
7733 {
7734   CHECK_SYMBOL (coding_system);
7735   setup_coding_system (Fcheck_coding_system (coding_system),
7736                        &safe_terminal_coding);
7737   /* Characer composition should be disabled.  */
7738   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7739   safe_terminal_coding.src_multibyte = 1;
7740   safe_terminal_coding.dst_multibyte = 0;
7741   return Qnil;
7742 }
7743
7744 DEFUN ("terminal-coding-system",
7745        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7746        doc: /* Return coding system specified for terminal output.  */)
7747      ()
7748 {
7749   return CODING_ID_NAME (terminal_coding.id);
7750 }
7751
7752 DEFUN ("set-keyboard-coding-system-internal",
7753        Fset_keyboard_coding_system_internal,
7754        Sset_keyboard_coding_system_internal, 1, 1, 0,
7755        doc: /* Internal use only.  */)
7756      (coding_system)
7757      Lisp_Object coding_system;
7758 {
7759   CHECK_SYMBOL (coding_system);
7760   setup_coding_system (Fcheck_coding_system (coding_system),
7761                        &keyboard_coding);
7762   /* Characer composition should be disabled.  */
7763   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7764   return Qnil;
7765 }
7766
7767 DEFUN ("keyboard-coding-system",
7768        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7769        doc: /* Return coding system specified for decoding keyboard input.  */)
7770      ()
7771 {
7772   return CODING_ID_NAME (keyboard_coding.id);
7773 }
7774
7775 \f
7776 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7777        Sfind_operation_coding_system,  1, MANY, 0,
7778        doc: /* Choose a coding system for an operation based on the target name.
7779 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7780 DECODING-SYSTEM is the coding system to use for decoding
7781 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7782 for encoding (in case OPERATION does encoding).
7783
7784 The first argument OPERATION specifies an I/O primitive:
7785   For file I/O, `insert-file-contents' or `write-region'.
7786   For process I/O, `call-process', `call-process-region', or `start-process'.
7787   For network I/O, `open-network-stream'.
7788
7789 The remaining arguments should be the same arguments that were passed
7790 to the primitive.  Depending on which primitive, one of those arguments
7791 is selected as the TARGET.  For example, if OPERATION does file I/O,
7792 whichever argument specifies the file name is TARGET.
7793
7794 TARGET has a meaning which depends on OPERATION:
7795   For file I/O, TARGET is a file name.
7796   For process I/O, TARGET is a process name.
7797   For network I/O, TARGET is a service name or a port number
7798
7799 This function looks up what specified for TARGET in,
7800 `file-coding-system-alist', `process-coding-system-alist',
7801 or `network-coding-system-alist' depending on OPERATION.
7802 They may specify a coding system, a cons of coding systems,
7803 or a function symbol to call.
7804 In the last case, we call the function with one argument,
7805 which is a list of all the arguments given to this function.
7806
7807 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7808      (nargs, args)
7809      int nargs;
7810      Lisp_Object *args;
7811 {
7812   Lisp_Object operation, target_idx, target, val;
7813   register Lisp_Object chain;
7814
7815   if (nargs < 2)
7816     error ("Too few arguments");
7817   operation = args[0];
7818   if (!SYMBOLP (operation)
7819       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7820     error ("Invalid first arguement");
7821   if (nargs < 1 + XINT (target_idx))
7822     error ("Too few arguments for operation: %s",
7823            SDATA (SYMBOL_NAME (operation)));
7824   target = args[XINT (target_idx) + 1];
7825   if (!(STRINGP (target)
7826         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7827     error ("Invalid %dth argument", XINT (target_idx) + 1);
7828
7829   chain = ((EQ (operation, Qinsert_file_contents)
7830             || EQ (operation, Qwrite_region))
7831            ? Vfile_coding_system_alist
7832            : (EQ (operation, Qopen_network_stream)
7833               ? Vnetwork_coding_system_alist
7834               : Vprocess_coding_system_alist));
7835   if (NILP (chain))
7836     return Qnil;
7837
7838   for (; CONSP (chain); chain = XCDR (chain))
7839     {
7840       Lisp_Object elt;
7841
7842       elt = XCAR (chain);
7843       if (CONSP (elt)
7844           && ((STRINGP (target)
7845                && STRINGP (XCAR (elt))
7846                && fast_string_match (XCAR (elt), target) >= 0)
7847               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7848         {
7849           val = XCDR (elt);
7850           /* Here, if VAL is both a valid coding system and a valid
7851              function symbol, we return VAL as a coding system.  */
7852           if (CONSP (val))
7853             return val;
7854           if (! SYMBOLP (val))
7855             return Qnil;
7856           if (! NILP (Fcoding_system_p (val)))
7857             return Fcons (val, val);
7858           if (! NILP (Ffboundp (val)))
7859             {
7860               val = call1 (val, Flist (nargs, args));
7861               if (CONSP (val))
7862                 return val;
7863               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7864                 return Fcons (val, val);
7865             }
7866           return Qnil;
7867         }
7868     }
7869   return Qnil;
7870 }
7871
7872 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7873        Sset_coding_system_priority, 0, MANY, 0,
7874        doc: /* Assign higher priority to the coding systems given as arguments.
7875 If multiple coding systems belongs to the same category,
7876 all but the first one are ignored.
7877
7878 usage: (set-coding-system-priority ...)  */)
7879      (nargs, args)
7880      int nargs;
7881      Lisp_Object *args;
7882 {
7883   int i, j;
7884   int changed[coding_category_max];
7885   enum coding_category priorities[coding_category_max];
7886
7887   bzero (changed, sizeof changed);
7888
7889   for (i = j = 0; i < nargs; i++)
7890     {
7891       enum coding_category category;
7892       Lisp_Object spec, attrs;
7893
7894       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7895       attrs = AREF (spec, 0);
7896       category = XINT (CODING_ATTR_CATEGORY (attrs));
7897       if (changed[category])
7898         /* Ignore this coding system because a coding system of the
7899            same category already had a higher priority.  */
7900         continue;
7901       changed[category] = 1;
7902       priorities[j++] = category;
7903       if (coding_categories[category].id >= 0
7904           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7905         setup_coding_system (args[i], &coding_categories[category]);
7906       Fset (AREF (Vcoding_category_table, category), args[i]);
7907     }
7908
7909   /* Now we have decided top J priorities.  Reflect the order of the
7910      original priorities to the remaining priorities.  */
7911
7912   for (i = j, j = 0; i < coding_category_max; i++, j++)
7913     {
7914       while (j < coding_category_max
7915              && changed[coding_priorities[j]])
7916         j++;
7917       if (j == coding_category_max)
7918         abort ();
7919       priorities[i] = coding_priorities[j];
7920     }
7921
7922   bcopy (priorities, coding_priorities, sizeof priorities);
7923
7924   /* Update `coding-category-list'.  */
7925   Vcoding_category_list = Qnil;
7926   for (i = coding_category_max - 1; i >= 0; i--)
7927     Vcoding_category_list
7928       = Fcons (AREF (Vcoding_category_table, priorities[i]),
7929                Vcoding_category_list);
7930
7931   return Qnil;
7932 }
7933
7934 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7935        Scoding_system_priority_list, 0, 1, 0,
7936        doc: /* Return a list of coding systems ordered by their priorities.
7937 HIGHESTP non-nil means just return the highest priority one.  */)
7938      (highestp)
7939      Lisp_Object highestp;
7940 {
7941   int i;
7942   Lisp_Object val;
7943
7944   for (i = 0, val = Qnil; i < coding_category_max; i++)
7945     {
7946       enum coding_category category = coding_priorities[i];
7947       int id = coding_categories[category].id;
7948       Lisp_Object attrs;
7949
7950       if (id < 0)
7951         continue;
7952       attrs = CODING_ID_ATTRS (id);
7953       if (! NILP (highestp))
7954         return CODING_ATTR_BASE_NAME (attrs);
7955       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7956     }
7957   return Fnreverse (val);
7958 }
7959
7960 static char *suffixes[] = { "-unix", "-dos", "-mac" };
7961
7962 static Lisp_Object
7963 make_subsidiaries (base)
7964      Lisp_Object base;
7965 {
7966   Lisp_Object subsidiaries;
7967   int base_name_len = SBYTES (SYMBOL_NAME (base));
7968   char *buf = (char *) alloca (base_name_len + 6);
7969   int i;
7970
7971   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
7972   subsidiaries = Fmake_vector (make_number (3), Qnil);
7973   for (i = 0; i < 3; i++)
7974     {
7975       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7976       ASET (subsidiaries, i, intern (buf));
7977     }
7978   return subsidiaries;
7979 }
7980
7981
7982 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7983        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
7984        doc: /* For internal use only.
7985 usage: (define-coding-system-internal ...)  */)
7986      (nargs, args)
7987      int nargs;
7988      Lisp_Object *args;
7989 {
7990   Lisp_Object name;
7991   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
7992   Lisp_Object attrs;            /* Vector of attributes.  */
7993   Lisp_Object eol_type;
7994   Lisp_Object aliases;
7995   Lisp_Object coding_type, charset_list, safe_charsets;
7996   enum coding_category category;
7997   Lisp_Object tail, val;
7998   int max_charset_id = 0;
7999   int i;
8000
8001   if (nargs < coding_arg_max)
8002     goto short_args;
8003
8004   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
8005
8006   name = args[coding_arg_name];
8007   CHECK_SYMBOL (name);
8008   CODING_ATTR_BASE_NAME (attrs) = name;
8009
8010   val = args[coding_arg_mnemonic];
8011   if (! STRINGP (val))
8012     CHECK_CHARACTER (val);
8013   CODING_ATTR_MNEMONIC (attrs) = val;
8014
8015   coding_type = args[coding_arg_coding_type];
8016   CHECK_SYMBOL (coding_type);
8017   CODING_ATTR_TYPE (attrs) = coding_type;
8018
8019   charset_list = args[coding_arg_charset_list];
8020   if (SYMBOLP (charset_list))
8021     {
8022       if (EQ (charset_list, Qiso_2022))
8023         {
8024           if (! EQ (coding_type, Qiso_2022))
8025             error ("Invalid charset-list");
8026           charset_list = Viso_2022_charset_list;
8027         }
8028       else if (EQ (charset_list, Qemacs_mule))
8029         {
8030           if (! EQ (coding_type, Qemacs_mule))
8031             error ("Invalid charset-list");
8032           charset_list = Vemacs_mule_charset_list;
8033         }
8034       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8035         if (max_charset_id < XFASTINT (XCAR (tail)))
8036           max_charset_id = XFASTINT (XCAR (tail));
8037     }
8038   else
8039     {
8040       charset_list = Fcopy_sequence (charset_list);
8041       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
8042         {
8043           struct charset *charset;
8044
8045           val = Fcar (tail);
8046           CHECK_CHARSET_GET_CHARSET (val, charset);
8047           if (EQ (coding_type, Qiso_2022)
8048               ? CHARSET_ISO_FINAL (charset) < 0
8049               : EQ (coding_type, Qemacs_mule)
8050               ? CHARSET_EMACS_MULE_ID (charset) < 0
8051               : 0)
8052             error ("Can't handle charset `%s'",
8053                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8054
8055           XSETCAR (tail, make_number (charset->id));
8056           if (max_charset_id < charset->id)
8057             max_charset_id = charset->id;
8058         }
8059     }
8060   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
8061
8062   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8063                                 make_number (255));
8064   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8065     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
8066   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
8067
8068   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
8069
8070   val = args[coding_arg_decode_translation_table];
8071   if (! NILP (val))
8072     CHECK_CHAR_TABLE (val);
8073   CODING_ATTR_DECODE_TBL (attrs) = val;
8074
8075   val = args[coding_arg_encode_translation_table];
8076   if (! NILP (val))
8077     CHECK_CHAR_TABLE (val);
8078   CODING_ATTR_ENCODE_TBL (attrs) = val;
8079
8080   val = args[coding_arg_post_read_conversion];
8081   CHECK_SYMBOL (val);
8082   CODING_ATTR_POST_READ (attrs) = val;
8083
8084   val = args[coding_arg_pre_write_conversion];
8085   CHECK_SYMBOL (val);
8086   CODING_ATTR_PRE_WRITE (attrs) = val;
8087
8088   val = args[coding_arg_default_char];
8089   if (NILP (val))
8090     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8091   else
8092     {
8093       CHECK_CHARACTER (val);
8094       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8095     }
8096
8097   val = args[coding_arg_for_unibyte];
8098   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
8099
8100   val = args[coding_arg_plist];
8101   CHECK_LIST (val);
8102   CODING_ATTR_PLIST (attrs) = val;
8103
8104   if (EQ (coding_type, Qcharset))
8105     {
8106       Lisp_Object list;
8107       /* Generate a lisp vector of 256 elements.  Each element is nil,
8108          integer, or a list of charset IDs.
8109
8110          If Nth element is nil, the byte code N is invalid in this
8111          coding system.
8112
8113          If Nth element is a number NUM, N is the first byte of a
8114          charset whose ID is NUM.
8115
8116          If Nth element is a list of charset IDs, N is the first byte
8117          of one of them.  The list is sorted by dimensions of the
8118          charsets.  A charset of smaller dimension comes firtst.
8119       */
8120       for (list = Qnil, tail = charset_list; CONSP (tail); tail = XCDR (tail))
8121         {
8122           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8123
8124           if (charset->method == CHARSET_METHOD_SUPERSET)
8125             {
8126               val = CHARSET_SUPERSET (charset);
8127               for (; CONSP (val); val = XCDR (val))
8128                 list = Fcons (XCAR (XCAR (val)), list);
8129             }
8130           else
8131             list = Fcons (XCAR (tail), list);
8132         }
8133
8134       val = Fmake_vector (make_number (256), Qnil);
8135
8136       for (tail = Fnreverse (list); CONSP (tail); tail = XCDR (tail))
8137         {
8138           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8139           int dim = CHARSET_DIMENSION (charset);
8140           int idx = (dim - 1) * 4;
8141
8142           if (CHARSET_ASCII_COMPATIBLE_P (charset))
8143             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8144
8145           for (i = charset->code_space[idx];
8146                i <= charset->code_space[idx + 1]; i++)
8147             {
8148               Lisp_Object tmp, tmp2;
8149               int dim2;
8150
8151               tmp = AREF (val, i);
8152               if (NILP (tmp))
8153                 tmp = XCAR (tail);
8154               else if (NUMBERP (tmp))
8155                 {
8156                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8157                   if (dim < dim2)
8158                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
8159                   else
8160                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
8161                 }
8162               else
8163                 {
8164                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8165                     {
8166                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8167                       if (dim < dim2)
8168                         break;
8169                     }
8170                   if (NILP (tmp2))
8171                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8172                   else
8173                     {
8174                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8175                       XSETCAR (tmp2, XCAR (tail));
8176                     }
8177                 }
8178               ASET (val, i, tmp);
8179             }
8180         }
8181       ASET (attrs, coding_attr_charset_valids, val);
8182       category = coding_category_charset;
8183     }
8184   else if (EQ (coding_type, Qccl))
8185     {
8186       Lisp_Object valids;
8187
8188       if (nargs < coding_arg_ccl_max)
8189         goto short_args;
8190
8191       val = args[coding_arg_ccl_decoder];
8192       CHECK_CCL_PROGRAM (val);
8193       if (VECTORP (val))
8194         val = Fcopy_sequence (val);
8195       ASET (attrs, coding_attr_ccl_decoder, val);
8196
8197       val = args[coding_arg_ccl_encoder];
8198       CHECK_CCL_PROGRAM (val);
8199       if (VECTORP (val))
8200         val = Fcopy_sequence (val);
8201       ASET (attrs, coding_attr_ccl_encoder, val);
8202
8203       val = args[coding_arg_ccl_valids];
8204       valids = Fmake_string (make_number (256), make_number (0));
8205       for (tail = val; !NILP (tail); tail = Fcdr (tail))
8206         {
8207           int from, to;
8208
8209           val = Fcar (tail);
8210           if (INTEGERP (val))
8211             {
8212               from = to = XINT (val);
8213               if (from < 0 || from > 255)
8214                 args_out_of_range_3 (val, make_number (0), make_number (255));
8215             }
8216           else
8217             {
8218               CHECK_CONS (val);
8219               CHECK_NATNUM_CAR (val);
8220               CHECK_NATNUM_CDR (val);
8221               from = XINT (XCAR (val));
8222               if (from > 255)
8223                 args_out_of_range_3 (XCAR (val),
8224                                      make_number (0), make_number (255));
8225               to = XINT (XCDR (val));
8226               if (to < from || to > 255)
8227                 args_out_of_range_3 (XCDR (val),
8228                                      XCAR (val), make_number (255));
8229             }
8230           for (i = from; i <= to; i++)
8231             SSET (valids, i, 1);
8232         }
8233       ASET (attrs, coding_attr_ccl_valids, valids);
8234
8235       category = coding_category_ccl;
8236     }
8237   else if (EQ (coding_type, Qutf_16))
8238     {
8239       Lisp_Object bom, endian;
8240
8241       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8242
8243       if (nargs < coding_arg_utf16_max)
8244         goto short_args;
8245
8246       bom = args[coding_arg_utf16_bom];
8247       if (! NILP (bom) && ! EQ (bom, Qt))
8248         {
8249           CHECK_CONS (bom);
8250           val = XCAR (bom);
8251           CHECK_CODING_SYSTEM (val);
8252           val = XCDR (bom);
8253           CHECK_CODING_SYSTEM (val);
8254         }
8255       ASET (attrs, coding_attr_utf_16_bom, bom);
8256
8257       endian = args[coding_arg_utf16_endian];
8258       CHECK_SYMBOL (endian);
8259       if (NILP (endian))
8260         endian = Qbig;
8261       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8262         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
8263       ASET (attrs, coding_attr_utf_16_endian, endian);
8264
8265       category = (CONSP (bom)
8266                   ? coding_category_utf_16_auto
8267                   : NILP (bom)
8268                   ? (EQ (endian, Qbig)
8269                      ? coding_category_utf_16_be_nosig
8270                      : coding_category_utf_16_le_nosig)
8271                   : (EQ (endian, Qbig)
8272                      ? coding_category_utf_16_be
8273                      : coding_category_utf_16_le));
8274     }
8275   else if (EQ (coding_type, Qiso_2022))
8276     {
8277       Lisp_Object initial, reg_usage, request, flags;
8278       int i;
8279
8280       if (nargs < coding_arg_iso2022_max)
8281         goto short_args;
8282
8283       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8284       CHECK_VECTOR (initial);
8285       for (i = 0; i < 4; i++)
8286         {
8287           val = Faref (initial, make_number (i));
8288           if (! NILP (val))
8289             {
8290               struct charset *charset;
8291
8292               CHECK_CHARSET_GET_CHARSET (val, charset);
8293               ASET (initial, i, make_number (CHARSET_ID (charset)));
8294               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
8295                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8296             }
8297           else
8298             ASET (initial, i, make_number (-1));
8299         }
8300
8301       reg_usage = args[coding_arg_iso2022_reg_usage];
8302       CHECK_CONS (reg_usage);
8303       CHECK_NUMBER_CAR (reg_usage);
8304       CHECK_NUMBER_CDR (reg_usage);
8305
8306       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
8307       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
8308         {
8309           int id;
8310           Lisp_Object tmp;
8311
8312           val = Fcar (tail);
8313           CHECK_CONS (val);
8314           tmp = XCAR (val);
8315           CHECK_CHARSET_GET_ID (tmp, id);
8316           CHECK_NATNUM_CDR (val);
8317           if (XINT (XCDR (val)) >= 4)
8318             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
8319           XSETCAR (val, make_number (id));
8320         }
8321
8322       flags = args[coding_arg_iso2022_flags];
8323       CHECK_NATNUM (flags);
8324       i = XINT (flags);
8325       if (EQ (args[coding_arg_charset_list], Qiso_2022))
8326         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
8327
8328       ASET (attrs, coding_attr_iso_initial, initial);
8329       ASET (attrs, coding_attr_iso_usage, reg_usage);
8330       ASET (attrs, coding_attr_iso_request, request);
8331       ASET (attrs, coding_attr_iso_flags, flags);
8332       setup_iso_safe_charsets (attrs);
8333
8334       if (i & CODING_ISO_FLAG_SEVEN_BITS)
8335         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
8336                           | CODING_ISO_FLAG_SINGLE_SHIFT))
8337                     ? coding_category_iso_7_else
8338                     : EQ (args[coding_arg_charset_list], Qiso_2022)
8339                     ? coding_category_iso_7
8340                     : coding_category_iso_7_tight);
8341       else
8342         {
8343           int id = XINT (AREF (initial, 1));
8344
8345           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
8346                        || EQ (args[coding_arg_charset_list], Qiso_2022)
8347                        || id < 0)
8348                       ? coding_category_iso_8_else
8349                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
8350                       ? coding_category_iso_8_1
8351                       : coding_category_iso_8_2);
8352         }
8353       if (category != coding_category_iso_8_1
8354           && category != coding_category_iso_8_2)
8355         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8356     }
8357   else if (EQ (coding_type, Qemacs_mule))
8358     {
8359       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
8360         ASET (attrs, coding_attr_emacs_mule_full, Qt);
8361       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8362       category = coding_category_emacs_mule;
8363     }
8364   else if (EQ (coding_type, Qshift_jis))
8365     {
8366
8367       struct charset *charset;
8368
8369       if (XINT (Flength (charset_list)) != 3)
8370         error ("There should be just three charsets");
8371
8372       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8373       if (CHARSET_DIMENSION (charset) != 1)
8374         error ("Dimension of charset %s is not one",
8375                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8376       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8377         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8378
8379       charset_list = XCDR (charset_list);
8380       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8381       if (CHARSET_DIMENSION (charset) != 1)
8382         error ("Dimension of charset %s is not one",
8383                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8384
8385       charset_list = XCDR (charset_list);
8386       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8387       if (CHARSET_DIMENSION (charset) != 2)
8388         error ("Dimension of charset %s is not two",
8389                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8390
8391       category = coding_category_sjis;
8392       Vsjis_coding_system = name;
8393     }
8394   else if (EQ (coding_type, Qbig5))
8395     {
8396       struct charset *charset;
8397
8398       if (XINT (Flength (charset_list)) != 2)
8399         error ("There should be just two charsets");
8400
8401       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8402       if (CHARSET_DIMENSION (charset) != 1)
8403         error ("Dimension of charset %s is not one",
8404                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8405       if (CHARSET_ASCII_COMPATIBLE_P (charset))
8406         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8407
8408       charset_list = XCDR (charset_list);
8409       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8410       if (CHARSET_DIMENSION (charset) != 2)
8411         error ("Dimension of charset %s is not two",
8412                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8413
8414       category = coding_category_big5;
8415       Vbig5_coding_system = name;
8416     }
8417   else if (EQ (coding_type, Qraw_text))
8418     {
8419       category = coding_category_raw_text;
8420       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8421     }
8422   else if (EQ (coding_type, Qutf_8))
8423     {
8424       category = coding_category_utf_8;
8425       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8426     }
8427   else if (EQ (coding_type, Qundecided))
8428     category = coding_category_undecided;
8429   else
8430     error ("Invalid coding system type: %s",
8431            SDATA (SYMBOL_NAME (coding_type)));
8432
8433   CODING_ATTR_CATEGORY (attrs) = make_number (category);
8434   CODING_ATTR_PLIST (attrs)
8435     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
8436                                 CODING_ATTR_PLIST (attrs)));
8437
8438   eol_type = args[coding_arg_eol_type];
8439   if (! NILP (eol_type)
8440       && ! EQ (eol_type, Qunix)
8441       && ! EQ (eol_type, Qdos)
8442       && ! EQ (eol_type, Qmac))
8443     error ("Invalid eol-type");
8444
8445   aliases = Fcons (name, Qnil);
8446
8447   if (NILP (eol_type))
8448     {
8449       eol_type = make_subsidiaries (name);
8450       for (i = 0; i < 3; i++)
8451         {
8452           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
8453
8454           this_name = AREF (eol_type, i);
8455           this_aliases = Fcons (this_name, Qnil);
8456           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
8457           this_spec = Fmake_vector (make_number (3), attrs);
8458           ASET (this_spec, 1, this_aliases);
8459           ASET (this_spec, 2, this_eol_type);
8460           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
8461           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
8462           Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
8463                                         Vcoding_system_alist);
8464         }
8465     }
8466
8467   spec_vec = Fmake_vector (make_number (3), attrs);
8468   ASET (spec_vec, 1, aliases);
8469   ASET (spec_vec, 2, eol_type);
8470
8471   Fputhash (name, spec_vec, Vcoding_system_hash_table);
8472   Vcoding_system_list = Fcons (name, Vcoding_system_list);
8473   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
8474                                 Vcoding_system_alist);
8475
8476   {
8477     int id = coding_categories[category].id;
8478
8479     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
8480       setup_coding_system (name, &coding_categories[category]);
8481   }
8482
8483   return Qnil;
8484
8485  short_args:
8486   return Fsignal (Qwrong_number_of_arguments,
8487                   Fcons (intern ("define-coding-system-internal"),
8488                          make_number (nargs)));
8489 }
8490
8491 /* Fixme: should this record the alias relationships for
8492    diagnostics?  Should it update coding-system-list?  */
8493 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
8494        Sdefine_coding_system_alias, 2, 2, 0,
8495        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
8496      (alias, coding_system)
8497      Lisp_Object alias, coding_system;
8498 {
8499   Lisp_Object spec, aliases, eol_type;
8500
8501   CHECK_SYMBOL (alias);
8502   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8503   aliases = AREF (spec, 1);
8504   while (!NILP (XCDR (aliases)))
8505     aliases = XCDR (aliases);
8506   XSETCDR (aliases, Fcons (alias, Qnil));
8507
8508   eol_type = AREF (spec, 2);
8509   if (VECTORP (eol_type))
8510     {
8511       Lisp_Object subsidiaries;
8512       int i;
8513
8514       subsidiaries = make_subsidiaries (alias);
8515       for (i = 0; i < 3; i++)
8516         Fdefine_coding_system_alias (AREF (subsidiaries, i),
8517                                      AREF (eol_type, i));
8518
8519       ASET (spec, 2, subsidiaries);
8520     }
8521
8522   Fputhash (alias, spec, Vcoding_system_hash_table);
8523   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
8524                                 Vcoding_system_alist);
8525
8526   return Qnil;
8527 }
8528
8529 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
8530        1, 1, 0,
8531        doc: /* Return the base of CODING-SYSTEM.
8532 Any alias or subsidiary coding system is not a base coding system.  */)
8533   (coding_system)
8534      Lisp_Object coding_system;
8535 {
8536   Lisp_Object spec, attrs;
8537
8538   if (NILP (coding_system))
8539     return (Qno_conversion);
8540   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8541   attrs = AREF (spec, 0);
8542   return CODING_ATTR_BASE_NAME (attrs);
8543 }
8544
8545 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
8546        1, 1, 0,
8547        doc: "Return the property list of CODING-SYSTEM.")
8548      (coding_system)
8549      Lisp_Object coding_system;
8550 {
8551   Lisp_Object spec, attrs;
8552
8553   if (NILP (coding_system))
8554     coding_system = Qno_conversion;
8555   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8556   attrs = AREF (spec, 0);
8557   return CODING_ATTR_PLIST (attrs);
8558 }
8559
8560
8561 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
8562        1, 1, 0,
8563        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
8564      (coding_system)
8565      Lisp_Object coding_system;
8566 {
8567   Lisp_Object spec;
8568
8569   if (NILP (coding_system))
8570     coding_system = Qno_conversion;
8571   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
8572   return AREF (spec, 1);
8573 }
8574
8575 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
8576        Scoding_system_eol_type, 1, 1, 0,
8577        doc: /* Return eol-type of CODING-SYSTEM.
8578 An eol-type is integer 0, 1, 2, or a vector of coding systems.
8579
8580 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
8581 and CR respectively.
8582
8583 A vector value indicates that a format of end-of-line should be
8584 detected automatically.  Nth element of the vector is the subsidiary
8585 coding system whose eol-type is N.  */)
8586      (coding_system)
8587      Lisp_Object coding_system;
8588 {
8589   Lisp_Object spec, eol_type;
8590   int n;
8591
8592   if (NILP (coding_system))
8593     coding_system = Qno_conversion;
8594   if (! CODING_SYSTEM_P (coding_system))
8595     return Qnil;
8596   spec = CODING_SYSTEM_SPEC (coding_system);
8597   eol_type = AREF (spec, 2);
8598   if (VECTORP (eol_type))
8599     return Fcopy_sequence (eol_type);
8600   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
8601   return make_number (n);
8602 }
8603
8604 #endif /* emacs */
8605
8606 \f
8607 /*** 9. Post-amble ***/
8608
8609 void
8610 init_coding_once ()
8611 {
8612   int i;
8613
8614   for (i = 0; i < coding_category_max; i++)
8615     {
8616       coding_categories[i].id = -1;
8617       coding_priorities[i] = i;
8618     }
8619
8620   /* ISO2022 specific initialize routine.  */
8621   for (i = 0; i < 0x20; i++)
8622     iso_code_class[i] = ISO_control_0;
8623   for (i = 0x21; i < 0x7F; i++)
8624     iso_code_class[i] = ISO_graphic_plane_0;
8625   for (i = 0x80; i < 0xA0; i++)
8626     iso_code_class[i] = ISO_control_1;
8627   for (i = 0xA1; i < 0xFF; i++)
8628     iso_code_class[i] = ISO_graphic_plane_1;
8629   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
8630   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
8631   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
8632   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
8633   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
8634   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
8635   iso_code_class[ISO_CODE_ESC] = ISO_escape;
8636   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
8637   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
8638   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
8639
8640   inhibit_pre_post_conversion = 0;
8641
8642   for (i = 0; i < 256; i++)
8643     {
8644       emacs_mule_bytes[i] = 1;
8645     }
8646   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
8647   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
8648   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
8649   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
8650 }
8651
8652 #ifdef emacs
8653
8654 void
8655 syms_of_coding ()
8656 {
8657   staticpro (&Vcoding_system_hash_table);
8658   {
8659     Lisp_Object args[2];
8660     args[0] = QCtest;
8661     args[1] = Qeq;
8662     Vcoding_system_hash_table = Fmake_hash_table (2, args);
8663   }
8664
8665   staticpro (&Vsjis_coding_system);
8666   Vsjis_coding_system = Qnil;
8667
8668   staticpro (&Vbig5_coding_system);
8669   Vbig5_coding_system = Qnil;
8670
8671   staticpro (&Vcode_conversion_work_buf_list);
8672   Vcode_conversion_work_buf_list = Qnil;
8673
8674   staticpro (&Vcode_conversion_reused_work_buf);
8675   Vcode_conversion_reused_work_buf = Qnil;
8676
8677   DEFSYM (Qcharset, "charset");
8678   DEFSYM (Qtarget_idx, "target-idx");
8679   DEFSYM (Qcoding_system_history, "coding-system-history");
8680   Fset (Qcoding_system_history, Qnil);
8681
8682   /* Target FILENAME is the first argument.  */
8683   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
8684   /* Target FILENAME is the third argument.  */
8685   Fput (Qwrite_region, Qtarget_idx, make_number (2));
8686
8687   DEFSYM (Qcall_process, "call-process");
8688   /* Target PROGRAM is the first argument.  */
8689   Fput (Qcall_process, Qtarget_idx, make_number (0));
8690
8691   DEFSYM (Qcall_process_region, "call-process-region");
8692   /* Target PROGRAM is the third argument.  */
8693   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
8694
8695   DEFSYM (Qstart_process, "start-process");
8696   /* Target PROGRAM is the third argument.  */
8697   Fput (Qstart_process, Qtarget_idx, make_number (2));
8698
8699   DEFSYM (Qopen_network_stream, "open-network-stream");
8700   /* Target SERVICE is the fourth argument.  */
8701   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
8702
8703   DEFSYM (Qcoding_system, "coding-system");
8704   DEFSYM (Qcoding_aliases, "coding-aliases");
8705
8706   DEFSYM (Qeol_type, "eol-type");
8707   DEFSYM (Qunix, "unix");
8708   DEFSYM (Qdos, "dos");
8709
8710   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
8711   DEFSYM (Qpost_read_conversion, "post-read-conversion");
8712   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
8713   DEFSYM (Qdefault_char, "default-char");
8714   DEFSYM (Qundecided, "undecided");
8715   DEFSYM (Qno_conversion, "no-conversion");
8716   DEFSYM (Qraw_text, "raw-text");
8717
8718   DEFSYM (Qiso_2022, "iso-2022");
8719
8720   DEFSYM (Qutf_8, "utf-8");
8721   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
8722
8723   DEFSYM (Qutf_16, "utf-16");
8724   DEFSYM (Qbig, "big");
8725   DEFSYM (Qlittle, "little");
8726
8727   DEFSYM (Qshift_jis, "shift-jis");
8728   DEFSYM (Qbig5, "big5");
8729
8730   DEFSYM (Qcoding_system_p, "coding-system-p");
8731
8732   DEFSYM (Qcoding_system_error, "coding-system-error");
8733   Fput (Qcoding_system_error, Qerror_conditions,
8734         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
8735   Fput (Qcoding_system_error, Qerror_message,
8736         build_string ("Invalid coding system"));
8737
8738   /* Intern this now in case it isn't already done.
8739      Setting this variable twice is harmless.
8740      But don't staticpro it here--that is done in alloc.c.  */
8741   Qchar_table_extra_slots = intern ("char-table-extra-slots");
8742
8743   DEFSYM (Qtranslation_table, "translation-table");
8744   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
8745   DEFSYM (Qtranslation_table_id, "translation-table-id");
8746   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
8747   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
8748
8749   DEFSYM (Qvalid_codes, "valid-codes");
8750
8751   DEFSYM (Qemacs_mule, "emacs-mule");
8752
8753   DEFSYM (QCcategory, ":category");
8754
8755   Vcoding_category_table
8756     = Fmake_vector (make_number (coding_category_max), Qnil);
8757   staticpro (&Vcoding_category_table);
8758   /* Followings are target of code detection.  */
8759   ASET (Vcoding_category_table, coding_category_iso_7,
8760         intern ("coding-category-iso-7"));
8761   ASET (Vcoding_category_table, coding_category_iso_7_tight,
8762         intern ("coding-category-iso-7-tight"));
8763   ASET (Vcoding_category_table, coding_category_iso_8_1,
8764         intern ("coding-category-iso-8-1"));
8765   ASET (Vcoding_category_table, coding_category_iso_8_2,
8766         intern ("coding-category-iso-8-2"));
8767   ASET (Vcoding_category_table, coding_category_iso_7_else,
8768         intern ("coding-category-iso-7-else"));
8769   ASET (Vcoding_category_table, coding_category_iso_8_else,
8770         intern ("coding-category-iso-8-else"));
8771   ASET (Vcoding_category_table, coding_category_utf_8,
8772         intern ("coding-category-utf-8"));
8773   ASET (Vcoding_category_table, coding_category_utf_16_be,
8774         intern ("coding-category-utf-16-be"));
8775   ASET (Vcoding_category_table, coding_category_utf_16_auto,
8776         intern ("coding-category-utf-16-auto"));
8777   ASET (Vcoding_category_table, coding_category_utf_16_le,
8778         intern ("coding-category-utf-16-le"));
8779   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8780         intern ("coding-category-utf-16-be-nosig"));
8781   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8782         intern ("coding-category-utf-16-le-nosig"));
8783   ASET (Vcoding_category_table, coding_category_charset,
8784         intern ("coding-category-charset"));
8785   ASET (Vcoding_category_table, coding_category_sjis,
8786         intern ("coding-category-sjis"));
8787   ASET (Vcoding_category_table, coding_category_big5,
8788         intern ("coding-category-big5"));
8789   ASET (Vcoding_category_table, coding_category_ccl,
8790         intern ("coding-category-ccl"));
8791   ASET (Vcoding_category_table, coding_category_emacs_mule,
8792         intern ("coding-category-emacs-mule"));
8793   /* Followings are NOT target of code detection.  */
8794   ASET (Vcoding_category_table, coding_category_raw_text,
8795         intern ("coding-category-raw-text"));
8796   ASET (Vcoding_category_table, coding_category_undecided,
8797         intern ("coding-category-undecided"));
8798
8799   defsubr (&Scoding_system_p);
8800   defsubr (&Sread_coding_system);
8801   defsubr (&Sread_non_nil_coding_system);
8802   defsubr (&Scheck_coding_system);
8803   defsubr (&Sdetect_coding_region);
8804   defsubr (&Sdetect_coding_string);
8805   defsubr (&Sfind_coding_systems_region_internal);
8806   defsubr (&Sunencodable_char_position);
8807   defsubr (&Scheck_coding_systems_region);
8808   defsubr (&Sdecode_coding_region);
8809   defsubr (&Sencode_coding_region);
8810   defsubr (&Sdecode_coding_string);
8811   defsubr (&Sencode_coding_string);
8812   defsubr (&Sdecode_sjis_char);
8813   defsubr (&Sencode_sjis_char);
8814   defsubr (&Sdecode_big5_char);
8815   defsubr (&Sencode_big5_char);
8816   defsubr (&Sset_terminal_coding_system_internal);
8817   defsubr (&Sset_safe_terminal_coding_system_internal);
8818   defsubr (&Sterminal_coding_system);
8819   defsubr (&Sset_keyboard_coding_system_internal);
8820   defsubr (&Skeyboard_coding_system);
8821   defsubr (&Sfind_operation_coding_system);
8822   defsubr (&Sset_coding_system_priority);
8823   defsubr (&Sdefine_coding_system_internal);
8824   defsubr (&Sdefine_coding_system_alias);
8825   defsubr (&Scoding_system_base);
8826   defsubr (&Scoding_system_plist);
8827   defsubr (&Scoding_system_aliases);
8828   defsubr (&Scoding_system_eol_type);
8829   defsubr (&Scoding_system_priority_list);
8830
8831   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
8832                doc: /* List of coding systems.
8833
8834 Do not alter the value of this variable manually.  This variable should be
8835 updated by the functions `define-coding-system' and
8836 `define-coding-system-alias'.  */);
8837   Vcoding_system_list = Qnil;
8838
8839   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
8840                doc: /* Alist of coding system names.
8841 Each element is one element list of coding system name.
8842 This variable is given to `completing-read' as TABLE argument.
8843
8844 Do not alter the value of this variable manually.  This variable should be
8845 updated by the functions `make-coding-system' and
8846 `define-coding-system-alias'.  */);
8847   Vcoding_system_alist = Qnil;
8848
8849   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
8850                doc: /* List of coding-categories (symbols) ordered by priority.
8851
8852 On detecting a coding system, Emacs tries code detection algorithms
8853 associated with each coding-category one by one in this order.  When
8854 one algorithm agrees with a byte sequence of source text, the coding
8855 system bound to the corresponding coding-category is selected.  */);
8856   {
8857     int i;
8858
8859     Vcoding_category_list = Qnil;
8860     for (i = coding_category_max - 1; i >= 0; i--)
8861       Vcoding_category_list
8862         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8863                  Vcoding_category_list);
8864   }
8865
8866   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
8867                doc: /* Specify the coding system for read operations.
8868 It is useful to bind this variable with `let', but do not set it globally.
8869 If the value is a coding system, it is used for decoding on read operation.
8870 If not, an appropriate element is used from one of the coding system alists:
8871 There are three such tables, `file-coding-system-alist',
8872 `process-coding-system-alist', and `network-coding-system-alist'.  */);
8873   Vcoding_system_for_read = Qnil;
8874
8875   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
8876                doc: /* Specify the coding system for write operations.
8877 Programs bind this variable with `let', but you should not set it globally.
8878 If the value is a coding system, it is used for encoding of output,
8879 when writing it to a file and when sending it to a file or subprocess.
8880
8881 If this does not specify a coding system, an appropriate element
8882 is used from one of the coding system alists:
8883 There are three such tables, `file-coding-system-alist',
8884 `process-coding-system-alist', and `network-coding-system-alist'.
8885 For output to files, if the above procedure does not specify a coding system,
8886 the value of `buffer-file-coding-system' is used.  */);
8887   Vcoding_system_for_write = Qnil;
8888
8889   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
8890                doc: /*
8891 Coding system used in the latest file or process I/O.  */);
8892   Vlast_coding_system_used = Qnil;
8893
8894   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
8895                doc: /*
8896 *Non-nil means always inhibit code conversion of end-of-line format.
8897 See info node `Coding Systems' and info node `Text and Binary' concerning
8898 such conversion.  */);
8899   inhibit_eol_conversion = 0;
8900
8901   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
8902                doc: /*
8903 Non-nil means process buffer inherits coding system of process output.
8904 Bind it to t if the process output is to be treated as if it were a file
8905 read from some filesystem.  */);
8906   inherit_process_coding_system = 0;
8907
8908   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
8909                doc: /*
8910 Alist to decide a coding system to use for a file I/O operation.
8911 The format is ((PATTERN . VAL) ...),
8912 where PATTERN is a regular expression matching a file name,
8913 VAL is a coding system, a cons of coding systems, or a function symbol.
8914 If VAL is a coding system, it is used for both decoding and encoding
8915 the file contents.
8916 If VAL is a cons of coding systems, the car part is used for decoding,
8917 and the cdr part is used for encoding.
8918 If VAL is a function symbol, the function must return a coding system
8919 or a cons of coding systems which are used as above.  The function gets
8920 the arguments with which `find-operation-coding-systems' was called.
8921
8922 See also the function `find-operation-coding-system'
8923 and the variable `auto-coding-alist'.  */);
8924   Vfile_coding_system_alist = Qnil;
8925
8926   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
8927                doc: /*
8928 Alist to decide a coding system to use for a process I/O operation.
8929 The format is ((PATTERN . VAL) ...),
8930 where PATTERN is a regular expression matching a program name,
8931 VAL is a coding system, a cons of coding systems, or a function symbol.
8932 If VAL is a coding system, it is used for both decoding what received
8933 from the program and encoding what sent to the program.
8934 If VAL is a cons of coding systems, the car part is used for decoding,
8935 and the cdr part is used for encoding.
8936 If VAL is a function symbol, the function must return a coding system
8937 or a cons of coding systems which are used as above.
8938
8939 See also the function `find-operation-coding-system'.  */);
8940   Vprocess_coding_system_alist = Qnil;
8941
8942   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
8943                doc: /*
8944 Alist to decide a coding system to use for a network I/O operation.
8945 The format is ((PATTERN . VAL) ...),
8946 where PATTERN is a regular expression matching a network service name
8947 or is a port number to connect to,
8948 VAL is a coding system, a cons of coding systems, or a function symbol.
8949 If VAL is a coding system, it is used for both decoding what received
8950 from the network stream and encoding what sent to the network stream.
8951 If VAL is a cons of coding systems, the car part is used for decoding,
8952 and the cdr part is used for encoding.
8953 If VAL is a function symbol, the function must return a coding system
8954 or a cons of coding systems which are used as above.
8955
8956 See also the function `find-operation-coding-system'.  */);
8957   Vnetwork_coding_system_alist = Qnil;
8958
8959   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
8960                doc: /* Coding system to use with system messages.
8961 Also used for decoding keyboard input on X Window system.  */);
8962   Vlocale_coding_system = Qnil;
8963
8964   /* The eol mnemonics are reset in startup.el system-dependently.  */
8965   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
8966                doc: /*
8967 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
8968   eol_mnemonic_unix = build_string (":");
8969
8970   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
8971                doc: /*
8972 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
8973   eol_mnemonic_dos = build_string ("\\");
8974
8975   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
8976                doc: /*
8977 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
8978   eol_mnemonic_mac = build_string ("/");
8979
8980   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
8981                doc: /*
8982 *String displayed in mode line when end-of-line format is not yet determined.  */);
8983   eol_mnemonic_undecided = build_string (":");
8984
8985   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
8986                doc: /*
8987 *Non-nil enables character translation while encoding and decoding.  */);
8988   Venable_character_translation = Qt;
8989
8990   DEFVAR_LISP ("standard-translation-table-for-decode",
8991                &Vstandard_translation_table_for_decode,
8992                doc: /* Table for translating characters while decoding.  */);
8993   Vstandard_translation_table_for_decode = Qnil;
8994
8995   DEFVAR_LISP ("standard-translation-table-for-encode",
8996                &Vstandard_translation_table_for_encode,
8997                doc: /* Table for translating characters while encoding.  */);
8998   Vstandard_translation_table_for_encode = Qnil;
8999
9000   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
9001                doc: /* Alist of charsets vs revision numbers.
9002 While encoding, if a charset (car part of an element) is found,
9003 designate it with the escape sequence identifying revision (cdr part
9004 of the element).  */);
9005   Vcharset_revision_table = Qnil;
9006
9007   DEFVAR_LISP ("default-process-coding-system",
9008                &Vdefault_process_coding_system,
9009                doc: /* Cons of coding systems used for process I/O by default.
9010 The car part is used for decoding a process output,
9011 the cdr part is used for encoding a text to be sent to a process.  */);
9012   Vdefault_process_coding_system = Qnil;
9013
9014   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
9015                doc: /*
9016 Table of extra Latin codes in the range 128..159 (inclusive).
9017 This is a vector of length 256.
9018 If Nth element is non-nil, the existence of code N in a file
9019 \(or output of subprocess) doesn't prevent it to be detected as
9020 a coding system of ISO 2022 variant which has a flag
9021 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9022 or reading output of a subprocess.
9023 Only 128th through 159th elements has a meaning.  */);
9024   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
9025
9026   DEFVAR_LISP ("select-safe-coding-system-function",
9027                &Vselect_safe_coding_system_function,
9028                doc: /*
9029 Function to call to select safe coding system for encoding a text.
9030
9031 If set, this function is called to force a user to select a proper
9032 coding system which can encode the text in the case that a default
9033 coding system used in each operation can't encode the text.
9034
9035 The default value is `select-safe-coding-system' (which see).  */);
9036   Vselect_safe_coding_system_function = Qnil;
9037
9038   DEFVAR_BOOL ("coding-system-require-warning",
9039                &coding_system_require_warning,
9040                doc: /* Internal use only.
9041 If non-nil, on writing a file, `select-safe-coding-system-function' is
9042 called even if `coding-system-for-write' is non-nil.  The command
9043 `universal-coding-system-argument' binds this variable to t temporarily.  */);
9044   coding_system_require_warning = 0;
9045
9046
9047   DEFVAR_BOOL ("inhibit-iso-escape-detection",
9048                &inhibit_iso_escape_detection,
9049                doc: /*
9050 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9051
9052 By default, on reading a file, Emacs tries to detect how the text is
9053 encoded.  This code detection is sensitive to escape sequences.  If
9054 the sequence is valid as ISO2022, the code is determined as one of
9055 the ISO2022 encodings, and the file is decoded by the corresponding
9056 coding system (e.g. `iso-2022-7bit').
9057
9058 However, there may be a case that you want to read escape sequences in
9059 a file as is.  In such a case, you can set this variable to non-nil.
9060 Then, as the code detection ignores any escape sequences, no file is
9061 detected as encoded in some ISO2022 encoding.  The result is that all
9062 escape sequences become visible in a buffer.
9063
9064 The default value is nil, and it is strongly recommended not to change
9065 it.  That is because many Emacs Lisp source files that contain
9066 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9067 in Emacs's distribution, and they won't be decoded correctly on
9068 reading if you suppress escape sequence detection.
9069
9070 The other way to read escape sequences in a file without decoding is
9071 to explicitly specify some coding system that doesn't use ISO2022's
9072 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
9073   inhibit_iso_escape_detection = 0;
9074
9075   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
9076                doc: /* Char table for translating self-inserting characters.
9077 This is applied to the result of input methods, not their input.  See also
9078 `keyboard-translate-table'.  */);
9079     Vtranslation_table_for_input = Qnil;
9080
9081   {
9082     Lisp_Object args[coding_arg_max];
9083     Lisp_Object plist[16];
9084     int i;
9085
9086     for (i = 0; i < coding_arg_max; i++)
9087       args[i] = Qnil;
9088
9089     plist[0] = intern (":name");
9090     plist[1] = args[coding_arg_name] = Qno_conversion;
9091     plist[2] = intern (":mnemonic");
9092     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9093     plist[4] = intern (":coding-type");
9094     plist[5] = args[coding_arg_coding_type] = Qraw_text;
9095     plist[6] = intern (":ascii-compatible-p");
9096     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9097     plist[8] = intern (":default-char");
9098     plist[9] = args[coding_arg_default_char] = make_number (0);
9099     plist[10] = intern (":for-unibyte");
9100     plist[11] = args[coding_arg_for_unibyte] = Qt;
9101     plist[12] = intern (":docstring");
9102     plist[13] = build_string ("Do no conversion.\n\
9103 \n\
9104 When you visit a file with this coding, the file is read into a\n\
9105 unibyte buffer as is, thus each byte of a file is treated as a\n\
9106 character.");
9107     plist[14] = intern (":eol-type");
9108     plist[15] = args[coding_arg_eol_type] = Qunix;
9109     args[coding_arg_plist] = Flist (16, plist);
9110     Fdefine_coding_system_internal (coding_arg_max, args);
9111   }
9112
9113   setup_coding_system (Qno_conversion, &keyboard_coding);
9114   setup_coding_system (Qno_conversion, &terminal_coding);
9115   setup_coding_system (Qno_conversion, &safe_terminal_coding);
9116
9117   {
9118     int i;
9119
9120     for (i = 0; i < coding_category_max; i++)
9121       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9122   }
9123 }
9124
9125 char *
9126 emacs_strerror (error_number)
9127      int error_number;
9128 {
9129   char *str;
9130
9131   synchronize_system_messages_locale ();
9132   str = strerror (error_number);
9133
9134   if (! NILP (Vlocale_coding_system))
9135     {
9136       Lisp_Object dec = code_convert_string_norecord (build_string (str),
9137                                                       Vlocale_coding_system,
9138                                                       0);
9139       str = (char *) SDATA (dec);
9140     }
9141
9142   return str;
9143 }
9144
9145 #endif /* emacs */