src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995,97,1998,2002,2003  Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001,2002,2003  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /*** TABLE OF CONTENTS ***
  24
  25   0. General comments
  26   1. Preamble
  27   2. Emacs' internal format (emacs-mule) handlers
  28   3. ISO2022 handlers
  29   4. Shift-JIS and BIG5 handlers
  30   5. CCL handlers
  31   6. End-of-line handlers
  32   7. C library functions
  33   8. Emacs Lisp library functions
  34   9. Post-amble
  35
  36 */
  37
  38 /*** 0. General comments ***/
  39
  40
  41 /*** GENERAL NOTE on CODING SYSTEMS ***
  42
  43   A coding system is an encoding mechanism for one or more character
  44   sets.  Here's a list of coding systems which Emacs can handle.  When
  45   we say "decode", it means converting some other coding system to
  46   Emacs' internal format (emacs-mule), and when we say "encode",
  47   it means converting the coding system emacs-mule to some other
  48   coding system.
  49
  50   0. Emacs' internal format (emacs-mule)
  51
  52   Emacs itself holds a multi-lingual character in buffers and strings
  53   in a special format.  Details are described in section 2.
  54
  55   1. ISO2022
  56
  57   The most famous coding system for multiple character sets.  X's
  58   Compound Text, various EUCs (Extended Unix Code), and coding
  59   systems used in Internet communication such as ISO-2022-JP are
  60   all variants of ISO2022.  Details are described in section 3.
  61
  62   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  63
  64   A coding system to encode character sets: ASCII, JISX0201, and
  65   JISX0208.  Widely used for PC's in Japan.  Details are described in
  66   section 4.
  67
  68   3. BIG5
  69
  70   A coding system to encode the character sets ASCII and Big5.  Widely
  71   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  72   described in section 4.  In this file, when we write "BIG5"
  73   (all uppercase), we mean the coding system, and when we write
  74   "Big5" (capitalized), we mean the character set.
  75
  76   4. Raw text
  77
  78   A coding system for text containing random 8-bit code.  Emacs does
  79   no code conversion on such text except for end-of-line format.
  80
  81   5. Other
  82
  83   If a user wants to read/write text encoded in a coding system not
  84   listed above, he can supply a decoder and an encoder for it as CCL
  85   (Code Conversion Language) programs.  Emacs executes the CCL program
  86   while reading/writing.
  87
  88   Emacs represents a coding system by a Lisp symbol that has a property
  89   `coding-system'.  But, before actually using the coding system, the
  90   information about it is set in a structure of type `struct
  91   coding_system' for rapid processing.  See section 6 for more details.
  92
  93 */
  94
  95 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  96
  97   How end-of-line of text is encoded depends on the operating system.
  98   For instance, Unix's format is just one byte of `line-feed' code,
  99   whereas DOS's format is two-byte sequence of `carriage-return' and
 100   `line-feed' codes.  MacOS's format is usually one byte of
 101   `carriage-return'.
 102
 103   Since text character encoding and end-of-line encoding are
 104   independent, any coding system described above can have any
 105   end-of-line format.  So Emacs has information about end-of-line
 106   format in each coding-system.  See section 6 for more details.
 107
 108 */
 109
 110 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 111
 112   These functions check if a text between SRC and SRC_END is encoded
 113   in the coding system category XXX.  Each returns an integer value in
 114   which appropriate flag bits for the category XXX are set.  The flag
 115   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 116   template for these functions.  If MULTIBYTEP is nonzero, 8-bit codes
 117   of the range 0x80..0x9F are in multibyte form.  */
 118 #if 0
 119 int
 120 detect_coding_emacs_mule (src, src_end, multibytep)
 121      unsigned char *src, *src_end;
 122      int multibytep;
 123 {
 124   ...
 125 }
 126 #endif
 127
 128 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 129
 130   These functions decode SRC_BYTES length of unibyte text at SOURCE
 131   encoded in CODING to Emacs' internal format.  The resulting
 132   multibyte text goes to a place pointed to by DESTINATION, the length
 133   of which should not exceed DST_BYTES.
 134
 135   These functions set the information about original and decoded texts
 136   in the members `produced', `produced_char', `consumed', and
 137   `consumed_char' of the structure *CODING.  They also set the member
 138   `result' to one of CODING_FINISH_XXX indicating how the decoding
 139   finished.
 140
 141   DST_BYTES zero means that the source area and destination area are
 142   overlapped, which means that we can produce a decoded text until it
 143   reaches the head of the not-yet-decoded source text.
 144
 145   Below is a template for these functions.  */
 146 #if 0
 147 static void
 148 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 149      struct coding_system *coding;
 150      unsigned char *source, *destination;
 151      int src_bytes, dst_bytes;
 152 {
 153   ...
 154 }
 155 #endif
 156
 157 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 158
 159   These functions encode SRC_BYTES length text at SOURCE from Emacs'
 160   internal multibyte format to CODING.  The resulting unibyte text
 161   goes to a place pointed to by DESTINATION, the length of which
 162   should not exceed DST_BYTES.
 163
 164   These functions set the information about original and encoded texts
 165   in the members `produced', `produced_char', `consumed', and
 166   `consumed_char' of the structure *CODING.  They also set the member
 167   `result' to one of CODING_FINISH_XXX indicating how the encoding
 168   finished.
 169
 170   DST_BYTES zero means that the source area and destination area are
 171   overlapped, which means that we can produce encoded text until it
 172   reaches at the head of the not-yet-encoded source text.
 173
 174   Below is a template for these functions.  */
 175 #if 0
 176 static void
 177 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 178      struct coding_system *coding;
 179      unsigned char *source, *destination;
 180      int src_bytes, dst_bytes;
 181 {
 182   ...
 183 }
 184 #endif
 185
 186 /*** COMMONLY USED MACROS ***/
 187
 188 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 189    get one, two, and three bytes from the source text respectively.
 190    If there are not enough bytes in the source, they jump to
 191    `label_end_of_loop'.  The caller should set variables `coding',
 192    `src' and `src_end' to appropriate pointer in advance.  These
 193    macros are called from decoding routines `decode_coding_XXX', thus
 194    it is assumed that the source text is unibyte.  */
 195
 196 #define ONE_MORE_BYTE(c1)                                       \
 197   do {                                                          \
 198     if (src >= src_end)                                         \
 199       {                                                         \
 200         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 201         goto label_end_of_loop;                                 \
 202       }                                                         \
 203     c1 = *src++;                                                \
 204   } while (0)
 205
 206 #define TWO_MORE_BYTES(c1, c2)                                  \
 207   do {                                                          \
 208     if (src + 1 >= src_end)                                     \
 209       {                                                         \
 210         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 211         goto label_end_of_loop;                                 \
 212       }                                                         \
 213     c1 = *src++;                                                \
 214     c2 = *src++;                                                \
 215   } while (0)
 216
 217
 218 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
 219    form if MULTIBYTEP is nonzero.  */
 220
 221 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)           \
 222   do {                                                          \
 223     if (src >= src_end)                                         \
 224       {                                                         \
 225         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 226         goto label_end_of_loop;                                 \
 227       }                                                         \
 228     c1 = *src++;                                                \
 229     if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)         \
 230       c1 = *src++ - 0x20;                                       \
 231   } while (0)
 232
 233 /* Set C to the next character at the source text pointed by `src'.
 234    If there are not enough characters in the source, jump to
 235    `label_end_of_loop'.  The caller should set variables `coding'
 236    `src', `src_end', and `translation_table' to appropriate pointers
 237    in advance.  This macro is used in encoding routines
 238    `encode_coding_XXX', thus it assumes that the source text is in
 239    multibyte form except for 8-bit characters.  8-bit characters are
 240    in multibyte form if coding->src_multibyte is nonzero, else they
 241    are represented by a single byte.  */
 242
 243 #define ONE_MORE_CHAR(c)                                        \
 244   do {                                                          \
 245     int len = src_end - src;                                    \
 246     int bytes;                                                  \
 247     if (len <= 0)                                               \
 248       {                                                         \
 249         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 250         goto label_end_of_loop;                                 \
 251       }                                                         \
 252     if (coding->src_multibyte                                   \
 253         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 254       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 255     else                                                        \
 256       c = *src, bytes = 1;                                      \
 257     if (!NILP (translation_table))                              \
 258       c = translate_char (translation_table, c, -1, 0, 0);      \
 259     src += bytes;                                               \
 260   } while (0)
 261
 262
 263 /* Produce a multibyte form of character C to `dst'.  Jump to
 264    `label_end_of_loop' if there's not enough space at `dst'.
 265
 266    If we are now in the middle of a composition sequence, the decoded
 267    character may be ALTCHAR (for the current composition).  In that
 268    case, the character goes to coding->cmp_data->data instead of
 269    `dst'.
 270
 271    This macro is used in decoding routines.  */
 272
 273 #define EMIT_CHAR(c)                                                    \
 274   do {                                                                  \
 275     if (! COMPOSING_P (coding)                                          \
 276         || coding->composing == COMPOSITION_RELATIVE                    \
 277         || coding->composing == COMPOSITION_WITH_RULE)                  \
 278       {                                                                 \
 279         int bytes = CHAR_BYTES (c);                                     \
 280         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 281           {                                                             \
 282             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 283             goto label_end_of_loop;                                     \
 284           }                                                             \
 285         dst += CHAR_STRING (c, dst);                                    \
 286         coding->produced_char++;                                        \
 287       }                                                                 \
 288                                                                         \
 289     if (COMPOSING_P (coding)                                            \
 290         && coding->composing != COMPOSITION_RELATIVE)                   \
 291       {                                                                 \
 292         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 293         coding->composition_rule_follows                                \
 294           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 295       }                                                                 \
 296   } while (0)
 297
 298
 299 #define EMIT_ONE_BYTE(c)                                        \
 300   do {                                                          \
 301     if (dst >= (dst_bytes ? dst_end : src))                     \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     *dst++ = c;                                                 \
 307   } while (0)
 308
 309 #define EMIT_TWO_BYTES(c1, c2)                                  \
 310   do {                                                          \
 311     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 312       {                                                         \
 313         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 314         goto label_end_of_loop;                                 \
 315       }                                                         \
 316     *dst++ = c1, *dst++ = c2;                                   \
 317   } while (0)
 318
 319 #define EMIT_BYTES(from, to)                                    \
 320   do {                                                          \
 321     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 322       {                                                         \
 323         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 324         goto label_end_of_loop;                                 \
 325       }                                                         \
 326     while (from < to)                                           \
 327       *dst++ = *from++;                                         \
 328   } while (0)
 329
 330 \f
 331 /*** 1. Preamble ***/
 332
 333 #ifdef emacs
 334 #include <config.h>
 335 #endif
 336
 337 #include <stdio.h>
 338
 339 #ifdef emacs
 340
 341 #include "lisp.h"
 342 #include "buffer.h"
 343 #include "charset.h"
 344 #include "composite.h"
 345 #include "ccl.h"
 346 #include "coding.h"
 347 #include "window.h"
 348
 349 #else  /* not emacs */
 350
 351 #include "mulelib.h"
 352
 353 #endif /* not emacs */
 354
 355 Lisp_Object Qcoding_system, Qeol_type;
 356 Lisp_Object Qbuffer_file_coding_system;
 357 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 358 Lisp_Object Qno_conversion, Qundecided;
 359 Lisp_Object Qcoding_system_history;
 360 Lisp_Object Qsafe_chars;
 361 Lisp_Object Qvalid_codes;
 362
 363 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 364 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 365 Lisp_Object Qstart_process, Qopen_network_stream;
 366 Lisp_Object Qtarget_idx;
 367
 368 Lisp_Object Vselect_safe_coding_system_function;
 369
 370 int coding_system_require_warning;
 371
 372 /* Mnemonic string for each format of end-of-line.  */
 373 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 374 /* Mnemonic string to indicate format of end-of-line is not yet
 375    decided.  */
 376 Lisp_Object eol_mnemonic_undecided;
 377
 378 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 379    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 380 int system_eol_type;
 381
 382 #ifdef emacs
 383
 384 /* Information about which coding system is safe for which chars.
 385    The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
 386
 387    GENERIC-LIST is a list of generic coding systems which can encode
 388    any characters.
 389
 390    NON-GENERIC-ALIST is an alist of non generic coding systems vs the
 391    corresponding char table that contains safe chars.  */
 392 Lisp_Object Vcoding_system_safe_chars;
 393
 394 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 395
 396 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 397
 398 /* Coding system emacs-mule and raw-text are for converting only
 399    end-of-line format.  */
 400 Lisp_Object Qemacs_mule, Qraw_text;
 401
 402 Lisp_Object Qutf_8;
 403
 404 /* Coding-systems are handed between Emacs Lisp programs and C internal
 405    routines by the following three variables.  */
 406 /* Coding-system for reading files and receiving data from process.  */
 407 Lisp_Object Vcoding_system_for_read;
 408 /* Coding-system for writing files and sending data to process.  */
 409 Lisp_Object Vcoding_system_for_write;
 410 /* Coding-system actually used in the latest I/O.  */
 411 Lisp_Object Vlast_coding_system_used;
 412
 413 /* A vector of length 256 which contains information about special
 414    Latin codes (especially for dealing with Microsoft codes).  */
 415 Lisp_Object Vlatin_extra_code_table;
 416
 417 /* Flag to inhibit code conversion of end-of-line format.  */
 418 int inhibit_eol_conversion;
 419
 420 /* Flag to inhibit ISO2022 escape sequence detection.  */
 421 int inhibit_iso_escape_detection;
 422
 423 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 424 int inherit_process_coding_system;
 425
 426 /* Coding system to be used to encode text for terminal display.  */
 427 struct coding_system terminal_coding;
 428
 429 /* Coding system to be used to encode text for terminal display when
 430    terminal coding system is nil.  */
 431 struct coding_system safe_terminal_coding;
 432
 433 /* Coding system of what is sent from terminal keyboard.  */
 434 struct coding_system keyboard_coding;
 435
 436 /* Default coding system to be used to write a file.  */
 437 struct coding_system default_buffer_file_coding;
 438
 439 Lisp_Object Vfile_coding_system_alist;
 440 Lisp_Object Vprocess_coding_system_alist;
 441 Lisp_Object Vnetwork_coding_system_alist;
 442
 443 Lisp_Object Vlocale_coding_system;
 444
 445 #endif /* emacs */
 446
 447 Lisp_Object Qcoding_category, Qcoding_category_index;
 448
 449 /* List of symbols `coding-category-xxx' ordered by priority.  */
 450 Lisp_Object Vcoding_category_list;
 451
 452 /* Table of coding categories (Lisp symbols).  */
 453 Lisp_Object Vcoding_category_table;
 454
 455 /* Table of names of symbol for each coding-category.  */
 456 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 457   "coding-category-emacs-mule",
 458   "coding-category-sjis",
 459   "coding-category-iso-7",
 460   "coding-category-iso-7-tight",
 461   "coding-category-iso-8-1",
 462   "coding-category-iso-8-2",
 463   "coding-category-iso-7-else",
 464   "coding-category-iso-8-else",
 465   "coding-category-ccl",
 466   "coding-category-big5",
 467   "coding-category-utf-8",
 468   "coding-category-utf-16-be",
 469   "coding-category-utf-16-le",
 470   "coding-category-raw-text",
 471   "coding-category-binary"
 472 };
 473
 474 /* Table of pointers to coding systems corresponding to each coding
 475    categories.  */
 476 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 477
 478 /* Table of coding category masks.  Nth element is a mask for a coding
 479    category of which priority is Nth.  */
 480 static
 481 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 482
 483 /* Flag to tell if we look up translation table on character code
 484    conversion.  */
 485 Lisp_Object Venable_character_translation;
 486 /* Standard translation table to look up on decoding (reading).  */
 487 Lisp_Object Vstandard_translation_table_for_decode;
 488 /* Standard translation table to look up on encoding (writing).  */
 489 Lisp_Object Vstandard_translation_table_for_encode;
 490
 491 Lisp_Object Qtranslation_table;
 492 Lisp_Object Qtranslation_table_id;
 493 Lisp_Object Qtranslation_table_for_decode;
 494 Lisp_Object Qtranslation_table_for_encode;
 495
 496 /* Alist of charsets vs revision number.  */
 497 Lisp_Object Vcharset_revision_alist;
 498
 499 /* Default coding systems used for process I/O.  */
 500 Lisp_Object Vdefault_process_coding_system;
 501
 502 /* Char table for translating Quail and self-inserting input.  */
 503 Lisp_Object Vtranslation_table_for_input;
 504
 505 /* Global flag to tell that we can't call post-read-conversion and
 506    pre-write-conversion functions.  Usually the value is zero, but it
 507    is set to 1 temporarily while such functions are running.  This is
 508    to avoid infinite recursive call.  */
 509 static int inhibit_pre_post_conversion;
 510
 511 Lisp_Object Qchar_coding_system;
 512
 513 /* Return `safe-chars' property of CODING_SYSTEM (symbol).  Don't check
 514    its validity.  */
 515
 516 Lisp_Object
 517 coding_safe_chars (coding_system)
 518      Lisp_Object coding_system;
 519 {
 520   Lisp_Object coding_spec, plist, safe_chars;
 521
 522   coding_spec = Fget (coding_system, Qcoding_system);
 523   plist = XVECTOR (coding_spec)->contents[3];
 524   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 525   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 526 }
 527
 528 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 529   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 530
 531 \f
 532 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 533
 534 /* Emacs' internal format for representation of multiple character
 535    sets is a kind of multi-byte encoding, i.e. characters are
 536    represented by variable-length sequences of one-byte codes.
 537
 538    ASCII characters and control characters (e.g. `tab', `newline') are
 539    represented by one-byte sequences which are their ASCII codes, in
 540    the range 0x00 through 0x7F.
 541
 542    8-bit characters of the range 0x80..0x9F are represented by
 543    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 544    code + 0x20).
 545
 546    8-bit characters of the range 0xA0..0xFF are represented by
 547    one-byte sequences which are their 8-bit code.
 548
 549    The other characters are represented by a sequence of `base
 550    leading-code', optional `extended leading-code', and one or two
 551    `position-code's.  The length of the sequence is determined by the
 552    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 553    whereas extended leading-code and position-code take the range 0xA0
 554    through 0xFF.  See `charset.h' for more details about leading-code
 555    and position-code.
 556
 557    --- CODE RANGE of Emacs' internal format ---
 558    character set        range
 559    -------------        -----
 560    ascii                0x00..0x7F
 561    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 562    eight-bit-graphic    0xA0..0xBF
 563    ELSE                 0x81..0x9D + [0xA0..0xFF]+
 564    ---------------------------------------------
 565
 566    As this is the internal character representation, the format is
 567    usually not used externally (i.e. in a file or in a data sent to a
 568    process).  But, it is possible to have a text externally in this
 569    format (i.e. by encoding by the coding system `emacs-mule').
 570
 571    In that case, a sequence of one-byte codes has a slightly different
 572    form.
 573
 574    Firstly, all characters in eight-bit-control are represented by
 575    one-byte sequences which are their 8-bit code.
 576
 577    Next, character composition data are represented by the byte
 578    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 579    where,
 580         METHOD is 0xF0 plus one of composition method (enum
 581         composition_method),
 582
 583         BYTES is 0xA0 plus the byte length of these composition data,
 584
 585         CHARS is 0xA0 plus the number of characters composed by these
 586         data,
 587
 588         COMPONENTs are characters of multibyte form or composition
 589         rules encoded by two-byte of ASCII codes.
 590
 591    In addition, for backward compatibility, the following formats are
 592    also recognized as composition data on decoding.
 593
 594    0x80 MSEQ ...
 595    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 596
 597    Here,
 598         MSEQ is a multibyte form but in these special format:
 599           ASCII: 0xA0 ASCII_CODE+0x80,
 600           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 601         RULE is a one byte code of the range 0xA0..0xF0 that
 602         represents a composition rule.
 603   */
 604
 605 enum emacs_code_class_type emacs_code_class[256];
 606
 607 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 608    Check if a text is encoded in Emacs' internal format.  If it is,
 609    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 610
 611 static int
 612 detect_coding_emacs_mule (src, src_end, multibytep)
 613       unsigned char *src, *src_end;
 614       int multibytep;
 615 {
 616   unsigned char c;
 617   int composing = 0;
 618   /* Dummy for ONE_MORE_BYTE.  */
 619   struct coding_system dummy_coding;
 620   struct coding_system *coding = &dummy_coding;
 621
 622   while (1)
 623     {
 624       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 625
 626       if (composing)
 627         {
 628           if (c < 0xA0)
 629             composing = 0;
 630           else if (c == 0xA0)
 631             {
 632               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 633               c &= 0x7F;
 634             }
 635           else
 636             c -= 0x20;
 637         }
 638
 639       if (c < 0x20)
 640         {
 641           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 642             return 0;
 643         }
 644       else if (c >= 0x80 && c < 0xA0)
 645         {
 646           if (c == 0x80)
 647             /* Old leading code for a composite character.  */
 648             composing = 1;
 649           else
 650             {
 651               unsigned char *src_base = src - 1;
 652               int bytes;
 653
 654               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 655                                                bytes))
 656                 return 0;
 657               src = src_base + bytes;
 658             }
 659         }
 660     }
 661  label_end_of_loop:
 662   return CODING_CATEGORY_MASK_EMACS_MULE;
 663 }
 664
 665
 666 /* Record the starting position START and METHOD of one composition.  */
 667
 668 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
 669   do {                                                          \
 670     struct composition_data *cmp_data = coding->cmp_data;       \
 671     int *data = cmp_data->data + cmp_data->used;                \
 672     coding->cmp_data_start = cmp_data->used;                    \
 673     data[0] = -1;                                               \
 674     data[1] = cmp_data->char_offset + start;                    \
 675     data[3] = (int) method;                                     \
 676     cmp_data->used += 4;                                        \
 677   } while (0)
 678
 679 /* Record the ending position END of the current composition.  */
 680
 681 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
 682   do {                                                          \
 683     struct composition_data *cmp_data = coding->cmp_data;       \
 684     int *data = cmp_data->data + coding->cmp_data_start;        \
 685     data[0] = cmp_data->used - coding->cmp_data_start;          \
 686     data[2] = cmp_data->char_offset + end;                      \
 687   } while (0)
 688
 689 /* Record one COMPONENT (alternate character or composition rule).  */
 690
 691 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)             \
 692   do {                                                                  \
 693     coding->cmp_data->data[coding->cmp_data->used++] = component;       \
 694     if (coding->cmp_data->used - coding->cmp_data_start                 \
 695         == COMPOSITION_DATA_MAX_BUNCH_LENGTH)                           \
 696       {                                                                 \
 697         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
 698         coding->composing = COMPOSITION_NO;                             \
 699       }                                                                 \
 700   } while (0)
 701
 702
 703 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
 704    is not less than SRC_END, return -1 without incrementing Src.  */
 705
 706 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
 707
 708
 709 /* Decode a character represented as a component of composition
 710    sequence of Emacs 20 style at SRC.  Set C to that character, store
 711    its multibyte form sequence at P, and set P to the end of that
 712    sequence.  If no valid character is found, set C to -1.  */
 713
 714 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)                \
 715   do {                                                          \
 716     int bytes;                                                  \
 717                                                                 \
 718     c = SAFE_ONE_MORE_BYTE ();                                  \
 719     if (c < 0)                                                  \
 720       break;                                                    \
 721     if (CHAR_HEAD_P (c))                                        \
 722       c = -1;                                                   \
 723     else if (c == 0xA0)                                         \
 724       {                                                         \
 725         c = SAFE_ONE_MORE_BYTE ();                              \
 726         if (c < 0xA0)                                           \
 727           c = -1;                                               \
 728         else                                                    \
 729           {                                                     \
 730             c -= 0xA0;                                          \
 731             *p++ = c;                                           \
 732           }                                                     \
 733       }                                                         \
 734     else if (BASE_LEADING_CODE_P (c - 0x20))                    \
 735       {                                                         \
 736         unsigned char *p0 = p;                                  \
 737                                                                 \
 738         c -= 0x20;                                              \
 739         *p++ = c;                                               \
 740         bytes = BYTES_BY_CHAR_HEAD (c);                         \
 741         while (--bytes)                                         \
 742           {                                                     \
 743             c = SAFE_ONE_MORE_BYTE ();                          \
 744             if (c < 0)                                          \
 745               break;                                            \
 746             *p++ = c;                                           \
 747           }                                                     \
 748         if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)      \
 749             || (coding->flags /* We are recovering a file.  */  \
 750                 && p0[0] == LEADING_CODE_8_BIT_CONTROL          \
 751                 && ! CHAR_HEAD_P (p0[1])))                      \
 752           c = STRING_CHAR (p0, bytes);                          \
 753         else                                                    \
 754           c = -1;                                               \
 755       }                                                         \
 756     else                                                        \
 757       c = -1;                                                   \
 758   } while (0)
 759
 760
 761 /* Decode a composition rule represented as a component of composition
 762    sequence of Emacs 20 style at SRC.  Set C to the rule.  If not
 763    valid rule is found, set C to -1.  */
 764
 765 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c)           \
 766   do {                                                  \
 767     c = SAFE_ONE_MORE_BYTE ();                          \
 768     c -= 0xA0;                                          \
 769     if (c < 0 || c >= 81)                               \
 770       c = -1;                                           \
 771     else                                                \
 772       {                                                 \
 773         gref = c / 9, nref = c % 9;                     \
 774         c = COMPOSITION_ENCODE_RULE (gref, nref);       \
 775       }                                                 \
 776   } while (0)
 777
 778
 779 /* Decode composition sequence encoded by `emacs-mule' at the source
 780    pointed by SRC.  SRC_END is the end of source.  Store information
 781    of the composition in CODING->cmp_data.
 782
 783    For backward compatibility, decode also a composition sequence of
 784    Emacs 20 style.  In that case, the composition sequence contains
 785    characters that should be extracted into a buffer or string.  Store
 786    those characters at *DESTINATION in multibyte form.
 787
 788    If we encounter an invalid byte sequence, return 0.
 789    If we encounter an insufficient source or destination, or
 790    insufficient space in CODING->cmp_data, return 1.
 791    Otherwise, return consumed bytes in the source.
 792
 793 */
 794 static INLINE int
 795 decode_composition_emacs_mule (coding, src, src_end,
 796                                destination, dst_end, dst_bytes)
 797      struct coding_system *coding;
 798      unsigned char *src, *src_end, **destination, *dst_end;
 799      int dst_bytes;
 800 {
 801   unsigned char *dst = *destination;
 802   int method, data_len, nchars;
 803   unsigned char *src_base = src++;
 804   /* Store components of composition.  */
 805   int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 806   int ncomponent;
 807   /* Store multibyte form of characters to be composed.  This is for
 808      Emacs 20 style composition sequence.  */
 809   unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 810   unsigned char *bufp = buf;
 811   int c, i, gref, nref;
 812
 813   if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 814       >= COMPOSITION_DATA_SIZE)
 815     {
 816       coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 817       return -1;
 818     }
 819
 820   ONE_MORE_BYTE (c);
 821   if (c - 0xF0 >= COMPOSITION_RELATIVE
 822            && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 823     {
 824       int with_rule;
 825
 826       method = c - 0xF0;
 827       with_rule = (method == COMPOSITION_WITH_RULE
 828                    || method == COMPOSITION_WITH_RULE_ALTCHARS);
 829       ONE_MORE_BYTE (c);
 830       data_len = c - 0xA0;
 831       if (data_len < 4
 832           || src_base + data_len > src_end)
 833         return 0;
 834       ONE_MORE_BYTE (c);
 835       nchars = c - 0xA0;
 836       if (c < 1)
 837         return 0;
 838       for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 839         {
 840           /* If it is longer than this, it can't be valid.  */
 841           if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
 842             return 0;
 843
 844           if (ncomponent % 2 && with_rule)
 845             {
 846               ONE_MORE_BYTE (gref);
 847               gref -= 32;
 848               ONE_MORE_BYTE (nref);
 849               nref -= 32;
 850               c = COMPOSITION_ENCODE_RULE (gref, nref);
 851             }
 852           else
 853             {
 854               int bytes;
 855               if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
 856                   || (coding->flags /* We are recovering a file.  */
 857                       && src[0] == LEADING_CODE_8_BIT_CONTROL
 858                       && ! CHAR_HEAD_P (src[1])))
 859                 c = STRING_CHAR (src, bytes);
 860               else
 861                 c = *src, bytes = 1;
 862               src += bytes;
 863             }
 864           component[ncomponent] = c;
 865         }
 866     }
 867   else
 868     {
 869       /* This may be an old Emacs 20 style format.  See the comment at
 870          the section 2 of this file.  */
 871       while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 872       if (src == src_end
 873           && !(coding->mode & CODING_MODE_LAST_BLOCK))
 874         goto label_end_of_loop;
 875
 876       src_end = src;
 877       src = src_base + 1;
 878       if (c < 0xC0)
 879         {
 880           method = COMPOSITION_RELATIVE;
 881           for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 882             {
 883               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 884               if (c < 0)
 885                 break;
 886               component[ncomponent++] = c;
 887             }
 888           if (ncomponent < 2)
 889             return 0;
 890           nchars = ncomponent;
 891         }
 892       else if (c == 0xFF)
 893         {
 894           method = COMPOSITION_WITH_RULE;
 895           src++;
 896           DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 897           if (c < 0)
 898             return 0;
 899           component[0] = c;
 900           for (ncomponent = 1;
 901                ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 902             {
 903               DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 904               if (c < 0)
 905                 break;
 906               component[ncomponent++] = c;
 907               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 908               if (c < 0)
 909                 break;
 910               component[ncomponent++] = c;
 911             }
 912           if (ncomponent < 3)
 913             return 0;
 914           nchars = (ncomponent + 1) / 2;
 915         }
 916       else
 917         return 0;
 918     }
 919
 920   if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 921     {
 922       CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 923       for (i = 0; i < ncomponent; i++)
 924         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 925       CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 926       if (buf < bufp)
 927         {
 928           unsigned char *p = buf;
 929           EMIT_BYTES (p, bufp);
 930           *destination += bufp - buf;
 931           coding->produced_char += nchars;
 932         }
 933       return (src - src_base);
 934     }
 935  label_end_of_loop:
 936   return -1;
 937 }
 938
 939 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 940
 941 static void
 942 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 943      struct coding_system *coding;
 944      unsigned char *source, *destination;
 945      int src_bytes, dst_bytes;
 946 {
 947   unsigned char *src = source;
 948   unsigned char *src_end = source + src_bytes;
 949   unsigned char *dst = destination;
 950   unsigned char *dst_end = destination + dst_bytes;
 951   /* SRC_BASE remembers the start position in source in each loop.
 952      The loop will be exited when there's not enough source code, or
 953      when there's not enough destination area to produce a
 954      character.  */
 955   unsigned char *src_base;
 956
 957   coding->produced_char = 0;
 958   while ((src_base = src) < src_end)
 959     {
 960       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 961       int bytes;
 962
 963       if (*src == '\r')
 964         {
 965           int c = *src++;
 966
 967           if (coding->eol_type == CODING_EOL_CR)
 968             c = '\n';
 969           else if (coding->eol_type == CODING_EOL_CRLF)
 970             {
 971               ONE_MORE_BYTE (c);
 972               if (c != '\n')
 973                 {
 974                   src--;
 975                   c = '\r';
 976                 }
 977             }
 978           *dst++ = c;
 979           coding->produced_char++;
 980           continue;
 981         }
 982       else if (*src == '\n')
 983         {
 984           if ((coding->eol_type == CODING_EOL_CR
 985                || coding->eol_type == CODING_EOL_CRLF)
 986               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 987             {
 988               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 989               goto label_end_of_loop;
 990             }
 991           *dst++ = *src++;
 992           coding->produced_char++;
 993           continue;
 994         }
 995       else if (*src == 0x80 && coding->cmp_data)
 996         {
 997           /* Start of composition data.  */
 998           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
 999                                                          &dst, dst_end,
1000                                                          dst_bytes);
1001           if (consumed < 0)
1002             goto label_end_of_loop;
1003           else if (consumed > 0)
1004             {
1005               src += consumed;
1006               continue;
1007             }
1008           bytes = CHAR_STRING (*src, tmp);
1009           p = tmp;
1010           src++;
1011         }
1012       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
1013                || (coding->flags /* We are recovering a file.  */
1014                    && src[0] == LEADING_CODE_8_BIT_CONTROL
1015                    && ! CHAR_HEAD_P (src[1])))
1016         {
1017           p = src;
1018           src += bytes;
1019         }
1020       else
1021         {
1022           bytes = CHAR_STRING (*src, tmp);
1023           p = tmp;
1024           src++;
1025         }
1026       if (dst + bytes >= (dst_bytes ? dst_end : src))
1027         {
1028           coding->result = CODING_FINISH_INSUFFICIENT_DST;
1029           break;
1030         }
1031       while (bytes--) *dst++ = *p++;
1032       coding->produced_char++;
1033     }
1034  label_end_of_loop:
1035   coding->consumed = coding->consumed_char = src_base - source;
1036   coding->produced = dst - destination;
1037 }
1038
1039
1040 /* Encode composition data stored at DATA into a special byte sequence
1041    starting by 0x80.  Update CODING->cmp_data_start and maybe
1042    CODING->cmp_data for the next call.  */
1043
1044 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                     \
1045   do {                                                                  \
1046     unsigned char buf[1024], *p0 = buf, *p;                             \
1047     int len = data[0];                                                  \
1048     int i;                                                              \
1049                                                                         \
1050     buf[0] = 0x80;                                                      \
1051     buf[1] = 0xF0 + data[3];    /* METHOD */                            \
1052     buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */           \
1053     p = buf + 4;                                                        \
1054     if (data[3] == COMPOSITION_WITH_RULE                                \
1055         || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
1056       {                                                                 \
1057         p += CHAR_STRING (data[4], p);                                  \
1058         for (i = 5; i < len; i += 2)                                    \
1059           {                                                             \
1060             int gref, nref;                                             \
1061              COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
1062             *p++ = 0x20 + gref;                                         \
1063             *p++ = 0x20 + nref;                                         \
1064             p += CHAR_STRING (data[i + 1], p);                          \
1065           }                                                             \
1066       }                                                                 \
1067     else                                                                \
1068       {                                                                 \
1069         for (i = 4; i < len; i++)                                       \
1070           p += CHAR_STRING (data[i], p);                                \
1071       }                                                                 \
1072     buf[2] = 0xA0 + (p - buf);  /* COMPONENTS-BYTES */                  \
1073                                                                         \
1074     if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))              \
1075       {                                                                 \
1076         coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
1077         goto label_end_of_loop;                                         \
1078       }                                                                 \
1079     while (p0 < p)                                                      \
1080       *dst++ = *p0++;                                                   \
1081     coding->cmp_data_start += data[0];                                  \
1082     if (coding->cmp_data_start == coding->cmp_data->used                \
1083         && coding->cmp_data->next)                                      \
1084       {                                                                 \
1085         coding->cmp_data = coding->cmp_data->next;                      \
1086         coding->cmp_data_start = 0;                                     \
1087       }                                                                 \
1088   } while (0)
1089
1090
1091 static void encode_eol P_ ((struct coding_system *, const unsigned char *,
1092                             unsigned char *, int, int));
1093
1094 static void
1095 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1096      struct coding_system *coding;
1097      unsigned char *source, *destination;
1098      int src_bytes, dst_bytes;
1099 {
1100   unsigned char *src = source;
1101   unsigned char *src_end = source + src_bytes;
1102   unsigned char *dst = destination;
1103   unsigned char *dst_end = destination + dst_bytes;
1104   unsigned char *src_base;
1105   int c;
1106   int char_offset;
1107   int *data;
1108
1109   Lisp_Object translation_table;
1110
1111   translation_table = Qnil;
1112
1113   /* Optimization for the case that there's no composition.  */
1114   if (!coding->cmp_data || coding->cmp_data->used == 0)
1115     {
1116       encode_eol (coding, source, destination, src_bytes, dst_bytes);
1117       return;
1118     }
1119
1120   char_offset = coding->cmp_data->char_offset;
1121   data = coding->cmp_data->data + coding->cmp_data_start;
1122   while (1)
1123     {
1124       src_base = src;
1125
1126       /* If SRC starts a composition, encode the information about the
1127          composition in advance.  */
1128       if (coding->cmp_data_start < coding->cmp_data->used
1129           && char_offset + coding->consumed_char == data[1])
1130         {
1131           ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1132           char_offset = coding->cmp_data->char_offset;
1133           data = coding->cmp_data->data + coding->cmp_data_start;
1134         }
1135
1136       ONE_MORE_CHAR (c);
1137       if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1138                         || coding->eol_type == CODING_EOL_CR))
1139         {
1140           if (coding->eol_type == CODING_EOL_CRLF)
1141             EMIT_TWO_BYTES ('\r', c);
1142           else
1143             EMIT_ONE_BYTE ('\r');
1144         }
1145       else if (SINGLE_BYTE_CHAR_P (c))
1146         {
1147           if (coding->flags && ! ASCII_BYTE_P (c))
1148             {
1149               /* As we are auto saving, retain the multibyte form for
1150                  8-bit chars.  */
1151               unsigned char buf[MAX_MULTIBYTE_LENGTH];
1152               int bytes = CHAR_STRING (c, buf);
1153
1154               if (bytes == 1)
1155                 EMIT_ONE_BYTE (buf[0]);
1156               else
1157                 EMIT_TWO_BYTES (buf[0], buf[1]);
1158             }
1159           else
1160             EMIT_ONE_BYTE (c);
1161         }
1162       else
1163         EMIT_BYTES (src_base, src);
1164       coding->consumed_char++;
1165     }
1166  label_end_of_loop:
1167   coding->consumed = src_base - source;
1168   coding->produced = coding->produced_char = dst - destination;
1169   return;
1170 }
1171
1172 \f
1173 /*** 3. ISO2022 handlers ***/
1174
1175 /* The following note describes the coding system ISO2022 briefly.
1176    Since the intention of this note is to help understand the
1177    functions in this file, some parts are NOT ACCURATE or are OVERLY
1178    SIMPLIFIED.  For thorough understanding, please refer to the
1179    original document of ISO2022.  This is equivalent to the standard
1180    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
1181
1182    ISO2022 provides many mechanisms to encode several character sets
1183    in 7-bit and 8-bit environments.  For 7-bit environments, all text
1184    is encoded using bytes less than 128.  This may make the encoded
1185    text a little bit longer, but the text passes more easily through
1186    several types of gateway, some of which strip off the MSB (Most
1187    Significant Bit).
1188
1189    There are two kinds of character sets: control character sets and
1190    graphic character sets.  The former contain control characters such
1191    as `newline' and `escape' to provide control functions (control
1192    functions are also provided by escape sequences).  The latter
1193    contain graphic characters such as 'A' and '-'.  Emacs recognizes
1194    two control character sets and many graphic character sets.
1195
1196    Graphic character sets are classified into one of the following
1197    four classes, according to the number of bytes (DIMENSION) and
1198    number of characters in one dimension (CHARS) of the set:
1199    - DIMENSION1_CHARS94
1200    - DIMENSION1_CHARS96
1201    - DIMENSION2_CHARS94
1202    - DIMENSION2_CHARS96
1203
1204    In addition, each character set is assigned an identification tag,
1205    unique for each set, called the "final character" (denoted as <F>
1206    hereafter).  The <F> of each character set is decided by ECMA(*)
1207    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
1208    (0x30..0x3F are for private use only).
1209
1210    Note (*): ECMA = European Computer Manufacturers Association
1211
1212    Here are examples of graphic character sets [NAME(<F>)]:
1213         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1214         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1215         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1216         o DIMENSION2_CHARS96 -- none for the moment
1217
1218    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
1219         C0 [0x00..0x1F] -- control character plane 0
1220         GL [0x20..0x7F] -- graphic character plane 0
1221         C1 [0x80..0x9F] -- control character plane 1
1222         GR [0xA0..0xFF] -- graphic character plane 1
1223
1224    A control character set is directly designated and invoked to C0 or
1225    C1 by an escape sequence.  The most common case is that:
1226    - ISO646's  control character set is designated/invoked to C0, and
1227    - ISO6429's control character set is designated/invoked to C1,
1228    and usually these designations/invocations are omitted in encoded
1229    text.  In a 7-bit environment, only C0 can be used, and a control
1230    character for C1 is encoded by an appropriate escape sequence to
1231    fit into the environment.  All control characters for C1 are
1232    defined to have corresponding escape sequences.
1233
1234    A graphic character set is at first designated to one of four
1235    graphic registers (G0 through G3), then these graphic registers are
1236    invoked to GL or GR.  These designations and invocations can be
1237    done independently.  The most common case is that G0 is invoked to
1238    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
1239    these invocations and designations are omitted in encoded text.
1240    In a 7-bit environment, only GL can be used.
1241
1242    When a graphic character set of CHARS94 is invoked to GL, codes
1243    0x20 and 0x7F of the GL area work as control characters SPACE and
1244    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1245    be used.
1246
1247    There are two ways of invocation: locking-shift and single-shift.
1248    With locking-shift, the invocation lasts until the next different
1249    invocation, whereas with single-shift, the invocation affects the
1250    following character only and doesn't affect the locking-shift
1251    state.  Invocations are done by the following control characters or
1252    escape sequences:
1253
1254    ----------------------------------------------------------------------
1255    abbrev  function                  cntrl escape seq   description
1256    ----------------------------------------------------------------------
1257    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
1258    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
1259    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
1260    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
1261    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
1262    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
1263    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
1264    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
1265    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
1266    ----------------------------------------------------------------------
1267    (*) These are not used by any known coding system.
1268
1269    Control characters for these functions are defined by macros
1270    ISO_CODE_XXX in `coding.h'.
1271
1272    Designations are done by the following escape sequences:
1273    ----------------------------------------------------------------------
1274    escape sequence      description
1275    ----------------------------------------------------------------------
1276    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
1277    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
1278    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
1279    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
1280    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
1281    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
1282    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
1283    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
1284    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
1285    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
1286    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
1287    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
1288    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
1289    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
1290    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
1291    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
1292    ----------------------------------------------------------------------
1293
1294    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
1295    of dimension 1, chars 94, and final character <F>, etc...
1296
1297    Note (*): Although these designations are not allowed in ISO2022,
1298    Emacs accepts them on decoding, and produces them on encoding
1299    CHARS96 character sets in a coding system which is characterized as
1300    7-bit environment, non-locking-shift, and non-single-shift.
1301
1302    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
1303    '(' can be omitted.  We refer to this as "short-form" hereafter.
1304
1305    Now you may notice that there are a lot of ways of encoding the
1306    same multilingual text in ISO2022.  Actually, there exist many
1307    coding systems such as Compound Text (used in X11's inter client
1308    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
1309    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
1310    localized platforms), and all of these are variants of ISO2022.
1311
1312    In addition to the above, Emacs handles two more kinds of escape
1313    sequences: ISO6429's direction specification and Emacs' private
1314    sequence for specifying character composition.
1315
1316    ISO6429's direction specification takes the following form:
1317         o CSI ']'      -- end of the current direction
1318         o CSI '0' ']'  -- end of the current direction
1319         o CSI '1' ']'  -- start of left-to-right text
1320         o CSI '2' ']'  -- start of right-to-left text
1321    The control character CSI (0x9B: control sequence introducer) is
1322    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1323
1324    Character composition specification takes the following form:
1325         o ESC '0' -- start relative composition
1326         o ESC '1' -- end composition
1327         o ESC '2' -- start rule-base composition (*)
1328         o ESC '3' -- start relative composition with alternate chars  (**)
1329         o ESC '4' -- start rule-base composition with alternate chars  (**)
1330   Since these are not standard escape sequences of any ISO standard,
1331   the use of them with these meanings is restricted to Emacs only.
1332
1333   (*) This form is used only in Emacs 20.5 and older versions,
1334   but the newer versions can safely decode it.
1335   (**) This form is used only in Emacs 21.1 and newer versions,
1336   and the older versions can't decode it.
1337
1338   Here's a list of example usages of these composition escape
1339   sequences (categorized by `enum composition_method').
1340
1341   COMPOSITION_RELATIVE:
1342         ESC 0 CHAR [ CHAR ] ESC 1
1343   COMPOSITION_WITH_RULE:
1344         ESC 2 CHAR [ RULE CHAR ] ESC 1
1345   COMPOSITION_WITH_ALTCHARS:
1346         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
1347   COMPOSITION_WITH_RULE_ALTCHARS:
1348         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
1349
1350 enum iso_code_class_type iso_code_class[256];
1351
1352 #define CHARSET_OK(idx, charset, c)                                     \
1353   (coding_system_table[idx]                                             \
1354    && (charset == CHARSET_ASCII                                         \
1355        || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
1356            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
1357    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1358                                               charset)                  \
1359        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1360
1361 #define SHIFT_OUT_OK(idx) \
1362   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1363
1364 #define COMPOSITION_OK(idx)     \
1365   (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
1366
1367 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1368    Check if a text is encoded in ISO2022.  If it is, return an
1369    integer in which appropriate flag bits any of:
1370         CODING_CATEGORY_MASK_ISO_7
1371         CODING_CATEGORY_MASK_ISO_7_TIGHT
1372         CODING_CATEGORY_MASK_ISO_8_1
1373         CODING_CATEGORY_MASK_ISO_8_2
1374         CODING_CATEGORY_MASK_ISO_7_ELSE
1375         CODING_CATEGORY_MASK_ISO_8_ELSE
1376    are set.  If a code which should never appear in ISO2022 is found,
1377    returns 0.  */
1378
1379 static int
1380 detect_coding_iso2022 (src, src_end, multibytep)
1381      unsigned char *src, *src_end;
1382      int multibytep;
1383 {
1384   int mask = CODING_CATEGORY_MASK_ISO;
1385   int mask_found = 0;
1386   int reg[4], shift_out = 0, single_shifting = 0;
1387   int c, c1, charset;
1388   /* Dummy for ONE_MORE_BYTE.  */
1389   struct coding_system dummy_coding;
1390   struct coding_system *coding = &dummy_coding;
1391   Lisp_Object safe_chars;
1392
1393   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1394   while (mask && src < src_end)
1395     {
1396       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1397     retry:
1398       switch (c)
1399         {
1400         case ISO_CODE_ESC:
1401           if (inhibit_iso_escape_detection)
1402             break;
1403           single_shifting = 0;
1404           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1405           if (c >= '(' && c <= '/')
1406             {
1407               /* Designation sequence for a charset of dimension 1.  */
1408               ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1409               if (c1 < ' ' || c1 >= 0x80
1410                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1411                 /* Invalid designation sequence.  Just ignore.  */
1412                 break;
1413               reg[(c - '(') % 4] = charset;
1414             }
1415           else if (c == '$')
1416             {
1417               /* Designation sequence for a charset of dimension 2.  */
1418               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1419               if (c >= '@' && c <= 'B')
1420                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
1421                 reg[0] = charset = iso_charset_table[1][0][c];
1422               else if (c >= '(' && c <= '/')
1423                 {
1424                   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1425                   if (c1 < ' ' || c1 >= 0x80
1426                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1427                     /* Invalid designation sequence.  Just ignore.  */
1428                     break;
1429                   reg[(c - '(') % 4] = charset;
1430                 }
1431               else
1432                 /* Invalid designation sequence.  Just ignore.  */
1433                 break;
1434             }
1435           else if (c == 'N' || c == 'O')
1436             {
1437               /* ESC <Fe> for SS2 or SS3.  */
1438               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
1439               break;
1440             }
1441           else if (c >= '0' && c <= '4')
1442             {
1443               /* ESC <Fp> for start/end composition.  */
1444               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
1445                 mask_found |= CODING_CATEGORY_MASK_ISO_7;
1446               else
1447                 mask &= ~CODING_CATEGORY_MASK_ISO_7;
1448               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
1449                 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1450               else
1451                 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1452               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
1453                 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1454               else
1455                 mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
1456               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
1457                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1458               else
1459                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1460               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
1461                 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1462               else
1463                 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1464               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
1465                 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1466               else
1467                 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1468               break;
1469             }
1470           else
1471             /* Invalid escape sequence.  Just ignore.  */
1472             break;
1473
1474           /* We found a valid designation sequence for CHARSET.  */
1475           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
1476           c = MAKE_CHAR (charset, 0, 0);
1477           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
1478             mask_found |= CODING_CATEGORY_MASK_ISO_7;
1479           else
1480             mask &= ~CODING_CATEGORY_MASK_ISO_7;
1481           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
1482             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1483           else
1484             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1485           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
1486             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1487           else
1488             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1489           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
1490             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1491           else
1492             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1493           break;
1494
1495         case ISO_CODE_SO:
1496           if (inhibit_iso_escape_detection)
1497             break;
1498           single_shifting = 0;
1499           if (shift_out == 0
1500               && (reg[1] >= 0
1501                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1502                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1503             {
1504               /* Locking shift out.  */
1505               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1506               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1507             }
1508           break;
1509
1510         case ISO_CODE_SI:
1511           if (inhibit_iso_escape_detection)
1512             break;
1513           single_shifting = 0;
1514           if (shift_out == 1)
1515             {
1516               /* Locking shift in.  */
1517               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1518               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1519             }
1520           break;
1521
1522         case ISO_CODE_CSI:
1523           single_shifting = 0;
1524         case ISO_CODE_SS2:
1525         case ISO_CODE_SS3:
1526           {
1527             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1528
1529             if (inhibit_iso_escape_detection)
1530               break;
1531             if (c != ISO_CODE_CSI)
1532               {
1533                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1534                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1535                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1536                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1537                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1538                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1539                 single_shifting = 1;
1540               }
1541             if (VECTORP (Vlatin_extra_code_table)
1542                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1543               {
1544                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1545                     & CODING_FLAG_ISO_LATIN_EXTRA)
1546                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1547                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1548                     & CODING_FLAG_ISO_LATIN_EXTRA)
1549                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1550               }
1551             mask &= newmask;
1552             mask_found |= newmask;
1553           }
1554           break;
1555
1556         default:
1557           if (c < 0x80)
1558             {
1559               single_shifting = 0;
1560               break;
1561             }
1562           else if (c < 0xA0)
1563             {
1564               single_shifting = 0;
1565               if (VECTORP (Vlatin_extra_code_table)
1566                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1567                 {
1568                   int newmask = 0;
1569
1570                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1571                       & CODING_FLAG_ISO_LATIN_EXTRA)
1572                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1573                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1574                       & CODING_FLAG_ISO_LATIN_EXTRA)
1575                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1576                   mask &= newmask;
1577                   mask_found |= newmask;
1578                 }
1579               else
1580                 return 0;
1581             }
1582           else
1583             {
1584               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1585                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1586               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1587               /* Check the length of succeeding codes of the range
1588                  0xA0..0FF.  If the byte length is odd, we exclude
1589                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1590                  when we are not single shifting.  */
1591               if (!single_shifting
1592                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1593                 {
1594                   int i = 1;
1595
1596                   c = -1;
1597                   while (src < src_end)
1598                     {
1599                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1600                       if (c < 0xA0)
1601                         break;
1602                       i++;
1603                     }
1604
1605                   if (i & 1 && src < src_end)
1606                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1607                   else
1608                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1609                   if (c >= 0)
1610                     /* This means that we have read one extra byte.  */
1611                     goto retry;
1612                 }
1613             }
1614           break;
1615         }
1616     }
1617  label_end_of_loop:
1618   return (mask & mask_found);
1619 }
1620
1621 /* Decode a character of which charset is CHARSET, the 1st position
1622    code is C1, the 2nd position code is C2, and return the decoded
1623    character code.  If the variable `translation_table' is non-nil,
1624    returned the translated code.  */
1625
1626 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1627   (NILP (translation_table)                     \
1628    ? MAKE_CHAR (charset, c1, c2)                \
1629    : translate_char (translation_table, -1, charset, c1, c2))
1630
1631 /* Set designation state into CODING.  */
1632 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1633   do {                                                                     \
1634     int charset, c;                                                        \
1635                                                                            \
1636     if (final_char < '0' || final_char >= 128)                             \
1637       goto label_invalid_code;                                             \
1638     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1639                                  make_number (chars),                      \
1640                                  make_number (final_char));                \
1641     c = MAKE_CHAR (charset, 0, 0);                                         \
1642     if (charset >= 0                                                       \
1643         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1644             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1645       {                                                                    \
1646         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1647             && reg == 0                                                    \
1648             && charset == CHARSET_ASCII)                                   \
1649           {                                                                \
1650             /* We should insert this designation sequence as is so         \
1651                that it is surely written back to a file.  */               \
1652             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1653             goto label_invalid_code;                                       \
1654           }                                                                \
1655         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1656         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1657             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1658           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1659         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1660       }                                                                    \
1661     else                                                                   \
1662       {                                                                    \
1663         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1664         goto label_invalid_code;                                           \
1665       }                                                                    \
1666   } while (0)
1667
1668 /* Allocate a memory block for storing information about compositions.
1669    The block is chained to the already allocated blocks.  */
1670
1671 void
1672 coding_allocate_composition_data (coding, char_offset)
1673      struct coding_system *coding;
1674      int char_offset;
1675 {
1676   struct composition_data *cmp_data
1677     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1678
1679   cmp_data->char_offset = char_offset;
1680   cmp_data->used = 0;
1681   cmp_data->prev = coding->cmp_data;
1682   cmp_data->next = NULL;
1683   if (coding->cmp_data)
1684     coding->cmp_data->next = cmp_data;
1685   coding->cmp_data = cmp_data;
1686   coding->cmp_data_start = 0;
1687 }
1688
1689 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1690    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1691    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1692    ESC 3 : altchar composition :  ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1693    ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1694   */
1695
1696 #define DECODE_COMPOSITION_START(c1)                                       \
1697   do {                                                                     \
1698     if (coding->composing == COMPOSITION_DISABLED)                         \
1699       {                                                                    \
1700         *dst++ = ISO_CODE_ESC;                                             \
1701         *dst++ = c1 & 0x7f;                                                \
1702         coding->produced_char += 2;                                        \
1703       }                                                                    \
1704     else if (!COMPOSING_P (coding))                                        \
1705       {                                                                    \
1706         /* This is surely the start of a composition.  We must be sure     \
1707            that coding->cmp_data has enough space to store the             \
1708            information about the composition.  If not, terminate the       \
1709            current decoding loop, allocate one more memory block for       \
1710            coding->cmp_data in the caller, then start the decoding         \
1711            loop again.  We can't allocate memory here directly because     \
1712            it may cause buffer/string relocation.  */                      \
1713         if (!coding->cmp_data                                              \
1714             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1715                 >= COMPOSITION_DATA_SIZE))                                 \
1716           {                                                                \
1717             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1718             goto label_end_of_loop;                                        \
1719           }                                                                \
1720         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1721                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1722                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1723                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1724         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1725                                       coding->composing);                  \
1726         coding->composition_rule_follows = 0;                              \
1727       }                                                                    \
1728     else                                                                   \
1729       {                                                                    \
1730         /* We are already handling a composition.  If the method is        \
1731            the following two, the codes following the current escape       \
1732            sequence are actual characters stored in a buffer.  */          \
1733         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1734             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1735           {                                                                \
1736             coding->composing = COMPOSITION_RELATIVE;                      \
1737             coding->composition_rule_follows = 0;                          \
1738           }                                                                \
1739       }                                                                    \
1740   } while (0)
1741
1742 /* Handle composition end sequence ESC 1.  */
1743
1744 #define DECODE_COMPOSITION_END(c1)                                      \
1745   do {                                                                  \
1746     if (! COMPOSING_P (coding))                                         \
1747       {                                                                 \
1748         *dst++ = ISO_CODE_ESC;                                          \
1749         *dst++ = c1;                                                    \
1750         coding->produced_char += 2;                                     \
1751       }                                                                 \
1752     else                                                                \
1753       {                                                                 \
1754         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1755         coding->composing = COMPOSITION_NO;                             \
1756       }                                                                 \
1757   } while (0)
1758
1759 /* Decode a composition rule from the byte C1 (and maybe one more byte
1760    from SRC) and store one encoded composition rule in
1761    coding->cmp_data.  */
1762
1763 #define DECODE_COMPOSITION_RULE(c1)                                     \
1764   do {                                                                  \
1765     int rule = 0;                                                       \
1766     (c1) -= 32;                                                         \
1767     if (c1 < 81)                /* old format (before ver.21) */        \
1768       {                                                                 \
1769         int gref = (c1) / 9;                                            \
1770         int nref = (c1) % 9;                                            \
1771         if (gref == 4) gref = 10;                                       \
1772         if (nref == 4) nref = 10;                                       \
1773         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1774       }                                                                 \
1775     else if (c1 < 93)           /* new format (after ver.21) */         \
1776       {                                                                 \
1777         ONE_MORE_BYTE (c2);                                             \
1778         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1779       }                                                                 \
1780     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1781     coding->composition_rule_follows = 0;                               \
1782   } while (0)
1783
1784
1785 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1786
1787 static void
1788 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1789      struct coding_system *coding;
1790      unsigned char *source, *destination;
1791      int src_bytes, dst_bytes;
1792 {
1793   unsigned char *src = source;
1794   unsigned char *src_end = source + src_bytes;
1795   unsigned char *dst = destination;
1796   unsigned char *dst_end = destination + dst_bytes;
1797   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1798   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1799   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1800   /* SRC_BASE remembers the start position in source in each loop.
1801      The loop will be exited when there's not enough source code
1802      (within macro ONE_MORE_BYTE), or when there's not enough
1803      destination area to produce a character (within macro
1804      EMIT_CHAR).  */
1805   unsigned char *src_base;
1806   int c, charset;
1807   Lisp_Object translation_table;
1808   Lisp_Object safe_chars;
1809
1810   safe_chars = coding_safe_chars (coding->symbol);
1811
1812   if (NILP (Venable_character_translation))
1813     translation_table = Qnil;
1814   else
1815     {
1816       translation_table = coding->translation_table_for_decode;
1817       if (NILP (translation_table))
1818         translation_table = Vstandard_translation_table_for_decode;
1819     }
1820
1821   coding->result = CODING_FINISH_NORMAL;
1822
1823   while (1)
1824     {
1825       int c1, c2;
1826
1827       src_base = src;
1828       ONE_MORE_BYTE (c1);
1829
1830       /* We produce no character or one character.  */
1831       switch (iso_code_class [c1])
1832         {
1833         case ISO_0x20_or_0x7F:
1834           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1835             {
1836               DECODE_COMPOSITION_RULE (c1);
1837               continue;
1838             }
1839           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1840             {
1841               /* This is SPACE or DEL.  */
1842               charset = CHARSET_ASCII;
1843               break;
1844             }
1845           /* This is a graphic character, we fall down ...  */
1846
1847         case ISO_graphic_plane_0:
1848           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1849             {
1850               DECODE_COMPOSITION_RULE (c1);
1851               continue;
1852             }
1853           charset = charset0;
1854           break;
1855
1856         case ISO_0xA0_or_0xFF:
1857           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1858               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1859             goto label_invalid_code;
1860           /* This is a graphic character, we fall down ... */
1861
1862         case ISO_graphic_plane_1:
1863           if (charset1 < 0)
1864             goto label_invalid_code;
1865           charset = charset1;
1866           break;
1867
1868         case ISO_control_0:
1869           if (COMPOSING_P (coding))
1870             DECODE_COMPOSITION_END ('1');
1871
1872           /* All ISO2022 control characters in this class have the
1873              same representation in Emacs internal format.  */
1874           if (c1 == '\n'
1875               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1876               && (coding->eol_type == CODING_EOL_CR
1877                   || coding->eol_type == CODING_EOL_CRLF))
1878             {
1879               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1880               goto label_end_of_loop;
1881             }
1882           charset = CHARSET_ASCII;
1883           break;
1884
1885         case ISO_control_1:
1886           if (COMPOSING_P (coding))
1887             DECODE_COMPOSITION_END ('1');
1888           goto label_invalid_code;
1889
1890         case ISO_carriage_return:
1891           if (COMPOSING_P (coding))
1892             DECODE_COMPOSITION_END ('1');
1893
1894           if (coding->eol_type == CODING_EOL_CR)
1895             c1 = '\n';
1896           else if (coding->eol_type == CODING_EOL_CRLF)
1897             {
1898               ONE_MORE_BYTE (c1);
1899               if (c1 != ISO_CODE_LF)
1900                 {
1901                   src--;
1902                   c1 = '\r';
1903                 }
1904             }
1905           charset = CHARSET_ASCII;
1906           break;
1907
1908         case ISO_shift_out:
1909           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1910               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1911             goto label_invalid_code;
1912           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1913           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1914           continue;
1915
1916         case ISO_shift_in:
1917           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1918             goto label_invalid_code;
1919           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1920           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1921           continue;
1922
1923         case ISO_single_shift_2_7:
1924         case ISO_single_shift_2:
1925           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1926             goto label_invalid_code;
1927           /* SS2 is handled as an escape sequence of ESC 'N' */
1928           c1 = 'N';
1929           goto label_escape_sequence;
1930
1931         case ISO_single_shift_3:
1932           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1933             goto label_invalid_code;
1934           /* SS2 is handled as an escape sequence of ESC 'O' */
1935           c1 = 'O';
1936           goto label_escape_sequence;
1937
1938         case ISO_control_sequence_introducer:
1939           /* CSI is handled as an escape sequence of ESC '[' ...  */
1940           c1 = '[';
1941           goto label_escape_sequence;
1942
1943         case ISO_escape:
1944           ONE_MORE_BYTE (c1);
1945         label_escape_sequence:
1946           /* Escape sequences handled by Emacs are invocation,
1947              designation, direction specification, and character
1948              composition specification.  */
1949           switch (c1)
1950             {
1951             case '&':           /* revision of following character set */
1952               ONE_MORE_BYTE (c1);
1953               if (!(c1 >= '@' && c1 <= '~'))
1954                 goto label_invalid_code;
1955               ONE_MORE_BYTE (c1);
1956               if (c1 != ISO_CODE_ESC)
1957                 goto label_invalid_code;
1958               ONE_MORE_BYTE (c1);
1959               goto label_escape_sequence;
1960
1961             case '$':           /* designation of 2-byte character set */
1962               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1963                 goto label_invalid_code;
1964               ONE_MORE_BYTE (c1);
1965               if (c1 >= '@' && c1 <= 'B')
1966                 {       /* designation of JISX0208.1978, GB2312.1980,
1967                            or JISX0208.1980 */
1968                   DECODE_DESIGNATION (0, 2, 94, c1);
1969                 }
1970               else if (c1 >= 0x28 && c1 <= 0x2B)
1971                 {       /* designation of DIMENSION2_CHARS94 character set */
1972                   ONE_MORE_BYTE (c2);
1973                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1974                 }
1975               else if (c1 >= 0x2C && c1 <= 0x2F)
1976                 {       /* designation of DIMENSION2_CHARS96 character set */
1977                   ONE_MORE_BYTE (c2);
1978                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1979                 }
1980               else
1981                 goto label_invalid_code;
1982               /* We must update these variables now.  */
1983               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1984               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1985               continue;
1986
1987             case 'n':           /* invocation of locking-shift-2 */
1988               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1989                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1990                 goto label_invalid_code;
1991               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1992               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1993               continue;
1994
1995             case 'o':           /* invocation of locking-shift-3 */
1996               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1997                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1998                 goto label_invalid_code;
1999               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
2000               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2001               continue;
2002
2003             case 'N':           /* invocation of single-shift-2 */
2004               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2005                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
2006                 goto label_invalid_code;
2007               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
2008               ONE_MORE_BYTE (c1);
2009               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2010                 goto label_invalid_code;
2011               break;
2012
2013             case 'O':           /* invocation of single-shift-3 */
2014               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2015                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
2016                 goto label_invalid_code;
2017               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
2018               ONE_MORE_BYTE (c1);
2019               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2020                 goto label_invalid_code;
2021               break;
2022
2023             case '0': case '2': case '3': case '4': /* start composition */
2024               DECODE_COMPOSITION_START (c1);
2025               continue;
2026
2027             case '1':           /* end composition */
2028               DECODE_COMPOSITION_END (c1);
2029               continue;
2030
2031             case '[':           /* specification of direction */
2032               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
2033                 goto label_invalid_code;
2034               /* For the moment, nested direction is not supported.
2035                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
2036                  left-to-right, and nonzero means right-to-left.  */
2037               ONE_MORE_BYTE (c1);
2038               switch (c1)
2039                 {
2040                 case ']':       /* end of the current direction */
2041                   coding->mode &= ~CODING_MODE_DIRECTION;
2042
2043                 case '0':       /* end of the current direction */
2044                 case '1':       /* start of left-to-right direction */
2045                   ONE_MORE_BYTE (c1);
2046                   if (c1 == ']')
2047                     coding->mode &= ~CODING_MODE_DIRECTION;
2048                   else
2049                     goto label_invalid_code;
2050                   break;
2051
2052                 case '2':       /* start of right-to-left direction */
2053                   ONE_MORE_BYTE (c1);
2054                   if (c1 == ']')
2055                     coding->mode |= CODING_MODE_DIRECTION;
2056                   else
2057                     goto label_invalid_code;
2058                   break;
2059
2060                 default:
2061                   goto label_invalid_code;
2062                 }
2063               continue;
2064
2065             case '%':
2066               if (COMPOSING_P (coding))
2067                 DECODE_COMPOSITION_END ('1');
2068               ONE_MORE_BYTE (c1);
2069               if (c1 == '/')
2070                 {
2071                   /* CTEXT extended segment:
2072                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
2073                      We keep these bytes as is for the moment.
2074                      They may be decoded by post-read-conversion.  */
2075                   int dim, M, L;
2076                   int size, required;
2077                   int produced_chars;
2078
2079                   ONE_MORE_BYTE (dim);
2080                   ONE_MORE_BYTE (M);
2081                   ONE_MORE_BYTE (L);
2082                   size = ((M - 128) * 128) + (L - 128);
2083                   required = 8 + size * 2;
2084                   if (dst + required > (dst_bytes ? dst_end : src))
2085                     goto label_end_of_loop;
2086                   *dst++ = ISO_CODE_ESC;
2087                   *dst++ = '%';
2088                   *dst++ = '/';
2089                   *dst++ = dim;
2090                   produced_chars = 4;
2091                   dst += CHAR_STRING (M, dst), produced_chars++;
2092                   dst += CHAR_STRING (L, dst), produced_chars++;
2093                   while (size-- > 0)
2094                     {
2095                       ONE_MORE_BYTE (c1);
2096                       dst += CHAR_STRING (c1, dst), produced_chars++;
2097                     }
2098                   coding->produced_char += produced_chars;
2099                 }
2100               else if (c1 == 'G')
2101                 {
2102                   unsigned char *d = dst;
2103                   int produced_chars;
2104
2105                   /* XFree86 extension for embedding UTF-8 in CTEXT:
2106                      ESC % G --UTF-8-BYTES-- ESC % @
2107                      We keep these bytes as is for the moment.
2108                      They may be decoded by post-read-conversion.  */
2109                   if (d + 6 > (dst_bytes ? dst_end : src))
2110                     goto label_end_of_loop;
2111                   *d++ = ISO_CODE_ESC;
2112                   *d++ = '%';
2113                   *d++ = 'G';
2114                   produced_chars = 3;
2115                   while (d + 1 < (dst_bytes ? dst_end : src))
2116                     {
2117                       ONE_MORE_BYTE (c1);
2118                       if (c1 == ISO_CODE_ESC
2119                           && src + 1 < src_end
2120                           && src[0] == '%'
2121                           && src[1] == '@')
2122                         break;
2123                       d += CHAR_STRING (c1, d), produced_chars++;
2124                     }
2125                   if (d + 3 > (dst_bytes ? dst_end : src))
2126                     goto label_end_of_loop;
2127                   *d++ = ISO_CODE_ESC;
2128                   *d++ = '%';
2129                   *d++ = '@';
2130                   dst = d;
2131                   coding->produced_char += produced_chars + 3;
2132                 }
2133               else
2134                 goto label_invalid_code;
2135               continue;
2136
2137             default:
2138               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
2139                 goto label_invalid_code;
2140               if (c1 >= 0x28 && c1 <= 0x2B)
2141                 {       /* designation of DIMENSION1_CHARS94 character set */
2142                   ONE_MORE_BYTE (c2);
2143                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
2144                 }
2145               else if (c1 >= 0x2C && c1 <= 0x2F)
2146                 {       /* designation of DIMENSION1_CHARS96 character set */
2147                   ONE_MORE_BYTE (c2);
2148                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
2149                 }
2150               else
2151                 goto label_invalid_code;
2152               /* We must update these variables now.  */
2153               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2154               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2155               continue;
2156             }
2157         }
2158
2159       /* Now we know CHARSET and 1st position code C1 of a character.
2160          Produce a multibyte sequence for that character while getting
2161          2nd position code C2 if necessary.  */
2162       if (CHARSET_DIMENSION (charset) == 2)
2163         {
2164           ONE_MORE_BYTE (c2);
2165           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2166             /* C2 is not in a valid range.  */
2167             goto label_invalid_code;
2168         }
2169       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2170       EMIT_CHAR (c);
2171       continue;
2172
2173     label_invalid_code:
2174       coding->errors++;
2175       if (COMPOSING_P (coding))
2176         DECODE_COMPOSITION_END ('1');
2177       src = src_base;
2178       c = *src++;
2179       EMIT_CHAR (c);
2180     }
2181
2182  label_end_of_loop:
2183   coding->consumed = coding->consumed_char = src_base - source;
2184   coding->produced = dst - destination;
2185   return;
2186 }
2187
2188
2189 /* ISO2022 encoding stuff.  */
2190
2191 /*
2192    It is not enough to say just "ISO2022" on encoding, we have to
2193    specify more details.  In Emacs, each ISO2022 coding system
2194    variant has the following specifications:
2195         1. Initial designation to G0 through G3.
2196         2. Allows short-form designation?
2197         3. ASCII should be designated to G0 before control characters?
2198         4. ASCII should be designated to G0 at end of line?
2199         5. 7-bit environment or 8-bit environment?
2200         6. Use locking-shift?
2201         7. Use Single-shift?
2202    And the following two are only for Japanese:
2203         8. Use ASCII in place of JIS0201-1976-Roman?
2204         9. Use JISX0208-1983 in place of JISX0208-1978?
2205    These specifications are encoded in `coding->flags' as flag bits
2206    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
2207    details.
2208 */
2209
2210 /* Produce codes (escape sequence) for designating CHARSET to graphic
2211    register REG at DST, and increment DST.  If <final-char> of CHARSET is
2212    '@', 'A', or 'B' and the coding system CODING allows, produce
2213    designation sequence of short-form.  */
2214
2215 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
2216   do {                                                                  \
2217     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
2218     char *intermediate_char_94 = "()*+";                                \
2219     char *intermediate_char_96 = ",-./";                                \
2220     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
2221                                                                         \
2222     if (revision < 255)                                                 \
2223       {                                                                 \
2224         *dst++ = ISO_CODE_ESC;                                          \
2225         *dst++ = '&';                                                   \
2226         *dst++ = '@' + revision;                                        \
2227       }                                                                 \
2228     *dst++ = ISO_CODE_ESC;                                              \
2229     if (CHARSET_DIMENSION (charset) == 1)                               \
2230       {                                                                 \
2231         if (CHARSET_CHARS (charset) == 94)                              \
2232           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
2233         else                                                            \
2234           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2235       }                                                                 \
2236     else                                                                \
2237       {                                                                 \
2238         *dst++ = '$';                                                   \
2239         if (CHARSET_CHARS (charset) == 94)                              \
2240           {                                                             \
2241             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
2242                 || reg != 0                                             \
2243                 || final_char < '@' || final_char > 'B')                \
2244               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
2245           }                                                             \
2246         else                                                            \
2247           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2248       }                                                                 \
2249     *dst++ = final_char;                                                \
2250     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
2251   } while (0)
2252
2253 /* The following two macros produce codes (control character or escape
2254    sequence) for ISO2022 single-shift functions (single-shift-2 and
2255    single-shift-3).  */
2256
2257 #define ENCODE_SINGLE_SHIFT_2                           \
2258   do {                                                  \
2259     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2260       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
2261     else                                                \
2262       *dst++ = ISO_CODE_SS2;                            \
2263     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2264   } while (0)
2265
2266 #define ENCODE_SINGLE_SHIFT_3                           \
2267   do {                                                  \
2268     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2269       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
2270     else                                                \
2271       *dst++ = ISO_CODE_SS3;                            \
2272     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2273   } while (0)
2274
2275 /* The following four macros produce codes (control character or
2276    escape sequence) for ISO2022 locking-shift functions (shift-in,
2277    shift-out, locking-shift-2, and locking-shift-3).  */
2278
2279 #define ENCODE_SHIFT_IN                         \
2280   do {                                          \
2281     *dst++ = ISO_CODE_SI;                       \
2282     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2283   } while (0)
2284
2285 #define ENCODE_SHIFT_OUT                        \
2286   do {                                          \
2287     *dst++ = ISO_CODE_SO;                       \
2288     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2289   } while (0)
2290
2291 #define ENCODE_LOCKING_SHIFT_2                  \
2292   do {                                          \
2293     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
2294     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2295   } while (0)
2296
2297 #define ENCODE_LOCKING_SHIFT_3                  \
2298   do {                                          \
2299     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
2300     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2301   } while (0)
2302
2303 /* Produce codes for a DIMENSION1 character whose character set is
2304    CHARSET and whose position-code is C1.  Designation and invocation
2305    sequences are also produced in advance if necessary.  */
2306
2307 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
2308   do {                                                                  \
2309     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2310       {                                                                 \
2311         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2312           *dst++ = c1 & 0x7F;                                           \
2313         else                                                            \
2314           *dst++ = c1 | 0x80;                                           \
2315         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2316         break;                                                          \
2317       }                                                                 \
2318     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2319       {                                                                 \
2320         *dst++ = c1 & 0x7F;                                             \
2321         break;                                                          \
2322       }                                                                 \
2323     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2324       {                                                                 \
2325         *dst++ = c1 | 0x80;                                             \
2326         break;                                                          \
2327       }                                                                 \
2328     else                                                                \
2329       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2330          must invoke it, or, at first, designate it to some graphic     \
2331          register.  Then repeat the loop to actually produce the        \
2332          character.  */                                                 \
2333       dst = encode_invocation_designation (charset, coding, dst);       \
2334   } while (1)
2335
2336 /* Produce codes for a DIMENSION2 character whose character set is
2337    CHARSET and whose position-codes are C1 and C2.  Designation and
2338    invocation codes are also produced in advance if necessary.  */
2339
2340 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
2341   do {                                                                  \
2342     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2343       {                                                                 \
2344         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2345           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
2346         else                                                            \
2347           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
2348         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2349         break;                                                          \
2350       }                                                                 \
2351     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2352       {                                                                 \
2353         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
2354         break;                                                          \
2355       }                                                                 \
2356     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2357       {                                                                 \
2358         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
2359         break;                                                          \
2360       }                                                                 \
2361     else                                                                \
2362       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2363          must invoke it, or, at first, designate it to some graphic     \
2364          register.  Then repeat the loop to actually produce the        \
2365          character.  */                                                 \
2366       dst = encode_invocation_designation (charset, coding, dst);       \
2367   } while (1)
2368
2369 #define ENCODE_ISO_CHARACTER(c)                                 \
2370   do {                                                          \
2371     int charset, c1, c2;                                        \
2372                                                                 \
2373     SPLIT_CHAR (c, charset, c1, c2);                            \
2374     if (CHARSET_DEFINED_P (charset))                            \
2375       {                                                         \
2376         if (CHARSET_DIMENSION (charset) == 1)                   \
2377           {                                                     \
2378             if (charset == CHARSET_ASCII                        \
2379                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
2380               charset = charset_latin_jisx0201;                 \
2381             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
2382           }                                                     \
2383         else                                                    \
2384           {                                                     \
2385             if (charset == charset_jisx0208                     \
2386                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
2387               charset = charset_jisx0208_1978;                  \
2388             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
2389           }                                                     \
2390       }                                                         \
2391     else                                                        \
2392       {                                                         \
2393         *dst++ = c1;                                            \
2394         if (c2 >= 0)                                            \
2395           *dst++ = c2;                                          \
2396       }                                                         \
2397   } while (0)
2398
2399
2400 /* Instead of encoding character C, produce one or two `?'s.  */
2401
2402 #define ENCODE_UNSAFE_CHARACTER(c)                              \
2403   do {                                                          \
2404     ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);        \
2405     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                   \
2406       ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);      \
2407   } while (0)
2408
2409
2410 /* Produce designation and invocation codes at a place pointed by DST
2411    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
2412    Return new DST.  */
2413
2414 unsigned char *
2415 encode_invocation_designation (charset, coding, dst)
2416      int charset;
2417      struct coding_system *coding;
2418      unsigned char *dst;
2419 {
2420   int reg;                      /* graphic register number */
2421
2422   /* At first, check designations.  */
2423   for (reg = 0; reg < 4; reg++)
2424     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2425       break;
2426
2427   if (reg >= 4)
2428     {
2429       /* CHARSET is not yet designated to any graphic registers.  */
2430       /* At first check the requested designation.  */
2431       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2432       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2433         /* Since CHARSET requests no special designation, designate it
2434            to graphic register 0.  */
2435         reg = 0;
2436
2437       ENCODE_DESIGNATION (charset, reg, coding);
2438     }
2439
2440   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2441       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2442     {
2443       /* Since the graphic register REG is not invoked to any graphic
2444          planes, invoke it to graphic plane 0.  */
2445       switch (reg)
2446         {
2447         case 0:                 /* graphic register 0 */
2448           ENCODE_SHIFT_IN;
2449           break;
2450
2451         case 1:                 /* graphic register 1 */
2452           ENCODE_SHIFT_OUT;
2453           break;
2454
2455         case 2:                 /* graphic register 2 */
2456           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2457             ENCODE_SINGLE_SHIFT_2;
2458           else
2459             ENCODE_LOCKING_SHIFT_2;
2460           break;
2461
2462         case 3:                 /* graphic register 3 */
2463           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2464             ENCODE_SINGLE_SHIFT_3;
2465           else
2466             ENCODE_LOCKING_SHIFT_3;
2467           break;
2468         }
2469     }
2470
2471   return dst;
2472 }
2473
2474 /* Produce 2-byte codes for encoded composition rule RULE.  */
2475
2476 #define ENCODE_COMPOSITION_RULE(rule)           \
2477   do {                                          \
2478     int gref, nref;                             \
2479     COMPOSITION_DECODE_RULE (rule, gref, nref); \
2480     *dst++ = 32 + 81 + gref;                    \
2481     *dst++ = 32 + nref;                         \
2482   } while (0)
2483
2484 /* Produce codes for indicating the start of a composition sequence
2485    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
2486    which specify information about the composition.  See the comment
2487    in coding.h for the format of DATA.  */
2488
2489 #define ENCODE_COMPOSITION_START(coding, data)                          \
2490   do {                                                                  \
2491     coding->composing = data[3];                                        \
2492     *dst++ = ISO_CODE_ESC;                                              \
2493     if (coding->composing == COMPOSITION_RELATIVE)                      \
2494       *dst++ = '0';                                                     \
2495     else                                                                \
2496       {                                                                 \
2497         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
2498                   ? '3' : '4');                                         \
2499         coding->cmp_data_index = coding->cmp_data_start + 4;            \
2500         coding->composition_rule_follows = 0;                           \
2501       }                                                                 \
2502   } while (0)
2503
2504 /* Produce codes for indicating the end of the current composition.  */
2505
2506 #define ENCODE_COMPOSITION_END(coding, data)                    \
2507   do {                                                          \
2508     *dst++ = ISO_CODE_ESC;                                      \
2509     *dst++ = '1';                                               \
2510     coding->cmp_data_start += data[0];                          \
2511     coding->composing = COMPOSITION_NO;                         \
2512     if (coding->cmp_data_start == coding->cmp_data->used        \
2513         && coding->cmp_data->next)                              \
2514       {                                                         \
2515         coding->cmp_data = coding->cmp_data->next;              \
2516         coding->cmp_data_start = 0;                             \
2517       }                                                         \
2518   } while (0)
2519
2520 /* Produce composition start sequence ESC 0.  Here, this sequence
2521    doesn't mean the start of a new composition but means that we have
2522    just produced components (alternate chars and composition rules) of
2523    the composition and the actual text follows in SRC.  */
2524
2525 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
2526   do {                                          \
2527     *dst++ = ISO_CODE_ESC;                      \
2528     *dst++ = '0';                               \
2529     coding->composing = COMPOSITION_RELATIVE;   \
2530   } while (0)
2531
2532 /* The following three macros produce codes for indicating direction
2533    of text.  */
2534 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
2535   do {                                                  \
2536     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
2537       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
2538     else                                                \
2539       *dst++ = ISO_CODE_CSI;                            \
2540   } while (0)
2541
2542 #define ENCODE_DIRECTION_R2L    \
2543   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
2544
2545 #define ENCODE_DIRECTION_L2R    \
2546   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
2547
2548 /* Produce codes for designation and invocation to reset the graphic
2549    planes and registers to initial state.  */
2550 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
2551   do {                                                                      \
2552     int reg;                                                                \
2553     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
2554       ENCODE_SHIFT_IN;                                                      \
2555     for (reg = 0; reg < 4; reg++)                                           \
2556       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
2557           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
2558               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
2559         ENCODE_DESIGNATION                                                  \
2560           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
2561   } while (0)
2562
2563 /* Produce designation sequences of charsets in the line started from
2564    SRC to a place pointed by DST, and return updated DST.
2565
2566    If the current block ends before any end-of-line, we may fail to
2567    find all the necessary designations.  */
2568
2569 static unsigned char *
2570 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2571      struct coding_system *coding;
2572      Lisp_Object translation_table;
2573      unsigned char *src, *src_end, *dst;
2574 {
2575   int charset, c, found = 0, reg;
2576   /* Table of charsets to be designated to each graphic register.  */
2577   int r[4];
2578
2579   for (reg = 0; reg < 4; reg++)
2580     r[reg] = -1;
2581
2582   while (found < 4)
2583     {
2584       ONE_MORE_CHAR (c);
2585       if (c == '\n')
2586         break;
2587
2588       charset = CHAR_CHARSET (c);
2589       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2590       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2591         {
2592           found++;
2593           r[reg] = charset;
2594         }
2595     }
2596
2597  label_end_of_loop:
2598   if (found)
2599     {
2600       for (reg = 0; reg < 4; reg++)
2601         if (r[reg] >= 0
2602             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2603           ENCODE_DESIGNATION (r[reg], reg, coding);
2604     }
2605
2606   return dst;
2607 }
2608
2609 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2610
2611 static void
2612 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2613      struct coding_system *coding;
2614      unsigned char *source, *destination;
2615      int src_bytes, dst_bytes;
2616 {
2617   unsigned char *src = source;
2618   unsigned char *src_end = source + src_bytes;
2619   unsigned char *dst = destination;
2620   unsigned char *dst_end = destination + dst_bytes;
2621   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2622      from DST_END to assure overflow checking is necessary only at the
2623      head of loop.  */
2624   unsigned char *adjusted_dst_end = dst_end - 19;
2625   /* SRC_BASE remembers the start position in source in each loop.
2626      The loop will be exited when there's not enough source text to
2627      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2628      there's not enough destination area to produce encoded codes
2629      (within macro EMIT_BYTES).  */
2630   unsigned char *src_base;
2631   int c;
2632   Lisp_Object translation_table;
2633   Lisp_Object safe_chars;
2634
2635   if (coding->flags & CODING_FLAG_ISO_SAFE)
2636     coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
2637
2638   safe_chars = coding_safe_chars (coding->symbol);
2639
2640   if (NILP (Venable_character_translation))
2641     translation_table = Qnil;
2642   else
2643     {
2644       translation_table = coding->translation_table_for_encode;
2645       if (NILP (translation_table))
2646         translation_table = Vstandard_translation_table_for_encode;
2647     }
2648
2649   coding->consumed_char = 0;
2650   coding->errors = 0;
2651   while (1)
2652     {
2653       src_base = src;
2654
2655       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2656         {
2657           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2658           break;
2659         }
2660
2661       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2662           && CODING_SPEC_ISO_BOL (coding))
2663         {
2664           /* We have to produce designation sequences if any now.  */
2665           dst = encode_designation_at_bol (coding, translation_table,
2666                                            src, src_end, dst);
2667           CODING_SPEC_ISO_BOL (coding) = 0;
2668         }
2669
2670       /* Check composition start and end.  */
2671       if (coding->composing != COMPOSITION_DISABLED
2672           && coding->cmp_data_start < coding->cmp_data->used)
2673         {
2674           struct composition_data *cmp_data = coding->cmp_data;
2675           int *data = cmp_data->data + coding->cmp_data_start;
2676           int this_pos = cmp_data->char_offset + coding->consumed_char;
2677
2678           if (coding->composing == COMPOSITION_RELATIVE)
2679             {
2680               if (this_pos == data[2])
2681                 {
2682                   ENCODE_COMPOSITION_END (coding, data);
2683                   cmp_data = coding->cmp_data;
2684                   data = cmp_data->data + coding->cmp_data_start;
2685                 }
2686             }
2687           else if (COMPOSING_P (coding))
2688             {
2689               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2690               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2691                 /* We have consumed components of the composition.
2692                    What follows in SRC is the composition's base
2693                    text.  */
2694                 ENCODE_COMPOSITION_FAKE_START (coding);
2695               else
2696                 {
2697                   int c = cmp_data->data[coding->cmp_data_index++];
2698                   if (coding->composition_rule_follows)
2699                     {
2700                       ENCODE_COMPOSITION_RULE (c);
2701                       coding->composition_rule_follows = 0;
2702                     }
2703                   else
2704                     {
2705                       if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2706                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2707                         ENCODE_UNSAFE_CHARACTER (c);
2708                       else
2709                         ENCODE_ISO_CHARACTER (c);
2710                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2711                         coding->composition_rule_follows = 1;
2712                     }
2713                   continue;
2714                 }
2715             }
2716           if (!COMPOSING_P (coding))
2717             {
2718               if (this_pos == data[1])
2719                 {
2720                   ENCODE_COMPOSITION_START (coding, data);
2721                   continue;
2722                 }
2723             }
2724         }
2725
2726       ONE_MORE_CHAR (c);
2727
2728       /* Now encode the character C.  */
2729       if (c < 0x20 || c == 0x7F)
2730         {
2731           if (c == '\r')
2732             {
2733               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2734                 {
2735                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2736                     ENCODE_RESET_PLANE_AND_REGISTER;
2737                   *dst++ = c;
2738                   continue;
2739                 }
2740               /* fall down to treat '\r' as '\n' ...  */
2741               c = '\n';
2742             }
2743           if (c == '\n')
2744             {
2745               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2746                 ENCODE_RESET_PLANE_AND_REGISTER;
2747               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2748                 bcopy (coding->spec.iso2022.initial_designation,
2749                        coding->spec.iso2022.current_designation,
2750                        sizeof coding->spec.iso2022.initial_designation);
2751               if (coding->eol_type == CODING_EOL_LF
2752                   || coding->eol_type == CODING_EOL_UNDECIDED)
2753                 *dst++ = ISO_CODE_LF;
2754               else if (coding->eol_type == CODING_EOL_CRLF)
2755                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2756               else
2757                 *dst++ = ISO_CODE_CR;
2758               CODING_SPEC_ISO_BOL (coding) = 1;
2759             }
2760           else
2761             {
2762               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2763                 ENCODE_RESET_PLANE_AND_REGISTER;
2764               *dst++ = c;
2765             }
2766         }
2767       else if (ASCII_BYTE_P (c))
2768         ENCODE_ISO_CHARACTER (c);
2769       else if (SINGLE_BYTE_CHAR_P (c))
2770         {
2771           *dst++ = c;
2772           coding->errors++;
2773         }
2774       else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2775                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2776         ENCODE_UNSAFE_CHARACTER (c);
2777       else
2778         ENCODE_ISO_CHARACTER (c);
2779
2780       coding->consumed_char++;
2781     }
2782
2783  label_end_of_loop:
2784   coding->consumed = src_base - source;
2785   coding->produced = coding->produced_char = dst - destination;
2786 }
2787
2788 \f
2789 /*** 4. SJIS and BIG5 handlers ***/
2790
2791 /* Although SJIS and BIG5 are not ISO coding systems, they are used
2792    quite widely.  So, for the moment, Emacs supports them in the bare
2793    C code.  But, in the future, they may be supported only by CCL.  */
2794
2795 /* SJIS is a coding system encoding three character sets: ASCII, right
2796    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2797    as is.  A character of charset katakana-jisx0201 is encoded by
2798    "position-code + 0x80".  A character of charset japanese-jisx0208
2799    is encoded in 2-byte but two position-codes are divided and shifted
2800    so that it fits in the range below.
2801
2802    --- CODE RANGE of SJIS ---
2803    (character set)      (range)
2804    ASCII                0x00 .. 0x7F
2805    KATAKANA-JISX0201    0xA1 .. 0xDF
2806    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2807             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2808    -------------------------------
2809
2810 */
2811
2812 /* BIG5 is a coding system encoding two character sets: ASCII and
2813    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2814    character set and is encoded in two bytes.
2815
2816    --- CODE RANGE of BIG5 ---
2817    (character set)      (range)
2818    ASCII                0x00 .. 0x7F
2819    Big5 (1st byte)      0xA1 .. 0xFE
2820         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2821    --------------------------
2822
2823    Since the number of characters in Big5 is larger than maximum
2824    characters in Emacs' charset (96x96), it can't be handled as one
2825    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2826    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2827    contains frequently used characters and the latter contains less
2828    frequently used characters.  */
2829
2830 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2831    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2832    C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
2833    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2834
2835 /* Number of Big5 characters which have the same code in 1st byte.  */
2836 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2837
2838 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2839   do {                                                                  \
2840     unsigned int temp                                                   \
2841       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2842     if (b1 < 0xC9)                                                      \
2843       charset = charset_big5_1;                                         \
2844     else                                                                \
2845       {                                                                 \
2846         charset = charset_big5_2;                                       \
2847         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2848       }                                                                 \
2849     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2850     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2851   } while (0)
2852
2853 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2854   do {                                                                  \
2855     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2856     if (charset == charset_big5_2)                                      \
2857       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2858     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2859     b2 = temp % BIG5_SAME_ROW;                                          \
2860     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2861   } while (0)
2862
2863 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2864    Check if a text is encoded in SJIS.  If it is, return
2865    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2866
2867 static int
2868 detect_coding_sjis (src, src_end, multibytep)
2869      unsigned char *src, *src_end;
2870      int multibytep;
2871 {
2872   int c;
2873   /* Dummy for ONE_MORE_BYTE.  */
2874   struct coding_system dummy_coding;
2875   struct coding_system *coding = &dummy_coding;
2876
2877   while (1)
2878     {
2879       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2880       if (c < 0x80)
2881         continue;
2882       if (c == 0x80 || c == 0xA0 || c > 0xEF)
2883         return 0;
2884       if (c <= 0x9F || c >= 0xE0)
2885         {
2886           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2887           if (c < 0x40 || c == 0x7F || c > 0xFC)
2888             return 0;
2889         }
2890     }
2891  label_end_of_loop:
2892   return CODING_CATEGORY_MASK_SJIS;
2893 }
2894
2895 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2896    Check if a text is encoded in BIG5.  If it is, return
2897    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2898
2899 static int
2900 detect_coding_big5 (src, src_end, multibytep)
2901      unsigned char *src, *src_end;
2902      int multibytep;
2903 {
2904   int c;
2905   /* Dummy for ONE_MORE_BYTE.  */
2906   struct coding_system dummy_coding;
2907   struct coding_system *coding = &dummy_coding;
2908
2909   while (1)
2910     {
2911       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2912       if (c < 0x80)
2913         continue;
2914       if (c < 0xA1 || c > 0xFE)
2915         return 0;
2916       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2917       if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2918         return 0;
2919     }
2920  label_end_of_loop:
2921   return CODING_CATEGORY_MASK_BIG5;
2922 }
2923
2924 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2925    Check if a text is encoded in UTF-8.  If it is, return
2926    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2927
2928 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2929 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2930 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2931 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2932 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2933 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2934 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2935
2936 static int
2937 detect_coding_utf_8 (src, src_end, multibytep)
2938      unsigned char *src, *src_end;
2939      int multibytep;
2940 {
2941   unsigned char c;
2942   int seq_maybe_bytes;
2943   /* Dummy for ONE_MORE_BYTE.  */
2944   struct coding_system dummy_coding;
2945   struct coding_system *coding = &dummy_coding;
2946
2947   while (1)
2948     {
2949       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2950       if (UTF_8_1_OCTET_P (c))
2951         continue;
2952       else if (UTF_8_2_OCTET_LEADING_P (c))
2953         seq_maybe_bytes = 1;
2954       else if (UTF_8_3_OCTET_LEADING_P (c))
2955         seq_maybe_bytes = 2;
2956       else if (UTF_8_4_OCTET_LEADING_P (c))
2957         seq_maybe_bytes = 3;
2958       else if (UTF_8_5_OCTET_LEADING_P (c))
2959         seq_maybe_bytes = 4;
2960       else if (UTF_8_6_OCTET_LEADING_P (c))
2961         seq_maybe_bytes = 5;
2962       else
2963         return 0;
2964
2965       do
2966         {
2967           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2968           if (!UTF_8_EXTRA_OCTET_P (c))
2969             return 0;
2970           seq_maybe_bytes--;
2971         }
2972       while (seq_maybe_bytes > 0);
2973     }
2974
2975  label_end_of_loop:
2976   return CODING_CATEGORY_MASK_UTF_8;
2977 }
2978
2979 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2980    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2981    Little Endian (otherwise).  If it is, return
2982    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2983    else return 0.  */
2984
2985 #define UTF_16_INVALID_P(val)   \
2986   (((val) == 0xFFFE)            \
2987    || ((val) == 0xFFFF))
2988
2989 #define UTF_16_HIGH_SURROGATE_P(val) \
2990   (((val) & 0xD800) == 0xD800)
2991
2992 #define UTF_16_LOW_SURROGATE_P(val) \
2993   (((val) & 0xDC00) == 0xDC00)
2994
2995 static int
2996 detect_coding_utf_16 (src, src_end, multibytep)
2997      unsigned char *src, *src_end;
2998      int multibytep;
2999 {
3000   unsigned char c1, c2;
3001   /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE.  */
3002   struct coding_system dummy_coding;
3003   struct coding_system *coding = &dummy_coding;
3004
3005   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
3006   ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
3007
3008   if ((c1 == 0xFF) && (c2 == 0xFE))
3009     return CODING_CATEGORY_MASK_UTF_16_LE;
3010   else if ((c1 == 0xFE) && (c2 == 0xFF))
3011     return CODING_CATEGORY_MASK_UTF_16_BE;
3012
3013  label_end_of_loop:
3014   return 0;
3015 }
3016
3017 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3018    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3019
3020 static void
3021 decode_coding_sjis_big5 (coding, source, destination,
3022                          src_bytes, dst_bytes, sjis_p)
3023      struct coding_system *coding;
3024      unsigned char *source, *destination;
3025      int src_bytes, dst_bytes;
3026      int sjis_p;
3027 {
3028   unsigned char *src = source;
3029   unsigned char *src_end = source + src_bytes;
3030   unsigned char *dst = destination;
3031   unsigned char *dst_end = destination + dst_bytes;
3032   /* SRC_BASE remembers the start position in source in each loop.
3033      The loop will be exited when there's not enough source code
3034      (within macro ONE_MORE_BYTE), or when there's not enough
3035      destination area to produce a character (within macro
3036      EMIT_CHAR).  */
3037   unsigned char *src_base;
3038   Lisp_Object translation_table;
3039
3040   if (NILP (Venable_character_translation))
3041     translation_table = Qnil;
3042   else
3043     {
3044       translation_table = coding->translation_table_for_decode;
3045       if (NILP (translation_table))
3046         translation_table = Vstandard_translation_table_for_decode;
3047     }
3048
3049   coding->produced_char = 0;
3050   while (1)
3051     {
3052       int c, charset, c1, c2;
3053
3054       src_base = src;
3055       ONE_MORE_BYTE (c1);
3056
3057       if (c1 < 0x80)
3058         {
3059           charset = CHARSET_ASCII;
3060           if (c1 < 0x20)
3061             {
3062               if (c1 == '\r')
3063                 {
3064                   if (coding->eol_type == CODING_EOL_CRLF)
3065                     {
3066                       ONE_MORE_BYTE (c2);
3067                       if (c2 == '\n')
3068                         c1 = c2;
3069                       else
3070                         /* To process C2 again, SRC is subtracted by 1.  */
3071                         src--;
3072                     }
3073                   else if (coding->eol_type == CODING_EOL_CR)
3074                     c1 = '\n';
3075                 }
3076               else if (c1 == '\n'
3077                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3078                        && (coding->eol_type == CODING_EOL_CR
3079                            || coding->eol_type == CODING_EOL_CRLF))
3080                 {
3081                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3082                   goto label_end_of_loop;
3083                 }
3084             }
3085         }
3086       else
3087         {
3088           if (sjis_p)
3089             {
3090               if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
3091                 goto label_invalid_code;
3092               if (c1 <= 0x9F || c1 >= 0xE0)
3093                 {
3094                   /* SJIS -> JISX0208 */
3095                   ONE_MORE_BYTE (c2);
3096                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
3097                     goto label_invalid_code;
3098                   DECODE_SJIS (c1, c2, c1, c2);
3099                   charset = charset_jisx0208;
3100                 }
3101               else
3102                 /* SJIS -> JISX0201-Kana */
3103                 charset = charset_katakana_jisx0201;
3104             }
3105           else
3106             {
3107               /* BIG5 -> Big5 */
3108               if (c1 < 0xA0 || c1 > 0xFE)
3109                 goto label_invalid_code;
3110               ONE_MORE_BYTE (c2);
3111               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
3112                 goto label_invalid_code;
3113               DECODE_BIG5 (c1, c2, charset, c1, c2);
3114             }
3115         }
3116
3117       c = DECODE_ISO_CHARACTER (charset, c1, c2);
3118       EMIT_CHAR (c);
3119       continue;
3120
3121     label_invalid_code:
3122       coding->errors++;
3123       src = src_base;
3124       c = *src++;
3125       EMIT_CHAR (c);
3126     }
3127
3128  label_end_of_loop:
3129   coding->consumed = coding->consumed_char = src_base - source;
3130   coding->produced = dst - destination;
3131   return;
3132 }
3133
3134 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3135    This function can encode charsets `ascii', `katakana-jisx0201',
3136    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
3137    are sure that all these charsets are registered as official charset
3138    (i.e. do not have extended leading-codes).  Characters of other
3139    charsets are produced without any encoding.  If SJIS_P is 1, encode
3140    SJIS text, else encode BIG5 text.  */
3141
3142 static void
3143 encode_coding_sjis_big5 (coding, source, destination,
3144                          src_bytes, dst_bytes, sjis_p)
3145      struct coding_system *coding;
3146      unsigned char *source, *destination;
3147      int src_bytes, dst_bytes;
3148      int sjis_p;
3149 {
3150   unsigned char *src = source;
3151   unsigned char *src_end = source + src_bytes;
3152   unsigned char *dst = destination;
3153   unsigned char *dst_end = destination + dst_bytes;
3154   /* SRC_BASE remembers the start position in source in each loop.
3155      The loop will be exited when there's not enough source text to
3156      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3157      there's not enough destination area to produce encoded codes
3158      (within macro EMIT_BYTES).  */
3159   unsigned char *src_base;
3160   Lisp_Object translation_table;
3161
3162   if (NILP (Venable_character_translation))
3163     translation_table = Qnil;
3164   else
3165     {
3166       translation_table = coding->translation_table_for_encode;
3167       if (NILP (translation_table))
3168         translation_table = Vstandard_translation_table_for_encode;
3169     }
3170
3171   while (1)
3172     {
3173       int c, charset, c1, c2;
3174
3175       src_base = src;
3176       ONE_MORE_CHAR (c);
3177
3178       /* Now encode the character C.  */
3179       if (SINGLE_BYTE_CHAR_P (c))
3180         {
3181           switch (c)
3182             {
3183             case '\r':
3184               if (!(coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
3185                 {
3186                   EMIT_ONE_BYTE (c);
3187                   break;
3188                 }
3189               c = '\n';
3190             case '\n':
3191               if (coding->eol_type == CODING_EOL_CRLF)
3192                 {
3193                   EMIT_TWO_BYTES ('\r', c);
3194                   break;
3195                 }
3196               else if (coding->eol_type == CODING_EOL_CR)
3197                 c = '\r';
3198             default:
3199               EMIT_ONE_BYTE (c);
3200             }
3201         }
3202       else
3203         {
3204           SPLIT_CHAR (c, charset, c1, c2);
3205           if (sjis_p)
3206             {
3207               if (charset == charset_jisx0208
3208                   || charset == charset_jisx0208_1978)
3209                 {
3210                   ENCODE_SJIS (c1, c2, c1, c2);
3211                   EMIT_TWO_BYTES (c1, c2);
3212                 }
3213               else if (charset == charset_katakana_jisx0201)
3214                 EMIT_ONE_BYTE (c1 | 0x80);
3215               else if (charset == charset_latin_jisx0201)
3216                 EMIT_ONE_BYTE (c1);
3217               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3218                 {
3219                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3220                   if (CHARSET_WIDTH (charset) > 1)
3221                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3222                 }
3223               else
3224                 /* There's no way other than producing the internal
3225                    codes as is.  */
3226                 EMIT_BYTES (src_base, src);
3227             }
3228           else
3229             {
3230               if (charset == charset_big5_1 || charset == charset_big5_2)
3231                 {
3232                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
3233                   EMIT_TWO_BYTES (c1, c2);
3234                 }
3235               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3236                 {
3237                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3238                   if (CHARSET_WIDTH (charset) > 1)
3239                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3240                 }
3241               else
3242                 /* There's no way other than producing the internal
3243                    codes as is.  */
3244                 EMIT_BYTES (src_base, src);
3245             }
3246         }
3247       coding->consumed_char++;
3248     }
3249
3250  label_end_of_loop:
3251   coding->consumed = src_base - source;
3252   coding->produced = coding->produced_char = dst - destination;
3253 }
3254
3255 \f
3256 /*** 5. CCL handlers ***/
3257
3258 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3259    Check if a text is encoded in a coding system of which
3260    encoder/decoder are written in CCL program.  If it is, return
3261    CODING_CATEGORY_MASK_CCL, else return 0.  */
3262
3263 static int
3264 detect_coding_ccl (src, src_end, multibytep)
3265      unsigned char *src, *src_end;
3266      int multibytep;
3267 {
3268   unsigned char *valid;
3269   int c;
3270   /* Dummy for ONE_MORE_BYTE.  */
3271   struct coding_system dummy_coding;
3272   struct coding_system *coding = &dummy_coding;
3273
3274   /* No coding system is assigned to coding-category-ccl.  */
3275   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3276     return 0;
3277
3278   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3279   while (1)
3280     {
3281       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
3282       if (! valid[c])
3283         return 0;
3284     }
3285  label_end_of_loop:
3286   return CODING_CATEGORY_MASK_CCL;
3287 }
3288
3289 \f
3290 /*** 6. End-of-line handlers ***/
3291
3292 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3293
3294 static void
3295 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3296      struct coding_system *coding;
3297      unsigned char *source, *destination;
3298      int src_bytes, dst_bytes;
3299 {
3300   unsigned char *src = source;
3301   unsigned char *dst = destination;
3302   unsigned char *src_end = src + src_bytes;
3303   unsigned char *dst_end = dst + dst_bytes;
3304   Lisp_Object translation_table;
3305   /* SRC_BASE remembers the start position in source in each loop.
3306      The loop will be exited when there's not enough source code
3307      (within macro ONE_MORE_BYTE), or when there's not enough
3308      destination area to produce a character (within macro
3309      EMIT_CHAR).  */
3310   unsigned char *src_base;
3311   int c;
3312
3313   translation_table = Qnil;
3314   switch (coding->eol_type)
3315     {
3316     case CODING_EOL_CRLF:
3317       while (1)
3318         {
3319           src_base = src;
3320           ONE_MORE_BYTE (c);
3321           if (c == '\r')
3322             {
3323               ONE_MORE_BYTE (c);
3324               if (c != '\n')
3325                 {
3326                   src--;
3327                   c = '\r';
3328                 }
3329             }
3330           else if (c == '\n'
3331                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
3332             {
3333               coding->result = CODING_FINISH_INCONSISTENT_EOL;
3334               goto label_end_of_loop;
3335             }
3336           EMIT_CHAR (c);
3337         }
3338       break;
3339
3340     case CODING_EOL_CR:
3341       while (1)
3342         {
3343           src_base = src;
3344           ONE_MORE_BYTE (c);
3345           if (c == '\n')
3346             {
3347               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3348                 {
3349                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3350                   goto label_end_of_loop;
3351                 }
3352             }
3353           else if (c == '\r')
3354             c = '\n';
3355           EMIT_CHAR (c);
3356         }
3357       break;
3358
3359     default:                    /* no need for EOL handling */
3360       while (1)
3361         {
3362           src_base = src;
3363           ONE_MORE_BYTE (c);
3364           EMIT_CHAR (c);
3365         }
3366     }
3367
3368  label_end_of_loop:
3369   coding->consumed = coding->consumed_char = src_base - source;
3370   coding->produced = dst - destination;
3371   return;
3372 }
3373
3374 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
3375    format of end-of-line according to `coding->eol_type'.  It also
3376    convert multibyte form 8-bit characters to unibyte if
3377    CODING->src_multibyte is nonzero.  If `coding->mode &
3378    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3379    also means end-of-line.  */
3380
3381 static void
3382 encode_eol (coding, source, destination, src_bytes, dst_bytes)
3383      struct coding_system *coding;
3384      const unsigned char *source;
3385      unsigned char *destination;
3386      int src_bytes, dst_bytes;
3387 {
3388   const unsigned char *src = source;
3389   unsigned char *dst = destination;
3390   const unsigned char *src_end = src + src_bytes;
3391   unsigned char *dst_end = dst + dst_bytes;
3392   Lisp_Object translation_table;
3393   /* SRC_BASE remembers the start position in source in each loop.
3394      The loop will be exited when there's not enough source text to
3395      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3396      there's not enough destination area to produce encoded codes
3397      (within macro EMIT_BYTES).  */
3398   const unsigned char *src_base;
3399   unsigned char *tmp;
3400   int c;
3401   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3402
3403   translation_table = Qnil;
3404   if (coding->src_multibyte
3405       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3406     {
3407       src_end--;
3408       src_bytes--;
3409       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3410     }
3411
3412   if (coding->eol_type == CODING_EOL_CRLF)
3413     {
3414       while (src < src_end)
3415         {
3416           src_base = src;
3417           c = *src++;
3418           if (c >= 0x20)
3419             EMIT_ONE_BYTE (c);
3420           else if (c == '\n' || (c == '\r' && selective_display))
3421             EMIT_TWO_BYTES ('\r', '\n');
3422           else
3423             EMIT_ONE_BYTE (c);
3424         }
3425       src_base = src;
3426     label_end_of_loop:
3427       ;
3428     }
3429   else
3430     {
3431       if (!dst_bytes || src_bytes <= dst_bytes)
3432         {
3433           safe_bcopy (src, dst, src_bytes);
3434           src_base = src_end;
3435           dst += src_bytes;
3436         }
3437       else
3438         {
3439           if (coding->src_multibyte
3440               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3441             dst_bytes--;
3442           safe_bcopy (src, dst, dst_bytes);
3443           src_base = src + dst_bytes;
3444           dst = destination + dst_bytes;
3445           coding->result = CODING_FINISH_INSUFFICIENT_DST;
3446         }
3447       if (coding->eol_type == CODING_EOL_CR)
3448         {
3449           for (tmp = destination; tmp < dst; tmp++)
3450             if (*tmp == '\n') *tmp = '\r';
3451         }
3452       else if (selective_display)
3453         {
3454           for (tmp = destination; tmp < dst; tmp++)
3455             if (*tmp == '\r') *tmp = '\n';
3456         }
3457     }
3458   if (coding->src_multibyte)
3459     dst = destination + str_as_unibyte (destination, dst - destination);
3460
3461   coding->consumed = src_base - source;
3462   coding->produced = dst - destination;
3463   coding->produced_char = coding->produced;
3464 }
3465
3466 \f
3467 /*** 7. C library functions ***/
3468
3469 /* In Emacs Lisp, a coding system is represented by a Lisp symbol which
3470    has a property `coding-system'.  The value of this property is a
3471    vector of length 5 (called the coding-vector).  Among elements of
3472    this vector, the first (element[0]) and the fifth (element[4])
3473    carry important information for decoding/encoding.  Before
3474    decoding/encoding, this information should be set in fields of a
3475    structure of type `coding_system'.
3476
3477    The value of the property `coding-system' can be a symbol of another
3478    subsidiary coding-system.  In that case, Emacs gets coding-vector
3479    from that symbol.
3480
3481    `element[0]' contains information to be set in `coding->type'.  The
3482    value and its meaning is as follows:
3483
3484    0 -- coding_type_emacs_mule
3485    1 -- coding_type_sjis
3486    2 -- coding_type_iso2022
3487    3 -- coding_type_big5
3488    4 -- coding_type_ccl encoder/decoder written in CCL
3489    nil -- coding_type_no_conversion
3490    t -- coding_type_undecided (automatic conversion on decoding,
3491                                no-conversion on encoding)
3492
3493    `element[4]' contains information to be set in `coding->flags' and
3494    `coding->spec'.  The meaning varies by `coding->type'.
3495
3496    If `coding->type' is `coding_type_iso2022', element[4] is a vector
3497    of length 32 (of which the first 13 sub-elements are used now).
3498    Meanings of these sub-elements are:
3499
3500    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3501         If the value is an integer of valid charset, the charset is
3502         assumed to be designated to graphic register N initially.
3503
3504         If the value is minus, it is a minus value of charset which
3505         reserves graphic register N, which means that the charset is
3506         not designated initially but should be designated to graphic
3507         register N just before encoding a character in that charset.
3508
3509         If the value is nil, graphic register N is never used on
3510         encoding.
3511
3512    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3513         Each value takes t or nil.  See the section ISO2022 of
3514         `coding.h' for more information.
3515
3516    If `coding->type' is `coding_type_big5', element[4] is t to denote
3517    BIG5-ETen or nil to denote BIG5-HKU.
3518
3519    If `coding->type' takes the other value, element[4] is ignored.
3520
3521    Emacs Lisp's coding systems also carry information about format of
3522    end-of-line in a value of property `eol-type'.  If the value is
3523    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3524    means CODING_EOL_CR.  If it is not integer, it should be a vector
3525    of subsidiary coding systems of which property `eol-type' has one
3526    of the above values.
3527
3528 */
3529
3530 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3531    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
3532    is setup so that no conversion is necessary and return -1, else
3533    return 0.  */
3534
3535 int
3536 setup_coding_system (coding_system, coding)
3537      Lisp_Object coding_system;
3538      struct coding_system *coding;
3539 {
3540   Lisp_Object coding_spec, coding_type, eol_type, plist;
3541   Lisp_Object val;
3542
3543   /* At first, zero clear all members.  */
3544   bzero (coding, sizeof (struct coding_system));
3545
3546   /* Initialize some fields required for all kinds of coding systems.  */
3547   coding->symbol = coding_system;
3548   coding->heading_ascii = -1;
3549   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
3550   coding->composing = COMPOSITION_DISABLED;
3551   coding->cmp_data = NULL;
3552
3553   if (NILP (coding_system))
3554     goto label_invalid_coding_system;
3555
3556   coding_spec = Fget (coding_system, Qcoding_system);
3557
3558   if (!VECTORP (coding_spec)
3559       || XVECTOR (coding_spec)->size != 5
3560       || !CONSP (XVECTOR (coding_spec)->contents[3]))
3561     goto label_invalid_coding_system;
3562
3563   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3564   if (VECTORP (eol_type))
3565     {
3566       coding->eol_type = CODING_EOL_UNDECIDED;
3567       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3568     }
3569   else if (XFASTINT (eol_type) == 1)
3570     {
3571       coding->eol_type = CODING_EOL_CRLF;
3572       coding->common_flags
3573         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3574     }
3575   else if (XFASTINT (eol_type) == 2)
3576     {
3577       coding->eol_type = CODING_EOL_CR;
3578       coding->common_flags
3579         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3580     }
3581   else
3582     coding->eol_type = CODING_EOL_LF;
3583
3584   coding_type = XVECTOR (coding_spec)->contents[0];
3585   /* Try short cut.  */
3586   if (SYMBOLP (coding_type))
3587     {
3588       if (EQ (coding_type, Qt))
3589         {
3590           coding->type = coding_type_undecided;
3591           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3592         }
3593       else
3594         coding->type = coding_type_no_conversion;
3595       /* Initialize this member.  Any thing other than
3596          CODING_CATEGORY_IDX_UTF_16_BE and
3597          CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3598          special treatment in detect_eol.  */
3599       coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3600
3601       return 0;
3602     }
3603
3604   /* Get values of coding system properties:
3605      `post-read-conversion', `pre-write-conversion',
3606      `translation-table-for-decode', `translation-table-for-encode'.  */
3607   plist = XVECTOR (coding_spec)->contents[3];
3608   /* Pre & post conversion functions should be disabled if
3609      inhibit_eol_conversion is nonzero.  This is the case that a code
3610      conversion function is called while those functions are running.  */
3611   if (! inhibit_pre_post_conversion)
3612     {
3613       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3614       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3615     }
3616   val = Fplist_get (plist, Qtranslation_table_for_decode);
3617   if (SYMBOLP (val))
3618     val = Fget (val, Qtranslation_table_for_decode);
3619   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3620   val = Fplist_get (plist, Qtranslation_table_for_encode);
3621   if (SYMBOLP (val))
3622     val = Fget (val, Qtranslation_table_for_encode);
3623   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3624   val = Fplist_get (plist, Qcoding_category);
3625   if (!NILP (val))
3626     {
3627       val = Fget (val, Qcoding_category_index);
3628       if (INTEGERP (val))
3629         coding->category_idx = XINT (val);
3630       else
3631         goto label_invalid_coding_system;
3632     }
3633   else
3634     goto label_invalid_coding_system;
3635
3636   /* If the coding system has non-nil `composition' property, enable
3637      composition handling.  */
3638   val = Fplist_get (plist, Qcomposition);
3639   if (!NILP (val))
3640     coding->composing = COMPOSITION_NO;
3641
3642   switch (XFASTINT (coding_type))
3643     {
3644     case 0:
3645       coding->type = coding_type_emacs_mule;
3646       coding->common_flags
3647         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3648       if (!NILP (coding->post_read_conversion))
3649         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3650       if (!NILP (coding->pre_write_conversion))
3651         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3652       break;
3653
3654     case 1:
3655       coding->type = coding_type_sjis;
3656       coding->common_flags
3657         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3658       break;
3659
3660     case 2:
3661       coding->type = coding_type_iso2022;
3662       coding->common_flags
3663         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3664       {
3665         Lisp_Object val, temp;
3666         Lisp_Object *flags;
3667         int i, charset, reg_bits = 0;
3668
3669         val = XVECTOR (coding_spec)->contents[4];
3670
3671         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3672           goto label_invalid_coding_system;
3673
3674         flags = XVECTOR (val)->contents;
3675         coding->flags
3676           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3677              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3678              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3679              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3680              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3681              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3682              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3683              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3684              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3685              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3686              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3687              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3688              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3689              );
3690
3691         /* Invoke graphic register 0 to plane 0.  */
3692         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3693         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3694         CODING_SPEC_ISO_INVOCATION (coding, 1)
3695           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3696         /* Not single shifting at first.  */
3697         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3698         /* Beginning of buffer should also be regarded as bol. */
3699         CODING_SPEC_ISO_BOL (coding) = 1;
3700
3701         for (charset = 0; charset <= MAX_CHARSET; charset++)
3702           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3703         val = Vcharset_revision_alist;
3704         while (CONSP (val))
3705           {
3706             charset = get_charset_id (Fcar_safe (XCAR (val)));
3707             if (charset >= 0
3708                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3709                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3710               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3711             val = XCDR (val);
3712           }
3713
3714         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3715            FLAGS[REG] can be one of below:
3716                 integer CHARSET: CHARSET occupies register I,
3717                 t: designate nothing to REG initially, but can be used
3718                   by any charsets,
3719                 list of integer, nil, or t: designate the first
3720                   element (if integer) to REG initially, the remaining
3721                   elements (if integer) is designated to REG on request,
3722                   if an element is t, REG can be used by any charsets,
3723                 nil: REG is never used.  */
3724         for (charset = 0; charset <= MAX_CHARSET; charset++)
3725           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3726             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3727         for (i = 0; i < 4; i++)
3728           {
3729             if ((INTEGERP (flags[i])
3730                  && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
3731                 || (charset = get_charset_id (flags[i])) >= 0)
3732               {
3733                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3734                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3735               }
3736             else if (EQ (flags[i], Qt))
3737               {
3738                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3739                 reg_bits |= 1 << i;
3740                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3741               }
3742             else if (CONSP (flags[i]))
3743               {
3744                 Lisp_Object tail;
3745                 tail = flags[i];
3746
3747                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3748                 if ((INTEGERP (XCAR (tail))
3749                      && (charset = XINT (XCAR (tail)),
3750                          CHARSET_VALID_P (charset)))
3751                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3752                   {
3753                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3754                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3755                   }
3756                 else
3757                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3758                 tail = XCDR (tail);
3759                 while (CONSP (tail))
3760                   {
3761                     if ((INTEGERP (XCAR (tail))
3762                          && (charset = XINT (XCAR (tail)),
3763                              CHARSET_VALID_P (charset)))
3764                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3765                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3766                         = i;
3767                     else if (EQ (XCAR (tail), Qt))
3768                       reg_bits |= 1 << i;
3769                     tail = XCDR (tail);
3770                   }
3771               }
3772             else
3773               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3774
3775             CODING_SPEC_ISO_DESIGNATION (coding, i)
3776               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3777           }
3778
3779         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3780           {
3781             /* REG 1 can be used only by locking shift in 7-bit env.  */
3782             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3783               reg_bits &= ~2;
3784             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3785               /* Without any shifting, only REG 0 and 1 can be used.  */
3786               reg_bits &= 3;
3787           }
3788
3789         if (reg_bits)
3790           for (charset = 0; charset <= MAX_CHARSET; charset++)
3791             {
3792               if (CHARSET_DEFINED_P (charset)
3793                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3794                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3795                 {
3796                   /* There exist some default graphic registers to be
3797                      used by CHARSET.  */
3798
3799                   /* We had better avoid designating a charset of
3800                      CHARS96 to REG 0 as far as possible.  */
3801                   if (CHARSET_CHARS (charset) == 96)
3802                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3803                       = (reg_bits & 2
3804                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3805                   else
3806                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3807                       = (reg_bits & 1
3808                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3809                 }
3810             }
3811       }
3812       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3813       coding->spec.iso2022.last_invalid_designation_register = -1;
3814       break;
3815
3816     case 3:
3817       coding->type = coding_type_big5;
3818       coding->common_flags
3819         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3820       coding->flags
3821         = (NILP (XVECTOR (coding_spec)->contents[4])
3822            ? CODING_FLAG_BIG5_HKU
3823            : CODING_FLAG_BIG5_ETEN);
3824       break;
3825
3826     case 4:
3827       coding->type = coding_type_ccl;
3828       coding->common_flags
3829         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3830       {
3831         val = XVECTOR (coding_spec)->contents[4];
3832         if (! CONSP (val)
3833             || setup_ccl_program (&(coding->spec.ccl.decoder),
3834                                   XCAR (val)) < 0
3835             || setup_ccl_program (&(coding->spec.ccl.encoder),
3836                                   XCDR (val)) < 0)
3837           goto label_invalid_coding_system;
3838
3839         bzero (coding->spec.ccl.valid_codes, 256);
3840         val = Fplist_get (plist, Qvalid_codes);
3841         if (CONSP (val))
3842           {
3843             Lisp_Object this;
3844
3845             for (; CONSP (val); val = XCDR (val))
3846               {
3847                 this = XCAR (val);
3848                 if (INTEGERP (this)
3849                     && XINT (this) >= 0 && XINT (this) < 256)
3850                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3851                 else if (CONSP (this)
3852                          && INTEGERP (XCAR (this))
3853                          && INTEGERP (XCDR (this)))
3854                   {
3855                     int start = XINT (XCAR (this));
3856                     int end = XINT (XCDR (this));
3857
3858                     if (start >= 0 && start <= end && end < 256)
3859                       while (start <= end)
3860                         coding->spec.ccl.valid_codes[start++] = 1;
3861                   }
3862               }
3863           }
3864       }
3865       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3866       coding->spec.ccl.cr_carryover = 0;
3867       coding->spec.ccl.eight_bit_carryover[0] = 0;
3868       break;
3869
3870     case 5:
3871       coding->type = coding_type_raw_text;
3872       break;
3873
3874     default:
3875       goto label_invalid_coding_system;
3876     }
3877   return 0;
3878
3879  label_invalid_coding_system:
3880   coding->type = coding_type_no_conversion;
3881   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3882   coding->common_flags = 0;
3883   coding->eol_type = CODING_EOL_LF;
3884   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3885   return -1;
3886 }
3887
3888 /* Free memory blocks allocated for storing composition information.  */
3889
3890 void
3891 coding_free_composition_data (coding)
3892      struct coding_system *coding;
3893 {
3894   struct composition_data *cmp_data = coding->cmp_data, *next;
3895
3896   if (!cmp_data)
3897     return;
3898   /* Memory blocks are chained.  At first, rewind to the first, then,
3899      free blocks one by one.  */
3900   while (cmp_data->prev)
3901     cmp_data = cmp_data->prev;
3902   while (cmp_data)
3903     {
3904       next = cmp_data->next;
3905       xfree (cmp_data);
3906       cmp_data = next;
3907     }
3908   coding->cmp_data = NULL;
3909 }
3910
3911 /* Set `char_offset' member of all memory blocks pointed by
3912    coding->cmp_data to POS.  */
3913
3914 void
3915 coding_adjust_composition_offset (coding, pos)
3916      struct coding_system *coding;
3917      int pos;
3918 {
3919   struct composition_data *cmp_data;
3920
3921   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3922     cmp_data->char_offset = pos;
3923 }
3924
3925 /* Setup raw-text or one of its subsidiaries in the structure
3926    coding_system CODING according to the already setup value eol_type
3927    in CODING.  CODING should be setup for some coding system in
3928    advance.  */
3929
3930 void
3931 setup_raw_text_coding_system (coding)
3932      struct coding_system *coding;
3933 {
3934   if (coding->type != coding_type_raw_text)
3935     {
3936       coding->symbol = Qraw_text;
3937       coding->type = coding_type_raw_text;
3938       if (coding->eol_type != CODING_EOL_UNDECIDED)
3939         {
3940           Lisp_Object subsidiaries;
3941           subsidiaries = Fget (Qraw_text, Qeol_type);
3942
3943           if (VECTORP (subsidiaries)
3944               && XVECTOR (subsidiaries)->size == 3)
3945             coding->symbol
3946               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3947         }
3948       setup_coding_system (coding->symbol, coding);
3949     }
3950   return;
3951 }
3952
3953 /* Emacs has a mechanism to automatically detect a coding system if it
3954    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3955    it's impossible to distinguish some coding systems accurately
3956    because they use the same range of codes.  So, at first, coding
3957    systems are categorized into 7, those are:
3958
3959    o coding-category-emacs-mule
3960
3961         The category for a coding system which has the same code range
3962         as Emacs' internal format.  Assigned the coding-system (Lisp
3963         symbol) `emacs-mule' by default.
3964
3965    o coding-category-sjis
3966
3967         The category for a coding system which has the same code range
3968         as SJIS.  Assigned the coding-system (Lisp
3969         symbol) `japanese-shift-jis' by default.
3970
3971    o coding-category-iso-7
3972
3973         The category for a coding system which has the same code range
3974         as ISO2022 of 7-bit environment.  This doesn't use any locking
3975         shift and single shift functions.  This can encode/decode all
3976         charsets.  Assigned the coding-system (Lisp symbol)
3977         `iso-2022-7bit' by default.
3978
3979    o coding-category-iso-7-tight
3980
3981         Same as coding-category-iso-7 except that this can
3982         encode/decode only the specified charsets.
3983
3984    o coding-category-iso-8-1
3985
3986         The category for a coding system which has the same code range
3987         as ISO2022 of 8-bit environment and graphic plane 1 used only
3988         for DIMENSION1 charset.  This doesn't use any locking shift
3989         and single shift functions.  Assigned the coding-system (Lisp
3990         symbol) `iso-latin-1' by default.
3991
3992    o coding-category-iso-8-2
3993
3994         The category for a coding system which has the same code range
3995         as ISO2022 of 8-bit environment and graphic plane 1 used only
3996         for DIMENSION2 charset.  This doesn't use any locking shift
3997         and single shift functions.  Assigned the coding-system (Lisp
3998         symbol) `japanese-iso-8bit' by default.
3999
4000    o coding-category-iso-7-else
4001
4002         The category for a coding system which has the same code range
4003         as ISO2022 of 7-bit environment but uses locking shift or
4004         single shift functions.  Assigned the coding-system (Lisp
4005         symbol) `iso-2022-7bit-lock' by default.
4006
4007    o coding-category-iso-8-else
4008
4009         The category for a coding system which has the same code range
4010         as ISO2022 of 8-bit environment but uses locking shift or
4011         single shift functions.  Assigned the coding-system (Lisp
4012         symbol) `iso-2022-8bit-ss2' by default.
4013
4014    o coding-category-big5
4015
4016         The category for a coding system which has the same code range
4017         as BIG5.  Assigned the coding-system (Lisp symbol)
4018         `cn-big5' by default.
4019
4020    o coding-category-utf-8
4021
4022         The category for a coding system which has the same code range
4023         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
4024         symbol) `utf-8' by default.
4025
4026    o coding-category-utf-16-be
4027
4028         The category for a coding system in which a text has an
4029         Unicode signature (cf. Unicode Standard) in the order of BIG
4030         endian at the head.  Assigned the coding-system (Lisp symbol)
4031         `utf-16-be' by default.
4032
4033    o coding-category-utf-16-le
4034
4035         The category for a coding system in which a text has an
4036         Unicode signature (cf. Unicode Standard) in the order of
4037         LITTLE endian at the head.  Assigned the coding-system (Lisp
4038         symbol) `utf-16-le' by default.
4039
4040    o coding-category-ccl
4041
4042         The category for a coding system of which encoder/decoder is
4043         written in CCL programs.  The default value is nil, i.e., no
4044         coding system is assigned.
4045
4046    o coding-category-binary
4047
4048         The category for a coding system not categorized in any of the
4049         above.  Assigned the coding-system (Lisp symbol)
4050         `no-conversion' by default.
4051
4052    Each of them is a Lisp symbol and the value is an actual
4053    `coding-system' (this is also a Lisp symbol) assigned by a user.
4054    What Emacs does actually is to detect a category of coding system.
4055    Then, it uses a `coding-system' assigned to it.  If Emacs can't
4056    decide a single possible category, it selects a category of the
4057    highest priority.  Priorities of categories are also specified by a
4058    user in a Lisp variable `coding-category-list'.
4059
4060 */
4061
4062 static
4063 int ascii_skip_code[256];
4064
4065 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
4066    If it detects possible coding systems, return an integer in which
4067    appropriate flag bits are set.  Flag bits are defined by macros
4068    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
4069    it should point the table `coding_priorities'.  In that case, only
4070    the flag bit for a coding system of the highest priority is set in
4071    the returned value.  If MULTIBYTEP is nonzero, 8-bit codes of the
4072    range 0x80..0x9F are in multibyte form.
4073
4074    How many ASCII characters are at the head is returned as *SKIP.  */
4075
4076 static int
4077 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
4078      unsigned char *source;
4079      int src_bytes, *priorities, *skip;
4080      int multibytep;
4081 {
4082   register unsigned char c;
4083   unsigned char *src = source, *src_end = source + src_bytes;
4084   unsigned int mask, utf16_examined_p, iso2022_examined_p;
4085   int i;
4086
4087   /* At first, skip all ASCII characters and control characters except
4088      for three ISO2022 specific control characters.  */
4089   ascii_skip_code[ISO_CODE_SO] = 0;
4090   ascii_skip_code[ISO_CODE_SI] = 0;
4091   ascii_skip_code[ISO_CODE_ESC] = 0;
4092
4093  label_loop_detect_coding:
4094   while (src < src_end && ascii_skip_code[*src]) src++;
4095   *skip = src - source;
4096
4097   if (src >= src_end)
4098     /* We found nothing other than ASCII.  There's nothing to do.  */
4099     return 0;
4100
4101   c = *src;
4102   /* The text seems to be encoded in some multilingual coding system.
4103      Now, try to find in which coding system the text is encoded.  */
4104   if (c < 0x80)
4105     {
4106       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
4107       /* C is an ISO2022 specific control code of C0.  */
4108       mask = detect_coding_iso2022 (src, src_end, multibytep);
4109       if (mask == 0)
4110         {
4111           /* No valid ISO2022 code follows C.  Try again.  */
4112           src++;
4113           if (c == ISO_CODE_ESC)
4114             ascii_skip_code[ISO_CODE_ESC] = 1;
4115           else
4116             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
4117           goto label_loop_detect_coding;
4118         }
4119       if (priorities)
4120         {
4121           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4122             {
4123               if (mask & priorities[i])
4124                 return priorities[i];
4125             }
4126           return CODING_CATEGORY_MASK_RAW_TEXT;
4127         }
4128     }
4129   else
4130     {
4131       int try;
4132
4133       if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
4134         c = src[1] - 0x20;
4135
4136       if (c < 0xA0)
4137         {
4138           /* C is the first byte of SJIS character code,
4139              or a leading-code of Emacs' internal format (emacs-mule),
4140              or the first byte of UTF-16.  */
4141           try = (CODING_CATEGORY_MASK_SJIS
4142                   | CODING_CATEGORY_MASK_EMACS_MULE
4143                   | CODING_CATEGORY_MASK_UTF_16_BE
4144                   | CODING_CATEGORY_MASK_UTF_16_LE);
4145
4146           /* Or, if C is a special latin extra code,
4147              or is an ISO2022 specific control code of C1 (SS2 or SS3),
4148              or is an ISO2022 control-sequence-introducer (CSI),
4149              we should also consider the possibility of ISO2022 codings.  */
4150           if ((VECTORP (Vlatin_extra_code_table)
4151                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4152               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
4153               || (c == ISO_CODE_CSI
4154                   && (src < src_end
4155                       && (*src == ']'
4156                           || ((*src == '0' || *src == '1' || *src == '2')
4157                               && src + 1 < src_end
4158                               && src[1] == ']')))))
4159             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4160                      | CODING_CATEGORY_MASK_ISO_8BIT);
4161         }
4162       else
4163         /* C is a character of ISO2022 in graphic plane right,
4164            or a SJIS's 1-byte character code (i.e. JISX0201),
4165            or the first byte of BIG5's 2-byte code,
4166            or the first byte of UTF-8/16.  */
4167         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4168                 | CODING_CATEGORY_MASK_ISO_8BIT
4169                 | CODING_CATEGORY_MASK_SJIS
4170                 | CODING_CATEGORY_MASK_BIG5
4171                 | CODING_CATEGORY_MASK_UTF_8
4172                 | CODING_CATEGORY_MASK_UTF_16_BE
4173                 | CODING_CATEGORY_MASK_UTF_16_LE);
4174
4175       /* Or, we may have to consider the possibility of CCL.  */
4176       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4177           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4178               ->spec.ccl.valid_codes)[c])
4179         try |= CODING_CATEGORY_MASK_CCL;
4180
4181       mask = 0;
4182       utf16_examined_p = iso2022_examined_p = 0;
4183       if (priorities)
4184         {
4185           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4186             {
4187               if (!iso2022_examined_p
4188                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4189                 {
4190                   mask |= detect_coding_iso2022 (src, src_end, multibytep);
4191                   iso2022_examined_p = 1;
4192                 }
4193               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
4194                 mask |= detect_coding_sjis (src, src_end, multibytep);
4195               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
4196                 mask |= detect_coding_utf_8 (src, src_end, multibytep);
4197               else if (!utf16_examined_p
4198                        && (priorities[i] & try &
4199                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
4200                 {
4201                   mask |= detect_coding_utf_16 (src, src_end, multibytep);
4202                   utf16_examined_p = 1;
4203                 }
4204               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
4205                 mask |= detect_coding_big5 (src, src_end, multibytep);
4206               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
4207                 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4208               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
4209                 mask |= detect_coding_ccl (src, src_end, multibytep);
4210               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
4211                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
4212               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
4213                 mask |= CODING_CATEGORY_MASK_BINARY;
4214               if (mask & priorities[i])
4215                 return priorities[i];
4216             }
4217           return CODING_CATEGORY_MASK_RAW_TEXT;
4218         }
4219       if (try & CODING_CATEGORY_MASK_ISO)
4220         mask |= detect_coding_iso2022 (src, src_end, multibytep);
4221       if (try & CODING_CATEGORY_MASK_SJIS)
4222         mask |= detect_coding_sjis (src, src_end, multibytep);
4223       if (try & CODING_CATEGORY_MASK_BIG5)
4224         mask |= detect_coding_big5 (src, src_end, multibytep);
4225       if (try & CODING_CATEGORY_MASK_UTF_8)
4226         mask |= detect_coding_utf_8 (src, src_end, multibytep);
4227       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
4228         mask |= detect_coding_utf_16 (src, src_end, multibytep);
4229       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
4230         mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4231       if (try & CODING_CATEGORY_MASK_CCL)
4232         mask |= detect_coding_ccl (src, src_end, multibytep);
4233     }
4234   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4235 }
4236
4237 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4238    The information of the detected coding system is set in CODING.  */
4239
4240 void
4241 detect_coding (coding, src, src_bytes)
4242      struct coding_system *coding;
4243      const unsigned char *src;
4244      int src_bytes;
4245 {
4246   unsigned int idx;
4247   int skip, mask;
4248   Lisp_Object val;
4249
4250   val = Vcoding_category_list;
4251   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4252                              coding->src_multibyte);
4253   coding->heading_ascii = skip;
4254
4255   if (!mask) return;
4256
4257   /* We found a single coding system of the highest priority in MASK.  */
4258   idx = 0;
4259   while (mask && ! (mask & 1)) mask >>= 1, idx++;
4260   if (! mask)
4261     idx = CODING_CATEGORY_IDX_RAW_TEXT;
4262
4263   val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]);
4264
4265   if (coding->eol_type != CODING_EOL_UNDECIDED)
4266     {
4267       Lisp_Object tmp;
4268
4269       tmp = Fget (val, Qeol_type);
4270       if (VECTORP (tmp))
4271         val = XVECTOR (tmp)->contents[coding->eol_type];
4272     }
4273
4274   /* Setup this new coding system while preserving some slots.  */
4275   {
4276     int src_multibyte = coding->src_multibyte;
4277     int dst_multibyte = coding->dst_multibyte;
4278
4279     setup_coding_system (val, coding);
4280     coding->src_multibyte = src_multibyte;
4281     coding->dst_multibyte = dst_multibyte;
4282     coding->heading_ascii = skip;
4283   }
4284 }
4285
4286 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
4287    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4288    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4289
4290    How many non-eol characters are at the head is returned as *SKIP.  */
4291
4292 #define MAX_EOL_CHECK_COUNT 3
4293
4294 static int
4295 detect_eol_type (source, src_bytes, skip)
4296      unsigned char *source;
4297      int src_bytes, *skip;
4298 {
4299   unsigned char *src = source, *src_end = src + src_bytes;
4300   unsigned char c;
4301   int total = 0;                /* How many end-of-lines are found so far.  */
4302   int eol_type = CODING_EOL_UNDECIDED;
4303   int this_eol_type;
4304
4305   *skip = 0;
4306
4307   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4308     {
4309       c = *src++;
4310       if (c == '\n' || c == '\r')
4311         {
4312           if (*skip == 0)
4313             *skip = src - 1 - source;
4314           total++;
4315           if (c == '\n')
4316             this_eol_type = CODING_EOL_LF;
4317           else if (src >= src_end || *src != '\n')
4318             this_eol_type = CODING_EOL_CR;
4319           else
4320             this_eol_type = CODING_EOL_CRLF, src++;
4321
4322           if (eol_type == CODING_EOL_UNDECIDED)
4323             /* This is the first end-of-line.  */
4324             eol_type = this_eol_type;
4325           else if (eol_type != this_eol_type)
4326             {
4327               /* The found type is different from what found before.  */
4328               eol_type = CODING_EOL_INCONSISTENT;
4329               break;
4330             }
4331         }
4332     }
4333
4334   if (*skip == 0)
4335     *skip = src_end - source;
4336   return eol_type;
4337 }
4338
4339 /* Like detect_eol_type, but detect EOL type in 2-octet
4340    big-endian/little-endian format for coding systems utf-16-be and
4341    utf-16-le.  */
4342
4343 static int
4344 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4345      unsigned char *source;
4346      int src_bytes, *skip, big_endian_p;
4347 {
4348   unsigned char *src = source, *src_end = src + src_bytes;
4349   unsigned int c1, c2;
4350   int total = 0;                /* How many end-of-lines are found so far.  */
4351   int eol_type = CODING_EOL_UNDECIDED;
4352   int this_eol_type;
4353   int msb, lsb;
4354
4355   if (big_endian_p)
4356     msb = 0, lsb = 1;
4357   else
4358     msb = 1, lsb = 0;
4359
4360   *skip = 0;
4361
4362   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4363     {
4364       c1 = (src[msb] << 8) | (src[lsb]);
4365       src += 2;
4366
4367       if (c1 == '\n' || c1 == '\r')
4368         {
4369           if (*skip == 0)
4370             *skip = src - 2 - source;
4371           total++;
4372           if (c1 == '\n')
4373             {
4374               this_eol_type = CODING_EOL_LF;
4375             }
4376           else
4377             {
4378               if ((src + 1) >= src_end)
4379                 {
4380                   this_eol_type = CODING_EOL_CR;
4381                 }
4382               else
4383                 {
4384                   c2 = (src[msb] << 8) | (src[lsb]);
4385                   if (c2 == '\n')
4386                     this_eol_type = CODING_EOL_CRLF, src += 2;
4387                   else
4388                     this_eol_type = CODING_EOL_CR;
4389                 }
4390             }
4391
4392           if (eol_type == CODING_EOL_UNDECIDED)
4393             /* This is the first end-of-line.  */
4394             eol_type = this_eol_type;
4395           else if (eol_type != this_eol_type)
4396             {
4397               /* The found type is different from what found before.  */
4398               eol_type = CODING_EOL_INCONSISTENT;
4399               break;
4400             }
4401         }
4402     }
4403
4404   if (*skip == 0)
4405     *skip = src_end - source;
4406   return eol_type;
4407 }
4408
4409 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4410    is encoded.  If it detects an appropriate format of end-of-line, it
4411    sets the information in *CODING.  */
4412
4413 void
4414 detect_eol (coding, src, src_bytes)
4415      struct coding_system *coding;
4416      const unsigned char *src;
4417      int src_bytes;
4418 {
4419   Lisp_Object val;
4420   int skip;
4421   int eol_type;
4422
4423   switch (coding->category_idx)
4424     {
4425     case CODING_CATEGORY_IDX_UTF_16_BE:
4426       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4427       break;
4428     case CODING_CATEGORY_IDX_UTF_16_LE:
4429       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4430       break;
4431     default:
4432       eol_type = detect_eol_type (src, src_bytes, &skip);
4433       break;
4434     }
4435
4436   if (coding->heading_ascii > skip)
4437     coding->heading_ascii = skip;
4438   else
4439     skip = coding->heading_ascii;
4440
4441   if (eol_type == CODING_EOL_UNDECIDED)
4442     return;
4443   if (eol_type == CODING_EOL_INCONSISTENT)
4444     {
4445 #if 0
4446       /* This code is suppressed until we find a better way to
4447          distinguish raw text file and binary file.  */
4448
4449       /* If we have already detected that the coding is raw-text, the
4450          coding should actually be no-conversion.  */
4451       if (coding->type == coding_type_raw_text)
4452         {
4453           setup_coding_system (Qno_conversion, coding);
4454           return;
4455         }
4456       /* Else, let's decode only text code anyway.  */
4457 #endif /* 0 */
4458       eol_type = CODING_EOL_LF;
4459     }
4460
4461   val = Fget (coding->symbol, Qeol_type);
4462   if (VECTORP (val) && XVECTOR (val)->size == 3)
4463     {
4464       int src_multibyte = coding->src_multibyte;
4465       int dst_multibyte = coding->dst_multibyte;
4466       struct composition_data *cmp_data = coding->cmp_data;
4467
4468       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
4469       coding->src_multibyte = src_multibyte;
4470       coding->dst_multibyte = dst_multibyte;
4471       coding->heading_ascii = skip;
4472       coding->cmp_data = cmp_data;
4473     }
4474 }
4475
4476 #define CONVERSION_BUFFER_EXTRA_ROOM 256
4477
4478 #define DECODING_BUFFER_MAG(coding)                     \
4479   (coding->type == coding_type_iso2022                  \
4480    ? 3                                                  \
4481    : (coding->type == coding_type_ccl                   \
4482       ? coding->spec.ccl.decoder.buf_magnification      \
4483       : 2))
4484
4485 /* Return maximum size (bytes) of a buffer enough for decoding
4486    SRC_BYTES of text encoded in CODING.  */
4487
4488 int
4489 decoding_buffer_size (coding, src_bytes)
4490      struct coding_system *coding;
4491      int src_bytes;
4492 {
4493   return (src_bytes * DECODING_BUFFER_MAG (coding)
4494           + CONVERSION_BUFFER_EXTRA_ROOM);
4495 }
4496
4497 /* Return maximum size (bytes) of a buffer enough for encoding
4498    SRC_BYTES of text to CODING.  */
4499
4500 int
4501 encoding_buffer_size (coding, src_bytes)
4502      struct coding_system *coding;
4503      int src_bytes;
4504 {
4505   int magnification;
4506
4507   if (coding->type == coding_type_ccl)
4508     magnification = coding->spec.ccl.encoder.buf_magnification;
4509   else if (CODING_REQUIRE_ENCODING (coding))
4510     magnification = 3;
4511   else
4512     magnification = 1;
4513
4514   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4515 }
4516
4517 /* Working buffer for code conversion.  */
4518 struct conversion_buffer
4519 {
4520   int size;                     /* size of data.  */
4521   int on_stack;                 /* 1 if allocated by alloca.  */
4522   unsigned char *data;
4523 };
4524
4525 /* Don't use alloca for allocating memory space larger than this, lest
4526    we overflow their stack.  */
4527 #define MAX_ALLOCA 16*1024
4528
4529 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
4530 #define allocate_conversion_buffer(buf, len)            \
4531   do {                                                  \
4532     if (len < MAX_ALLOCA)                               \
4533       {                                                 \
4534         buf.data = (unsigned char *) alloca (len);      \
4535         buf.on_stack = 1;                               \
4536       }                                                 \
4537     else                                                \
4538       {                                                 \
4539         buf.data = (unsigned char *) xmalloc (len);     \
4540         buf.on_stack = 0;                               \
4541       }                                                 \
4542     buf.size = len;                                     \
4543   } while (0)
4544
4545 /* Double the allocated memory for *BUF.  */
4546 static void
4547 extend_conversion_buffer (buf)
4548      struct conversion_buffer *buf;
4549 {
4550   if (buf->on_stack)
4551     {
4552       unsigned char *save = buf->data;
4553       buf->data = (unsigned char *) xmalloc (buf->size * 2);
4554       bcopy (save, buf->data, buf->size);
4555       buf->on_stack = 0;
4556     }
4557   else
4558     {
4559       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4560     }
4561   buf->size *= 2;
4562 }
4563
4564 /* Free the allocated memory for BUF if it is not on stack.  */
4565 static void
4566 free_conversion_buffer (buf)
4567      struct conversion_buffer *buf;
4568 {
4569   if (!buf->on_stack)
4570     xfree (buf->data);
4571 }
4572
4573 int
4574 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4575      struct coding_system *coding;
4576      unsigned char *source, *destination;
4577      int src_bytes, dst_bytes, encodep;
4578 {
4579   struct ccl_program *ccl
4580     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
4581   unsigned char *dst = destination;
4582
4583   ccl->suppress_error = coding->suppress_error;
4584   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4585   if (encodep)
4586     {
4587       /* On encoding, EOL format is converted within ccl_driver.  For
4588          that, setup proper information in the structure CCL.  */
4589       ccl->eol_type = coding->eol_type;
4590       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4591         ccl->eol_type = CODING_EOL_LF;
4592       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4593       ccl->eight_bit_control = coding->dst_multibyte;
4594     }
4595   else
4596     ccl->eight_bit_control = 1;
4597   ccl->multibyte = coding->src_multibyte;
4598   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4599     {
4600       /* Move carryover bytes to DESTINATION.  */
4601       unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4602       while (*p)
4603         *dst++ = *p++;
4604       coding->spec.ccl.eight_bit_carryover[0] = 0;
4605       if (dst_bytes)
4606         dst_bytes -= dst - destination;
4607     }
4608
4609   coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4610                                   &(coding->consumed))
4611                       + dst - destination);
4612
4613   if (encodep)
4614     {
4615       coding->produced_char = coding->produced;
4616       coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4617     }
4618   else if (!ccl->eight_bit_control)
4619     {
4620       /* The produced bytes forms a valid multibyte sequence. */
4621       coding->produced_char
4622         = multibyte_chars_in_text (destination, coding->produced);
4623       coding->spec.ccl.eight_bit_carryover[0] = 0;
4624     }
4625   else
4626     {
4627       /* On decoding, the destination should always multibyte.  But,
4628          CCL program might have been generated an invalid multibyte
4629          sequence.  Here we make such a sequence valid as
4630          multibyte.  */
4631       int bytes
4632         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
4633
4634       if ((coding->consumed < src_bytes
4635            || !ccl->last_block)
4636           && coding->produced >= 1
4637           && destination[coding->produced - 1] >= 0x80)
4638         {
4639           /* We should not convert the tailing 8-bit codes to
4640              multibyte form even if they doesn't form a valid
4641              multibyte sequence.  They may form a valid sequence in
4642              the next call.  */
4643           int carryover = 0;
4644
4645           if (destination[coding->produced - 1] < 0xA0)
4646             carryover = 1;
4647           else if (coding->produced >= 2)
4648             {
4649               if (destination[coding->produced - 2] >= 0x80)
4650                 {
4651                   if (destination[coding->produced - 2] < 0xA0)
4652                     carryover = 2;
4653                   else if (coding->produced >= 3
4654                            && destination[coding->produced - 3] >= 0x80
4655                            && destination[coding->produced - 3] < 0xA0)
4656                     carryover = 3;
4657                 }
4658             }
4659           if (carryover > 0)
4660             {
4661               BCOPY_SHORT (destination + coding->produced - carryover,
4662                            coding->spec.ccl.eight_bit_carryover,
4663                            carryover);
4664               coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4665               coding->produced -= carryover;
4666             }
4667         }
4668       coding->produced = str_as_multibyte (destination, bytes,
4669                                            coding->produced,
4670                                            &(coding->produced_char));
4671     }
4672
4673   switch (ccl->status)
4674     {
4675     case CCL_STAT_SUSPEND_BY_SRC:
4676       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4677       break;
4678     case CCL_STAT_SUSPEND_BY_DST:
4679       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4680       break;
4681     case CCL_STAT_QUIT:
4682     case CCL_STAT_INVALID_CMD:
4683       coding->result = CODING_FINISH_INTERRUPT;
4684       break;
4685     default:
4686       coding->result = CODING_FINISH_NORMAL;
4687       break;
4688     }
4689   return coding->result;
4690 }
4691
4692 /* Decode EOL format of the text at PTR of BYTES length destructively
4693    according to CODING->eol_type.  This is called after the CCL
4694    program produced a decoded text at PTR.  If we do CRLF->LF
4695    conversion, update CODING->produced and CODING->produced_char.  */
4696
4697 static void
4698 decode_eol_post_ccl (coding, ptr, bytes)
4699      struct coding_system *coding;
4700      unsigned char *ptr;
4701      int bytes;
4702 {
4703   Lisp_Object val, saved_coding_symbol;
4704   unsigned char *pend = ptr + bytes;
4705   int dummy;
4706
4707   /* Remember the current coding system symbol.  We set it back when
4708      an inconsistent EOL is found so that `last-coding-system-used' is
4709      set to the coding system that doesn't specify EOL conversion.  */
4710   saved_coding_symbol = coding->symbol;
4711
4712   coding->spec.ccl.cr_carryover = 0;
4713   if (coding->eol_type == CODING_EOL_UNDECIDED)
4714     {
4715       /* Here, to avoid the call of setup_coding_system, we directly
4716          call detect_eol_type.  */
4717       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4718       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4719         coding->eol_type = CODING_EOL_LF;
4720       if (coding->eol_type != CODING_EOL_UNDECIDED)
4721         {
4722           val = Fget (coding->symbol, Qeol_type);
4723           if (VECTORP (val) && XVECTOR (val)->size == 3)
4724             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4725         }
4726       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4727     }
4728
4729   if (coding->eol_type == CODING_EOL_LF
4730       || coding->eol_type == CODING_EOL_UNDECIDED)
4731     {
4732       /* We have nothing to do.  */
4733       ptr = pend;
4734     }
4735   else if (coding->eol_type == CODING_EOL_CRLF)
4736     {
4737       unsigned char *pstart = ptr, *p = ptr;
4738
4739       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4740           && *(pend - 1) == '\r')
4741         {
4742           /* If the last character is CR, we can't handle it here
4743              because LF will be in the not-yet-decoded source text.
4744              Record that the CR is not yet processed.  */
4745           coding->spec.ccl.cr_carryover = 1;
4746           coding->produced--;
4747           coding->produced_char--;
4748           pend--;
4749         }
4750       while (ptr < pend)
4751         {
4752           if (*ptr == '\r')
4753             {
4754               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4755                 {
4756                   *p++ = '\n';
4757                   ptr += 2;
4758                 }
4759               else
4760                 {
4761                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4762                     goto undo_eol_conversion;
4763                   *p++ = *ptr++;
4764                 }
4765             }
4766           else if (*ptr == '\n'
4767                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4768             goto undo_eol_conversion;
4769           else
4770             *p++ = *ptr++;
4771           continue;
4772
4773         undo_eol_conversion:
4774           /* We have faced with inconsistent EOL format at PTR.
4775              Convert all LFs before PTR back to CRLFs.  */
4776           for (p--, ptr--; p >= pstart; p--)
4777             {
4778               if (*p == '\n')
4779                 *ptr-- = '\n', *ptr-- = '\r';
4780               else
4781                 *ptr-- = *p;
4782             }
4783           /*  If carryover is recorded, cancel it because we don't
4784               convert CRLF anymore.  */
4785           if (coding->spec.ccl.cr_carryover)
4786             {
4787               coding->spec.ccl.cr_carryover = 0;
4788               coding->produced++;
4789               coding->produced_char++;
4790               pend++;
4791             }
4792           p = ptr = pend;
4793           coding->eol_type = CODING_EOL_LF;
4794           coding->symbol = saved_coding_symbol;
4795         }
4796       if (p < pend)
4797         {
4798           /* As each two-byte sequence CRLF was converted to LF, (PEND
4799              - P) is the number of deleted characters.  */
4800           coding->produced -= pend - p;
4801           coding->produced_char -= pend - p;
4802         }
4803     }
4804   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4805     {
4806       unsigned char *p = ptr;
4807
4808       for (; ptr < pend; ptr++)
4809         {
4810           if (*ptr == '\r')
4811             *ptr = '\n';
4812           else if (*ptr == '\n'
4813                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4814             {
4815               for (; p < ptr; p++)
4816                 {
4817                   if (*p == '\n')
4818                     *p = '\r';
4819                 }
4820               ptr = pend;
4821               coding->eol_type = CODING_EOL_LF;
4822               coding->symbol = saved_coding_symbol;
4823             }
4824         }
4825     }
4826 }
4827
4828 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4829    decoding, it may detect coding system and format of end-of-line if
4830    those are not yet decided.  The source should be unibyte, the
4831    result is multibyte if CODING->dst_multibyte is nonzero, else
4832    unibyte.  */
4833
4834 int
4835 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4836      struct coding_system *coding;
4837      const unsigned char *source;
4838      unsigned char *destination;
4839      int src_bytes, dst_bytes;
4840 {
4841   int extra = 0;
4842
4843   if (coding->type == coding_type_undecided)
4844     detect_coding (coding, source, src_bytes);
4845
4846   if (coding->eol_type == CODING_EOL_UNDECIDED
4847       && coding->type != coding_type_ccl)
4848     {
4849       detect_eol (coding, source, src_bytes);
4850       /* We had better recover the original eol format if we
4851          encounter an inconsistent eol format while decoding.  */
4852       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4853     }
4854
4855   coding->produced = coding->produced_char = 0;
4856   coding->consumed = coding->consumed_char = 0;
4857   coding->errors = 0;
4858   coding->result = CODING_FINISH_NORMAL;
4859
4860   switch (coding->type)
4861     {
4862     case coding_type_sjis:
4863       decode_coding_sjis_big5 (coding, source, destination,
4864                                src_bytes, dst_bytes, 1);
4865       break;
4866
4867     case coding_type_iso2022:
4868       decode_coding_iso2022 (coding, source, destination,
4869                              src_bytes, dst_bytes);
4870       break;
4871
4872     case coding_type_big5:
4873       decode_coding_sjis_big5 (coding, source, destination,
4874                                src_bytes, dst_bytes, 0);
4875       break;
4876
4877     case coding_type_emacs_mule:
4878       decode_coding_emacs_mule (coding, source, destination,
4879                                 src_bytes, dst_bytes);
4880       break;
4881
4882     case coding_type_ccl:
4883       if (coding->spec.ccl.cr_carryover)
4884         {
4885           /* Put the CR which was not processed by the previous call
4886              of decode_eol_post_ccl in DESTINATION.  It will be
4887              decoded together with the following LF by the call to
4888              decode_eol_post_ccl below.  */
4889           *destination = '\r';
4890           coding->produced++;
4891           coding->produced_char++;
4892           dst_bytes--;
4893           extra = coding->spec.ccl.cr_carryover;
4894         }
4895       ccl_coding_driver (coding, source, destination + extra,
4896                          src_bytes, dst_bytes, 0);
4897       if (coding->eol_type != CODING_EOL_LF)
4898         {
4899           coding->produced += extra;
4900           coding->produced_char += extra;
4901           decode_eol_post_ccl (coding, destination, coding->produced);
4902         }
4903       break;
4904
4905     default:
4906       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4907     }
4908
4909   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4910       && coding->mode & CODING_MODE_LAST_BLOCK
4911       && coding->consumed == src_bytes)
4912     coding->result = CODING_FINISH_NORMAL;
4913
4914   if (coding->mode & CODING_MODE_LAST_BLOCK
4915       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4916     {
4917       const unsigned char *src = source + coding->consumed;
4918       unsigned char *dst = destination + coding->produced;
4919
4920       src_bytes -= coding->consumed;
4921       coding->errors++;
4922       if (COMPOSING_P (coding))
4923         DECODE_COMPOSITION_END ('1');
4924       while (src_bytes--)
4925         {
4926           int c = *src++;
4927           dst += CHAR_STRING (c, dst);
4928           coding->produced_char++;
4929         }
4930       coding->consumed = coding->consumed_char = src - source;
4931       coding->produced = dst - destination;
4932       coding->result = CODING_FINISH_NORMAL;
4933     }
4934
4935   if (!coding->dst_multibyte)
4936     {
4937       coding->produced = str_as_unibyte (destination, coding->produced);
4938       coding->produced_char = coding->produced;
4939     }
4940
4941   return coding->result;
4942 }
4943
4944 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4945    multibyteness of the source is CODING->src_multibyte, the
4946    multibyteness of the result is always unibyte.  */
4947
4948 int
4949 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4950      struct coding_system *coding;
4951      const unsigned char *source;
4952      unsigned char *destination;
4953      int src_bytes, dst_bytes;
4954 {
4955   coding->produced = coding->produced_char = 0;
4956   coding->consumed = coding->consumed_char = 0;
4957   coding->errors = 0;
4958   coding->result = CODING_FINISH_NORMAL;
4959
4960   switch (coding->type)
4961     {
4962     case coding_type_sjis:
4963       encode_coding_sjis_big5 (coding, source, destination,
4964                                src_bytes, dst_bytes, 1);
4965       break;
4966
4967     case coding_type_iso2022:
4968       encode_coding_iso2022 (coding, source, destination,
4969                              src_bytes, dst_bytes);
4970       break;
4971
4972     case coding_type_big5:
4973       encode_coding_sjis_big5 (coding, source, destination,
4974                                src_bytes, dst_bytes, 0);
4975       break;
4976
4977     case coding_type_emacs_mule:
4978       encode_coding_emacs_mule (coding, source, destination,
4979                                 src_bytes, dst_bytes);
4980       break;
4981
4982     case coding_type_ccl:
4983       ccl_coding_driver (coding, source, destination,
4984                          src_bytes, dst_bytes, 1);
4985       break;
4986
4987     default:
4988       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4989     }
4990
4991   if (coding->mode & CODING_MODE_LAST_BLOCK
4992       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4993     {
4994       const unsigned char *src = source + coding->consumed;
4995       unsigned char *dst = destination + coding->produced;
4996
4997       if (coding->type == coding_type_iso2022)
4998         ENCODE_RESET_PLANE_AND_REGISTER;
4999       if (COMPOSING_P (coding))
5000         *dst++ = ISO_CODE_ESC, *dst++ = '1';
5001       if (coding->consumed < src_bytes)
5002         {
5003           int len = src_bytes - coding->consumed;
5004
5005           BCOPY_SHORT (src, dst, len);
5006           if (coding->src_multibyte)
5007             len = str_as_unibyte (dst, len);
5008           dst += len;
5009           coding->consumed = src_bytes;
5010         }
5011       coding->produced = coding->produced_char = dst - destination;
5012       coding->result = CODING_FINISH_NORMAL;
5013     }
5014
5015   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
5016       && coding->consumed == src_bytes)
5017     coding->result = CODING_FINISH_NORMAL;
5018
5019   return coding->result;
5020 }
5021
5022 /* Scan text in the region between *BEG and *END (byte positions),
5023    skip characters which we don't have to decode by coding system
5024    CODING at the head and tail, then set *BEG and *END to the region
5025    of the text we actually have to convert.  The caller should move
5026    the gap out of the region in advance if the region is from a
5027    buffer.
5028
5029    If STR is not NULL, *BEG and *END are indices into STR.  */
5030
5031 static void
5032 shrink_decoding_region (beg, end, coding, str)
5033      int *beg, *end;
5034      struct coding_system *coding;
5035      unsigned char *str;
5036 {
5037   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
5038   int eol_conversion;
5039   Lisp_Object translation_table;
5040
5041   if (coding->type == coding_type_ccl
5042       || coding->type == coding_type_undecided
5043       || coding->eol_type != CODING_EOL_LF
5044       || !NILP (coding->post_read_conversion)
5045       || coding->composing != COMPOSITION_DISABLED)
5046     {
5047       /* We can't skip any data.  */
5048       return;
5049     }
5050   if (coding->type == coding_type_no_conversion
5051       || coding->type == coding_type_raw_text
5052       || coding->type == coding_type_emacs_mule)
5053     {
5054       /* We need no conversion, but don't have to skip any data here.
5055          Decoding routine handles them effectively anyway.  */
5056       return;
5057     }
5058
5059   translation_table = coding->translation_table_for_decode;
5060   if (NILP (translation_table) && !NILP (Venable_character_translation))
5061     translation_table = Vstandard_translation_table_for_decode;
5062   if (CHAR_TABLE_P (translation_table))
5063     {
5064       int i;
5065       for (i = 0; i < 128; i++)
5066         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5067           break;
5068       if (i < 128)
5069         /* Some ASCII character should be translated.  We give up
5070            shrinking.  */
5071         return;
5072     }
5073
5074   if (coding->heading_ascii >= 0)
5075     /* Detection routine has already found how much we can skip at the
5076        head.  */
5077     *beg += coding->heading_ascii;
5078
5079   if (str)
5080     {
5081       begp_orig = begp = str + *beg;
5082       endp_orig = endp = str + *end;
5083     }
5084   else
5085     {
5086       begp_orig = begp = BYTE_POS_ADDR (*beg);
5087       endp_orig = endp = begp + *end - *beg;
5088     }
5089
5090   eol_conversion = (coding->eol_type == CODING_EOL_CR
5091                     || coding->eol_type == CODING_EOL_CRLF);
5092
5093   switch (coding->type)
5094     {
5095     case coding_type_sjis:
5096     case coding_type_big5:
5097       /* We can skip all ASCII characters at the head.  */
5098       if (coding->heading_ascii < 0)
5099         {
5100           if (eol_conversion)
5101             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
5102           else
5103             while (begp < endp && *begp < 0x80) begp++;
5104         }
5105       /* We can skip all ASCII characters at the tail except for the
5106          second byte of SJIS or BIG5 code.  */
5107       if (eol_conversion)
5108         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
5109       else
5110         while (begp < endp && endp[-1] < 0x80) endp--;
5111       /* Do not consider LF as ascii if preceded by CR, since that
5112          confuses eol decoding. */
5113       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5114         endp++;
5115       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
5116         endp++;
5117       break;
5118
5119     case coding_type_iso2022:
5120       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5121         /* We can't skip any data.  */
5122         break;
5123       if (coding->heading_ascii < 0)
5124         {
5125           /* We can skip all ASCII characters at the head except for a
5126              few control codes.  */
5127           while (begp < endp && (c = *begp) < 0x80
5128                  && c != ISO_CODE_CR && c != ISO_CODE_SO
5129                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
5130                  && (!eol_conversion || c != ISO_CODE_LF))
5131             begp++;
5132         }
5133       switch (coding->category_idx)
5134         {
5135         case CODING_CATEGORY_IDX_ISO_8_1:
5136         case CODING_CATEGORY_IDX_ISO_8_2:
5137           /* We can skip all ASCII characters at the tail.  */
5138           if (eol_conversion)
5139             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
5140           else
5141             while (begp < endp && endp[-1] < 0x80) endp--;
5142           /* Do not consider LF as ascii if preceded by CR, since that
5143              confuses eol decoding. */
5144           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5145             endp++;
5146           break;
5147
5148         case CODING_CATEGORY_IDX_ISO_7:
5149         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
5150           {
5151             /* We can skip all characters at the tail except for 8-bit
5152                codes and ESC and the following 2-byte at the tail.  */
5153             unsigned char *eight_bit = NULL;
5154
5155             if (eol_conversion)
5156               while (begp < endp
5157                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
5158                 {
5159                   if (!eight_bit && c & 0x80) eight_bit = endp;
5160                   endp--;
5161                 }
5162             else
5163               while (begp < endp
5164                      && (c = endp[-1]) != ISO_CODE_ESC)
5165                 {
5166                   if (!eight_bit && c & 0x80) eight_bit = endp;
5167                   endp--;
5168                 }
5169             /* Do not consider LF as ascii if preceded by CR, since that
5170                confuses eol decoding. */
5171             if (begp < endp && endp < endp_orig
5172                 && endp[-1] == '\r' && endp[0] == '\n')
5173               endp++;
5174             if (begp < endp && endp[-1] == ISO_CODE_ESC)
5175               {
5176                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
5177                   /* This is an ASCII designation sequence.  We can
5178                      surely skip the tail.  But, if we have
5179                      encountered an 8-bit code, skip only the codes
5180                      after that.  */
5181                   endp = eight_bit ? eight_bit : endp + 2;
5182                 else
5183                   /* Hmmm, we can't skip the tail.  */
5184                   endp = endp_orig;
5185               }
5186             else if (eight_bit)
5187               endp = eight_bit;
5188           }
5189         }
5190       break;
5191
5192     default:
5193       abort ();
5194     }
5195   *beg += begp - begp_orig;
5196   *end += endp - endp_orig;
5197   return;
5198 }
5199
5200 /* Like shrink_decoding_region but for encoding.  */
5201
5202 static void
5203 shrink_encoding_region (beg, end, coding, str)
5204      int *beg, *end;
5205      struct coding_system *coding;
5206      unsigned char *str;
5207 {
5208   unsigned char *begp_orig, *begp, *endp_orig, *endp;
5209   int eol_conversion;
5210   Lisp_Object translation_table;
5211
5212   if (coding->type == coding_type_ccl
5213       || coding->eol_type == CODING_EOL_CRLF
5214       || coding->eol_type == CODING_EOL_CR
5215       || (coding->cmp_data && coding->cmp_data->used > 0))
5216     {
5217       /* We can't skip any data.  */
5218       return;
5219     }
5220   if (coding->type == coding_type_no_conversion
5221       || coding->type == coding_type_raw_text
5222       || coding->type == coding_type_emacs_mule
5223       || coding->type == coding_type_undecided)
5224     {
5225       /* We need no conversion, but don't have to skip any data here.
5226          Encoding routine handles them effectively anyway.  */
5227       return;
5228     }
5229
5230   translation_table = coding->translation_table_for_encode;
5231   if (NILP (translation_table) && !NILP (Venable_character_translation))
5232     translation_table = Vstandard_translation_table_for_encode;
5233   if (CHAR_TABLE_P (translation_table))
5234     {
5235       int i;
5236       for (i = 0; i < 128; i++)
5237         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5238           break;
5239       if (i < 128)
5240         /* Some ASCII character should be translated.  We give up
5241            shrinking.  */
5242         return;
5243     }
5244
5245   if (str)
5246     {
5247       begp_orig = begp = str + *beg;
5248       endp_orig = endp = str + *end;
5249     }
5250   else
5251     {
5252       begp_orig = begp = BYTE_POS_ADDR (*beg);
5253       endp_orig = endp = begp + *end - *beg;
5254     }
5255
5256   eol_conversion = (coding->eol_type == CODING_EOL_CR
5257                     || coding->eol_type == CODING_EOL_CRLF);
5258
5259   /* Here, we don't have to check coding->pre_write_conversion because
5260      the caller is expected to have handled it already.  */
5261   switch (coding->type)
5262     {
5263     case coding_type_iso2022:
5264       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5265         /* We can't skip any data.  */
5266         break;
5267       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5268         {
5269           unsigned char *bol = begp;
5270           while (begp < endp && *begp < 0x80)
5271             {
5272               begp++;
5273               if (begp[-1] == '\n')
5274                 bol = begp;
5275             }
5276           begp = bol;
5277           goto label_skip_tail;
5278         }
5279       /* fall down ... */
5280
5281     case coding_type_sjis:
5282     case coding_type_big5:
5283       /* We can skip all ASCII characters at the head and tail.  */
5284       if (eol_conversion)
5285         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5286       else
5287         while (begp < endp && *begp < 0x80) begp++;
5288     label_skip_tail:
5289       if (eol_conversion)
5290         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5291       else
5292         while (begp < endp && *(endp - 1) < 0x80) endp--;
5293       break;
5294
5295     default:
5296       abort ();
5297     }
5298
5299   *beg += begp - begp_orig;
5300   *end += endp - endp_orig;
5301   return;
5302 }
5303
5304 /* As shrinking conversion region requires some overhead, we don't try
5305    shrinking if the length of conversion region is less than this
5306    value.  */
5307 static int shrink_conversion_region_threshhold = 1024;
5308
5309 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
5310   do {                                                                  \
5311     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
5312       {                                                                 \
5313         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
5314         else shrink_decoding_region (beg, end, coding, str);            \
5315       }                                                                 \
5316   } while (0)
5317
5318 static Lisp_Object
5319 code_convert_region_unwind (arg)
5320      Lisp_Object arg;
5321 {
5322   inhibit_pre_post_conversion = 0;
5323   Vlast_coding_system_used = arg;
5324   return Qnil;
5325 }
5326
5327 /* Store information about all compositions in the range FROM and TO
5328    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
5329    buffer or a string, defaults to the current buffer.  */
5330
5331 void
5332 coding_save_composition (coding, from, to, obj)
5333      struct coding_system *coding;
5334      int from, to;
5335      Lisp_Object obj;
5336 {
5337   Lisp_Object prop;
5338   int start, end;
5339
5340   if (coding->composing == COMPOSITION_DISABLED)
5341     return;
5342   if (!coding->cmp_data)
5343     coding_allocate_composition_data (coding, from);
5344   if (!find_composition (from, to, &start, &end, &prop, obj)
5345       || end > to)
5346     return;
5347   if (start < from
5348       && (!find_composition (end, to, &start, &end, &prop, obj)
5349           || end > to))
5350     return;
5351   coding->composing = COMPOSITION_NO;
5352   do
5353     {
5354       if (COMPOSITION_VALID_P (start, end, prop))
5355         {
5356           enum composition_method method = COMPOSITION_METHOD (prop);
5357           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5358               >= COMPOSITION_DATA_SIZE)
5359             coding_allocate_composition_data (coding, from);
5360           /* For relative composition, we remember start and end
5361              positions, for the other compositions, we also remember
5362              components.  */
5363           CODING_ADD_COMPOSITION_START (coding, start - from, method);
5364           if (method != COMPOSITION_RELATIVE)
5365             {
5366               /* We must store a*/
5367               Lisp_Object val, ch;
5368
5369               val = COMPOSITION_COMPONENTS (prop);
5370               if (CONSP (val))
5371                 while (CONSP (val))
5372                   {
5373                     ch = XCAR (val), val = XCDR (val);
5374                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5375                   }
5376               else if (VECTORP (val) || STRINGP (val))
5377                 {
5378                   int len = (VECTORP (val)
5379                              ? XVECTOR (val)->size : SCHARS (val));
5380                   int i;
5381                   for (i = 0; i < len; i++)
5382                     {
5383                       ch = (STRINGP (val)
5384                             ? Faref (val, make_number (i))
5385                             : XVECTOR (val)->contents[i]);
5386                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5387                     }
5388                 }
5389               else              /* INTEGERP (val) */
5390                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5391             }
5392           CODING_ADD_COMPOSITION_END (coding, end - from);
5393         }
5394       start = end;
5395     }
5396   while (start < to
5397          && find_composition (start, to, &start, &end, &prop, obj)
5398          && end <= to);
5399
5400   /* Make coding->cmp_data point to the first memory block.  */
5401   while (coding->cmp_data->prev)
5402     coding->cmp_data = coding->cmp_data->prev;
5403   coding->cmp_data_start = 0;
5404 }
5405
5406 /* Reflect the saved information about compositions to OBJ.
5407    CODING->cmp_data points to a memory block for the information.  OBJ
5408    is a buffer or a string, defaults to the current buffer.  */
5409
5410 void
5411 coding_restore_composition (coding, obj)
5412      struct coding_system *coding;
5413      Lisp_Object obj;
5414 {
5415   struct composition_data *cmp_data = coding->cmp_data;
5416
5417   if (!cmp_data)
5418     return;
5419
5420   while (cmp_data->prev)
5421     cmp_data = cmp_data->prev;
5422
5423   while (cmp_data)
5424     {
5425       int i;
5426
5427       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5428            i += cmp_data->data[i])
5429         {
5430           int *data = cmp_data->data + i;
5431           enum composition_method method = (enum composition_method) data[3];
5432           Lisp_Object components;
5433
5434           if (method == COMPOSITION_RELATIVE)
5435             components = Qnil;
5436           else
5437             {
5438               int len = data[0] - 4, j;
5439               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5440
5441               if (method == COMPOSITION_WITH_RULE_ALTCHARS
5442                   && len % 2 == 0)
5443                 len --;
5444               for (j = 0; j < len; j++)
5445                 args[j] = make_number (data[4 + j]);
5446               components = (method == COMPOSITION_WITH_ALTCHARS
5447                             ? Fstring (len, args) : Fvector (len, args));
5448             }
5449           compose_text (data[1], data[2], components, Qnil, obj);
5450         }
5451       cmp_data = cmp_data->next;
5452     }
5453 }
5454
5455 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
5456    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5457    coding system CODING, and return the status code of code conversion
5458    (currently, this value has no meaning).
5459
5460    How many characters (and bytes) are converted to how many
5461    characters (and bytes) are recorded in members of the structure
5462    CODING.
5463
5464    If REPLACE is nonzero, we do various things as if the original text
5465    is deleted and a new text is inserted.  See the comments in
5466    replace_range (insdel.c) to know what we are doing.
5467
5468    If REPLACE is zero, it is assumed that the source text is unibyte.
5469    Otherwise, it is assumed that the source text is multibyte.  */
5470
5471 int
5472 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5473      int from, from_byte, to, to_byte, encodep, replace;
5474      struct coding_system *coding;
5475 {
5476   int len = to - from, len_byte = to_byte - from_byte;
5477   int nchars_del = 0, nbytes_del = 0;
5478   int require, inserted, inserted_byte;
5479   int head_skip, tail_skip, total_skip = 0;
5480   Lisp_Object saved_coding_symbol;
5481   int first = 1;
5482   unsigned char *src, *dst;
5483   Lisp_Object deletion;
5484   int orig_point = PT, orig_len = len;
5485   int prev_Z;
5486   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5487
5488   deletion = Qnil;
5489   saved_coding_symbol = coding->symbol;
5490
5491   if (from < PT && PT < to)
5492     {
5493       TEMP_SET_PT_BOTH (from, from_byte);
5494       orig_point = from;
5495     }
5496
5497   if (replace)
5498     {
5499       int saved_from = from;
5500       int saved_inhibit_modification_hooks;
5501
5502       prepare_to_modify_buffer (from, to, &from);
5503       if (saved_from != from)
5504         {
5505           to = from + len;
5506           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
5507           len_byte = to_byte - from_byte;
5508         }
5509
5510       /* The code conversion routine can not preserve text properties
5511          for now.  So, we must remove all text properties in the
5512          region.  Here, we must suppress all modification hooks.  */
5513       saved_inhibit_modification_hooks = inhibit_modification_hooks;
5514       inhibit_modification_hooks = 1;
5515       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5516       inhibit_modification_hooks = saved_inhibit_modification_hooks;
5517     }
5518
5519   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5520     {
5521       /* We must detect encoding of text and eol format.  */
5522
5523       if (from < GPT && to > GPT)
5524         move_gap_both (from, from_byte);
5525       if (coding->type == coding_type_undecided)
5526         {
5527           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
5528           if (coding->type == coding_type_undecided)
5529             {
5530               /* It seems that the text contains only ASCII, but we
5531                  should not leave it undecided because the deeper
5532                  decoding routine (decode_coding) tries to detect the
5533                  encodings again in vain.  */
5534               coding->type = coding_type_emacs_mule;
5535               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5536               /* As emacs-mule decoder will handle composition, we
5537                  need this setting to allocate coding->cmp_data
5538                  later.  */
5539               coding->composing = COMPOSITION_NO;
5540             }
5541         }
5542       if (coding->eol_type == CODING_EOL_UNDECIDED
5543           && coding->type != coding_type_ccl)
5544         {
5545           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5546           if (coding->eol_type == CODING_EOL_UNDECIDED)
5547             coding->eol_type = CODING_EOL_LF;
5548           /* We had better recover the original eol format if we
5549              encounter an inconsistent eol format while decoding.  */
5550           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5551         }
5552     }
5553
5554   /* Now we convert the text.  */
5555
5556   /* For encoding, we must process pre-write-conversion in advance.  */
5557   if (! inhibit_pre_post_conversion
5558       && encodep
5559       && SYMBOLP (coding->pre_write_conversion)
5560       && ! NILP (Ffboundp (coding->pre_write_conversion)))
5561     {
5562       /* The function in pre-write-conversion may put a new text in a
5563          new buffer.  */
5564       struct buffer *prev = current_buffer;
5565       Lisp_Object new;
5566
5567       record_unwind_protect (code_convert_region_unwind,
5568                              Vlast_coding_system_used);
5569       /* We should not call any more pre-write/post-read-conversion
5570          functions while this pre-write-conversion is running.  */
5571       inhibit_pre_post_conversion = 1;
5572       call2 (coding->pre_write_conversion,
5573              make_number (from), make_number (to));
5574       inhibit_pre_post_conversion = 0;
5575       /* Discard the unwind protect.  */
5576       specpdl_ptr--;
5577
5578       if (current_buffer != prev)
5579         {
5580           len = ZV - BEGV;
5581           new = Fcurrent_buffer ();
5582           set_buffer_internal_1 (prev);
5583           del_range_2 (from, from_byte, to, to_byte, 0);
5584           TEMP_SET_PT_BOTH (from, from_byte);
5585           insert_from_buffer (XBUFFER (new), 1, len, 0);
5586           Fkill_buffer (new);
5587           if (orig_point >= to)
5588             orig_point += len - orig_len;
5589           else if (orig_point > from)
5590             orig_point = from;
5591           orig_len = len;
5592           to = from + len;
5593           from_byte = CHAR_TO_BYTE (from);
5594           to_byte = CHAR_TO_BYTE (to);
5595           len_byte = to_byte - from_byte;
5596           TEMP_SET_PT_BOTH (from, from_byte);
5597         }
5598     }
5599
5600   if (replace)
5601     {
5602       if (! EQ (current_buffer->undo_list, Qt))
5603         deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5604       else
5605         {
5606           nchars_del = to - from;
5607           nbytes_del = to_byte - from_byte;
5608         }
5609     }
5610
5611   if (coding->composing != COMPOSITION_DISABLED)
5612     {
5613       if (encodep)
5614         coding_save_composition (coding, from, to, Fcurrent_buffer ());
5615       else
5616         coding_allocate_composition_data (coding, from);
5617     }
5618
5619   /* Try to skip the heading and tailing ASCIIs.  */
5620   if (coding->type != coding_type_ccl)
5621     {
5622       int from_byte_orig = from_byte, to_byte_orig = to_byte;
5623
5624       if (from < GPT && GPT < to)
5625         move_gap_both (from, from_byte);
5626       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5627       if (from_byte == to_byte
5628           && (encodep || NILP (coding->post_read_conversion))
5629           && ! CODING_REQUIRE_FLUSHING (coding))
5630         {
5631           coding->produced = len_byte;
5632           coding->produced_char = len;
5633           if (!replace)
5634             /* We must record and adjust for this new text now.  */
5635             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5636           return 0;
5637         }
5638
5639       head_skip = from_byte - from_byte_orig;
5640       tail_skip = to_byte_orig - to_byte;
5641       total_skip = head_skip + tail_skip;
5642       from += head_skip;
5643       to -= tail_skip;
5644       len -= total_skip; len_byte -= total_skip;
5645     }
5646
5647   /* For conversion, we must put the gap before the text in addition to
5648      making the gap larger for efficient decoding.  The required gap
5649      size starts from 2000 which is the magic number used in make_gap.
5650      But, after one batch of conversion, it will be incremented if we
5651      find that it is not enough .  */
5652   require = 2000;
5653
5654   if (GAP_SIZE  < require)
5655     make_gap (require - GAP_SIZE);
5656   move_gap_both (from, from_byte);
5657
5658   inserted = inserted_byte = 0;
5659
5660   GAP_SIZE += len_byte;
5661   ZV -= len;
5662   Z -= len;
5663   ZV_BYTE -= len_byte;
5664   Z_BYTE -= len_byte;
5665
5666   if (GPT - BEG < BEG_UNCHANGED)
5667     BEG_UNCHANGED = GPT - BEG;
5668   if (Z - GPT < END_UNCHANGED)
5669     END_UNCHANGED = Z - GPT;
5670
5671   if (!encodep && coding->src_multibyte)
5672     {
5673       /* Decoding routines expects that the source text is unibyte.
5674          We must convert 8-bit characters of multibyte form to
5675          unibyte.  */
5676       int len_byte_orig = len_byte;
5677       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5678       if (len_byte < len_byte_orig)
5679         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5680                     len_byte);
5681       coding->src_multibyte = 0;
5682     }
5683
5684   for (;;)
5685     {
5686       int result;
5687
5688       /* The buffer memory is now:
5689          +--------+converted-text+---------+-------original-text-------+---+
5690          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5691                   |<---------------------- GAP ----------------------->|  */
5692       src = GAP_END_ADDR - len_byte;
5693       dst = GPT_ADDR + inserted_byte;
5694
5695       if (encodep)
5696         result = encode_coding (coding, src, dst, len_byte, 0);
5697       else
5698         {
5699           if (coding->composing != COMPOSITION_DISABLED)
5700             coding->cmp_data->char_offset = from + inserted;
5701           result = decode_coding (coding, src, dst, len_byte, 0);
5702         }
5703
5704       /* The buffer memory is now:
5705          +--------+-------converted-text----+--+------original-text----+---+
5706          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5707                   |<---------------------- GAP ----------------------->|  */
5708
5709       inserted += coding->produced_char;
5710       inserted_byte += coding->produced;
5711       len_byte -= coding->consumed;
5712
5713       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5714         {
5715           coding_allocate_composition_data (coding, from + inserted);
5716           continue;
5717         }
5718
5719       src += coding->consumed;
5720       dst += coding->produced;
5721
5722       if (result == CODING_FINISH_NORMAL)
5723         {
5724           src += len_byte;
5725           break;
5726         }
5727       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5728         {
5729           unsigned char *pend = dst, *p = pend - inserted_byte;
5730           Lisp_Object eol_type;
5731
5732           /* Encode LFs back to the original eol format (CR or CRLF).  */
5733           if (coding->eol_type == CODING_EOL_CR)
5734             {
5735               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5736             }
5737           else
5738             {
5739               int count = 0;
5740
5741               while (p < pend) if (*p++ == '\n') count++;
5742               if (src - dst < count)
5743                 {
5744                   /* We don't have sufficient room for encoding LFs
5745                      back to CRLF.  We must record converted and
5746                      not-yet-converted text back to the buffer
5747                      content, enlarge the gap, then record them out of
5748                      the buffer contents again.  */
5749                   int add = len_byte + inserted_byte;
5750
5751                   GAP_SIZE -= add;
5752                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5753                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5754                   make_gap (count - GAP_SIZE);
5755                   GAP_SIZE += add;
5756                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5757                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5758                   /* Don't forget to update SRC, DST, and PEND.  */
5759                   src = GAP_END_ADDR - len_byte;
5760                   dst = GPT_ADDR + inserted_byte;
5761                   pend = dst;
5762                 }
5763               inserted += count;
5764               inserted_byte += count;
5765               coding->produced += count;
5766               p = dst = pend + count;
5767               while (count)
5768                 {
5769                   *--p = *--pend;
5770                   if (*p == '\n') count--, *--p = '\r';
5771                 }
5772             }
5773
5774           /* Suppress eol-format conversion in the further conversion.  */
5775           coding->eol_type = CODING_EOL_LF;
5776
5777           /* Set the coding system symbol to that for Unix-like EOL.  */
5778           eol_type = Fget (saved_coding_symbol, Qeol_type);
5779           if (VECTORP (eol_type)
5780               && XVECTOR (eol_type)->size == 3
5781               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5782             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5783           else
5784             coding->symbol = saved_coding_symbol;
5785
5786           continue;
5787         }
5788       if (len_byte <= 0)
5789         {
5790           if (coding->type != coding_type_ccl
5791               || coding->mode & CODING_MODE_LAST_BLOCK)
5792             break;
5793           coding->mode |= CODING_MODE_LAST_BLOCK;
5794           continue;
5795         }
5796       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5797         {
5798           /* The source text ends in invalid codes.  Let's just
5799              make them valid buffer contents, and finish conversion.  */
5800           if (multibyte_p)
5801             {
5802               unsigned char *start = dst;
5803
5804               inserted += len_byte;
5805               while (len_byte--)
5806                 {
5807                   int c = *src++;
5808                   dst += CHAR_STRING (c, dst);
5809                 }
5810
5811               inserted_byte += dst - start;
5812             }
5813           else
5814             {
5815               inserted += len_byte;
5816               inserted_byte += len_byte;
5817               while (len_byte--)
5818                 *dst++ = *src++;
5819             }
5820           break;
5821         }
5822       if (result == CODING_FINISH_INTERRUPT)
5823         {
5824           /* The conversion procedure was interrupted by a user.  */
5825           break;
5826         }
5827       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5828       if (coding->consumed < 1)
5829         {
5830           /* It's quite strange to require more memory without
5831              consuming any bytes.  Perhaps CCL program bug.  */
5832           break;
5833         }
5834       if (first)
5835         {
5836           /* We have just done the first batch of conversion which was
5837              stopped because of insufficient gap.  Let's reconsider the
5838              required gap size (i.e. SRT - DST) now.
5839
5840              We have converted ORIG bytes (== coding->consumed) into
5841              NEW bytes (coding->produced).  To convert the remaining
5842              LEN bytes, we may need REQUIRE bytes of gap, where:
5843                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5844                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5845              Here, we are sure that NEW >= ORIG.  */
5846           float ratio;
5847
5848           if (coding->produced <= coding->consumed)
5849             {
5850               /* This happens because of CCL-based coding system with
5851                  eol-type CRLF.  */
5852               require = 0;
5853             }
5854           else
5855             {
5856               ratio = (coding->produced - coding->consumed) / coding->consumed;
5857               require = len_byte * ratio;
5858             }
5859           first = 0;
5860         }
5861       if ((src - dst) < (require + 2000))
5862         {
5863           /* See the comment above the previous call of make_gap.  */
5864           int add = len_byte + inserted_byte;
5865
5866           GAP_SIZE -= add;
5867           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5868           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5869           make_gap (require + 2000);
5870           GAP_SIZE += add;
5871           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5872           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5873         }
5874     }
5875   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5876
5877   if (encodep && coding->dst_multibyte)
5878     {
5879       /* The output is unibyte.  We must convert 8-bit characters to
5880          multibyte form.  */
5881       if (inserted_byte * 2 > GAP_SIZE)
5882         {
5883           GAP_SIZE -= inserted_byte;
5884           ZV += inserted_byte; Z += inserted_byte;
5885           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5886           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5887           make_gap (inserted_byte - GAP_SIZE);
5888           GAP_SIZE += inserted_byte;
5889           ZV -= inserted_byte; Z -= inserted_byte;
5890           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5891           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5892         }
5893       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5894     }
5895
5896   /* If we shrank the conversion area, adjust it now.  */
5897   if (total_skip > 0)
5898     {
5899       if (tail_skip > 0)
5900         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5901       inserted += total_skip; inserted_byte += total_skip;
5902       GAP_SIZE += total_skip;
5903       GPT -= head_skip; GPT_BYTE -= head_skip;
5904       ZV -= total_skip; ZV_BYTE -= total_skip;
5905       Z -= total_skip; Z_BYTE -= total_skip;
5906       from -= head_skip; from_byte -= head_skip;
5907       to += tail_skip; to_byte += tail_skip;
5908     }
5909
5910   prev_Z = Z;
5911   if (! EQ (current_buffer->undo_list, Qt))
5912     adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5913   else
5914     adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del,
5915                                  inserted, inserted_byte);
5916   inserted = Z - prev_Z;
5917
5918   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5919     coding_restore_composition (coding, Fcurrent_buffer ());
5920   coding_free_composition_data (coding);
5921
5922   if (! inhibit_pre_post_conversion
5923       && ! encodep && ! NILP (coding->post_read_conversion))
5924     {
5925       Lisp_Object val;
5926       Lisp_Object saved_coding_system;
5927
5928       if (from != PT)
5929         TEMP_SET_PT_BOTH (from, from_byte);
5930       prev_Z = Z;
5931       record_unwind_protect (code_convert_region_unwind,
5932                              Vlast_coding_system_used);
5933       saved_coding_system = Vlast_coding_system_used;
5934       Vlast_coding_system_used = coding->symbol;
5935       /* We should not call any more pre-write/post-read-conversion
5936          functions while this post-read-conversion is running.  */
5937       inhibit_pre_post_conversion = 1;
5938       val = call1 (coding->post_read_conversion, make_number (inserted));
5939       inhibit_pre_post_conversion = 0;
5940       coding->symbol = Vlast_coding_system_used;
5941       Vlast_coding_system_used = saved_coding_system;
5942       /* Discard the unwind protect.  */
5943       specpdl_ptr--;
5944       CHECK_NUMBER (val);
5945       inserted += Z - prev_Z;
5946     }
5947
5948   if (orig_point >= from)
5949     {
5950       if (orig_point >= from + orig_len)
5951         orig_point += inserted - orig_len;
5952       else
5953         orig_point = from;
5954       TEMP_SET_PT (orig_point);
5955     }
5956
5957   if (replace)
5958     {
5959       signal_after_change (from, to - from, inserted);
5960       update_compositions (from, from + inserted, CHECK_BORDER);
5961     }
5962
5963   {
5964     coding->consumed = to_byte - from_byte;
5965     coding->consumed_char = to - from;
5966     coding->produced = inserted_byte;
5967     coding->produced_char = inserted;
5968   }
5969
5970   return 0;
5971 }
5972
5973 Lisp_Object
5974 run_pre_post_conversion_on_str (str, coding, encodep)
5975      Lisp_Object str;
5976      struct coding_system *coding;
5977      int encodep;
5978 {
5979   int count = SPECPDL_INDEX ();
5980   struct gcpro gcpro1, gcpro2;
5981   int multibyte = STRING_MULTIBYTE (str);
5982   Lisp_Object buffer;
5983   struct buffer *buf;
5984   Lisp_Object old_deactivate_mark;
5985
5986   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5987   record_unwind_protect (code_convert_region_unwind,
5988                          Vlast_coding_system_used);
5989   /* It is not crucial to specbind this.  */
5990   old_deactivate_mark = Vdeactivate_mark;
5991   GCPRO2 (str, old_deactivate_mark);
5992
5993   buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
5994   buf = XBUFFER (buffer);
5995
5996   buf->directory = current_buffer->directory;
5997   buf->read_only = Qnil;
5998   buf->filename = Qnil;
5999   buf->undo_list = Qt;
6000   buf->overlays_before = Qnil;
6001   buf->overlays_after = Qnil;
6002
6003   set_buffer_internal (buf);
6004   /* We must insert the contents of STR as is without
6005      unibyte<->multibyte conversion.  For that, we adjust the
6006      multibyteness of the working buffer to that of STR.  */
6007   Ferase_buffer ();
6008   buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
6009
6010   insert_from_string (str, 0, 0,
6011                       SCHARS (str), SBYTES (str), 0);
6012   UNGCPRO;
6013   inhibit_pre_post_conversion = 1;
6014   if (encodep)
6015     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
6016   else
6017     {
6018       Vlast_coding_system_used = coding->symbol;
6019       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
6020       call1 (coding->post_read_conversion, make_number (Z - BEG));
6021       coding->symbol = Vlast_coding_system_used;
6022     }
6023   inhibit_pre_post_conversion = 0;
6024   Vdeactivate_mark = old_deactivate_mark;
6025   str = make_buffer_string (BEG, Z, 1);
6026   return unbind_to (count, str);
6027 }
6028
6029 Lisp_Object
6030 decode_coding_string (str, coding, nocopy)
6031      Lisp_Object str;
6032      struct coding_system *coding;
6033      int nocopy;
6034 {
6035   int len;
6036   struct conversion_buffer buf;
6037   int from, to_byte;
6038   Lisp_Object saved_coding_symbol;
6039   int result;
6040   int require_decoding;
6041   int shrinked_bytes = 0;
6042   Lisp_Object newstr;
6043   int consumed, consumed_char, produced, produced_char;
6044
6045   from = 0;
6046   to_byte = SBYTES (str);
6047
6048   saved_coding_symbol = coding->symbol;
6049   coding->src_multibyte = STRING_MULTIBYTE (str);
6050   coding->dst_multibyte = 1;
6051   if (CODING_REQUIRE_DETECTION (coding))
6052     {
6053       /* See the comments in code_convert_region.  */
6054       if (coding->type == coding_type_undecided)
6055         {
6056           detect_coding (coding, SDATA (str), to_byte);
6057           if (coding->type == coding_type_undecided)
6058             {
6059               coding->type = coding_type_emacs_mule;
6060               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
6061               /* As emacs-mule decoder will handle composition, we
6062                  need this setting to allocate coding->cmp_data
6063                  later.  */
6064               coding->composing = COMPOSITION_NO;
6065             }
6066         }
6067       if (coding->eol_type == CODING_EOL_UNDECIDED
6068           && coding->type != coding_type_ccl)
6069         {
6070           saved_coding_symbol = coding->symbol;
6071           detect_eol (coding, SDATA (str), to_byte);
6072           if (coding->eol_type == CODING_EOL_UNDECIDED)
6073             coding->eol_type = CODING_EOL_LF;
6074           /* We had better recover the original eol format if we
6075              encounter an inconsistent eol format while decoding.  */
6076           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
6077         }
6078     }
6079
6080   if (coding->type == coding_type_no_conversion
6081       || coding->type == coding_type_raw_text)
6082     coding->dst_multibyte = 0;
6083
6084   require_decoding = CODING_REQUIRE_DECODING (coding);
6085
6086   if (STRING_MULTIBYTE (str))
6087     {
6088       /* Decoding routines expect the source text to be unibyte.  */
6089       str = Fstring_as_unibyte (str);
6090       to_byte = SBYTES (str);
6091       nocopy = 1;
6092       coding->src_multibyte = 0;
6093     }
6094
6095   /* Try to skip the heading and tailing ASCIIs.  */
6096   if (require_decoding && coding->type != coding_type_ccl)
6097     {
6098       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6099                                 0);
6100       if (from == to_byte)
6101         require_decoding = 0;
6102       shrinked_bytes = from + (SBYTES (str) - to_byte);
6103     }
6104
6105   if (!require_decoding
6106       && !(SYMBOLP (coding->post_read_conversion)
6107            && !NILP (Ffboundp (coding->post_read_conversion))))
6108     {
6109       coding->consumed = SBYTES (str);
6110       coding->consumed_char = SCHARS (str);
6111       if (coding->dst_multibyte)
6112         {
6113           str = Fstring_as_multibyte (str);
6114           nocopy = 1;
6115         }
6116       coding->produced = SBYTES (str);
6117       coding->produced_char = SCHARS (str);
6118       return (nocopy ? str : Fcopy_sequence (str));
6119     }
6120
6121   if (coding->composing != COMPOSITION_DISABLED)
6122     coding_allocate_composition_data (coding, from);
6123   len = decoding_buffer_size (coding, to_byte - from);
6124   allocate_conversion_buffer (buf, len);
6125
6126   consumed = consumed_char = produced = produced_char = 0;
6127   while (1)
6128     {
6129       result = decode_coding (coding, SDATA (str) + from + consumed,
6130                               buf.data + produced, to_byte - from - consumed,
6131                               buf.size - produced);
6132       consumed += coding->consumed;
6133       consumed_char += coding->consumed_char;
6134       produced += coding->produced;
6135       produced_char += coding->produced_char;
6136       if (result == CODING_FINISH_NORMAL
6137           || (result == CODING_FINISH_INSUFFICIENT_SRC
6138               && coding->consumed == 0))
6139         break;
6140       if (result == CODING_FINISH_INSUFFICIENT_CMP)
6141         coding_allocate_composition_data (coding, from + produced_char);
6142       else if (result == CODING_FINISH_INSUFFICIENT_DST)
6143         extend_conversion_buffer (&buf);
6144       else if (result == CODING_FINISH_INCONSISTENT_EOL)
6145         {
6146           Lisp_Object eol_type;
6147
6148           /* Recover the original EOL format.  */
6149           if (coding->eol_type == CODING_EOL_CR)
6150             {
6151               unsigned char *p;
6152               for (p = buf.data; p < buf.data + produced; p++)
6153                 if (*p == '\n') *p = '\r';
6154             }
6155           else if (coding->eol_type == CODING_EOL_CRLF)
6156             {
6157               int num_eol = 0;
6158               unsigned char *p0, *p1;
6159               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
6160                 if (*p0 == '\n') num_eol++;
6161               if (produced + num_eol >= buf.size)
6162                 extend_conversion_buffer (&buf);
6163               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
6164                 {
6165                   *--p1 = *--p0;
6166                   if (*p0 == '\n') *--p1 = '\r';
6167                 }
6168               produced += num_eol;
6169               produced_char += num_eol;
6170             }
6171           /* Suppress eol-format conversion in the further conversion.  */
6172           coding->eol_type = CODING_EOL_LF;
6173
6174           /* Set the coding system symbol to that for Unix-like EOL.  */
6175           eol_type = Fget (saved_coding_symbol, Qeol_type);
6176           if (VECTORP (eol_type)
6177               && XVECTOR (eol_type)->size == 3
6178               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
6179             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
6180           else
6181             coding->symbol = saved_coding_symbol;
6182
6183
6184         }
6185     }
6186
6187   coding->consumed = consumed;
6188   coding->consumed_char = consumed_char;
6189   coding->produced = produced;
6190   coding->produced_char = produced_char;
6191
6192   if (coding->dst_multibyte)
6193     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
6194                                            produced + shrinked_bytes);
6195   else
6196     newstr = make_uninit_string (produced + shrinked_bytes);
6197   if (from > 0)
6198     STRING_COPYIN (newstr, 0, SDATA (str), from);
6199   STRING_COPYIN (newstr, from, buf.data, produced);
6200   if (shrinked_bytes > from)
6201     STRING_COPYIN (newstr, from + produced,
6202                    SDATA (str) + to_byte,
6203                    shrinked_bytes - from);
6204   free_conversion_buffer (&buf);
6205
6206   if (coding->cmp_data && coding->cmp_data->used)
6207     coding_restore_composition (coding, newstr);
6208   coding_free_composition_data (coding);
6209
6210   if (SYMBOLP (coding->post_read_conversion)
6211       && !NILP (Ffboundp (coding->post_read_conversion)))
6212     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
6213
6214   return newstr;
6215 }
6216
6217 Lisp_Object
6218 encode_coding_string (str, coding, nocopy)
6219      Lisp_Object str;
6220      struct coding_system *coding;
6221      int nocopy;
6222 {
6223   int len;
6224   struct conversion_buffer buf;
6225   int from, to, to_byte;
6226   int result;
6227   int shrinked_bytes = 0;
6228   Lisp_Object newstr;
6229   int consumed, consumed_char, produced, produced_char;
6230
6231   if (SYMBOLP (coding->pre_write_conversion)
6232       && !NILP (Ffboundp (coding->pre_write_conversion)))
6233     str = run_pre_post_conversion_on_str (str, coding, 1);
6234
6235   from = 0;
6236   to = SCHARS (str);
6237   to_byte = SBYTES (str);
6238
6239   /* Encoding routines determine the multibyteness of the source text
6240      by coding->src_multibyte.  */
6241   coding->src_multibyte = STRING_MULTIBYTE (str);
6242   coding->dst_multibyte = 0;
6243   if (! CODING_REQUIRE_ENCODING (coding))
6244     {
6245       coding->consumed = SBYTES (str);
6246       coding->consumed_char = SCHARS (str);
6247       if (STRING_MULTIBYTE (str))
6248         {
6249           str = Fstring_as_unibyte (str);
6250           nocopy = 1;
6251         }
6252       coding->produced = SBYTES (str);
6253       coding->produced_char = SCHARS (str);
6254       return (nocopy ? str : Fcopy_sequence (str));
6255     }
6256
6257   if (coding->composing != COMPOSITION_DISABLED)
6258     coding_save_composition (coding, from, to, str);
6259
6260   /* Try to skip the heading and tailing ASCIIs.  */
6261   if (coding->type != coding_type_ccl)
6262     {
6263       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6264                                 1);
6265       if (from == to_byte)
6266         return (nocopy ? str : Fcopy_sequence (str));
6267       shrinked_bytes = from + (SBYTES (str) - to_byte);
6268     }
6269
6270   len = encoding_buffer_size (coding, to_byte - from);
6271   allocate_conversion_buffer (buf, len);
6272
6273   consumed = consumed_char = produced = produced_char = 0;
6274   while (1)
6275     {
6276       result = encode_coding (coding, SDATA (str) + from + consumed,
6277                               buf.data + produced, to_byte - from - consumed,
6278                               buf.size - produced);
6279       consumed += coding->consumed;
6280       consumed_char += coding->consumed_char;
6281       produced += coding->produced;
6282       produced_char += coding->produced_char;
6283       if (result == CODING_FINISH_NORMAL
6284           || (result == CODING_FINISH_INSUFFICIENT_SRC
6285               && coding->consumed == 0))
6286         break;
6287       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
6288       extend_conversion_buffer (&buf);
6289     }
6290
6291   coding->consumed = consumed;
6292   coding->consumed_char = consumed_char;
6293   coding->produced = produced;
6294   coding->produced_char = produced_char;
6295
6296   newstr = make_uninit_string (produced + shrinked_bytes);
6297   if (from > 0)
6298     STRING_COPYIN (newstr, 0, SDATA (str), from);
6299   STRING_COPYIN (newstr, from, buf.data, produced);
6300   if (shrinked_bytes > from)
6301     STRING_COPYIN (newstr, from + produced,
6302                    SDATA (str) + to_byte,
6303                    shrinked_bytes - from);
6304
6305   free_conversion_buffer (&buf);
6306   coding_free_composition_data (coding);
6307
6308   return newstr;
6309 }
6310
6311 \f
6312 #ifdef emacs
6313 /*** 8. Emacs Lisp library functions ***/
6314
6315 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6316        doc: /* Return t if OBJECT is nil or a coding-system.
6317 See the documentation of `make-coding-system' for information
6318 about coding-system objects.  */)
6319      (obj)
6320      Lisp_Object obj;
6321 {
6322   if (NILP (obj))
6323     return Qt;
6324   if (!SYMBOLP (obj))
6325     return Qnil;
6326   /* Get coding-spec vector for OBJ.  */
6327   obj = Fget (obj, Qcoding_system);
6328   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6329           ? Qt : Qnil);
6330 }
6331
6332 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6333        Sread_non_nil_coding_system, 1, 1, 0,
6334        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6335      (prompt)
6336      Lisp_Object prompt;
6337 {
6338   Lisp_Object val;
6339   do
6340     {
6341       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6342                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6343     }
6344   while (SCHARS (val) == 0);
6345   return (Fintern (val, Qnil));
6346 }
6347
6348 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6349        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6350 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6351      (prompt, default_coding_system)
6352      Lisp_Object prompt, default_coding_system;
6353 {
6354   Lisp_Object val;
6355   if (SYMBOLP (default_coding_system))
6356     default_coding_system = SYMBOL_NAME (default_coding_system);
6357   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6358                           Qt, Qnil, Qcoding_system_history,
6359                           default_coding_system, Qnil);
6360   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6361 }
6362
6363 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6364        1, 1, 0,
6365        doc: /* Check validity of CODING-SYSTEM.
6366 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6367 It is valid if it is a symbol with a non-nil `coding-system' property.
6368 The value of property should be a vector of length 5.  */)
6369      (coding_system)
6370      Lisp_Object coding_system;
6371 {
6372   CHECK_SYMBOL (coding_system);
6373   if (!NILP (Fcoding_system_p (coding_system)))
6374     return coding_system;
6375   while (1)
6376     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6377 }
6378 \f
6379 Lisp_Object
6380 detect_coding_system (src, src_bytes, highest, multibytep)
6381      const unsigned char *src;
6382      int src_bytes, highest;
6383      int multibytep;
6384 {
6385   int coding_mask, eol_type;
6386   Lisp_Object val, tmp;
6387   int dummy;
6388
6389   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
6390   eol_type  = detect_eol_type (src, src_bytes, &dummy);
6391   if (eol_type == CODING_EOL_INCONSISTENT)
6392     eol_type = CODING_EOL_UNDECIDED;
6393
6394   if (!coding_mask)
6395     {
6396       val = Qundecided;
6397       if (eol_type != CODING_EOL_UNDECIDED)
6398         {
6399           Lisp_Object val2;
6400           val2 = Fget (Qundecided, Qeol_type);
6401           if (VECTORP (val2))
6402             val = XVECTOR (val2)->contents[eol_type];
6403         }
6404       return (highest ? val : Fcons (val, Qnil));
6405     }
6406
6407   /* At first, gather possible coding systems in VAL.  */
6408   val = Qnil;
6409   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
6410     {
6411       Lisp_Object category_val, category_index;
6412
6413       category_index = Fget (XCAR (tmp), Qcoding_category_index);
6414       category_val = Fsymbol_value (XCAR (tmp));
6415       if (!NILP (category_val)
6416           && NATNUMP (category_index)
6417           && (coding_mask & (1 << XFASTINT (category_index))))
6418         {
6419           val = Fcons (category_val, val);
6420           if (highest)
6421             break;
6422         }
6423     }
6424   if (!highest)
6425     val = Fnreverse (val);
6426
6427   /* Then, replace the elements with subsidiary coding systems.  */
6428   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
6429     {
6430       if (eol_type != CODING_EOL_UNDECIDED
6431           && eol_type != CODING_EOL_INCONSISTENT)
6432         {
6433           Lisp_Object eol;
6434           eol = Fget (XCAR (tmp), Qeol_type);
6435           if (VECTORP (eol))
6436             XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]);
6437         }
6438     }
6439   return (highest ? XCAR (val) : val);
6440 }
6441
6442 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6443        2, 3, 0,
6444        doc: /* Detect how the byte sequence in the region is encoded.
6445 Return a list of possible coding systems used on decoding a byte
6446 sequence containing the bytes in the region between START and END when
6447 the coding system `undecided' is specified.  The list is ordered by
6448 priority decided in the current language environment.
6449
6450 If only ASCII characters are found, it returns a list of single element
6451 `undecided' or its subsidiary coding system according to a detected
6452 end-of-line format.
6453
6454 If optional argument HIGHEST is non-nil, return the coding system of
6455 highest priority.  */)
6456      (start, end, highest)
6457      Lisp_Object start, end, highest;
6458 {
6459   int from, to;
6460   int from_byte, to_byte;
6461   int include_anchor_byte = 0;
6462
6463   CHECK_NUMBER_COERCE_MARKER (start);
6464   CHECK_NUMBER_COERCE_MARKER (end);
6465
6466   validate_region (&start, &end);
6467   from = XINT (start), to = XINT (end);
6468   from_byte = CHAR_TO_BYTE (from);
6469   to_byte = CHAR_TO_BYTE (to);
6470
6471   if (from < GPT && to >= GPT)
6472     move_gap_both (to, to_byte);
6473   /* If we an anchor byte `\0' follows the region, we include it in
6474      the detecting source.  Then code detectors can handle the tailing
6475      byte sequence more accurately.
6476
6477      Fix me: This is not a perfect solution.  It is better that we
6478      add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
6479   */
6480   if (to == Z || (to == GPT && GAP_SIZE > 0))
6481     include_anchor_byte = 1;
6482   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6483                                to_byte - from_byte + include_anchor_byte,
6484                                !NILP (highest),
6485                                !NILP (current_buffer
6486                                       ->enable_multibyte_characters));
6487 }
6488
6489 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6490        1, 2, 0,
6491        doc: /* Detect how the byte sequence in STRING is encoded.
6492 Return a list of possible coding systems used on decoding a byte
6493 sequence containing the bytes in STRING when the coding system
6494 `undecided' is specified.  The list is ordered by priority decided in
6495 the current language environment.
6496
6497 If only ASCII characters are found, it returns a list of single element
6498 `undecided' or its subsidiary coding system according to a detected
6499 end-of-line format.
6500
6501 If optional argument HIGHEST is non-nil, return the coding system of
6502 highest priority.  */)
6503      (string, highest)
6504      Lisp_Object string, highest;
6505 {
6506   CHECK_STRING (string);
6507
6508   return detect_coding_system (SDATA (string),
6509                                /* "+ 1" is to include the anchor byte
6510                                   `\0'.  With this, code detectors can
6511                                   handle the tailing bytes more
6512                                   accurately.  */
6513                                SBYTES (string) + 1,
6514                                !NILP (highest),
6515                                STRING_MULTIBYTE (string));
6516 }
6517
6518 /*  Subroutine for Fsafe_coding_systems_region_internal.
6519
6520     Return a list of coding systems that safely encode the multibyte
6521     text between P and PEND.  SAFE_CODINGS, if non-nil, is an alist of
6522     possible coding systems.  If it is nil, it means that we have not
6523     yet found any coding systems.
6524
6525     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
6526     element of WORK_TABLE is set to t once the element is looked up.
6527
6528     If a non-ASCII single byte char is found, set
6529     *single_byte_char_found to 1.  */
6530
6531 static Lisp_Object
6532 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6533      unsigned char *p, *pend;
6534      Lisp_Object safe_codings, work_table;
6535      int *single_byte_char_found;
6536 {
6537   int c, len;
6538   Lisp_Object val, ch;
6539   Lisp_Object prev, tail;
6540
6541   while (p < pend)
6542     {
6543       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6544       p += len;
6545       if (ASCII_BYTE_P (c))
6546         /* We can ignore ASCII characters here.  */
6547         continue;
6548       if (SINGLE_BYTE_CHAR_P (c))
6549         *single_byte_char_found = 1;
6550       if (NILP (safe_codings))
6551         /* Already all coding systems are excluded.  But, we can't
6552            terminate the loop here because non-ASCII single-byte char
6553            must be found.  */
6554         continue;
6555       /* Check the safe coding systems for C.  */
6556       ch = make_number (c);
6557       val = Faref (work_table, ch);
6558       if (EQ (val, Qt))
6559         /* This element was already checked.  Ignore it.  */
6560         continue;
6561       /* Remember that we checked this element.  */
6562       Faset (work_table, ch, Qt);
6563
6564       for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
6565         {
6566           Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
6567           int encodable;
6568
6569           elt = XCAR (tail);
6570           if (CONSP (XCDR (elt)))
6571             {
6572               /* This entry has this format now:
6573                  ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6574                           ACCEPT-LATIN-EXTRA ) */
6575               val = XCDR (elt);
6576               encodable = ! NILP (Faref (XCAR (val), ch));
6577               if (! encodable)
6578                 {
6579                   val = XCDR (val);
6580                   translation_table = XCAR (val);
6581                   hash_table = XCAR (XCDR (val));
6582                   accept_latin_extra = XCAR (XCDR (XCDR (val)));
6583                 }
6584             }
6585           else
6586             {
6587               /* This entry has this format now: ( CODING . SAFE-CHARS) */
6588               encodable = ! NILP (Faref (XCDR (elt), ch));
6589               if (! encodable)
6590                 {
6591                   /* Transform the format to:
6592                      ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6593                        ACCEPT-LATIN-EXTRA )  */
6594                   val = Fget (XCAR (elt), Qcoding_system);
6595                   translation_table
6596                     = Fplist_get (AREF (val, 3),
6597                                   Qtranslation_table_for_encode);
6598                   if (SYMBOLP (translation_table))
6599                     translation_table = Fget (translation_table,
6600                                               Qtranslation_table);
6601                   hash_table
6602                     = (CHAR_TABLE_P (translation_table)
6603                        ? XCHAR_TABLE (translation_table)->extras[1]
6604                        : Qnil);
6605                   accept_latin_extra
6606                     = ((EQ (AREF (val, 0), make_number (2))
6607                         && VECTORP (AREF (val, 4)))
6608                        ? AREF (AREF (val, 4), 16)
6609                        : Qnil);
6610                   XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
6611                                         translation_table, hash_table,
6612                                         accept_latin_extra));
6613                 }
6614             }
6615
6616           if (! encodable
6617               && ((CHAR_TABLE_P (translation_table)
6618                    && ! NILP (Faref (translation_table, ch)))
6619                   || (HASH_TABLE_P (hash_table)
6620                       && ! NILP (Fgethash (ch, hash_table, Qnil)))
6621                   || (SINGLE_BYTE_CHAR_P (c)
6622                       && ! NILP (accept_latin_extra)
6623                       && VECTORP (Vlatin_extra_code_table)
6624                       && ! NILP (AREF (Vlatin_extra_code_table, c)))))
6625             encodable = 1;
6626           if (encodable)
6627             prev = tail;
6628           else
6629             {
6630               /* Exclude this coding system from SAFE_CODINGS.  */
6631               if (EQ (tail, safe_codings))
6632                 safe_codings = XCDR (safe_codings);
6633               else
6634                 XSETCDR (prev, XCDR (tail));
6635             }
6636         }
6637     }
6638   return safe_codings;
6639 }
6640
6641 DEFUN ("find-coding-systems-region-internal",
6642        Ffind_coding_systems_region_internal,
6643        Sfind_coding_systems_region_internal, 2, 2, 0,
6644        doc: /* Internal use only.  */)
6645      (start, end)
6646      Lisp_Object start, end;
6647 {
6648   Lisp_Object work_table, safe_codings;
6649   int non_ascii_p = 0;
6650   int single_byte_char_found = 0;
6651   const unsigned char *p1, *p1end, *p2, *p2end, *p;
6652
6653   if (STRINGP (start))
6654     {
6655       if (!STRING_MULTIBYTE (start))
6656         return Qt;
6657       p1 = SDATA (start), p1end = p1 + SBYTES (start);
6658       p2 = p2end = p1end;
6659       if (SCHARS (start) != SBYTES (start))
6660         non_ascii_p = 1;
6661     }
6662   else
6663     {
6664       int from, to, stop;
6665
6666       CHECK_NUMBER_COERCE_MARKER (start);
6667       CHECK_NUMBER_COERCE_MARKER (end);
6668       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6669         args_out_of_range (start, end);
6670       if (NILP (current_buffer->enable_multibyte_characters))
6671         return Qt;
6672       from = CHAR_TO_BYTE (XINT (start));
6673       to = CHAR_TO_BYTE (XINT (end));
6674       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6675       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6676       if (stop == to)
6677         p2 = p2end = p1end;
6678       else
6679         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6680       if (XINT (end) - XINT (start) != to - from)
6681         non_ascii_p = 1;
6682     }
6683
6684   if (!non_ascii_p)
6685     {
6686       /* We are sure that the text contains no multibyte character.
6687          Check if it contains eight-bit-graphic.  */
6688       p = p1;
6689       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6690       if (p == p1end)
6691         {
6692           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6693           if (p == p2end)
6694             return Qt;
6695         }
6696     }
6697
6698   /* The text contains non-ASCII characters.  */
6699
6700   work_table = Fmake_char_table (Qchar_coding_system, Qnil);
6701   safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
6702
6703   safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
6704                                     &single_byte_char_found);
6705   if (p2 < p2end)
6706     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6707                                       &single_byte_char_found);
6708   if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
6709     safe_codings = Qt;
6710   else
6711     {
6712       /* Turn safe_codings to a list of coding systems... */
6713       Lisp_Object val;
6714
6715       if (single_byte_char_found)
6716         /* ... and append these for eight-bit chars.  */
6717         val = Fcons (Qraw_text,
6718                      Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
6719       else
6720         /* ... and append generic coding systems.  */
6721         val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
6722
6723       for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
6724         val = Fcons (XCAR (XCAR (safe_codings)), val);
6725       safe_codings = val;
6726     }
6727
6728   return safe_codings;
6729 }
6730
6731
6732 /* Search from position POS for such characters that are unencodable
6733    accoding to SAFE_CHARS, and return a list of their positions.  P
6734    points where in the memory the character at POS exists.  Limit the
6735    search at PEND or when Nth unencodable characters are found.
6736
6737    If SAFE_CHARS is a char table, an element for an unencodable
6738    character is nil.
6739
6740    If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
6741
6742    Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
6743    eight-bit-graphic characters are unencodable.  */
6744
6745 static Lisp_Object
6746 unencodable_char_position (safe_chars, pos, p, pend, n)
6747      Lisp_Object safe_chars;
6748      int pos;
6749      unsigned char *p, *pend;
6750      int n;
6751 {
6752   Lisp_Object pos_list;
6753
6754   pos_list = Qnil;
6755   while (p < pend)
6756     {
6757       int len;
6758       int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
6759
6760       if (c >= 128
6761           && (CHAR_TABLE_P (safe_chars)
6762               ? NILP (CHAR_TABLE_REF (safe_chars, c))
6763               : (NILP (safe_chars) || c < 256)))
6764         {
6765           pos_list = Fcons (make_number (pos), pos_list);
6766           if (--n <= 0)
6767             break;
6768         }
6769       pos++;
6770       p += len;
6771     }
6772   return Fnreverse (pos_list);
6773 }
6774
6775
6776 DEFUN ("unencodable-char-position", Funencodable_char_position,
6777        Sunencodable_char_position, 3, 5, 0,
6778        doc: /*
6779 Return position of first un-encodable character in a region.
6780 START and END specfiy the region and CODING-SYSTEM specifies the
6781 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
6782
6783 If optional 4th argument COUNT is non-nil, it specifies at most how
6784 many un-encodable characters to search.  In this case, the value is a
6785 list of positions.
6786
6787 If optional 5th argument STRING is non-nil, it is a string to search
6788 for un-encodable characters.  In that case, START and END are indexes
6789 to the string.  */)
6790      (start, end, coding_system, count, string)
6791      Lisp_Object start, end, coding_system, count, string;
6792 {
6793   int n;
6794   Lisp_Object safe_chars;
6795   struct coding_system coding;
6796   Lisp_Object positions;
6797   int from, to;
6798   unsigned char *p, *pend;
6799
6800   if (NILP (string))
6801     {
6802       validate_region (&start, &end);
6803       from = XINT (start);
6804       to = XINT (end);
6805       if (NILP (current_buffer->enable_multibyte_characters))
6806         return Qnil;
6807       p = CHAR_POS_ADDR (from);
6808       if (to == GPT)
6809         pend = GPT_ADDR;
6810       else
6811         pend = CHAR_POS_ADDR (to);
6812     }
6813   else
6814     {
6815       CHECK_STRING (string);
6816       CHECK_NATNUM (start);
6817       CHECK_NATNUM (end);
6818       from = XINT (start);
6819       to = XINT (end);
6820       if (from > to
6821           || to > SCHARS (string))
6822         args_out_of_range_3 (string, start, end);
6823       if (! STRING_MULTIBYTE (string))
6824         return Qnil;
6825       p = SDATA (string) + string_char_to_byte (string, from);
6826       pend = SDATA (string) + string_char_to_byte (string, to);
6827     }
6828
6829   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
6830
6831   if (NILP (count))
6832     n = 1;
6833   else
6834     {
6835       CHECK_NATNUM (count);
6836       n = XINT (count);
6837     }
6838
6839   if (coding.type == coding_type_no_conversion
6840       || coding.type == coding_type_raw_text)
6841     return Qnil;
6842
6843   if (coding.type == coding_type_undecided)
6844     safe_chars = Qnil;
6845   else
6846     safe_chars = coding_safe_chars (coding_system);
6847
6848   if (STRINGP (string)
6849       || from >= GPT || to <= GPT)
6850     positions = unencodable_char_position (safe_chars, from, p, pend, n);
6851   else
6852     {
6853       Lisp_Object args[2];
6854
6855       args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
6856       n -= XINT (Flength (args[0]));
6857       if (n <= 0)
6858         positions = args[0];
6859       else
6860         {
6861           args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
6862                                                pend, n);
6863           positions = Fappend (2, args);
6864         }
6865     }
6866
6867   return  (NILP (count) ? Fcar (positions) : positions);
6868 }
6869
6870
6871 Lisp_Object
6872 code_convert_region1 (start, end, coding_system, encodep)
6873      Lisp_Object start, end, coding_system;
6874      int encodep;
6875 {
6876   struct coding_system coding;
6877   int from, to;
6878
6879   CHECK_NUMBER_COERCE_MARKER (start);
6880   CHECK_NUMBER_COERCE_MARKER (end);
6881   CHECK_SYMBOL (coding_system);
6882
6883   validate_region (&start, &end);
6884   from = XFASTINT (start);
6885   to = XFASTINT (end);
6886
6887   if (NILP (coding_system))
6888     return make_number (to - from);
6889
6890   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6891     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6892
6893   coding.mode |= CODING_MODE_LAST_BLOCK;
6894   coding.src_multibyte = coding.dst_multibyte
6895     = !NILP (current_buffer->enable_multibyte_characters);
6896   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
6897                        &coding, encodep, 1);
6898   Vlast_coding_system_used = coding.symbol;
6899   return make_number (coding.produced_char);
6900 }
6901
6902 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6903        3, 3, "r\nzCoding system: ",
6904        doc: /* Decode the current region from the specified coding system.
6905 When called from a program, takes three arguments:
6906 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6907 This function sets `last-coding-system-used' to the precise coding system
6908 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6909 not fully specified.)
6910 It returns the length of the decoded text.  */)
6911      (start, end, coding_system)
6912      Lisp_Object start, end, coding_system;
6913 {
6914   return code_convert_region1 (start, end, coding_system, 0);
6915 }
6916
6917 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6918        3, 3, "r\nzCoding system: ",
6919        doc: /* Encode the current region into the specified coding system.
6920 When called from a program, takes three arguments:
6921 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6922 This function sets `last-coding-system-used' to the precise coding system
6923 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6924 not fully specified.)
6925 It returns the length of the encoded text.  */)
6926      (start, end, coding_system)
6927      Lisp_Object start, end, coding_system;
6928 {
6929   return code_convert_region1 (start, end, coding_system, 1);
6930 }
6931
6932 Lisp_Object
6933 code_convert_string1 (string, coding_system, nocopy, encodep)
6934      Lisp_Object string, coding_system, nocopy;
6935      int encodep;
6936 {
6937   struct coding_system coding;
6938
6939   CHECK_STRING (string);
6940   CHECK_SYMBOL (coding_system);
6941
6942   if (NILP (coding_system))
6943     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
6944
6945   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6946     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6947
6948   coding.mode |= CODING_MODE_LAST_BLOCK;
6949   string = (encodep
6950             ? encode_coding_string (string, &coding, !NILP (nocopy))
6951             : decode_coding_string (string, &coding, !NILP (nocopy)));
6952   Vlast_coding_system_used = coding.symbol;
6953
6954   return string;
6955 }
6956
6957 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6958        2, 3, 0,
6959        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6960 Optional arg NOCOPY non-nil means it is OK to return STRING itself
6961 if the decoding operation is trivial.
6962 This function sets `last-coding-system-used' to the precise coding system
6963 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6964 not fully specified.)  */)
6965      (string, coding_system, nocopy)
6966      Lisp_Object string, coding_system, nocopy;
6967 {
6968   return code_convert_string1 (string, coding_system, nocopy, 0);
6969 }
6970
6971 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6972        2, 3, 0,
6973        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6974 Optional arg NOCOPY non-nil means it is OK to return STRING itself
6975 if the encoding operation is trivial.
6976 This function sets `last-coding-system-used' to the precise coding system
6977 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6978 not fully specified.)  */)
6979      (string, coding_system, nocopy)
6980      Lisp_Object string, coding_system, nocopy;
6981 {
6982   return code_convert_string1 (string, coding_system, nocopy, 1);
6983 }
6984
6985 /* Encode or decode STRING according to CODING_SYSTEM.
6986    Do not set Vlast_coding_system_used.
6987
6988    This function is called only from macros DECODE_FILE and
6989    ENCODE_FILE, thus we ignore character composition.  */
6990
6991 Lisp_Object
6992 code_convert_string_norecord (string, coding_system, encodep)
6993      Lisp_Object string, coding_system;
6994      int encodep;
6995 {
6996   struct coding_system coding;
6997
6998   CHECK_STRING (string);
6999   CHECK_SYMBOL (coding_system);
7000
7001   if (NILP (coding_system))
7002     return string;
7003
7004   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
7005     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
7006
7007   coding.composing = COMPOSITION_DISABLED;
7008   coding.mode |= CODING_MODE_LAST_BLOCK;
7009   return (encodep
7010           ? encode_coding_string (string, &coding, 1)
7011           : decode_coding_string (string, &coding, 1));
7012 }
7013 \f
7014 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7015        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7016 Return the corresponding character.  */)
7017      (code)
7018      Lisp_Object code;
7019 {
7020   unsigned char c1, c2, s1, s2;
7021   Lisp_Object val;
7022
7023   CHECK_NUMBER (code);
7024   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
7025   if (s1 == 0)
7026     {
7027       if (s2 < 0x80)
7028         XSETFASTINT (val, s2);
7029       else if (s2 >= 0xA0 || s2 <= 0xDF)
7030         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
7031       else
7032         error ("Invalid Shift JIS code: %x", XFASTINT (code));
7033     }
7034   else
7035     {
7036       if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF)
7037           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
7038         error ("Invalid Shift JIS code: %x", XFASTINT (code));
7039       DECODE_SJIS (s1, s2, c1, c2);
7040       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
7041     }
7042   return val;
7043 }
7044
7045 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7046        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7047 Return the corresponding code in SJIS.  */)
7048      (ch)
7049      Lisp_Object ch;
7050 {
7051   int charset, c1, c2, s1, s2;
7052   Lisp_Object val;
7053
7054   CHECK_NUMBER (ch);
7055   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7056   if (charset == CHARSET_ASCII)
7057     {
7058       val = ch;
7059     }
7060   else if (charset == charset_jisx0208
7061            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
7062     {
7063       ENCODE_SJIS (c1, c2, s1, s2);
7064       XSETFASTINT (val, (s1 << 8) | s2);
7065     }
7066   else if (charset == charset_katakana_jisx0201
7067            && c1 > 0x20 && c2 < 0xE0)
7068     {
7069       XSETFASTINT (val, c1 | 0x80);
7070     }
7071   else
7072     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
7073   return val;
7074 }
7075
7076 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7077        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7078 Return the corresponding character.  */)
7079      (code)
7080      Lisp_Object code;
7081 {
7082   int charset;
7083   unsigned char b1, b2, c1, c2;
7084   Lisp_Object val;
7085
7086   CHECK_NUMBER (code);
7087   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
7088   if (b1 == 0)
7089     {
7090       if (b2 >= 0x80)
7091         error ("Invalid BIG5 code: %x", XFASTINT (code));
7092       val = code;
7093     }
7094   else
7095     {
7096       if ((b1 < 0xA1 || b1 > 0xFE)
7097           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
7098         error ("Invalid BIG5 code: %x", XFASTINT (code));
7099       DECODE_BIG5 (b1, b2, charset, c1, c2);
7100       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
7101     }
7102   return val;
7103 }
7104
7105 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7106        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7107 Return the corresponding character code in Big5.  */)
7108      (ch)
7109      Lisp_Object ch;
7110 {
7111   int charset, c1, c2, b1, b2;
7112   Lisp_Object val;
7113
7114   CHECK_NUMBER (ch);
7115   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7116   if (charset == CHARSET_ASCII)
7117     {
7118       val = ch;
7119     }
7120   else if ((charset == charset_big5_1
7121             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
7122            || (charset == charset_big5_2
7123                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
7124     {
7125       ENCODE_BIG5 (charset, c1, c2, b1, b2);
7126       XSETFASTINT (val, (b1 << 8) | b2);
7127     }
7128   else
7129     error ("Can't encode to Big5: %d", XFASTINT (ch));
7130   return val;
7131 }
7132 \f
7133 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
7134        Sset_terminal_coding_system_internal, 1, 1, 0,
7135        doc: /* Internal use only.  */)
7136      (coding_system)
7137      Lisp_Object coding_system;
7138 {
7139   CHECK_SYMBOL (coding_system);
7140   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
7141   /* We had better not send unsafe characters to terminal.  */
7142   terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
7143   /* Character composition should be disabled.  */
7144   terminal_coding.composing = COMPOSITION_DISABLED;
7145   /* Error notification should be suppressed.  */
7146   terminal_coding.suppress_error = 1;
7147   terminal_coding.src_multibyte = 1;
7148   terminal_coding.dst_multibyte = 0;
7149   return Qnil;
7150 }
7151
7152 DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
7153        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7154        doc: /* Internal use only.  */)
7155      (coding_system)
7156      Lisp_Object coding_system;
7157 {
7158   CHECK_SYMBOL (coding_system);
7159   setup_coding_system (Fcheck_coding_system (coding_system),
7160                        &safe_terminal_coding);
7161   /* Character composition should be disabled.  */
7162   safe_terminal_coding.composing = COMPOSITION_DISABLED;
7163   /* Error notification should be suppressed.  */
7164   terminal_coding.suppress_error = 1;
7165   safe_terminal_coding.src_multibyte = 1;
7166   safe_terminal_coding.dst_multibyte = 0;
7167   return Qnil;
7168 }
7169
7170 DEFUN ("terminal-coding-system", Fterminal_coding_system,
7171        Sterminal_coding_system, 0, 0, 0,
7172        doc: /* Return coding system specified for terminal output.  */)
7173      ()
7174 {
7175   return terminal_coding.symbol;
7176 }
7177
7178 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
7179        Sset_keyboard_coding_system_internal, 1, 1, 0,
7180        doc: /* Internal use only.  */)
7181      (coding_system)
7182      Lisp_Object coding_system;
7183 {
7184   CHECK_SYMBOL (coding_system);
7185   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
7186   /* Character composition should be disabled.  */
7187   keyboard_coding.composing = COMPOSITION_DISABLED;
7188   return Qnil;
7189 }
7190
7191 DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
7192        Skeyboard_coding_system, 0, 0, 0,
7193        doc: /* Return coding system specified for decoding keyboard input.  */)
7194      ()
7195 {
7196   return keyboard_coding.symbol;
7197 }
7198
7199 \f
7200 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7201        Sfind_operation_coding_system,  1, MANY, 0,
7202        doc: /* Choose a coding system for an operation based on the target name.
7203 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7204 DECODING-SYSTEM is the coding system to use for decoding
7205 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7206 for encoding (in case OPERATION does encoding).
7207
7208 The first argument OPERATION specifies an I/O primitive:
7209   For file I/O, `insert-file-contents' or `write-region'.
7210   For process I/O, `call-process', `call-process-region', or `start-process'.
7211   For network I/O, `open-network-stream'.
7212
7213 The remaining arguments should be the same arguments that were passed
7214 to the primitive.  Depending on which primitive, one of those arguments
7215 is selected as the TARGET.  For example, if OPERATION does file I/O,
7216 whichever argument specifies the file name is TARGET.
7217
7218 TARGET has a meaning which depends on OPERATION:
7219   For file I/O, TARGET is a file name.
7220   For process I/O, TARGET is a process name.
7221   For network I/O, TARGET is a service name or a port number
7222
7223 This function looks up what specified for TARGET in,
7224 `file-coding-system-alist', `process-coding-system-alist',
7225 or `network-coding-system-alist' depending on OPERATION.
7226 They may specify a coding system, a cons of coding systems,
7227 or a function symbol to call.
7228 In the last case, we call the function with one argument,
7229 which is a list of all the arguments given to this function.
7230
7231 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7232      (nargs, args)
7233      int nargs;
7234      Lisp_Object *args;
7235 {
7236   Lisp_Object operation, target_idx, target, val;
7237   register Lisp_Object chain;
7238
7239   if (nargs < 2)
7240     error ("Too few arguments");
7241   operation = args[0];
7242   if (!SYMBOLP (operation)
7243       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7244     error ("Invalid first argument");
7245   if (nargs < 1 + XINT (target_idx))
7246     error ("Too few arguments for operation: %s",
7247            SDATA (SYMBOL_NAME (operation)));
7248   /* For write-region, if the 6th argument (i.e. VISIT, the 5th
7249      argument to write-region) is string, it must be treated as a
7250      target file name.  */
7251   if (EQ (operation, Qwrite_region)
7252       && nargs > 5
7253       && STRINGP (args[5]))
7254     target_idx = make_number (4);
7255   target = args[XINT (target_idx) + 1];
7256   if (!(STRINGP (target)
7257         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7258     error ("Invalid argument %d", XINT (target_idx) + 1);
7259
7260   chain = ((EQ (operation, Qinsert_file_contents)
7261             || EQ (operation, Qwrite_region))
7262            ? Vfile_coding_system_alist
7263            : (EQ (operation, Qopen_network_stream)
7264               ? Vnetwork_coding_system_alist
7265               : Vprocess_coding_system_alist));
7266   if (NILP (chain))
7267     return Qnil;
7268
7269   for (; CONSP (chain); chain = XCDR (chain))
7270     {
7271       Lisp_Object elt;
7272       elt = XCAR (chain);
7273
7274       if (CONSP (elt)
7275           && ((STRINGP (target)
7276                && STRINGP (XCAR (elt))
7277                && fast_string_match (XCAR (elt), target) >= 0)
7278               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7279         {
7280           val = XCDR (elt);
7281           /* Here, if VAL is both a valid coding system and a valid
7282              function symbol, we return VAL as a coding system.  */
7283           if (CONSP (val))
7284             return val;
7285           if (! SYMBOLP (val))
7286             return Qnil;
7287           if (! NILP (Fcoding_system_p (val)))
7288             return Fcons (val, val);
7289           if (! NILP (Ffboundp (val)))
7290             {
7291               val = call1 (val, Flist (nargs, args));
7292               if (CONSP (val))
7293                 return val;
7294               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7295                 return Fcons (val, val);
7296             }
7297           return Qnil;
7298         }
7299     }
7300   return Qnil;
7301 }
7302
7303 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
7304        Supdate_coding_systems_internal, 0, 0, 0,
7305        doc: /* Update internal database for ISO2022 and CCL based coding systems.
7306 When values of any coding categories are changed, you must
7307 call this function.  */)
7308      ()
7309 {
7310   int i;
7311
7312   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
7313     {
7314       Lisp_Object val;
7315
7316       val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[i]);
7317       if (!NILP (val))
7318         {
7319           if (! coding_system_table[i])
7320             coding_system_table[i] = ((struct coding_system *)
7321                                       xmalloc (sizeof (struct coding_system)));
7322           setup_coding_system (val, coding_system_table[i]);
7323         }
7324       else if (coding_system_table[i])
7325         {
7326           xfree (coding_system_table[i]);
7327           coding_system_table[i] = NULL;
7328         }
7329     }
7330
7331   return Qnil;
7332 }
7333
7334 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
7335        Sset_coding_priority_internal, 0, 0, 0,
7336        doc: /* Update internal database for the current value of `coding-category-list'.
7337 This function is internal use only.  */)
7338      ()
7339 {
7340   int i = 0, idx;
7341   Lisp_Object val;
7342
7343   val = Vcoding_category_list;
7344
7345   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
7346     {
7347       if (! SYMBOLP (XCAR (val)))
7348         break;
7349       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
7350       if (idx >= CODING_CATEGORY_IDX_MAX)
7351         break;
7352       coding_priorities[i++] = (1 << idx);
7353       val = XCDR (val);
7354     }
7355   /* If coding-category-list is valid and contains all coding
7356      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
7357      the following code saves Emacs from crashing.  */
7358   while (i < CODING_CATEGORY_IDX_MAX)
7359     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
7360
7361   return Qnil;
7362 }
7363
7364 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7365        Sdefine_coding_system_internal, 1, 1, 0,
7366        doc: /* Register CODING-SYSTEM as a base coding system.
7367 This function is internal use only.  */)
7368      (coding_system)
7369      Lisp_Object coding_system;
7370 {
7371   Lisp_Object safe_chars, slot;
7372
7373   if (NILP (Fcheck_coding_system (coding_system)))
7374     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
7375   safe_chars = coding_safe_chars (coding_system);
7376   if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
7377     error ("No valid safe-chars property for %s",
7378            SDATA (SYMBOL_NAME (coding_system)));
7379   if (EQ (safe_chars, Qt))
7380     {
7381       if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
7382         XSETCAR (Vcoding_system_safe_chars,
7383                  Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
7384     }
7385   else
7386     {
7387       slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
7388       if (NILP (slot))
7389         XSETCDR (Vcoding_system_safe_chars,
7390                  nconc2 (XCDR (Vcoding_system_safe_chars),
7391                          Fcons (Fcons (coding_system, safe_chars), Qnil)));
7392       else
7393         XSETCDR (slot, safe_chars);
7394     }
7395   return Qnil;
7396 }
7397
7398 #endif /* emacs */
7399
7400 \f
7401 /*** 9. Post-amble ***/
7402
7403 void
7404 init_coding_once ()
7405 {
7406   int i;
7407
7408   /* Emacs' internal format specific initialize routine.  */
7409   for (i = 0; i <= 0x20; i++)
7410     emacs_code_class[i] = EMACS_control_code;
7411   emacs_code_class[0x0A] = EMACS_linefeed_code;
7412   emacs_code_class[0x0D] = EMACS_carriage_return_code;
7413   for (i = 0x21 ; i < 0x7F; i++)
7414     emacs_code_class[i] = EMACS_ascii_code;
7415   emacs_code_class[0x7F] = EMACS_control_code;
7416   for (i = 0x80; i < 0xFF; i++)
7417     emacs_code_class[i] = EMACS_invalid_code;
7418   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
7419   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
7420   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
7421   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
7422
7423   /* ISO2022 specific initialize routine.  */
7424   for (i = 0; i < 0x20; i++)
7425     iso_code_class[i] = ISO_control_0;
7426   for (i = 0x21; i < 0x7F; i++)
7427     iso_code_class[i] = ISO_graphic_plane_0;
7428   for (i = 0x80; i < 0xA0; i++)
7429     iso_code_class[i] = ISO_control_1;
7430   for (i = 0xA1; i < 0xFF; i++)
7431     iso_code_class[i] = ISO_graphic_plane_1;
7432   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7433   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7434   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7435   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7436   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7437   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7438   iso_code_class[ISO_CODE_ESC] = ISO_escape;
7439   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7440   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7441   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7442
7443   setup_coding_system (Qnil, &keyboard_coding);
7444   setup_coding_system (Qnil, &terminal_coding);
7445   setup_coding_system (Qnil, &safe_terminal_coding);
7446   setup_coding_system (Qnil, &default_buffer_file_coding);
7447
7448   bzero (coding_system_table, sizeof coding_system_table);
7449
7450   bzero (ascii_skip_code, sizeof ascii_skip_code);
7451   for (i = 0; i < 128; i++)
7452     ascii_skip_code[i] = 1;
7453
7454 #if defined (MSDOS) || defined (WINDOWSNT)
7455   system_eol_type = CODING_EOL_CRLF;
7456 #else
7457   system_eol_type = CODING_EOL_LF;
7458 #endif
7459
7460   inhibit_pre_post_conversion = 0;
7461 }
7462
7463 #ifdef emacs
7464
7465 void
7466 syms_of_coding ()
7467 {
7468   Qtarget_idx = intern ("target-idx");
7469   staticpro (&Qtarget_idx);
7470
7471   Qcoding_system_history = intern ("coding-system-history");
7472   staticpro (&Qcoding_system_history);
7473   Fset (Qcoding_system_history, Qnil);
7474
7475   /* Target FILENAME is the first argument.  */
7476   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7477   /* Target FILENAME is the third argument.  */
7478   Fput (Qwrite_region, Qtarget_idx, make_number (2));
7479
7480   Qcall_process = intern ("call-process");
7481   staticpro (&Qcall_process);
7482   /* Target PROGRAM is the first argument.  */
7483   Fput (Qcall_process, Qtarget_idx, make_number (0));
7484
7485   Qcall_process_region = intern ("call-process-region");
7486   staticpro (&Qcall_process_region);
7487   /* Target PROGRAM is the third argument.  */
7488   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7489
7490   Qstart_process = intern ("start-process");
7491   staticpro (&Qstart_process);
7492   /* Target PROGRAM is the third argument.  */
7493   Fput (Qstart_process, Qtarget_idx, make_number (2));
7494
7495   Qopen_network_stream = intern ("open-network-stream");
7496   staticpro (&Qopen_network_stream);
7497   /* Target SERVICE is the fourth argument.  */
7498   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7499
7500   Qcoding_system = intern ("coding-system");
7501   staticpro (&Qcoding_system);
7502
7503   Qeol_type = intern ("eol-type");
7504   staticpro (&Qeol_type);
7505
7506   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
7507   staticpro (&Qbuffer_file_coding_system);
7508
7509   Qpost_read_conversion = intern ("post-read-conversion");
7510   staticpro (&Qpost_read_conversion);
7511
7512   Qpre_write_conversion = intern ("pre-write-conversion");
7513   staticpro (&Qpre_write_conversion);
7514
7515   Qno_conversion = intern ("no-conversion");
7516   staticpro (&Qno_conversion);
7517
7518   Qundecided = intern ("undecided");
7519   staticpro (&Qundecided);
7520
7521   Qcoding_system_p = intern ("coding-system-p");
7522   staticpro (&Qcoding_system_p);
7523
7524   Qcoding_system_error = intern ("coding-system-error");
7525   staticpro (&Qcoding_system_error);
7526
7527   Fput (Qcoding_system_error, Qerror_conditions,
7528         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7529   Fput (Qcoding_system_error, Qerror_message,
7530         build_string ("Invalid coding system"));
7531
7532   Qcoding_category = intern ("coding-category");
7533   staticpro (&Qcoding_category);
7534   Qcoding_category_index = intern ("coding-category-index");
7535   staticpro (&Qcoding_category_index);
7536
7537   Vcoding_category_table
7538     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
7539   staticpro (&Vcoding_category_table);
7540   {
7541     int i;
7542     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7543       {
7544         XVECTOR (Vcoding_category_table)->contents[i]
7545           = intern (coding_category_name[i]);
7546         Fput (XVECTOR (Vcoding_category_table)->contents[i],
7547               Qcoding_category_index, make_number (i));
7548       }
7549   }
7550
7551   Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
7552   staticpro (&Vcoding_system_safe_chars);
7553
7554   Qtranslation_table = intern ("translation-table");
7555   staticpro (&Qtranslation_table);
7556   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
7557
7558   Qtranslation_table_id = intern ("translation-table-id");
7559   staticpro (&Qtranslation_table_id);
7560
7561   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7562   staticpro (&Qtranslation_table_for_decode);
7563
7564   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7565   staticpro (&Qtranslation_table_for_encode);
7566
7567   Qsafe_chars = intern ("safe-chars");
7568   staticpro (&Qsafe_chars);
7569
7570   Qchar_coding_system = intern ("char-coding-system");
7571   staticpro (&Qchar_coding_system);
7572
7573   /* Intern this now in case it isn't already done.
7574      Setting this variable twice is harmless.
7575      But don't staticpro it here--that is done in alloc.c.  */
7576   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7577   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7578   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
7579
7580   Qvalid_codes = intern ("valid-codes");
7581   staticpro (&Qvalid_codes);
7582
7583   Qemacs_mule = intern ("emacs-mule");
7584   staticpro (&Qemacs_mule);
7585
7586   Qraw_text = intern ("raw-text");
7587   staticpro (&Qraw_text);
7588
7589   Qutf_8 = intern ("utf-8");
7590   staticpro (&Qutf_8);
7591
7592   defsubr (&Scoding_system_p);
7593   defsubr (&Sread_coding_system);
7594   defsubr (&Sread_non_nil_coding_system);
7595   defsubr (&Scheck_coding_system);
7596   defsubr (&Sdetect_coding_region);
7597   defsubr (&Sdetect_coding_string);
7598   defsubr (&Sfind_coding_systems_region_internal);
7599   defsubr (&Sunencodable_char_position);
7600   defsubr (&Sdecode_coding_region);
7601   defsubr (&Sencode_coding_region);
7602   defsubr (&Sdecode_coding_string);
7603   defsubr (&Sencode_coding_string);
7604   defsubr (&Sdecode_sjis_char);
7605   defsubr (&Sencode_sjis_char);
7606   defsubr (&Sdecode_big5_char);
7607   defsubr (&Sencode_big5_char);
7608   defsubr (&Sset_terminal_coding_system_internal);
7609   defsubr (&Sset_safe_terminal_coding_system_internal);
7610   defsubr (&Sterminal_coding_system);
7611   defsubr (&Sset_keyboard_coding_system_internal);
7612   defsubr (&Skeyboard_coding_system);
7613   defsubr (&Sfind_operation_coding_system);
7614   defsubr (&Supdate_coding_systems_internal);
7615   defsubr (&Sset_coding_priority_internal);
7616   defsubr (&Sdefine_coding_system_internal);
7617
7618   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7619                doc: /* List of coding systems.
7620
7621 Do not alter the value of this variable manually.  This variable should be
7622 updated by the functions `make-coding-system' and
7623 `define-coding-system-alias'.  */);
7624   Vcoding_system_list = Qnil;
7625
7626   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7627                doc: /* Alist of coding system names.
7628 Each element is one element list of coding system name.
7629 This variable is given to `completing-read' as TABLE argument.
7630
7631 Do not alter the value of this variable manually.  This variable should be
7632 updated by the functions `make-coding-system' and
7633 `define-coding-system-alias'.  */);
7634   Vcoding_system_alist = Qnil;
7635
7636   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7637                doc: /* List of coding-categories (symbols) ordered by priority.
7638
7639 On detecting a coding system, Emacs tries code detection algorithms
7640 associated with each coding-category one by one in this order.  When
7641 one algorithm agrees with a byte sequence of source text, the coding
7642 system bound to the corresponding coding-category is selected.  */);
7643   {
7644     int i;
7645
7646     Vcoding_category_list = Qnil;
7647     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7648       Vcoding_category_list
7649         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7650                  Vcoding_category_list);
7651   }
7652
7653   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7654                doc: /* Specify the coding system for read operations.
7655 It is useful to bind this variable with `let', but do not set it globally.
7656 If the value is a coding system, it is used for decoding on read operation.
7657 If not, an appropriate element is used from one of the coding system alists:
7658 There are three such tables, `file-coding-system-alist',
7659 `process-coding-system-alist', and `network-coding-system-alist'.  */);
7660   Vcoding_system_for_read = Qnil;
7661
7662   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7663                doc: /* Specify the coding system for write operations.
7664 Programs bind this variable with `let', but you should not set it globally.
7665 If the value is a coding system, it is used for encoding of output,
7666 when writing it to a file and when sending it to a file or subprocess.
7667
7668 If this does not specify a coding system, an appropriate element
7669 is used from one of the coding system alists:
7670 There are three such tables, `file-coding-system-alist',
7671 `process-coding-system-alist', and `network-coding-system-alist'.
7672 For output to files, if the above procedure does not specify a coding system,
7673 the value of `buffer-file-coding-system' is used.  */);
7674   Vcoding_system_for_write = Qnil;
7675
7676   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7677                doc: /* Coding system used in the latest file or process I/O.
7678 Also set by `encode-coding-region', `decode-coding-region',
7679 `encode-coding-string' and `decode-coding-string'.  */);
7680   Vlast_coding_system_used = Qnil;
7681
7682   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7683                doc: /* *Non-nil means always inhibit code conversion of end-of-line format.
7684 See info node `Coding Systems' and info node `Text and Binary' concerning
7685 such conversion.  */);
7686   inhibit_eol_conversion = 0;
7687
7688   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
7689                doc: /* Non-nil means process buffer inherits coding system of process output.
7690 Bind it to t if the process output is to be treated as if it were a file
7691 read from some filesystem.  */);
7692   inherit_process_coding_system = 0;
7693
7694   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
7695                doc: /* Alist to decide a coding system to use for a file I/O operation.
7696 The format is ((PATTERN . VAL) ...),
7697 where PATTERN is a regular expression matching a file name,
7698 VAL is a coding system, a cons of coding systems, or a function symbol.
7699 If VAL is a coding system, it is used for both decoding and encoding
7700 the file contents.
7701 If VAL is a cons of coding systems, the car part is used for decoding,
7702 and the cdr part is used for encoding.
7703 If VAL is a function symbol, the function must return a coding system
7704 or a cons of coding systems which are used as above.  The function gets
7705 the arguments with which `find-operation-coding-system' was called.
7706
7707 See also the function `find-operation-coding-system'
7708 and the variable `auto-coding-alist'.  */);
7709   Vfile_coding_system_alist = Qnil;
7710
7711   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
7712     doc: /* Alist to decide a coding system to use for a process I/O operation.
7713 The format is ((PATTERN . VAL) ...),
7714 where PATTERN is a regular expression matching a program name,
7715 VAL is a coding system, a cons of coding systems, or a function symbol.
7716 If VAL is a coding system, it is used for both decoding what received
7717 from the program and encoding what sent to the program.
7718 If VAL is a cons of coding systems, the car part is used for decoding,
7719 and the cdr part is used for encoding.
7720 If VAL is a function symbol, the function must return a coding system
7721 or a cons of coding systems which are used as above.
7722
7723 See also the function `find-operation-coding-system'.  */);
7724   Vprocess_coding_system_alist = Qnil;
7725
7726   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
7727     doc: /* Alist to decide a coding system to use for a network I/O operation.
7728 The format is ((PATTERN . VAL) ...),
7729 where PATTERN is a regular expression matching a network service name
7730 or is a port number to connect to,
7731 VAL is a coding system, a cons of coding systems, or a function symbol.
7732 If VAL is a coding system, it is used for both decoding what received
7733 from the network stream and encoding what sent to the network stream.
7734 If VAL is a cons of coding systems, the car part is used for decoding,
7735 and the cdr part is used for encoding.
7736 If VAL is a function symbol, the function must return a coding system
7737 or a cons of coding systems which are used as above.
7738
7739 See also the function `find-operation-coding-system'.  */);
7740   Vnetwork_coding_system_alist = Qnil;
7741
7742   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
7743                doc: /* Coding system to use with system messages.
7744 Also used for decoding keyboard input on X Window system.  */);
7745   Vlocale_coding_system = Qnil;
7746
7747   /* The eol mnemonics are reset in startup.el system-dependently.  */
7748   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
7749                doc: /* *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
7750   eol_mnemonic_unix = build_string (":");
7751
7752   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
7753                doc: /* *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
7754   eol_mnemonic_dos = build_string ("\\");
7755
7756   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
7757                doc: /* *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
7758   eol_mnemonic_mac = build_string ("/");
7759
7760   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
7761                doc: /* *String displayed in mode line when end-of-line format is not yet determined.  */);
7762   eol_mnemonic_undecided = build_string (":");
7763
7764   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
7765                doc: /* *Non-nil enables character translation while encoding and decoding.  */);
7766   Venable_character_translation = Qt;
7767
7768   DEFVAR_LISP ("standard-translation-table-for-decode",
7769                &Vstandard_translation_table_for_decode,
7770                doc: /* Table for translating characters while decoding.  */);
7771   Vstandard_translation_table_for_decode = Qnil;
7772
7773   DEFVAR_LISP ("standard-translation-table-for-encode",
7774                &Vstandard_translation_table_for_encode,
7775                doc: /* Table for translating characters while encoding.  */);
7776   Vstandard_translation_table_for_encode = Qnil;
7777
7778   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
7779                doc: /* Alist of charsets vs revision numbers.
7780 While encoding, if a charset (car part of an element) is found,
7781 designate it with the escape sequence identifying revision (cdr part of the element).  */);
7782   Vcharset_revision_alist = Qnil;
7783
7784   DEFVAR_LISP ("default-process-coding-system",
7785                &Vdefault_process_coding_system,
7786                doc: /* Cons of coding systems used for process I/O by default.
7787 The car part is used for decoding a process output,
7788 the cdr part is used for encoding a text to be sent to a process.  */);
7789   Vdefault_process_coding_system = Qnil;
7790
7791   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
7792                doc: /* Table of extra Latin codes in the range 128..159 (inclusive).
7793 This is a vector of length 256.
7794 If Nth element is non-nil, the existence of code N in a file
7795 \(or output of subprocess) doesn't prevent it to be detected as
7796 a coding system of ISO 2022 variant which has a flag
7797 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
7798 or reading output of a subprocess.
7799 Only 128th through 159th elements has a meaning.  */);
7800   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
7801
7802   DEFVAR_LISP ("select-safe-coding-system-function",
7803                &Vselect_safe_coding_system_function,
7804                doc: /* Function to call to select safe coding system for encoding a text.
7805
7806 If set, this function is called to force a user to select a proper
7807 coding system which can encode the text in the case that a default
7808 coding system used in each operation can't encode the text.
7809
7810 The default value is `select-safe-coding-system' (which see).  */);
7811   Vselect_safe_coding_system_function = Qnil;
7812
7813   DEFVAR_BOOL ("coding-system-require-warning",
7814                &coding_system_require_warning,
7815                doc: /* Internal use only.
7816 If non-nil, on writing a file, `select-safe-coding-system-function' is
7817 called even if `coding-system-for-write' is non-nil.  The command
7818 `universal-coding-system-argument' binds this variable to t temporarily.  */);
7819   coding_system_require_warning = 0;
7820
7821
7822   DEFVAR_BOOL ("inhibit-iso-escape-detection",
7823                &inhibit_iso_escape_detection,
7824                doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
7825
7826 By default, on reading a file, Emacs tries to detect how the text is
7827 encoded.  This code detection is sensitive to escape sequences.  If
7828 the sequence is valid as ISO2022, the code is determined as one of
7829 the ISO2022 encodings, and the file is decoded by the corresponding
7830 coding system (e.g. `iso-2022-7bit').
7831
7832 However, there may be a case that you want to read escape sequences in
7833 a file as is.  In such a case, you can set this variable to non-nil.
7834 Then, as the code detection ignores any escape sequences, no file is
7835 detected as encoded in some ISO2022 encoding.  The result is that all
7836 escape sequences become visible in a buffer.
7837
7838 The default value is nil, and it is strongly recommended not to change
7839 it.  That is because many Emacs Lisp source files that contain
7840 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
7841 in Emacs's distribution, and they won't be decoded correctly on
7842 reading if you suppress escape sequence detection.
7843
7844 The other way to read escape sequences in a file without decoding is
7845 to explicitly specify some coding system that doesn't use ISO2022's
7846 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
7847   inhibit_iso_escape_detection = 0;
7848
7849   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
7850                doc: /* Char table for translating self-inserting characters.
7851 This is applied to the result of input methods, not their input.  See also
7852 `keyboard-translate-table'.  */);
7853     Vtranslation_table_for_input = Qnil;
7854 }
7855
7856 char *
7857 emacs_strerror (error_number)
7858      int error_number;
7859 {
7860   char *str;
7861
7862   synchronize_system_messages_locale ();
7863   str = strerror (error_number);
7864
7865   if (! NILP (Vlocale_coding_system))
7866     {
7867       Lisp_Object dec = code_convert_string_norecord (build_string (str),
7868                                                       Vlocale_coding_system,
7869                                                       0);
7870       str = (char *) SDATA (dec);
7871     }
7872
7873   return str;
7874 }
7875
7876 #endif /* emacs */
7877