src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995,97,1998,2002,2003  Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001,2002,2003  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /*** TABLE OF CONTENTS ***
  24
  25   0. General comments
  26   1. Preamble
  27   2. Emacs' internal format (emacs-mule) handlers
  28   3. ISO2022 handlers
  29   4. Shift-JIS and BIG5 handlers
  30   5. CCL handlers
  31   6. End-of-line handlers
  32   7. C library functions
  33   8. Emacs Lisp library functions
  34   9. Post-amble
  35
  36 */
  37
  38 /*** 0. General comments ***/
  39
  40
  41 /*** GENERAL NOTE on CODING SYSTEMS ***
  42
  43   A coding system is an encoding mechanism for one or more character
  44   sets.  Here's a list of coding systems which Emacs can handle.  When
  45   we say "decode", it means converting some other coding system to
  46   Emacs' internal format (emacs-mule), and when we say "encode",
  47   it means converting the coding system emacs-mule to some other
  48   coding system.
  49
  50   0. Emacs' internal format (emacs-mule)
  51
  52   Emacs itself holds a multi-lingual character in buffers and strings
  53   in a special format.  Details are described in section 2.
  54
  55   1. ISO2022
  56
  57   The most famous coding system for multiple character sets.  X's
  58   Compound Text, various EUCs (Extended Unix Code), and coding
  59   systems used in Internet communication such as ISO-2022-JP are
  60   all variants of ISO2022.  Details are described in section 3.
  61
  62   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  63
  64   A coding system to encode character sets: ASCII, JISX0201, and
  65   JISX0208.  Widely used for PC's in Japan.  Details are described in
  66   section 4.
  67
  68   3. BIG5
  69
  70   A coding system to encode the character sets ASCII and Big5.  Widely
  71   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  72   described in section 4.  In this file, when we write "BIG5"
  73   (all uppercase), we mean the coding system, and when we write
  74   "Big5" (capitalized), we mean the character set.
  75
  76   4. Raw text
  77
  78   A coding system for text containing random 8-bit code.  Emacs does
  79   no code conversion on such text except for end-of-line format.
  80
  81   5. Other
  82
  83   If a user wants to read/write text encoded in a coding system not
  84   listed above, he can supply a decoder and an encoder for it as CCL
  85   (Code Conversion Language) programs.  Emacs executes the CCL program
  86   while reading/writing.
  87
  88   Emacs represents a coding system by a Lisp symbol that has a property
  89   `coding-system'.  But, before actually using the coding system, the
  90   information about it is set in a structure of type `struct
  91   coding_system' for rapid processing.  See section 6 for more details.
  92
  93 */
  94
  95 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  96
  97   How end-of-line of text is encoded depends on the operating system.
  98   For instance, Unix's format is just one byte of `line-feed' code,
  99   whereas DOS's format is two-byte sequence of `carriage-return' and
 100   `line-feed' codes.  MacOS's format is usually one byte of
 101   `carriage-return'.
 102
 103   Since text character encoding and end-of-line encoding are
 104   independent, any coding system described above can have any
 105   end-of-line format.  So Emacs has information about end-of-line
 106   format in each coding-system.  See section 6 for more details.
 107
 108 */
 109
 110 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 111
 112   These functions check if a text between SRC and SRC_END is encoded
 113   in the coding system category XXX.  Each returns an integer value in
 114   which appropriate flag bits for the category XXX are set.  The flag
 115   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 116   template for these functions.  If MULTIBYTEP is nonzero, 8-bit codes
 117   of the range 0x80..0x9F are in multibyte form.  */
 118 #if 0
 119 int
 120 detect_coding_emacs_mule (src, src_end, multibytep)
 121      unsigned char *src, *src_end;
 122      int multibytep;
 123 {
 124   ...
 125 }
 126 #endif
 127
 128 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 129
 130   These functions decode SRC_BYTES length of unibyte text at SOURCE
 131   encoded in CODING to Emacs' internal format.  The resulting
 132   multibyte text goes to a place pointed to by DESTINATION, the length
 133   of which should not exceed DST_BYTES.
 134
 135   These functions set the information about original and decoded texts
 136   in the members `produced', `produced_char', `consumed', and
 137   `consumed_char' of the structure *CODING.  They also set the member
 138   `result' to one of CODING_FINISH_XXX indicating how the decoding
 139   finished.
 140
 141   DST_BYTES zero means that the source area and destination area are
 142   overlapped, which means that we can produce a decoded text until it
 143   reaches the head of the not-yet-decoded source text.
 144
 145   Below is a template for these functions.  */
 146 #if 0
 147 static void
 148 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 149      struct coding_system *coding;
 150      unsigned char *source, *destination;
 151      int src_bytes, dst_bytes;
 152 {
 153   ...
 154 }
 155 #endif
 156
 157 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 158
 159   These functions encode SRC_BYTES length text at SOURCE from Emacs'
 160   internal multibyte format to CODING.  The resulting unibyte text
 161   goes to a place pointed to by DESTINATION, the length of which
 162   should not exceed DST_BYTES.
 163
 164   These functions set the information about original and encoded texts
 165   in the members `produced', `produced_char', `consumed', and
 166   `consumed_char' of the structure *CODING.  They also set the member
 167   `result' to one of CODING_FINISH_XXX indicating how the encoding
 168   finished.
 169
 170   DST_BYTES zero means that the source area and destination area are
 171   overlapped, which means that we can produce encoded text until it
 172   reaches at the head of the not-yet-encoded source text.
 173
 174   Below is a template for these functions.  */
 175 #if 0
 176 static void
 177 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 178      struct coding_system *coding;
 179      unsigned char *source, *destination;
 180      int src_bytes, dst_bytes;
 181 {
 182   ...
 183 }
 184 #endif
 185
 186 /*** COMMONLY USED MACROS ***/
 187
 188 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 189    get one, two, and three bytes from the source text respectively.
 190    If there are not enough bytes in the source, they jump to
 191    `label_end_of_loop'.  The caller should set variables `coding',
 192    `src' and `src_end' to appropriate pointer in advance.  These
 193    macros are called from decoding routines `decode_coding_XXX', thus
 194    it is assumed that the source text is unibyte.  */
 195
 196 #define ONE_MORE_BYTE(c1)                                       \
 197   do {                                                          \
 198     if (src >= src_end)                                         \
 199       {                                                         \
 200         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 201         goto label_end_of_loop;                                 \
 202       }                                                         \
 203     c1 = *src++;                                                \
 204   } while (0)
 205
 206 #define TWO_MORE_BYTES(c1, c2)                                  \
 207   do {                                                          \
 208     if (src + 1 >= src_end)                                     \
 209       {                                                         \
 210         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 211         goto label_end_of_loop;                                 \
 212       }                                                         \
 213     c1 = *src++;                                                \
 214     c2 = *src++;                                                \
 215   } while (0)
 216
 217
 218 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
 219    form if MULTIBYTEP is nonzero.  */
 220
 221 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)           \
 222   do {                                                          \
 223     if (src >= src_end)                                         \
 224       {                                                         \
 225         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 226         goto label_end_of_loop;                                 \
 227       }                                                         \
 228     c1 = *src++;                                                \
 229     if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)         \
 230       c1 = *src++ - 0x20;                                       \
 231   } while (0)
 232
 233 /* Set C to the next character at the source text pointed by `src'.
 234    If there are not enough characters in the source, jump to
 235    `label_end_of_loop'.  The caller should set variables `coding'
 236    `src', `src_end', and `translation_table' to appropriate pointers
 237    in advance.  This macro is used in encoding routines
 238    `encode_coding_XXX', thus it assumes that the source text is in
 239    multibyte form except for 8-bit characters.  8-bit characters are
 240    in multibyte form if coding->src_multibyte is nonzero, else they
 241    are represented by a single byte.  */
 242
 243 #define ONE_MORE_CHAR(c)                                        \
 244   do {                                                          \
 245     int len = src_end - src;                                    \
 246     int bytes;                                                  \
 247     if (len <= 0)                                               \
 248       {                                                         \
 249         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 250         goto label_end_of_loop;                                 \
 251       }                                                         \
 252     if (coding->src_multibyte                                   \
 253         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 254       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 255     else                                                        \
 256       c = *src, bytes = 1;                                      \
 257     if (!NILP (translation_table))                              \
 258       c = translate_char (translation_table, c, -1, 0, 0);      \
 259     src += bytes;                                               \
 260   } while (0)
 261
 262
 263 /* Produce a multibyte form of character C to `dst'.  Jump to
 264    `label_end_of_loop' if there's not enough space at `dst'.
 265
 266    If we are now in the middle of a composition sequence, the decoded
 267    character may be ALTCHAR (for the current composition).  In that
 268    case, the character goes to coding->cmp_data->data instead of
 269    `dst'.
 270
 271    This macro is used in decoding routines.  */
 272
 273 #define EMIT_CHAR(c)                                                    \
 274   do {                                                                  \
 275     if (! COMPOSING_P (coding)                                          \
 276         || coding->composing == COMPOSITION_RELATIVE                    \
 277         || coding->composing == COMPOSITION_WITH_RULE)                  \
 278       {                                                                 \
 279         int bytes = CHAR_BYTES (c);                                     \
 280         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 281           {                                                             \
 282             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 283             goto label_end_of_loop;                                     \
 284           }                                                             \
 285         dst += CHAR_STRING (c, dst);                                    \
 286         coding->produced_char++;                                        \
 287       }                                                                 \
 288                                                                         \
 289     if (COMPOSING_P (coding)                                            \
 290         && coding->composing != COMPOSITION_RELATIVE)                   \
 291       {                                                                 \
 292         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 293         coding->composition_rule_follows                                \
 294           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 295       }                                                                 \
 296   } while (0)
 297
 298
 299 #define EMIT_ONE_BYTE(c)                                        \
 300   do {                                                          \
 301     if (dst >= (dst_bytes ? dst_end : src))                     \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     *dst++ = c;                                                 \
 307   } while (0)
 308
 309 #define EMIT_TWO_BYTES(c1, c2)                                  \
 310   do {                                                          \
 311     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 312       {                                                         \
 313         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 314         goto label_end_of_loop;                                 \
 315       }                                                         \
 316     *dst++ = c1, *dst++ = c2;                                   \
 317   } while (0)
 318
 319 #define EMIT_BYTES(from, to)                                    \
 320   do {                                                          \
 321     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 322       {                                                         \
 323         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 324         goto label_end_of_loop;                                 \
 325       }                                                         \
 326     while (from < to)                                           \
 327       *dst++ = *from++;                                         \
 328   } while (0)
 329
 330 \f
 331 /*** 1. Preamble ***/
 332
 333 #ifdef emacs
 334 #include <config.h>
 335 #endif
 336
 337 #include <stdio.h>
 338
 339 #ifdef emacs
 340
 341 #include "lisp.h"
 342 #include "buffer.h"
 343 #include "charset.h"
 344 #include "composite.h"
 345 #include "ccl.h"
 346 #include "coding.h"
 347 #include "window.h"
 348 #include "intervals.h"
 349
 350 #else  /* not emacs */
 351
 352 #include "mulelib.h"
 353
 354 #endif /* not emacs */
 355
 356 Lisp_Object Qcoding_system, Qeol_type;
 357 Lisp_Object Qbuffer_file_coding_system;
 358 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 359 Lisp_Object Qno_conversion, Qundecided;
 360 Lisp_Object Qcoding_system_history;
 361 Lisp_Object Qsafe_chars;
 362 Lisp_Object Qvalid_codes;
 363
 364 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 365 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 366 Lisp_Object Qstart_process, Qopen_network_stream;
 367 Lisp_Object Qtarget_idx;
 368
 369 /* If a symbol has this property, evaluate the value to define the
 370    symbol as a coding system.  */
 371 Lisp_Object Qcoding_system_define_form;
 372
 373 Lisp_Object Vselect_safe_coding_system_function;
 374
 375 int coding_system_require_warning;
 376
 377 /* Mnemonic string for each format of end-of-line.  */
 378 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 379 /* Mnemonic string to indicate format of end-of-line is not yet
 380    decided.  */
 381 Lisp_Object eol_mnemonic_undecided;
 382
 383 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 384    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 385 int system_eol_type;
 386
 387 #ifdef emacs
 388
 389 /* Information about which coding system is safe for which chars.
 390    The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
 391
 392    GENERIC-LIST is a list of generic coding systems which can encode
 393    any characters.
 394
 395    NON-GENERIC-ALIST is an alist of non generic coding systems vs the
 396    corresponding char table that contains safe chars.  */
 397 Lisp_Object Vcoding_system_safe_chars;
 398
 399 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 400
 401 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 402
 403 /* Coding system emacs-mule and raw-text are for converting only
 404    end-of-line format.  */
 405 Lisp_Object Qemacs_mule, Qraw_text;
 406
 407 Lisp_Object Qutf_8;
 408
 409 /* Coding-systems are handed between Emacs Lisp programs and C internal
 410    routines by the following three variables.  */
 411 /* Coding-system for reading files and receiving data from process.  */
 412 Lisp_Object Vcoding_system_for_read;
 413 /* Coding-system for writing files and sending data to process.  */
 414 Lisp_Object Vcoding_system_for_write;
 415 /* Coding-system actually used in the latest I/O.  */
 416 Lisp_Object Vlast_coding_system_used;
 417
 418 /* A vector of length 256 which contains information about special
 419    Latin codes (especially for dealing with Microsoft codes).  */
 420 Lisp_Object Vlatin_extra_code_table;
 421
 422 /* Flag to inhibit code conversion of end-of-line format.  */
 423 int inhibit_eol_conversion;
 424
 425 /* Flag to inhibit ISO2022 escape sequence detection.  */
 426 int inhibit_iso_escape_detection;
 427
 428 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 429 int inherit_process_coding_system;
 430
 431 /* Coding system to be used to encode text for terminal display.  */
 432 struct coding_system terminal_coding;
 433
 434 /* Coding system to be used to encode text for terminal display when
 435    terminal coding system is nil.  */
 436 struct coding_system safe_terminal_coding;
 437
 438 /* Coding system of what is sent from terminal keyboard.  */
 439 struct coding_system keyboard_coding;
 440
 441 /* Default coding system to be used to write a file.  */
 442 struct coding_system default_buffer_file_coding;
 443
 444 Lisp_Object Vfile_coding_system_alist;
 445 Lisp_Object Vprocess_coding_system_alist;
 446 Lisp_Object Vnetwork_coding_system_alist;
 447
 448 Lisp_Object Vlocale_coding_system;
 449
 450 #endif /* emacs */
 451
 452 Lisp_Object Qcoding_category, Qcoding_category_index;
 453
 454 /* List of symbols `coding-category-xxx' ordered by priority.  */
 455 Lisp_Object Vcoding_category_list;
 456
 457 /* Table of coding categories (Lisp symbols).  */
 458 Lisp_Object Vcoding_category_table;
 459
 460 /* Table of names of symbol for each coding-category.  */
 461 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 462   "coding-category-emacs-mule",
 463   "coding-category-sjis",
 464   "coding-category-iso-7",
 465   "coding-category-iso-7-tight",
 466   "coding-category-iso-8-1",
 467   "coding-category-iso-8-2",
 468   "coding-category-iso-7-else",
 469   "coding-category-iso-8-else",
 470   "coding-category-ccl",
 471   "coding-category-big5",
 472   "coding-category-utf-8",
 473   "coding-category-utf-16-be",
 474   "coding-category-utf-16-le",
 475   "coding-category-raw-text",
 476   "coding-category-binary"
 477 };
 478
 479 /* Table of pointers to coding systems corresponding to each coding
 480    categories.  */
 481 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 482
 483 /* Table of coding category masks.  Nth element is a mask for a coding
 484    category of which priority is Nth.  */
 485 static
 486 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 487
 488 /* Flag to tell if we look up translation table on character code
 489    conversion.  */
 490 Lisp_Object Venable_character_translation;
 491 /* Standard translation table to look up on decoding (reading).  */
 492 Lisp_Object Vstandard_translation_table_for_decode;
 493 /* Standard translation table to look up on encoding (writing).  */
 494 Lisp_Object Vstandard_translation_table_for_encode;
 495
 496 Lisp_Object Qtranslation_table;
 497 Lisp_Object Qtranslation_table_id;
 498 Lisp_Object Qtranslation_table_for_decode;
 499 Lisp_Object Qtranslation_table_for_encode;
 500
 501 /* Alist of charsets vs revision number.  */
 502 Lisp_Object Vcharset_revision_alist;
 503
 504 /* Default coding systems used for process I/O.  */
 505 Lisp_Object Vdefault_process_coding_system;
 506
 507 /* Char table for translating Quail and self-inserting input.  */
 508 Lisp_Object Vtranslation_table_for_input;
 509
 510 /* Global flag to tell that we can't call post-read-conversion and
 511    pre-write-conversion functions.  Usually the value is zero, but it
 512    is set to 1 temporarily while such functions are running.  This is
 513    to avoid infinite recursive call.  */
 514 static int inhibit_pre_post_conversion;
 515
 516 Lisp_Object Qchar_coding_system;
 517
 518 /* Return `safe-chars' property of CODING_SYSTEM (symbol).  Don't check
 519    its validity.  */
 520
 521 Lisp_Object
 522 coding_safe_chars (coding_system)
 523      Lisp_Object coding_system;
 524 {
 525   Lisp_Object coding_spec, plist, safe_chars;
 526
 527   coding_spec = Fget (coding_system, Qcoding_system);
 528   plist = XVECTOR (coding_spec)->contents[3];
 529   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 530   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 531 }
 532
 533 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 534   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 535
 536 \f
 537 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 538
 539 /* Emacs' internal format for representation of multiple character
 540    sets is a kind of multi-byte encoding, i.e. characters are
 541    represented by variable-length sequences of one-byte codes.
 542
 543    ASCII characters and control characters (e.g. `tab', `newline') are
 544    represented by one-byte sequences which are their ASCII codes, in
 545    the range 0x00 through 0x7F.
 546
 547    8-bit characters of the range 0x80..0x9F are represented by
 548    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 549    code + 0x20).
 550
 551    8-bit characters of the range 0xA0..0xFF are represented by
 552    one-byte sequences which are their 8-bit code.
 553
 554    The other characters are represented by a sequence of `base
 555    leading-code', optional `extended leading-code', and one or two
 556    `position-code's.  The length of the sequence is determined by the
 557    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 558    whereas extended leading-code and position-code take the range 0xA0
 559    through 0xFF.  See `charset.h' for more details about leading-code
 560    and position-code.
 561
 562    --- CODE RANGE of Emacs' internal format ---
 563    character set        range
 564    -------------        -----
 565    ascii                0x00..0x7F
 566    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 567    eight-bit-graphic    0xA0..0xBF
 568    ELSE                 0x81..0x9D + [0xA0..0xFF]+
 569    ---------------------------------------------
 570
 571    As this is the internal character representation, the format is
 572    usually not used externally (i.e. in a file or in a data sent to a
 573    process).  But, it is possible to have a text externally in this
 574    format (i.e. by encoding by the coding system `emacs-mule').
 575
 576    In that case, a sequence of one-byte codes has a slightly different
 577    form.
 578
 579    Firstly, all characters in eight-bit-control are represented by
 580    one-byte sequences which are their 8-bit code.
 581
 582    Next, character composition data are represented by the byte
 583    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 584    where,
 585         METHOD is 0xF0 plus one of composition method (enum
 586         composition_method),
 587
 588         BYTES is 0xA0 plus the byte length of these composition data,
 589
 590         CHARS is 0xA0 plus the number of characters composed by these
 591         data,
 592
 593         COMPONENTs are characters of multibyte form or composition
 594         rules encoded by two-byte of ASCII codes.
 595
 596    In addition, for backward compatibility, the following formats are
 597    also recognized as composition data on decoding.
 598
 599    0x80 MSEQ ...
 600    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 601
 602    Here,
 603         MSEQ is a multibyte form but in these special format:
 604           ASCII: 0xA0 ASCII_CODE+0x80,
 605           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 606         RULE is a one byte code of the range 0xA0..0xF0 that
 607         represents a composition rule.
 608   */
 609
 610 enum emacs_code_class_type emacs_code_class[256];
 611
 612 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 613    Check if a text is encoded in Emacs' internal format.  If it is,
 614    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 615
 616 static int
 617 detect_coding_emacs_mule (src, src_end, multibytep)
 618       unsigned char *src, *src_end;
 619       int multibytep;
 620 {
 621   unsigned char c;
 622   int composing = 0;
 623   /* Dummy for ONE_MORE_BYTE.  */
 624   struct coding_system dummy_coding;
 625   struct coding_system *coding = &dummy_coding;
 626
 627   while (1)
 628     {
 629       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 630
 631       if (composing)
 632         {
 633           if (c < 0xA0)
 634             composing = 0;
 635           else if (c == 0xA0)
 636             {
 637               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 638               c &= 0x7F;
 639             }
 640           else
 641             c -= 0x20;
 642         }
 643
 644       if (c < 0x20)
 645         {
 646           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 647             return 0;
 648         }
 649       else if (c >= 0x80 && c < 0xA0)
 650         {
 651           if (c == 0x80)
 652             /* Old leading code for a composite character.  */
 653             composing = 1;
 654           else
 655             {
 656               unsigned char *src_base = src - 1;
 657               int bytes;
 658
 659               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 660                                                bytes))
 661                 return 0;
 662               src = src_base + bytes;
 663             }
 664         }
 665     }
 666  label_end_of_loop:
 667   return CODING_CATEGORY_MASK_EMACS_MULE;
 668 }
 669
 670
 671 /* Record the starting position START and METHOD of one composition.  */
 672
 673 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
 674   do {                                                          \
 675     struct composition_data *cmp_data = coding->cmp_data;       \
 676     int *data = cmp_data->data + cmp_data->used;                \
 677     coding->cmp_data_start = cmp_data->used;                    \
 678     data[0] = -1;                                               \
 679     data[1] = cmp_data->char_offset + start;                    \
 680     data[3] = (int) method;                                     \
 681     cmp_data->used += 4;                                        \
 682   } while (0)
 683
 684 /* Record the ending position END of the current composition.  */
 685
 686 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
 687   do {                                                          \
 688     struct composition_data *cmp_data = coding->cmp_data;       \
 689     int *data = cmp_data->data + coding->cmp_data_start;        \
 690     data[0] = cmp_data->used - coding->cmp_data_start;          \
 691     data[2] = cmp_data->char_offset + end;                      \
 692   } while (0)
 693
 694 /* Record one COMPONENT (alternate character or composition rule).  */
 695
 696 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)             \
 697   do {                                                                  \
 698     coding->cmp_data->data[coding->cmp_data->used++] = component;       \
 699     if (coding->cmp_data->used - coding->cmp_data_start                 \
 700         == COMPOSITION_DATA_MAX_BUNCH_LENGTH)                           \
 701       {                                                                 \
 702         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
 703         coding->composing = COMPOSITION_NO;                             \
 704       }                                                                 \
 705   } while (0)
 706
 707
 708 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
 709    is not less than SRC_END, return -1 without incrementing Src.  */
 710
 711 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
 712
 713
 714 /* Decode a character represented as a component of composition
 715    sequence of Emacs 20 style at SRC.  Set C to that character, store
 716    its multibyte form sequence at P, and set P to the end of that
 717    sequence.  If no valid character is found, set C to -1.  */
 718
 719 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)                \
 720   do {                                                          \
 721     int bytes;                                                  \
 722                                                                 \
 723     c = SAFE_ONE_MORE_BYTE ();                                  \
 724     if (c < 0)                                                  \
 725       break;                                                    \
 726     if (CHAR_HEAD_P (c))                                        \
 727       c = -1;                                                   \
 728     else if (c == 0xA0)                                         \
 729       {                                                         \
 730         c = SAFE_ONE_MORE_BYTE ();                              \
 731         if (c < 0xA0)                                           \
 732           c = -1;                                               \
 733         else                                                    \
 734           {                                                     \
 735             c -= 0xA0;                                          \
 736             *p++ = c;                                           \
 737           }                                                     \
 738       }                                                         \
 739     else if (BASE_LEADING_CODE_P (c - 0x20))                    \
 740       {                                                         \
 741         unsigned char *p0 = p;                                  \
 742                                                                 \
 743         c -= 0x20;                                              \
 744         *p++ = c;                                               \
 745         bytes = BYTES_BY_CHAR_HEAD (c);                         \
 746         while (--bytes)                                         \
 747           {                                                     \
 748             c = SAFE_ONE_MORE_BYTE ();                          \
 749             if (c < 0)                                          \
 750               break;                                            \
 751             *p++ = c;                                           \
 752           }                                                     \
 753         if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)      \
 754             || (coding->flags /* We are recovering a file.  */  \
 755                 && p0[0] == LEADING_CODE_8_BIT_CONTROL          \
 756                 && ! CHAR_HEAD_P (p0[1])))                      \
 757           c = STRING_CHAR (p0, bytes);                          \
 758         else                                                    \
 759           c = -1;                                               \
 760       }                                                         \
 761     else                                                        \
 762       c = -1;                                                   \
 763   } while (0)
 764
 765
 766 /* Decode a composition rule represented as a component of composition
 767    sequence of Emacs 20 style at SRC.  Set C to the rule.  If not
 768    valid rule is found, set C to -1.  */
 769
 770 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c)           \
 771   do {                                                  \
 772     c = SAFE_ONE_MORE_BYTE ();                          \
 773     c -= 0xA0;                                          \
 774     if (c < 0 || c >= 81)                               \
 775       c = -1;                                           \
 776     else                                                \
 777       {                                                 \
 778         gref = c / 9, nref = c % 9;                     \
 779         c = COMPOSITION_ENCODE_RULE (gref, nref);       \
 780       }                                                 \
 781   } while (0)
 782
 783
 784 /* Decode composition sequence encoded by `emacs-mule' at the source
 785    pointed by SRC.  SRC_END is the end of source.  Store information
 786    of the composition in CODING->cmp_data.
 787
 788    For backward compatibility, decode also a composition sequence of
 789    Emacs 20 style.  In that case, the composition sequence contains
 790    characters that should be extracted into a buffer or string.  Store
 791    those characters at *DESTINATION in multibyte form.
 792
 793    If we encounter an invalid byte sequence, return 0.
 794    If we encounter an insufficient source or destination, or
 795    insufficient space in CODING->cmp_data, return 1.
 796    Otherwise, return consumed bytes in the source.
 797
 798 */
 799 static INLINE int
 800 decode_composition_emacs_mule (coding, src, src_end,
 801                                destination, dst_end, dst_bytes)
 802      struct coding_system *coding;
 803      unsigned char *src, *src_end, **destination, *dst_end;
 804      int dst_bytes;
 805 {
 806   unsigned char *dst = *destination;
 807   int method, data_len, nchars;
 808   unsigned char *src_base = src++;
 809   /* Store components of composition.  */
 810   int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 811   int ncomponent;
 812   /* Store multibyte form of characters to be composed.  This is for
 813      Emacs 20 style composition sequence.  */
 814   unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 815   unsigned char *bufp = buf;
 816   int c, i, gref, nref;
 817
 818   if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 819       >= COMPOSITION_DATA_SIZE)
 820     {
 821       coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 822       return -1;
 823     }
 824
 825   ONE_MORE_BYTE (c);
 826   if (c - 0xF0 >= COMPOSITION_RELATIVE
 827            && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 828     {
 829       int with_rule;
 830
 831       method = c - 0xF0;
 832       with_rule = (method == COMPOSITION_WITH_RULE
 833                    || method == COMPOSITION_WITH_RULE_ALTCHARS);
 834       ONE_MORE_BYTE (c);
 835       data_len = c - 0xA0;
 836       if (data_len < 4
 837           || src_base + data_len > src_end)
 838         return 0;
 839       ONE_MORE_BYTE (c);
 840       nchars = c - 0xA0;
 841       if (c < 1)
 842         return 0;
 843       for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 844         {
 845           /* If it is longer than this, it can't be valid.  */
 846           if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
 847             return 0;
 848
 849           if (ncomponent % 2 && with_rule)
 850             {
 851               ONE_MORE_BYTE (gref);
 852               gref -= 32;
 853               ONE_MORE_BYTE (nref);
 854               nref -= 32;
 855               c = COMPOSITION_ENCODE_RULE (gref, nref);
 856             }
 857           else
 858             {
 859               int bytes;
 860               if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
 861                   || (coding->flags /* We are recovering a file.  */
 862                       && src[0] == LEADING_CODE_8_BIT_CONTROL
 863                       && ! CHAR_HEAD_P (src[1])))
 864                 c = STRING_CHAR (src, bytes);
 865               else
 866                 c = *src, bytes = 1;
 867               src += bytes;
 868             }
 869           component[ncomponent] = c;
 870         }
 871     }
 872   else
 873     {
 874       /* This may be an old Emacs 20 style format.  See the comment at
 875          the section 2 of this file.  */
 876       while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 877       if (src == src_end
 878           && !(coding->mode & CODING_MODE_LAST_BLOCK))
 879         goto label_end_of_loop;
 880
 881       src_end = src;
 882       src = src_base + 1;
 883       if (c < 0xC0)
 884         {
 885           method = COMPOSITION_RELATIVE;
 886           for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 887             {
 888               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 889               if (c < 0)
 890                 break;
 891               component[ncomponent++] = c;
 892             }
 893           if (ncomponent < 2)
 894             return 0;
 895           nchars = ncomponent;
 896         }
 897       else if (c == 0xFF)
 898         {
 899           method = COMPOSITION_WITH_RULE;
 900           src++;
 901           DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 902           if (c < 0)
 903             return 0;
 904           component[0] = c;
 905           for (ncomponent = 1;
 906                ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 907             {
 908               DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 909               if (c < 0)
 910                 break;
 911               component[ncomponent++] = c;
 912               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 913               if (c < 0)
 914                 break;
 915               component[ncomponent++] = c;
 916             }
 917           if (ncomponent < 3)
 918             return 0;
 919           nchars = (ncomponent + 1) / 2;
 920         }
 921       else
 922         return 0;
 923     }
 924
 925   if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 926     {
 927       CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 928       for (i = 0; i < ncomponent; i++)
 929         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 930       CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 931       if (buf < bufp)
 932         {
 933           unsigned char *p = buf;
 934           EMIT_BYTES (p, bufp);
 935           *destination += bufp - buf;
 936           coding->produced_char += nchars;
 937         }
 938       return (src - src_base);
 939     }
 940  label_end_of_loop:
 941   return -1;
 942 }
 943
 944 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 945
 946 static void
 947 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 948      struct coding_system *coding;
 949      unsigned char *source, *destination;
 950      int src_bytes, dst_bytes;
 951 {
 952   unsigned char *src = source;
 953   unsigned char *src_end = source + src_bytes;
 954   unsigned char *dst = destination;
 955   unsigned char *dst_end = destination + dst_bytes;
 956   /* SRC_BASE remembers the start position in source in each loop.
 957      The loop will be exited when there's not enough source code, or
 958      when there's not enough destination area to produce a
 959      character.  */
 960   unsigned char *src_base;
 961
 962   coding->produced_char = 0;
 963   while ((src_base = src) < src_end)
 964     {
 965       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 966       int bytes;
 967
 968       if (*src == '\r')
 969         {
 970           int c = *src++;
 971
 972           if (coding->eol_type == CODING_EOL_CR)
 973             c = '\n';
 974           else if (coding->eol_type == CODING_EOL_CRLF)
 975             {
 976               ONE_MORE_BYTE (c);
 977               if (c != '\n')
 978                 {
 979                   src--;
 980                   c = '\r';
 981                 }
 982             }
 983           *dst++ = c;
 984           coding->produced_char++;
 985           continue;
 986         }
 987       else if (*src == '\n')
 988         {
 989           if ((coding->eol_type == CODING_EOL_CR
 990                || coding->eol_type == CODING_EOL_CRLF)
 991               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 992             {
 993               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 994               goto label_end_of_loop;
 995             }
 996           *dst++ = *src++;
 997           coding->produced_char++;
 998           continue;
 999         }
1000       else if (*src == 0x80 && coding->cmp_data)
1001         {
1002           /* Start of composition data.  */
1003           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
1004                                                          &dst, dst_end,
1005                                                          dst_bytes);
1006           if (consumed < 0)
1007             goto label_end_of_loop;
1008           else if (consumed > 0)
1009             {
1010               src += consumed;
1011               continue;
1012             }
1013           bytes = CHAR_STRING (*src, tmp);
1014           p = tmp;
1015           src++;
1016         }
1017       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
1018                || (coding->flags /* We are recovering a file.  */
1019                    && src[0] == LEADING_CODE_8_BIT_CONTROL
1020                    && ! CHAR_HEAD_P (src[1])))
1021         {
1022           p = src;
1023           src += bytes;
1024         }
1025       else
1026         {
1027           bytes = CHAR_STRING (*src, tmp);
1028           p = tmp;
1029           src++;
1030         }
1031       if (dst + bytes >= (dst_bytes ? dst_end : src))
1032         {
1033           coding->result = CODING_FINISH_INSUFFICIENT_DST;
1034           break;
1035         }
1036       while (bytes--) *dst++ = *p++;
1037       coding->produced_char++;
1038     }
1039  label_end_of_loop:
1040   coding->consumed = coding->consumed_char = src_base - source;
1041   coding->produced = dst - destination;
1042 }
1043
1044
1045 /* Encode composition data stored at DATA into a special byte sequence
1046    starting by 0x80.  Update CODING->cmp_data_start and maybe
1047    CODING->cmp_data for the next call.  */
1048
1049 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                     \
1050   do {                                                                  \
1051     unsigned char buf[1024], *p0 = buf, *p;                             \
1052     int len = data[0];                                                  \
1053     int i;                                                              \
1054                                                                         \
1055     buf[0] = 0x80;                                                      \
1056     buf[1] = 0xF0 + data[3];    /* METHOD */                            \
1057     buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */           \
1058     p = buf + 4;                                                        \
1059     if (data[3] == COMPOSITION_WITH_RULE                                \
1060         || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
1061       {                                                                 \
1062         p += CHAR_STRING (data[4], p);                                  \
1063         for (i = 5; i < len; i += 2)                                    \
1064           {                                                             \
1065             int gref, nref;                                             \
1066              COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
1067             *p++ = 0x20 + gref;                                         \
1068             *p++ = 0x20 + nref;                                         \
1069             p += CHAR_STRING (data[i + 1], p);                          \
1070           }                                                             \
1071       }                                                                 \
1072     else                                                                \
1073       {                                                                 \
1074         for (i = 4; i < len; i++)                                       \
1075           p += CHAR_STRING (data[i], p);                                \
1076       }                                                                 \
1077     buf[2] = 0xA0 + (p - buf);  /* COMPONENTS-BYTES */                  \
1078                                                                         \
1079     if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))              \
1080       {                                                                 \
1081         coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
1082         goto label_end_of_loop;                                         \
1083       }                                                                 \
1084     while (p0 < p)                                                      \
1085       *dst++ = *p0++;                                                   \
1086     coding->cmp_data_start += data[0];                                  \
1087     if (coding->cmp_data_start == coding->cmp_data->used                \
1088         && coding->cmp_data->next)                                      \
1089       {                                                                 \
1090         coding->cmp_data = coding->cmp_data->next;                      \
1091         coding->cmp_data_start = 0;                                     \
1092       }                                                                 \
1093   } while (0)
1094
1095
1096 static void encode_eol P_ ((struct coding_system *, const unsigned char *,
1097                             unsigned char *, int, int));
1098
1099 static void
1100 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1101      struct coding_system *coding;
1102      unsigned char *source, *destination;
1103      int src_bytes, dst_bytes;
1104 {
1105   unsigned char *src = source;
1106   unsigned char *src_end = source + src_bytes;
1107   unsigned char *dst = destination;
1108   unsigned char *dst_end = destination + dst_bytes;
1109   unsigned char *src_base;
1110   int c;
1111   int char_offset;
1112   int *data;
1113
1114   Lisp_Object translation_table;
1115
1116   translation_table = Qnil;
1117
1118   /* Optimization for the case that there's no composition.  */
1119   if (!coding->cmp_data || coding->cmp_data->used == 0)
1120     {
1121       encode_eol (coding, source, destination, src_bytes, dst_bytes);
1122       return;
1123     }
1124
1125   char_offset = coding->cmp_data->char_offset;
1126   data = coding->cmp_data->data + coding->cmp_data_start;
1127   while (1)
1128     {
1129       src_base = src;
1130
1131       /* If SRC starts a composition, encode the information about the
1132          composition in advance.  */
1133       if (coding->cmp_data_start < coding->cmp_data->used
1134           && char_offset + coding->consumed_char == data[1])
1135         {
1136           ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1137           char_offset = coding->cmp_data->char_offset;
1138           data = coding->cmp_data->data + coding->cmp_data_start;
1139         }
1140
1141       ONE_MORE_CHAR (c);
1142       if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1143                         || coding->eol_type == CODING_EOL_CR))
1144         {
1145           if (coding->eol_type == CODING_EOL_CRLF)
1146             EMIT_TWO_BYTES ('\r', c);
1147           else
1148             EMIT_ONE_BYTE ('\r');
1149         }
1150       else if (SINGLE_BYTE_CHAR_P (c))
1151         {
1152           if (coding->flags && ! ASCII_BYTE_P (c))
1153             {
1154               /* As we are auto saving, retain the multibyte form for
1155                  8-bit chars.  */
1156               unsigned char buf[MAX_MULTIBYTE_LENGTH];
1157               int bytes = CHAR_STRING (c, buf);
1158
1159               if (bytes == 1)
1160                 EMIT_ONE_BYTE (buf[0]);
1161               else
1162                 EMIT_TWO_BYTES (buf[0], buf[1]);
1163             }
1164           else
1165             EMIT_ONE_BYTE (c);
1166         }
1167       else
1168         EMIT_BYTES (src_base, src);
1169       coding->consumed_char++;
1170     }
1171  label_end_of_loop:
1172   coding->consumed = src_base - source;
1173   coding->produced = coding->produced_char = dst - destination;
1174   return;
1175 }
1176
1177 \f
1178 /*** 3. ISO2022 handlers ***/
1179
1180 /* The following note describes the coding system ISO2022 briefly.
1181    Since the intention of this note is to help understand the
1182    functions in this file, some parts are NOT ACCURATE or are OVERLY
1183    SIMPLIFIED.  For thorough understanding, please refer to the
1184    original document of ISO2022.  This is equivalent to the standard
1185    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
1186
1187    ISO2022 provides many mechanisms to encode several character sets
1188    in 7-bit and 8-bit environments.  For 7-bit environments, all text
1189    is encoded using bytes less than 128.  This may make the encoded
1190    text a little bit longer, but the text passes more easily through
1191    several types of gateway, some of which strip off the MSB (Most
1192    Significant Bit).
1193
1194    There are two kinds of character sets: control character sets and
1195    graphic character sets.  The former contain control characters such
1196    as `newline' and `escape' to provide control functions (control
1197    functions are also provided by escape sequences).  The latter
1198    contain graphic characters such as 'A' and '-'.  Emacs recognizes
1199    two control character sets and many graphic character sets.
1200
1201    Graphic character sets are classified into one of the following
1202    four classes, according to the number of bytes (DIMENSION) and
1203    number of characters in one dimension (CHARS) of the set:
1204    - DIMENSION1_CHARS94
1205    - DIMENSION1_CHARS96
1206    - DIMENSION2_CHARS94
1207    - DIMENSION2_CHARS96
1208
1209    In addition, each character set is assigned an identification tag,
1210    unique for each set, called the "final character" (denoted as <F>
1211    hereafter).  The <F> of each character set is decided by ECMA(*)
1212    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
1213    (0x30..0x3F are for private use only).
1214
1215    Note (*): ECMA = European Computer Manufacturers Association
1216
1217    Here are examples of graphic character sets [NAME(<F>)]:
1218         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1219         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1220         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1221         o DIMENSION2_CHARS96 -- none for the moment
1222
1223    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
1224         C0 [0x00..0x1F] -- control character plane 0
1225         GL [0x20..0x7F] -- graphic character plane 0
1226         C1 [0x80..0x9F] -- control character plane 1
1227         GR [0xA0..0xFF] -- graphic character plane 1
1228
1229    A control character set is directly designated and invoked to C0 or
1230    C1 by an escape sequence.  The most common case is that:
1231    - ISO646's  control character set is designated/invoked to C0, and
1232    - ISO6429's control character set is designated/invoked to C1,
1233    and usually these designations/invocations are omitted in encoded
1234    text.  In a 7-bit environment, only C0 can be used, and a control
1235    character for C1 is encoded by an appropriate escape sequence to
1236    fit into the environment.  All control characters for C1 are
1237    defined to have corresponding escape sequences.
1238
1239    A graphic character set is at first designated to one of four
1240    graphic registers (G0 through G3), then these graphic registers are
1241    invoked to GL or GR.  These designations and invocations can be
1242    done independently.  The most common case is that G0 is invoked to
1243    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
1244    these invocations and designations are omitted in encoded text.
1245    In a 7-bit environment, only GL can be used.
1246
1247    When a graphic character set of CHARS94 is invoked to GL, codes
1248    0x20 and 0x7F of the GL area work as control characters SPACE and
1249    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1250    be used.
1251
1252    There are two ways of invocation: locking-shift and single-shift.
1253    With locking-shift, the invocation lasts until the next different
1254    invocation, whereas with single-shift, the invocation affects the
1255    following character only and doesn't affect the locking-shift
1256    state.  Invocations are done by the following control characters or
1257    escape sequences:
1258
1259    ----------------------------------------------------------------------
1260    abbrev  function                  cntrl escape seq   description
1261    ----------------------------------------------------------------------
1262    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
1263    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
1264    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
1265    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
1266    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
1267    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
1268    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
1269    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
1270    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
1271    ----------------------------------------------------------------------
1272    (*) These are not used by any known coding system.
1273
1274    Control characters for these functions are defined by macros
1275    ISO_CODE_XXX in `coding.h'.
1276
1277    Designations are done by the following escape sequences:
1278    ----------------------------------------------------------------------
1279    escape sequence      description
1280    ----------------------------------------------------------------------
1281    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
1282    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
1283    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
1284    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
1285    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
1286    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
1287    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
1288    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
1289    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
1290    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
1291    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
1292    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
1293    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
1294    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
1295    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
1296    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
1297    ----------------------------------------------------------------------
1298
1299    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
1300    of dimension 1, chars 94, and final character <F>, etc...
1301
1302    Note (*): Although these designations are not allowed in ISO2022,
1303    Emacs accepts them on decoding, and produces them on encoding
1304    CHARS96 character sets in a coding system which is characterized as
1305    7-bit environment, non-locking-shift, and non-single-shift.
1306
1307    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
1308    '(' can be omitted.  We refer to this as "short-form" hereafter.
1309
1310    Now you may notice that there are a lot of ways of encoding the
1311    same multilingual text in ISO2022.  Actually, there exist many
1312    coding systems such as Compound Text (used in X11's inter client
1313    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
1314    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
1315    localized platforms), and all of these are variants of ISO2022.
1316
1317    In addition to the above, Emacs handles two more kinds of escape
1318    sequences: ISO6429's direction specification and Emacs' private
1319    sequence for specifying character composition.
1320
1321    ISO6429's direction specification takes the following form:
1322         o CSI ']'      -- end of the current direction
1323         o CSI '0' ']'  -- end of the current direction
1324         o CSI '1' ']'  -- start of left-to-right text
1325         o CSI '2' ']'  -- start of right-to-left text
1326    The control character CSI (0x9B: control sequence introducer) is
1327    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1328
1329    Character composition specification takes the following form:
1330         o ESC '0' -- start relative composition
1331         o ESC '1' -- end composition
1332         o ESC '2' -- start rule-base composition (*)
1333         o ESC '3' -- start relative composition with alternate chars  (**)
1334         o ESC '4' -- start rule-base composition with alternate chars  (**)
1335   Since these are not standard escape sequences of any ISO standard,
1336   the use of them with these meanings is restricted to Emacs only.
1337
1338   (*) This form is used only in Emacs 20.5 and older versions,
1339   but the newer versions can safely decode it.
1340   (**) This form is used only in Emacs 21.1 and newer versions,
1341   and the older versions can't decode it.
1342
1343   Here's a list of example usages of these composition escape
1344   sequences (categorized by `enum composition_method').
1345
1346   COMPOSITION_RELATIVE:
1347         ESC 0 CHAR [ CHAR ] ESC 1
1348   COMPOSITION_WITH_RULE:
1349         ESC 2 CHAR [ RULE CHAR ] ESC 1
1350   COMPOSITION_WITH_ALTCHARS:
1351         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
1352   COMPOSITION_WITH_RULE_ALTCHARS:
1353         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
1354
1355 enum iso_code_class_type iso_code_class[256];
1356
1357 #define CHARSET_OK(idx, charset, c)                                     \
1358   (coding_system_table[idx]                                             \
1359    && (charset == CHARSET_ASCII                                         \
1360        || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
1361            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
1362    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1363                                               charset)                  \
1364        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1365
1366 #define SHIFT_OUT_OK(idx) \
1367   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1368
1369 #define COMPOSITION_OK(idx)     \
1370   (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
1371
1372 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1373    Check if a text is encoded in ISO2022.  If it is, return an
1374    integer in which appropriate flag bits any of:
1375         CODING_CATEGORY_MASK_ISO_7
1376         CODING_CATEGORY_MASK_ISO_7_TIGHT
1377         CODING_CATEGORY_MASK_ISO_8_1
1378         CODING_CATEGORY_MASK_ISO_8_2
1379         CODING_CATEGORY_MASK_ISO_7_ELSE
1380         CODING_CATEGORY_MASK_ISO_8_ELSE
1381    are set.  If a code which should never appear in ISO2022 is found,
1382    returns 0.  */
1383
1384 static int
1385 detect_coding_iso2022 (src, src_end, multibytep)
1386      unsigned char *src, *src_end;
1387      int multibytep;
1388 {
1389   int mask = CODING_CATEGORY_MASK_ISO;
1390   int mask_found = 0;
1391   int reg[4], shift_out = 0, single_shifting = 0;
1392   int c, c1, charset;
1393   /* Dummy for ONE_MORE_BYTE.  */
1394   struct coding_system dummy_coding;
1395   struct coding_system *coding = &dummy_coding;
1396   Lisp_Object safe_chars;
1397
1398   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1399   while (mask && src < src_end)
1400     {
1401       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1402     retry:
1403       switch (c)
1404         {
1405         case ISO_CODE_ESC:
1406           if (inhibit_iso_escape_detection)
1407             break;
1408           single_shifting = 0;
1409           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1410           if (c >= '(' && c <= '/')
1411             {
1412               /* Designation sequence for a charset of dimension 1.  */
1413               ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1414               if (c1 < ' ' || c1 >= 0x80
1415                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1416                 /* Invalid designation sequence.  Just ignore.  */
1417                 break;
1418               reg[(c - '(') % 4] = charset;
1419             }
1420           else if (c == '$')
1421             {
1422               /* Designation sequence for a charset of dimension 2.  */
1423               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1424               if (c >= '@' && c <= 'B')
1425                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
1426                 reg[0] = charset = iso_charset_table[1][0][c];
1427               else if (c >= '(' && c <= '/')
1428                 {
1429                   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1430                   if (c1 < ' ' || c1 >= 0x80
1431                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1432                     /* Invalid designation sequence.  Just ignore.  */
1433                     break;
1434                   reg[(c - '(') % 4] = charset;
1435                 }
1436               else
1437                 /* Invalid designation sequence.  Just ignore.  */
1438                 break;
1439             }
1440           else if (c == 'N' || c == 'O')
1441             {
1442               /* ESC <Fe> for SS2 or SS3.  */
1443               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
1444               break;
1445             }
1446           else if (c >= '0' && c <= '4')
1447             {
1448               /* ESC <Fp> for start/end composition.  */
1449               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
1450                 mask_found |= CODING_CATEGORY_MASK_ISO_7;
1451               else
1452                 mask &= ~CODING_CATEGORY_MASK_ISO_7;
1453               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
1454                 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1455               else
1456                 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1457               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
1458                 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1459               else
1460                 mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
1461               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
1462                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1463               else
1464                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1465               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
1466                 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1467               else
1468                 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1469               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
1470                 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1471               else
1472                 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1473               break;
1474             }
1475           else
1476             /* Invalid escape sequence.  Just ignore.  */
1477             break;
1478
1479           /* We found a valid designation sequence for CHARSET.  */
1480           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
1481           c = MAKE_CHAR (charset, 0, 0);
1482           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
1483             mask_found |= CODING_CATEGORY_MASK_ISO_7;
1484           else
1485             mask &= ~CODING_CATEGORY_MASK_ISO_7;
1486           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
1487             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1488           else
1489             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1490           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
1491             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1492           else
1493             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1494           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
1495             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1496           else
1497             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1498           break;
1499
1500         case ISO_CODE_SO:
1501           if (inhibit_iso_escape_detection)
1502             break;
1503           single_shifting = 0;
1504           if (shift_out == 0
1505               && (reg[1] >= 0
1506                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1507                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1508             {
1509               /* Locking shift out.  */
1510               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1511               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1512             }
1513           break;
1514
1515         case ISO_CODE_SI:
1516           if (inhibit_iso_escape_detection)
1517             break;
1518           single_shifting = 0;
1519           if (shift_out == 1)
1520             {
1521               /* Locking shift in.  */
1522               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1523               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1524             }
1525           break;
1526
1527         case ISO_CODE_CSI:
1528           single_shifting = 0;
1529         case ISO_CODE_SS2:
1530         case ISO_CODE_SS3:
1531           {
1532             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1533
1534             if (inhibit_iso_escape_detection)
1535               break;
1536             if (c != ISO_CODE_CSI)
1537               {
1538                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1539                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1540                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1541                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1542                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1543                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1544                 single_shifting = 1;
1545               }
1546             if (VECTORP (Vlatin_extra_code_table)
1547                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1548               {
1549                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1550                     & CODING_FLAG_ISO_LATIN_EXTRA)
1551                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1552                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1553                     & CODING_FLAG_ISO_LATIN_EXTRA)
1554                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1555               }
1556             mask &= newmask;
1557             mask_found |= newmask;
1558           }
1559           break;
1560
1561         default:
1562           if (c < 0x80)
1563             {
1564               single_shifting = 0;
1565               break;
1566             }
1567           else if (c < 0xA0)
1568             {
1569               single_shifting = 0;
1570               if (VECTORP (Vlatin_extra_code_table)
1571                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1572                 {
1573                   int newmask = 0;
1574
1575                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1576                       & CODING_FLAG_ISO_LATIN_EXTRA)
1577                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1578                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1579                       & CODING_FLAG_ISO_LATIN_EXTRA)
1580                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1581                   mask &= newmask;
1582                   mask_found |= newmask;
1583                 }
1584               else
1585                 return 0;
1586             }
1587           else
1588             {
1589               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1590                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1591               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1592               /* Check the length of succeeding codes of the range
1593                  0xA0..0FF.  If the byte length is odd, we exclude
1594                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1595                  when we are not single shifting.  */
1596               if (!single_shifting
1597                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1598                 {
1599                   int i = 1;
1600
1601                   c = -1;
1602                   while (src < src_end)
1603                     {
1604                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1605                       if (c < 0xA0)
1606                         break;
1607                       i++;
1608                     }
1609
1610                   if (i & 1 && src < src_end)
1611                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1612                   else
1613                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1614                   if (c >= 0)
1615                     /* This means that we have read one extra byte.  */
1616                     goto retry;
1617                 }
1618             }
1619           break;
1620         }
1621     }
1622  label_end_of_loop:
1623   return (mask & mask_found);
1624 }
1625
1626 /* Decode a character of which charset is CHARSET, the 1st position
1627    code is C1, the 2nd position code is C2, and return the decoded
1628    character code.  If the variable `translation_table' is non-nil,
1629    returned the translated code.  */
1630
1631 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1632   (NILP (translation_table)                     \
1633    ? MAKE_CHAR (charset, c1, c2)                \
1634    : translate_char (translation_table, -1, charset, c1, c2))
1635
1636 /* Set designation state into CODING.  */
1637 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1638   do {                                                                     \
1639     int charset, c;                                                        \
1640                                                                            \
1641     if (final_char < '0' || final_char >= 128)                             \
1642       goto label_invalid_code;                                             \
1643     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1644                                  make_number (chars),                      \
1645                                  make_number (final_char));                \
1646     c = MAKE_CHAR (charset, 0, 0);                                         \
1647     if (charset >= 0                                                       \
1648         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1649             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1650       {                                                                    \
1651         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1652             && reg == 0                                                    \
1653             && charset == CHARSET_ASCII)                                   \
1654           {                                                                \
1655             /* We should insert this designation sequence as is so         \
1656                that it is surely written back to a file.  */               \
1657             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1658             goto label_invalid_code;                                       \
1659           }                                                                \
1660         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1661         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1662             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1663           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1664         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1665       }                                                                    \
1666     else                                                                   \
1667       {                                                                    \
1668         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1669         goto label_invalid_code;                                           \
1670       }                                                                    \
1671   } while (0)
1672
1673 /* Allocate a memory block for storing information about compositions.
1674    The block is chained to the already allocated blocks.  */
1675
1676 void
1677 coding_allocate_composition_data (coding, char_offset)
1678      struct coding_system *coding;
1679      int char_offset;
1680 {
1681   struct composition_data *cmp_data
1682     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1683
1684   cmp_data->char_offset = char_offset;
1685   cmp_data->used = 0;
1686   cmp_data->prev = coding->cmp_data;
1687   cmp_data->next = NULL;
1688   if (coding->cmp_data)
1689     coding->cmp_data->next = cmp_data;
1690   coding->cmp_data = cmp_data;
1691   coding->cmp_data_start = 0;
1692   coding->composing = COMPOSITION_NO;
1693 }
1694
1695 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1696    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1697    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1698    ESC 3 : altchar composition :  ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1699    ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1700   */
1701
1702 #define DECODE_COMPOSITION_START(c1)                                       \
1703   do {                                                                     \
1704     if (coding->composing == COMPOSITION_DISABLED)                         \
1705       {                                                                    \
1706         *dst++ = ISO_CODE_ESC;                                             \
1707         *dst++ = c1 & 0x7f;                                                \
1708         coding->produced_char += 2;                                        \
1709       }                                                                    \
1710     else if (!COMPOSING_P (coding))                                        \
1711       {                                                                    \
1712         /* This is surely the start of a composition.  We must be sure     \
1713            that coding->cmp_data has enough space to store the             \
1714            information about the composition.  If not, terminate the       \
1715            current decoding loop, allocate one more memory block for       \
1716            coding->cmp_data in the caller, then start the decoding         \
1717            loop again.  We can't allocate memory here directly because     \
1718            it may cause buffer/string relocation.  */                      \
1719         if (!coding->cmp_data                                              \
1720             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1721                 >= COMPOSITION_DATA_SIZE))                                 \
1722           {                                                                \
1723             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1724             goto label_end_of_loop;                                        \
1725           }                                                                \
1726         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1727                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1728                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1729                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1730         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1731                                       coding->composing);                  \
1732         coding->composition_rule_follows = 0;                              \
1733       }                                                                    \
1734     else                                                                   \
1735       {                                                                    \
1736         /* We are already handling a composition.  If the method is        \
1737            the following two, the codes following the current escape       \
1738            sequence are actual characters stored in a buffer.  */          \
1739         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1740             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1741           {                                                                \
1742             coding->composing = COMPOSITION_RELATIVE;                      \
1743             coding->composition_rule_follows = 0;                          \
1744           }                                                                \
1745       }                                                                    \
1746   } while (0)
1747
1748 /* Handle composition end sequence ESC 1.  */
1749
1750 #define DECODE_COMPOSITION_END(c1)                                      \
1751   do {                                                                  \
1752     if (! COMPOSING_P (coding))                                         \
1753       {                                                                 \
1754         *dst++ = ISO_CODE_ESC;                                          \
1755         *dst++ = c1;                                                    \
1756         coding->produced_char += 2;                                     \
1757       }                                                                 \
1758     else                                                                \
1759       {                                                                 \
1760         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1761         coding->composing = COMPOSITION_NO;                             \
1762       }                                                                 \
1763   } while (0)
1764
1765 /* Decode a composition rule from the byte C1 (and maybe one more byte
1766    from SRC) and store one encoded composition rule in
1767    coding->cmp_data.  */
1768
1769 #define DECODE_COMPOSITION_RULE(c1)                                     \
1770   do {                                                                  \
1771     int rule = 0;                                                       \
1772     (c1) -= 32;                                                         \
1773     if (c1 < 81)                /* old format (before ver.21) */        \
1774       {                                                                 \
1775         int gref = (c1) / 9;                                            \
1776         int nref = (c1) % 9;                                            \
1777         if (gref == 4) gref = 10;                                       \
1778         if (nref == 4) nref = 10;                                       \
1779         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1780       }                                                                 \
1781     else if (c1 < 93)           /* new format (after ver.21) */         \
1782       {                                                                 \
1783         ONE_MORE_BYTE (c2);                                             \
1784         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1785       }                                                                 \
1786     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1787     coding->composition_rule_follows = 0;                               \
1788   } while (0)
1789
1790
1791 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1792
1793 static void
1794 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1795      struct coding_system *coding;
1796      unsigned char *source, *destination;
1797      int src_bytes, dst_bytes;
1798 {
1799   unsigned char *src = source;
1800   unsigned char *src_end = source + src_bytes;
1801   unsigned char *dst = destination;
1802   unsigned char *dst_end = destination + dst_bytes;
1803   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1804   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1805   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1806   /* SRC_BASE remembers the start position in source in each loop.
1807      The loop will be exited when there's not enough source code
1808      (within macro ONE_MORE_BYTE), or when there's not enough
1809      destination area to produce a character (within macro
1810      EMIT_CHAR).  */
1811   unsigned char *src_base;
1812   int c, charset;
1813   Lisp_Object translation_table;
1814   Lisp_Object safe_chars;
1815
1816   safe_chars = coding_safe_chars (coding->symbol);
1817
1818   if (NILP (Venable_character_translation))
1819     translation_table = Qnil;
1820   else
1821     {
1822       translation_table = coding->translation_table_for_decode;
1823       if (NILP (translation_table))
1824         translation_table = Vstandard_translation_table_for_decode;
1825     }
1826
1827   coding->result = CODING_FINISH_NORMAL;
1828
1829   while (1)
1830     {
1831       int c1, c2 = 0;
1832
1833       src_base = src;
1834       ONE_MORE_BYTE (c1);
1835
1836       /* We produce no character or one character.  */
1837       switch (iso_code_class [c1])
1838         {
1839         case ISO_0x20_or_0x7F:
1840           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1841             {
1842               DECODE_COMPOSITION_RULE (c1);
1843               continue;
1844             }
1845           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1846             {
1847               /* This is SPACE or DEL.  */
1848               charset = CHARSET_ASCII;
1849               break;
1850             }
1851           /* This is a graphic character, we fall down ...  */
1852
1853         case ISO_graphic_plane_0:
1854           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1855             {
1856               DECODE_COMPOSITION_RULE (c1);
1857               continue;
1858             }
1859           charset = charset0;
1860           break;
1861
1862         case ISO_0xA0_or_0xFF:
1863           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1864               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1865             goto label_invalid_code;
1866           /* This is a graphic character, we fall down ... */
1867
1868         case ISO_graphic_plane_1:
1869           if (charset1 < 0)
1870             goto label_invalid_code;
1871           charset = charset1;
1872           break;
1873
1874         case ISO_control_0:
1875           if (COMPOSING_P (coding))
1876             DECODE_COMPOSITION_END ('1');
1877
1878           /* All ISO2022 control characters in this class have the
1879              same representation in Emacs internal format.  */
1880           if (c1 == '\n'
1881               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1882               && (coding->eol_type == CODING_EOL_CR
1883                   || coding->eol_type == CODING_EOL_CRLF))
1884             {
1885               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1886               goto label_end_of_loop;
1887             }
1888           charset = CHARSET_ASCII;
1889           break;
1890
1891         case ISO_control_1:
1892           if (COMPOSING_P (coding))
1893             DECODE_COMPOSITION_END ('1');
1894           goto label_invalid_code;
1895
1896         case ISO_carriage_return:
1897           if (COMPOSING_P (coding))
1898             DECODE_COMPOSITION_END ('1');
1899
1900           if (coding->eol_type == CODING_EOL_CR)
1901             c1 = '\n';
1902           else if (coding->eol_type == CODING_EOL_CRLF)
1903             {
1904               ONE_MORE_BYTE (c1);
1905               if (c1 != ISO_CODE_LF)
1906                 {
1907                   src--;
1908                   c1 = '\r';
1909                 }
1910             }
1911           charset = CHARSET_ASCII;
1912           break;
1913
1914         case ISO_shift_out:
1915           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1916               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1917             goto label_invalid_code;
1918           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1919           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1920           continue;
1921
1922         case ISO_shift_in:
1923           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1924             goto label_invalid_code;
1925           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1926           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1927           continue;
1928
1929         case ISO_single_shift_2_7:
1930         case ISO_single_shift_2:
1931           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1932             goto label_invalid_code;
1933           /* SS2 is handled as an escape sequence of ESC 'N' */
1934           c1 = 'N';
1935           goto label_escape_sequence;
1936
1937         case ISO_single_shift_3:
1938           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1939             goto label_invalid_code;
1940           /* SS2 is handled as an escape sequence of ESC 'O' */
1941           c1 = 'O';
1942           goto label_escape_sequence;
1943
1944         case ISO_control_sequence_introducer:
1945           /* CSI is handled as an escape sequence of ESC '[' ...  */
1946           c1 = '[';
1947           goto label_escape_sequence;
1948
1949         case ISO_escape:
1950           ONE_MORE_BYTE (c1);
1951         label_escape_sequence:
1952           /* Escape sequences handled by Emacs are invocation,
1953              designation, direction specification, and character
1954              composition specification.  */
1955           switch (c1)
1956             {
1957             case '&':           /* revision of following character set */
1958               ONE_MORE_BYTE (c1);
1959               if (!(c1 >= '@' && c1 <= '~'))
1960                 goto label_invalid_code;
1961               ONE_MORE_BYTE (c1);
1962               if (c1 != ISO_CODE_ESC)
1963                 goto label_invalid_code;
1964               ONE_MORE_BYTE (c1);
1965               goto label_escape_sequence;
1966
1967             case '$':           /* designation of 2-byte character set */
1968               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1969                 goto label_invalid_code;
1970               ONE_MORE_BYTE (c1);
1971               if (c1 >= '@' && c1 <= 'B')
1972                 {       /* designation of JISX0208.1978, GB2312.1980,
1973                            or JISX0208.1980 */
1974                   DECODE_DESIGNATION (0, 2, 94, c1);
1975                 }
1976               else if (c1 >= 0x28 && c1 <= 0x2B)
1977                 {       /* designation of DIMENSION2_CHARS94 character set */
1978                   ONE_MORE_BYTE (c2);
1979                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1980                 }
1981               else if (c1 >= 0x2C && c1 <= 0x2F)
1982                 {       /* designation of DIMENSION2_CHARS96 character set */
1983                   ONE_MORE_BYTE (c2);
1984                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1985                 }
1986               else
1987                 goto label_invalid_code;
1988               /* We must update these variables now.  */
1989               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1990               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1991               continue;
1992
1993             case 'n':           /* invocation of locking-shift-2 */
1994               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1995                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1996                 goto label_invalid_code;
1997               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1998               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1999               continue;
2000
2001             case 'o':           /* invocation of locking-shift-3 */
2002               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
2003                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
2004                 goto label_invalid_code;
2005               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
2006               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2007               continue;
2008
2009             case 'N':           /* invocation of single-shift-2 */
2010               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2011                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
2012                 goto label_invalid_code;
2013               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
2014               ONE_MORE_BYTE (c1);
2015               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2016                 goto label_invalid_code;
2017               break;
2018
2019             case 'O':           /* invocation of single-shift-3 */
2020               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2021                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
2022                 goto label_invalid_code;
2023               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
2024               ONE_MORE_BYTE (c1);
2025               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2026                 goto label_invalid_code;
2027               break;
2028
2029             case '0': case '2': case '3': case '4': /* start composition */
2030               DECODE_COMPOSITION_START (c1);
2031               continue;
2032
2033             case '1':           /* end composition */
2034               DECODE_COMPOSITION_END (c1);
2035               continue;
2036
2037             case '[':           /* specification of direction */
2038               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
2039                 goto label_invalid_code;
2040               /* For the moment, nested direction is not supported.
2041                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
2042                  left-to-right, and nonzero means right-to-left.  */
2043               ONE_MORE_BYTE (c1);
2044               switch (c1)
2045                 {
2046                 case ']':       /* end of the current direction */
2047                   coding->mode &= ~CODING_MODE_DIRECTION;
2048
2049                 case '0':       /* end of the current direction */
2050                 case '1':       /* start of left-to-right direction */
2051                   ONE_MORE_BYTE (c1);
2052                   if (c1 == ']')
2053                     coding->mode &= ~CODING_MODE_DIRECTION;
2054                   else
2055                     goto label_invalid_code;
2056                   break;
2057
2058                 case '2':       /* start of right-to-left direction */
2059                   ONE_MORE_BYTE (c1);
2060                   if (c1 == ']')
2061                     coding->mode |= CODING_MODE_DIRECTION;
2062                   else
2063                     goto label_invalid_code;
2064                   break;
2065
2066                 default:
2067                   goto label_invalid_code;
2068                 }
2069               continue;
2070
2071             case '%':
2072               if (COMPOSING_P (coding))
2073                 DECODE_COMPOSITION_END ('1');
2074               ONE_MORE_BYTE (c1);
2075               if (c1 == '/')
2076                 {
2077                   /* CTEXT extended segment:
2078                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
2079                      We keep these bytes as is for the moment.
2080                      They may be decoded by post-read-conversion.  */
2081                   int dim, M, L;
2082                   int size, required;
2083                   int produced_chars;
2084
2085                   ONE_MORE_BYTE (dim);
2086                   ONE_MORE_BYTE (M);
2087                   ONE_MORE_BYTE (L);
2088                   size = ((M - 128) * 128) + (L - 128);
2089                   required = 8 + size * 2;
2090                   if (dst + required > (dst_bytes ? dst_end : src))
2091                     goto label_end_of_loop;
2092                   *dst++ = ISO_CODE_ESC;
2093                   *dst++ = '%';
2094                   *dst++ = '/';
2095                   *dst++ = dim;
2096                   produced_chars = 4;
2097                   dst += CHAR_STRING (M, dst), produced_chars++;
2098                   dst += CHAR_STRING (L, dst), produced_chars++;
2099                   while (size-- > 0)
2100                     {
2101                       ONE_MORE_BYTE (c1);
2102                       dst += CHAR_STRING (c1, dst), produced_chars++;
2103                     }
2104                   coding->produced_char += produced_chars;
2105                 }
2106               else if (c1 == 'G')
2107                 {
2108                   unsigned char *d = dst;
2109                   int produced_chars;
2110
2111                   /* XFree86 extension for embedding UTF-8 in CTEXT:
2112                      ESC % G --UTF-8-BYTES-- ESC % @
2113                      We keep these bytes as is for the moment.
2114                      They may be decoded by post-read-conversion.  */
2115                   if (d + 6 > (dst_bytes ? dst_end : src))
2116                     goto label_end_of_loop;
2117                   *d++ = ISO_CODE_ESC;
2118                   *d++ = '%';
2119                   *d++ = 'G';
2120                   produced_chars = 3;
2121                   while (d + 1 < (dst_bytes ? dst_end : src))
2122                     {
2123                       ONE_MORE_BYTE (c1);
2124                       if (c1 == ISO_CODE_ESC
2125                           && src + 1 < src_end
2126                           && src[0] == '%'
2127                           && src[1] == '@')
2128                         {
2129                           src += 2;
2130                           break;
2131                         }
2132                       d += CHAR_STRING (c1, d), produced_chars++;
2133                     }
2134                   if (d + 3 > (dst_bytes ? dst_end : src))
2135                     goto label_end_of_loop;
2136                   *d++ = ISO_CODE_ESC;
2137                   *d++ = '%';
2138                   *d++ = '@';
2139                   dst = d;
2140                   coding->produced_char += produced_chars + 3;
2141                 }
2142               else
2143                 goto label_invalid_code;
2144               continue;
2145
2146             default:
2147               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
2148                 goto label_invalid_code;
2149               if (c1 >= 0x28 && c1 <= 0x2B)
2150                 {       /* designation of DIMENSION1_CHARS94 character set */
2151                   ONE_MORE_BYTE (c2);
2152                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
2153                 }
2154               else if (c1 >= 0x2C && c1 <= 0x2F)
2155                 {       /* designation of DIMENSION1_CHARS96 character set */
2156                   ONE_MORE_BYTE (c2);
2157                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
2158                 }
2159               else
2160                 goto label_invalid_code;
2161               /* We must update these variables now.  */
2162               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2163               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2164               continue;
2165             }
2166         }
2167
2168       /* Now we know CHARSET and 1st position code C1 of a character.
2169          Produce a multibyte sequence for that character while getting
2170          2nd position code C2 if necessary.  */
2171       if (CHARSET_DIMENSION (charset) == 2)
2172         {
2173           ONE_MORE_BYTE (c2);
2174           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2175             /* C2 is not in a valid range.  */
2176             goto label_invalid_code;
2177         }
2178       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2179       EMIT_CHAR (c);
2180       continue;
2181
2182     label_invalid_code:
2183       coding->errors++;
2184       if (COMPOSING_P (coding))
2185         DECODE_COMPOSITION_END ('1');
2186       src = src_base;
2187       c = *src++;
2188       EMIT_CHAR (c);
2189     }
2190
2191  label_end_of_loop:
2192   coding->consumed = coding->consumed_char = src_base - source;
2193   coding->produced = dst - destination;
2194   return;
2195 }
2196
2197
2198 /* ISO2022 encoding stuff.  */
2199
2200 /*
2201    It is not enough to say just "ISO2022" on encoding, we have to
2202    specify more details.  In Emacs, each ISO2022 coding system
2203    variant has the following specifications:
2204         1. Initial designation to G0 through G3.
2205         2. Allows short-form designation?
2206         3. ASCII should be designated to G0 before control characters?
2207         4. ASCII should be designated to G0 at end of line?
2208         5. 7-bit environment or 8-bit environment?
2209         6. Use locking-shift?
2210         7. Use Single-shift?
2211    And the following two are only for Japanese:
2212         8. Use ASCII in place of JIS0201-1976-Roman?
2213         9. Use JISX0208-1983 in place of JISX0208-1978?
2214    These specifications are encoded in `coding->flags' as flag bits
2215    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
2216    details.
2217 */
2218
2219 /* Produce codes (escape sequence) for designating CHARSET to graphic
2220    register REG at DST, and increment DST.  If <final-char> of CHARSET is
2221    '@', 'A', or 'B' and the coding system CODING allows, produce
2222    designation sequence of short-form.  */
2223
2224 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
2225   do {                                                                  \
2226     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
2227     char *intermediate_char_94 = "()*+";                                \
2228     char *intermediate_char_96 = ",-./";                                \
2229     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
2230                                                                         \
2231     if (revision < 255)                                                 \
2232       {                                                                 \
2233         *dst++ = ISO_CODE_ESC;                                          \
2234         *dst++ = '&';                                                   \
2235         *dst++ = '@' + revision;                                        \
2236       }                                                                 \
2237     *dst++ = ISO_CODE_ESC;                                              \
2238     if (CHARSET_DIMENSION (charset) == 1)                               \
2239       {                                                                 \
2240         if (CHARSET_CHARS (charset) == 94)                              \
2241           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
2242         else                                                            \
2243           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2244       }                                                                 \
2245     else                                                                \
2246       {                                                                 \
2247         *dst++ = '$';                                                   \
2248         if (CHARSET_CHARS (charset) == 94)                              \
2249           {                                                             \
2250             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
2251                 || reg != 0                                             \
2252                 || final_char < '@' || final_char > 'B')                \
2253               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
2254           }                                                             \
2255         else                                                            \
2256           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2257       }                                                                 \
2258     *dst++ = final_char;                                                \
2259     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
2260   } while (0)
2261
2262 /* The following two macros produce codes (control character or escape
2263    sequence) for ISO2022 single-shift functions (single-shift-2 and
2264    single-shift-3).  */
2265
2266 #define ENCODE_SINGLE_SHIFT_2                           \
2267   do {                                                  \
2268     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2269       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
2270     else                                                \
2271       *dst++ = ISO_CODE_SS2;                            \
2272     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2273   } while (0)
2274
2275 #define ENCODE_SINGLE_SHIFT_3                           \
2276   do {                                                  \
2277     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2278       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
2279     else                                                \
2280       *dst++ = ISO_CODE_SS3;                            \
2281     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2282   } while (0)
2283
2284 /* The following four macros produce codes (control character or
2285    escape sequence) for ISO2022 locking-shift functions (shift-in,
2286    shift-out, locking-shift-2, and locking-shift-3).  */
2287
2288 #define ENCODE_SHIFT_IN                         \
2289   do {                                          \
2290     *dst++ = ISO_CODE_SI;                       \
2291     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2292   } while (0)
2293
2294 #define ENCODE_SHIFT_OUT                        \
2295   do {                                          \
2296     *dst++ = ISO_CODE_SO;                       \
2297     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2298   } while (0)
2299
2300 #define ENCODE_LOCKING_SHIFT_2                  \
2301   do {                                          \
2302     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
2303     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2304   } while (0)
2305
2306 #define ENCODE_LOCKING_SHIFT_3                  \
2307   do {                                          \
2308     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
2309     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2310   } while (0)
2311
2312 /* Produce codes for a DIMENSION1 character whose character set is
2313    CHARSET and whose position-code is C1.  Designation and invocation
2314    sequences are also produced in advance if necessary.  */
2315
2316 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
2317   do {                                                                  \
2318     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2319       {                                                                 \
2320         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2321           *dst++ = c1 & 0x7F;                                           \
2322         else                                                            \
2323           *dst++ = c1 | 0x80;                                           \
2324         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2325         break;                                                          \
2326       }                                                                 \
2327     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2328       {                                                                 \
2329         *dst++ = c1 & 0x7F;                                             \
2330         break;                                                          \
2331       }                                                                 \
2332     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2333       {                                                                 \
2334         *dst++ = c1 | 0x80;                                             \
2335         break;                                                          \
2336       }                                                                 \
2337     else                                                                \
2338       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2339          must invoke it, or, at first, designate it to some graphic     \
2340          register.  Then repeat the loop to actually produce the        \
2341          character.  */                                                 \
2342       dst = encode_invocation_designation (charset, coding, dst);       \
2343   } while (1)
2344
2345 /* Produce codes for a DIMENSION2 character whose character set is
2346    CHARSET and whose position-codes are C1 and C2.  Designation and
2347    invocation codes are also produced in advance if necessary.  */
2348
2349 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
2350   do {                                                                  \
2351     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2352       {                                                                 \
2353         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2354           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
2355         else                                                            \
2356           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
2357         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2358         break;                                                          \
2359       }                                                                 \
2360     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2361       {                                                                 \
2362         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
2363         break;                                                          \
2364       }                                                                 \
2365     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2366       {                                                                 \
2367         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
2368         break;                                                          \
2369       }                                                                 \
2370     else                                                                \
2371       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2372          must invoke it, or, at first, designate it to some graphic     \
2373          register.  Then repeat the loop to actually produce the        \
2374          character.  */                                                 \
2375       dst = encode_invocation_designation (charset, coding, dst);       \
2376   } while (1)
2377
2378 #define ENCODE_ISO_CHARACTER(c)                                 \
2379   do {                                                          \
2380     int charset, c1, c2;                                        \
2381                                                                 \
2382     SPLIT_CHAR (c, charset, c1, c2);                            \
2383     if (CHARSET_DEFINED_P (charset))                            \
2384       {                                                         \
2385         if (CHARSET_DIMENSION (charset) == 1)                   \
2386           {                                                     \
2387             if (charset == CHARSET_ASCII                        \
2388                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
2389               charset = charset_latin_jisx0201;                 \
2390             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
2391           }                                                     \
2392         else                                                    \
2393           {                                                     \
2394             if (charset == charset_jisx0208                     \
2395                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
2396               charset = charset_jisx0208_1978;                  \
2397             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
2398           }                                                     \
2399       }                                                         \
2400     else                                                        \
2401       {                                                         \
2402         *dst++ = c1;                                            \
2403         if (c2 >= 0)                                            \
2404           *dst++ = c2;                                          \
2405       }                                                         \
2406   } while (0)
2407
2408
2409 /* Instead of encoding character C, produce one or two `?'s.  */
2410
2411 #define ENCODE_UNSAFE_CHARACTER(c)                              \
2412   do {                                                          \
2413     ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);        \
2414     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                   \
2415       ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);      \
2416   } while (0)
2417
2418
2419 /* Produce designation and invocation codes at a place pointed by DST
2420    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
2421    Return new DST.  */
2422
2423 unsigned char *
2424 encode_invocation_designation (charset, coding, dst)
2425      int charset;
2426      struct coding_system *coding;
2427      unsigned char *dst;
2428 {
2429   int reg;                      /* graphic register number */
2430
2431   /* At first, check designations.  */
2432   for (reg = 0; reg < 4; reg++)
2433     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2434       break;
2435
2436   if (reg >= 4)
2437     {
2438       /* CHARSET is not yet designated to any graphic registers.  */
2439       /* At first check the requested designation.  */
2440       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2441       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2442         /* Since CHARSET requests no special designation, designate it
2443            to graphic register 0.  */
2444         reg = 0;
2445
2446       ENCODE_DESIGNATION (charset, reg, coding);
2447     }
2448
2449   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2450       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2451     {
2452       /* Since the graphic register REG is not invoked to any graphic
2453          planes, invoke it to graphic plane 0.  */
2454       switch (reg)
2455         {
2456         case 0:                 /* graphic register 0 */
2457           ENCODE_SHIFT_IN;
2458           break;
2459
2460         case 1:                 /* graphic register 1 */
2461           ENCODE_SHIFT_OUT;
2462           break;
2463
2464         case 2:                 /* graphic register 2 */
2465           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2466             ENCODE_SINGLE_SHIFT_2;
2467           else
2468             ENCODE_LOCKING_SHIFT_2;
2469           break;
2470
2471         case 3:                 /* graphic register 3 */
2472           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2473             ENCODE_SINGLE_SHIFT_3;
2474           else
2475             ENCODE_LOCKING_SHIFT_3;
2476           break;
2477         }
2478     }
2479
2480   return dst;
2481 }
2482
2483 /* Produce 2-byte codes for encoded composition rule RULE.  */
2484
2485 #define ENCODE_COMPOSITION_RULE(rule)           \
2486   do {                                          \
2487     int gref, nref;                             \
2488     COMPOSITION_DECODE_RULE (rule, gref, nref); \
2489     *dst++ = 32 + 81 + gref;                    \
2490     *dst++ = 32 + nref;                         \
2491   } while (0)
2492
2493 /* Produce codes for indicating the start of a composition sequence
2494    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
2495    which specify information about the composition.  See the comment
2496    in coding.h for the format of DATA.  */
2497
2498 #define ENCODE_COMPOSITION_START(coding, data)                          \
2499   do {                                                                  \
2500     coding->composing = data[3];                                        \
2501     *dst++ = ISO_CODE_ESC;                                              \
2502     if (coding->composing == COMPOSITION_RELATIVE)                      \
2503       *dst++ = '0';                                                     \
2504     else                                                                \
2505       {                                                                 \
2506         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
2507                   ? '3' : '4');                                         \
2508         coding->cmp_data_index = coding->cmp_data_start + 4;            \
2509         coding->composition_rule_follows = 0;                           \
2510       }                                                                 \
2511   } while (0)
2512
2513 /* Produce codes for indicating the end of the current composition.  */
2514
2515 #define ENCODE_COMPOSITION_END(coding, data)                    \
2516   do {                                                          \
2517     *dst++ = ISO_CODE_ESC;                                      \
2518     *dst++ = '1';                                               \
2519     coding->cmp_data_start += data[0];                          \
2520     coding->composing = COMPOSITION_NO;                         \
2521     if (coding->cmp_data_start == coding->cmp_data->used        \
2522         && coding->cmp_data->next)                              \
2523       {                                                         \
2524         coding->cmp_data = coding->cmp_data->next;              \
2525         coding->cmp_data_start = 0;                             \
2526       }                                                         \
2527   } while (0)
2528
2529 /* Produce composition start sequence ESC 0.  Here, this sequence
2530    doesn't mean the start of a new composition but means that we have
2531    just produced components (alternate chars and composition rules) of
2532    the composition and the actual text follows in SRC.  */
2533
2534 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
2535   do {                                          \
2536     *dst++ = ISO_CODE_ESC;                      \
2537     *dst++ = '0';                               \
2538     coding->composing = COMPOSITION_RELATIVE;   \
2539   } while (0)
2540
2541 /* The following three macros produce codes for indicating direction
2542    of text.  */
2543 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
2544   do {                                                  \
2545     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
2546       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
2547     else                                                \
2548       *dst++ = ISO_CODE_CSI;                            \
2549   } while (0)
2550
2551 #define ENCODE_DIRECTION_R2L    \
2552   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
2553
2554 #define ENCODE_DIRECTION_L2R    \
2555   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
2556
2557 /* Produce codes for designation and invocation to reset the graphic
2558    planes and registers to initial state.  */
2559 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
2560   do {                                                                      \
2561     int reg;                                                                \
2562     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
2563       ENCODE_SHIFT_IN;                                                      \
2564     for (reg = 0; reg < 4; reg++)                                           \
2565       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
2566           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
2567               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
2568         ENCODE_DESIGNATION                                                  \
2569           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
2570   } while (0)
2571
2572 /* Produce designation sequences of charsets in the line started from
2573    SRC to a place pointed by DST, and return updated DST.
2574
2575    If the current block ends before any end-of-line, we may fail to
2576    find all the necessary designations.  */
2577
2578 static unsigned char *
2579 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2580      struct coding_system *coding;
2581      Lisp_Object translation_table;
2582      unsigned char *src, *src_end, *dst;
2583 {
2584   int charset, c, found = 0, reg;
2585   /* Table of charsets to be designated to each graphic register.  */
2586   int r[4];
2587
2588   for (reg = 0; reg < 4; reg++)
2589     r[reg] = -1;
2590
2591   while (found < 4)
2592     {
2593       ONE_MORE_CHAR (c);
2594       if (c == '\n')
2595         break;
2596
2597       charset = CHAR_CHARSET (c);
2598       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2599       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2600         {
2601           found++;
2602           r[reg] = charset;
2603         }
2604     }
2605
2606  label_end_of_loop:
2607   if (found)
2608     {
2609       for (reg = 0; reg < 4; reg++)
2610         if (r[reg] >= 0
2611             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2612           ENCODE_DESIGNATION (r[reg], reg, coding);
2613     }
2614
2615   return dst;
2616 }
2617
2618 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2619
2620 static void
2621 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2622      struct coding_system *coding;
2623      unsigned char *source, *destination;
2624      int src_bytes, dst_bytes;
2625 {
2626   unsigned char *src = source;
2627   unsigned char *src_end = source + src_bytes;
2628   unsigned char *dst = destination;
2629   unsigned char *dst_end = destination + dst_bytes;
2630   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2631      from DST_END to assure overflow checking is necessary only at the
2632      head of loop.  */
2633   unsigned char *adjusted_dst_end = dst_end - 19;
2634   /* SRC_BASE remembers the start position in source in each loop.
2635      The loop will be exited when there's not enough source text to
2636      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2637      there's not enough destination area to produce encoded codes
2638      (within macro EMIT_BYTES).  */
2639   unsigned char *src_base;
2640   int c;
2641   Lisp_Object translation_table;
2642   Lisp_Object safe_chars;
2643
2644   if (coding->flags & CODING_FLAG_ISO_SAFE)
2645     coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
2646
2647   safe_chars = coding_safe_chars (coding->symbol);
2648
2649   if (NILP (Venable_character_translation))
2650     translation_table = Qnil;
2651   else
2652     {
2653       translation_table = coding->translation_table_for_encode;
2654       if (NILP (translation_table))
2655         translation_table = Vstandard_translation_table_for_encode;
2656     }
2657
2658   coding->consumed_char = 0;
2659   coding->errors = 0;
2660   while (1)
2661     {
2662       src_base = src;
2663
2664       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2665         {
2666           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2667           break;
2668         }
2669
2670       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2671           && CODING_SPEC_ISO_BOL (coding))
2672         {
2673           /* We have to produce designation sequences if any now.  */
2674           dst = encode_designation_at_bol (coding, translation_table,
2675                                            src, src_end, dst);
2676           CODING_SPEC_ISO_BOL (coding) = 0;
2677         }
2678
2679       /* Check composition start and end.  */
2680       if (coding->composing != COMPOSITION_DISABLED
2681           && coding->cmp_data_start < coding->cmp_data->used)
2682         {
2683           struct composition_data *cmp_data = coding->cmp_data;
2684           int *data = cmp_data->data + coding->cmp_data_start;
2685           int this_pos = cmp_data->char_offset + coding->consumed_char;
2686
2687           if (coding->composing == COMPOSITION_RELATIVE)
2688             {
2689               if (this_pos == data[2])
2690                 {
2691                   ENCODE_COMPOSITION_END (coding, data);
2692                   cmp_data = coding->cmp_data;
2693                   data = cmp_data->data + coding->cmp_data_start;
2694                 }
2695             }
2696           else if (COMPOSING_P (coding))
2697             {
2698               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2699               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2700                 /* We have consumed components of the composition.
2701                    What follows in SRC is the composition's base
2702                    text.  */
2703                 ENCODE_COMPOSITION_FAKE_START (coding);
2704               else
2705                 {
2706                   int c = cmp_data->data[coding->cmp_data_index++];
2707                   if (coding->composition_rule_follows)
2708                     {
2709                       ENCODE_COMPOSITION_RULE (c);
2710                       coding->composition_rule_follows = 0;
2711                     }
2712                   else
2713                     {
2714                       if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2715                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2716                         ENCODE_UNSAFE_CHARACTER (c);
2717                       else
2718                         ENCODE_ISO_CHARACTER (c);
2719                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2720                         coding->composition_rule_follows = 1;
2721                     }
2722                   continue;
2723                 }
2724             }
2725           if (!COMPOSING_P (coding))
2726             {
2727               if (this_pos == data[1])
2728                 {
2729                   ENCODE_COMPOSITION_START (coding, data);
2730                   continue;
2731                 }
2732             }
2733         }
2734
2735       ONE_MORE_CHAR (c);
2736
2737       /* Now encode the character C.  */
2738       if (c < 0x20 || c == 0x7F)
2739         {
2740           if (c == '\r')
2741             {
2742               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2743                 {
2744                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2745                     ENCODE_RESET_PLANE_AND_REGISTER;
2746                   *dst++ = c;
2747                   continue;
2748                 }
2749               /* fall down to treat '\r' as '\n' ...  */
2750               c = '\n';
2751             }
2752           if (c == '\n')
2753             {
2754               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2755                 ENCODE_RESET_PLANE_AND_REGISTER;
2756               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2757                 bcopy (coding->spec.iso2022.initial_designation,
2758                        coding->spec.iso2022.current_designation,
2759                        sizeof coding->spec.iso2022.initial_designation);
2760               if (coding->eol_type == CODING_EOL_LF
2761                   || coding->eol_type == CODING_EOL_UNDECIDED)
2762                 *dst++ = ISO_CODE_LF;
2763               else if (coding->eol_type == CODING_EOL_CRLF)
2764                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2765               else
2766                 *dst++ = ISO_CODE_CR;
2767               CODING_SPEC_ISO_BOL (coding) = 1;
2768             }
2769           else
2770             {
2771               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2772                 ENCODE_RESET_PLANE_AND_REGISTER;
2773               *dst++ = c;
2774             }
2775         }
2776       else if (ASCII_BYTE_P (c))
2777         ENCODE_ISO_CHARACTER (c);
2778       else if (SINGLE_BYTE_CHAR_P (c))
2779         {
2780           *dst++ = c;
2781           coding->errors++;
2782         }
2783       else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2784                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2785         ENCODE_UNSAFE_CHARACTER (c);
2786       else
2787         ENCODE_ISO_CHARACTER (c);
2788
2789       coding->consumed_char++;
2790     }
2791
2792  label_end_of_loop:
2793   coding->consumed = src_base - source;
2794   coding->produced = coding->produced_char = dst - destination;
2795 }
2796
2797 \f
2798 /*** 4. SJIS and BIG5 handlers ***/
2799
2800 /* Although SJIS and BIG5 are not ISO coding systems, they are used
2801    quite widely.  So, for the moment, Emacs supports them in the bare
2802    C code.  But, in the future, they may be supported only by CCL.  */
2803
2804 /* SJIS is a coding system encoding three character sets: ASCII, right
2805    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2806    as is.  A character of charset katakana-jisx0201 is encoded by
2807    "position-code + 0x80".  A character of charset japanese-jisx0208
2808    is encoded in 2-byte but two position-codes are divided and shifted
2809    so that it fits in the range below.
2810
2811    --- CODE RANGE of SJIS ---
2812    (character set)      (range)
2813    ASCII                0x00 .. 0x7F
2814    KATAKANA-JISX0201    0xA1 .. 0xDF
2815    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2816             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2817    -------------------------------
2818
2819 */
2820
2821 /* BIG5 is a coding system encoding two character sets: ASCII and
2822    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2823    character set and is encoded in two bytes.
2824
2825    --- CODE RANGE of BIG5 ---
2826    (character set)      (range)
2827    ASCII                0x00 .. 0x7F
2828    Big5 (1st byte)      0xA1 .. 0xFE
2829         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2830    --------------------------
2831
2832    Since the number of characters in Big5 is larger than maximum
2833    characters in Emacs' charset (96x96), it can't be handled as one
2834    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2835    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2836    contains frequently used characters and the latter contains less
2837    frequently used characters.  */
2838
2839 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2840    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2841    C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
2842    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2843
2844 /* Number of Big5 characters which have the same code in 1st byte.  */
2845 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2846
2847 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2848   do {                                                                  \
2849     unsigned int temp                                                   \
2850       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2851     if (b1 < 0xC9)                                                      \
2852       charset = charset_big5_1;                                         \
2853     else                                                                \
2854       {                                                                 \
2855         charset = charset_big5_2;                                       \
2856         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2857       }                                                                 \
2858     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2859     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2860   } while (0)
2861
2862 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2863   do {                                                                  \
2864     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2865     if (charset == charset_big5_2)                                      \
2866       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2867     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2868     b2 = temp % BIG5_SAME_ROW;                                          \
2869     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2870   } while (0)
2871
2872 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2873    Check if a text is encoded in SJIS.  If it is, return
2874    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2875
2876 static int
2877 detect_coding_sjis (src, src_end, multibytep)
2878      unsigned char *src, *src_end;
2879      int multibytep;
2880 {
2881   int c;
2882   /* Dummy for ONE_MORE_BYTE.  */
2883   struct coding_system dummy_coding;
2884   struct coding_system *coding = &dummy_coding;
2885
2886   while (1)
2887     {
2888       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2889       if (c < 0x80)
2890         continue;
2891       if (c == 0x80 || c == 0xA0 || c > 0xEF)
2892         return 0;
2893       if (c <= 0x9F || c >= 0xE0)
2894         {
2895           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2896           if (c < 0x40 || c == 0x7F || c > 0xFC)
2897             return 0;
2898         }
2899     }
2900  label_end_of_loop:
2901   return CODING_CATEGORY_MASK_SJIS;
2902 }
2903
2904 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2905    Check if a text is encoded in BIG5.  If it is, return
2906    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2907
2908 static int
2909 detect_coding_big5 (src, src_end, multibytep)
2910      unsigned char *src, *src_end;
2911      int multibytep;
2912 {
2913   int c;
2914   /* Dummy for ONE_MORE_BYTE.  */
2915   struct coding_system dummy_coding;
2916   struct coding_system *coding = &dummy_coding;
2917
2918   while (1)
2919     {
2920       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2921       if (c < 0x80)
2922         continue;
2923       if (c < 0xA1 || c > 0xFE)
2924         return 0;
2925       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2926       if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2927         return 0;
2928     }
2929  label_end_of_loop:
2930   return CODING_CATEGORY_MASK_BIG5;
2931 }
2932
2933 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2934    Check if a text is encoded in UTF-8.  If it is, return
2935    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2936
2937 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2938 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2939 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2940 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2941 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2942 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2943 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2944
2945 static int
2946 detect_coding_utf_8 (src, src_end, multibytep)
2947      unsigned char *src, *src_end;
2948      int multibytep;
2949 {
2950   unsigned char c;
2951   int seq_maybe_bytes;
2952   /* Dummy for ONE_MORE_BYTE.  */
2953   struct coding_system dummy_coding;
2954   struct coding_system *coding = &dummy_coding;
2955
2956   while (1)
2957     {
2958       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2959       if (UTF_8_1_OCTET_P (c))
2960         continue;
2961       else if (UTF_8_2_OCTET_LEADING_P (c))
2962         seq_maybe_bytes = 1;
2963       else if (UTF_8_3_OCTET_LEADING_P (c))
2964         seq_maybe_bytes = 2;
2965       else if (UTF_8_4_OCTET_LEADING_P (c))
2966         seq_maybe_bytes = 3;
2967       else if (UTF_8_5_OCTET_LEADING_P (c))
2968         seq_maybe_bytes = 4;
2969       else if (UTF_8_6_OCTET_LEADING_P (c))
2970         seq_maybe_bytes = 5;
2971       else
2972         return 0;
2973
2974       do
2975         {
2976           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2977           if (!UTF_8_EXTRA_OCTET_P (c))
2978             return 0;
2979           seq_maybe_bytes--;
2980         }
2981       while (seq_maybe_bytes > 0);
2982     }
2983
2984  label_end_of_loop:
2985   return CODING_CATEGORY_MASK_UTF_8;
2986 }
2987
2988 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2989    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2990    Little Endian (otherwise).  If it is, return
2991    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2992    else return 0.  */
2993
2994 #define UTF_16_INVALID_P(val)   \
2995   (((val) == 0xFFFE)            \
2996    || ((val) == 0xFFFF))
2997
2998 #define UTF_16_HIGH_SURROGATE_P(val) \
2999   (((val) & 0xD800) == 0xD800)
3000
3001 #define UTF_16_LOW_SURROGATE_P(val) \
3002   (((val) & 0xDC00) == 0xDC00)
3003
3004 static int
3005 detect_coding_utf_16 (src, src_end, multibytep)
3006      unsigned char *src, *src_end;
3007      int multibytep;
3008 {
3009   unsigned char c1, c2;
3010   /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE.  */
3011   struct coding_system dummy_coding;
3012   struct coding_system *coding = &dummy_coding;
3013
3014   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
3015   ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
3016
3017   if ((c1 == 0xFF) && (c2 == 0xFE))
3018     return CODING_CATEGORY_MASK_UTF_16_LE;
3019   else if ((c1 == 0xFE) && (c2 == 0xFF))
3020     return CODING_CATEGORY_MASK_UTF_16_BE;
3021
3022  label_end_of_loop:
3023   return 0;
3024 }
3025
3026 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3027    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3028
3029 static void
3030 decode_coding_sjis_big5 (coding, source, destination,
3031                          src_bytes, dst_bytes, sjis_p)
3032      struct coding_system *coding;
3033      unsigned char *source, *destination;
3034      int src_bytes, dst_bytes;
3035      int sjis_p;
3036 {
3037   unsigned char *src = source;
3038   unsigned char *src_end = source + src_bytes;
3039   unsigned char *dst = destination;
3040   unsigned char *dst_end = destination + dst_bytes;
3041   /* SRC_BASE remembers the start position in source in each loop.
3042      The loop will be exited when there's not enough source code
3043      (within macro ONE_MORE_BYTE), or when there's not enough
3044      destination area to produce a character (within macro
3045      EMIT_CHAR).  */
3046   unsigned char *src_base;
3047   Lisp_Object translation_table;
3048
3049   if (NILP (Venable_character_translation))
3050     translation_table = Qnil;
3051   else
3052     {
3053       translation_table = coding->translation_table_for_decode;
3054       if (NILP (translation_table))
3055         translation_table = Vstandard_translation_table_for_decode;
3056     }
3057
3058   coding->produced_char = 0;
3059   while (1)
3060     {
3061       int c, charset, c1, c2 = 0;
3062
3063       src_base = src;
3064       ONE_MORE_BYTE (c1);
3065
3066       if (c1 < 0x80)
3067         {
3068           charset = CHARSET_ASCII;
3069           if (c1 < 0x20)
3070             {
3071               if (c1 == '\r')
3072                 {
3073                   if (coding->eol_type == CODING_EOL_CRLF)
3074                     {
3075                       ONE_MORE_BYTE (c2);
3076                       if (c2 == '\n')
3077                         c1 = c2;
3078                       else
3079                         /* To process C2 again, SRC is subtracted by 1.  */
3080                         src--;
3081                     }
3082                   else if (coding->eol_type == CODING_EOL_CR)
3083                     c1 = '\n';
3084                 }
3085               else if (c1 == '\n'
3086                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3087                        && (coding->eol_type == CODING_EOL_CR
3088                            || coding->eol_type == CODING_EOL_CRLF))
3089                 {
3090                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3091                   goto label_end_of_loop;
3092                 }
3093             }
3094         }
3095       else
3096         {
3097           if (sjis_p)
3098             {
3099               if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
3100                 goto label_invalid_code;
3101               if (c1 <= 0x9F || c1 >= 0xE0)
3102                 {
3103                   /* SJIS -> JISX0208 */
3104                   ONE_MORE_BYTE (c2);
3105                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
3106                     goto label_invalid_code;
3107                   DECODE_SJIS (c1, c2, c1, c2);
3108                   charset = charset_jisx0208;
3109                 }
3110               else
3111                 /* SJIS -> JISX0201-Kana */
3112                 charset = charset_katakana_jisx0201;
3113             }
3114           else
3115             {
3116               /* BIG5 -> Big5 */
3117               if (c1 < 0xA0 || c1 > 0xFE)
3118                 goto label_invalid_code;
3119               ONE_MORE_BYTE (c2);
3120               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
3121                 goto label_invalid_code;
3122               DECODE_BIG5 (c1, c2, charset, c1, c2);
3123             }
3124         }
3125
3126       c = DECODE_ISO_CHARACTER (charset, c1, c2);
3127       EMIT_CHAR (c);
3128       continue;
3129
3130     label_invalid_code:
3131       coding->errors++;
3132       src = src_base;
3133       c = *src++;
3134       EMIT_CHAR (c);
3135     }
3136
3137  label_end_of_loop:
3138   coding->consumed = coding->consumed_char = src_base - source;
3139   coding->produced = dst - destination;
3140   return;
3141 }
3142
3143 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3144    This function can encode charsets `ascii', `katakana-jisx0201',
3145    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
3146    are sure that all these charsets are registered as official charset
3147    (i.e. do not have extended leading-codes).  Characters of other
3148    charsets are produced without any encoding.  If SJIS_P is 1, encode
3149    SJIS text, else encode BIG5 text.  */
3150
3151 static void
3152 encode_coding_sjis_big5 (coding, source, destination,
3153                          src_bytes, dst_bytes, sjis_p)
3154      struct coding_system *coding;
3155      unsigned char *source, *destination;
3156      int src_bytes, dst_bytes;
3157      int sjis_p;
3158 {
3159   unsigned char *src = source;
3160   unsigned char *src_end = source + src_bytes;
3161   unsigned char *dst = destination;
3162   unsigned char *dst_end = destination + dst_bytes;
3163   /* SRC_BASE remembers the start position in source in each loop.
3164      The loop will be exited when there's not enough source text to
3165      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3166      there's not enough destination area to produce encoded codes
3167      (within macro EMIT_BYTES).  */
3168   unsigned char *src_base;
3169   Lisp_Object translation_table;
3170
3171   if (NILP (Venable_character_translation))
3172     translation_table = Qnil;
3173   else
3174     {
3175       translation_table = coding->translation_table_for_encode;
3176       if (NILP (translation_table))
3177         translation_table = Vstandard_translation_table_for_encode;
3178     }
3179
3180   while (1)
3181     {
3182       int c, charset, c1, c2;
3183
3184       src_base = src;
3185       ONE_MORE_CHAR (c);
3186
3187       /* Now encode the character C.  */
3188       if (SINGLE_BYTE_CHAR_P (c))
3189         {
3190           switch (c)
3191             {
3192             case '\r':
3193               if (!(coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
3194                 {
3195                   EMIT_ONE_BYTE (c);
3196                   break;
3197                 }
3198               c = '\n';
3199             case '\n':
3200               if (coding->eol_type == CODING_EOL_CRLF)
3201                 {
3202                   EMIT_TWO_BYTES ('\r', c);
3203                   break;
3204                 }
3205               else if (coding->eol_type == CODING_EOL_CR)
3206                 c = '\r';
3207             default:
3208               EMIT_ONE_BYTE (c);
3209             }
3210         }
3211       else
3212         {
3213           SPLIT_CHAR (c, charset, c1, c2);
3214           if (sjis_p)
3215             {
3216               if (charset == charset_jisx0208
3217                   || charset == charset_jisx0208_1978)
3218                 {
3219                   ENCODE_SJIS (c1, c2, c1, c2);
3220                   EMIT_TWO_BYTES (c1, c2);
3221                 }
3222               else if (charset == charset_katakana_jisx0201)
3223                 EMIT_ONE_BYTE (c1 | 0x80);
3224               else if (charset == charset_latin_jisx0201)
3225                 EMIT_ONE_BYTE (c1);
3226               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3227                 {
3228                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3229                   if (CHARSET_WIDTH (charset) > 1)
3230                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3231                 }
3232               else
3233                 /* There's no way other than producing the internal
3234                    codes as is.  */
3235                 EMIT_BYTES (src_base, src);
3236             }
3237           else
3238             {
3239               if (charset == charset_big5_1 || charset == charset_big5_2)
3240                 {
3241                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
3242                   EMIT_TWO_BYTES (c1, c2);
3243                 }
3244               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3245                 {
3246                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3247                   if (CHARSET_WIDTH (charset) > 1)
3248                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3249                 }
3250               else
3251                 /* There's no way other than producing the internal
3252                    codes as is.  */
3253                 EMIT_BYTES (src_base, src);
3254             }
3255         }
3256       coding->consumed_char++;
3257     }
3258
3259  label_end_of_loop:
3260   coding->consumed = src_base - source;
3261   coding->produced = coding->produced_char = dst - destination;
3262 }
3263
3264 \f
3265 /*** 5. CCL handlers ***/
3266
3267 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3268    Check if a text is encoded in a coding system of which
3269    encoder/decoder are written in CCL program.  If it is, return
3270    CODING_CATEGORY_MASK_CCL, else return 0.  */
3271
3272 static int
3273 detect_coding_ccl (src, src_end, multibytep)
3274      unsigned char *src, *src_end;
3275      int multibytep;
3276 {
3277   unsigned char *valid;
3278   int c;
3279   /* Dummy for ONE_MORE_BYTE.  */
3280   struct coding_system dummy_coding;
3281   struct coding_system *coding = &dummy_coding;
3282
3283   /* No coding system is assigned to coding-category-ccl.  */
3284   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3285     return 0;
3286
3287   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3288   while (1)
3289     {
3290       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
3291       if (! valid[c])
3292         return 0;
3293     }
3294  label_end_of_loop:
3295   return CODING_CATEGORY_MASK_CCL;
3296 }
3297
3298 \f
3299 /*** 6. End-of-line handlers ***/
3300
3301 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3302
3303 static void
3304 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3305      struct coding_system *coding;
3306      unsigned char *source, *destination;
3307      int src_bytes, dst_bytes;
3308 {
3309   unsigned char *src = source;
3310   unsigned char *dst = destination;
3311   unsigned char *src_end = src + src_bytes;
3312   unsigned char *dst_end = dst + dst_bytes;
3313   Lisp_Object translation_table;
3314   /* SRC_BASE remembers the start position in source in each loop.
3315      The loop will be exited when there's not enough source code
3316      (within macro ONE_MORE_BYTE), or when there's not enough
3317      destination area to produce a character (within macro
3318      EMIT_CHAR).  */
3319   unsigned char *src_base;
3320   int c;
3321
3322   translation_table = Qnil;
3323   switch (coding->eol_type)
3324     {
3325     case CODING_EOL_CRLF:
3326       while (1)
3327         {
3328           src_base = src;
3329           ONE_MORE_BYTE (c);
3330           if (c == '\r')
3331             {
3332               ONE_MORE_BYTE (c);
3333               if (c != '\n')
3334                 {
3335                   src--;
3336                   c = '\r';
3337                 }
3338             }
3339           else if (c == '\n'
3340                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
3341             {
3342               coding->result = CODING_FINISH_INCONSISTENT_EOL;
3343               goto label_end_of_loop;
3344             }
3345           EMIT_CHAR (c);
3346         }
3347       break;
3348
3349     case CODING_EOL_CR:
3350       while (1)
3351         {
3352           src_base = src;
3353           ONE_MORE_BYTE (c);
3354           if (c == '\n')
3355             {
3356               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3357                 {
3358                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3359                   goto label_end_of_loop;
3360                 }
3361             }
3362           else if (c == '\r')
3363             c = '\n';
3364           EMIT_CHAR (c);
3365         }
3366       break;
3367
3368     default:                    /* no need for EOL handling */
3369       while (1)
3370         {
3371           src_base = src;
3372           ONE_MORE_BYTE (c);
3373           EMIT_CHAR (c);
3374         }
3375     }
3376
3377  label_end_of_loop:
3378   coding->consumed = coding->consumed_char = src_base - source;
3379   coding->produced = dst - destination;
3380   return;
3381 }
3382
3383 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
3384    format of end-of-line according to `coding->eol_type'.  It also
3385    convert multibyte form 8-bit characters to unibyte if
3386    CODING->src_multibyte is nonzero.  If `coding->mode &
3387    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3388    also means end-of-line.  */
3389
3390 static void
3391 encode_eol (coding, source, destination, src_bytes, dst_bytes)
3392      struct coding_system *coding;
3393      const unsigned char *source;
3394      unsigned char *destination;
3395      int src_bytes, dst_bytes;
3396 {
3397   const unsigned char *src = source;
3398   unsigned char *dst = destination;
3399   const unsigned char *src_end = src + src_bytes;
3400   unsigned char *dst_end = dst + dst_bytes;
3401   Lisp_Object translation_table;
3402   /* SRC_BASE remembers the start position in source in each loop.
3403      The loop will be exited when there's not enough source text to
3404      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3405      there's not enough destination area to produce encoded codes
3406      (within macro EMIT_BYTES).  */
3407   const unsigned char *src_base;
3408   unsigned char *tmp;
3409   int c;
3410   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3411
3412   translation_table = Qnil;
3413   if (coding->src_multibyte
3414       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3415     {
3416       src_end--;
3417       src_bytes--;
3418       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3419     }
3420
3421   if (coding->eol_type == CODING_EOL_CRLF)
3422     {
3423       while (src < src_end)
3424         {
3425           src_base = src;
3426           c = *src++;
3427           if (c >= 0x20)
3428             EMIT_ONE_BYTE (c);
3429           else if (c == '\n' || (c == '\r' && selective_display))
3430             EMIT_TWO_BYTES ('\r', '\n');
3431           else
3432             EMIT_ONE_BYTE (c);
3433         }
3434       src_base = src;
3435     label_end_of_loop:
3436       ;
3437     }
3438   else
3439     {
3440       if (!dst_bytes || src_bytes <= dst_bytes)
3441         {
3442           safe_bcopy (src, dst, src_bytes);
3443           src_base = src_end;
3444           dst += src_bytes;
3445         }
3446       else
3447         {
3448           if (coding->src_multibyte
3449               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3450             dst_bytes--;
3451           safe_bcopy (src, dst, dst_bytes);
3452           src_base = src + dst_bytes;
3453           dst = destination + dst_bytes;
3454           coding->result = CODING_FINISH_INSUFFICIENT_DST;
3455         }
3456       if (coding->eol_type == CODING_EOL_CR)
3457         {
3458           for (tmp = destination; tmp < dst; tmp++)
3459             if (*tmp == '\n') *tmp = '\r';
3460         }
3461       else if (selective_display)
3462         {
3463           for (tmp = destination; tmp < dst; tmp++)
3464             if (*tmp == '\r') *tmp = '\n';
3465         }
3466     }
3467   if (coding->src_multibyte)
3468     dst = destination + str_as_unibyte (destination, dst - destination);
3469
3470   coding->consumed = src_base - source;
3471   coding->produced = dst - destination;
3472   coding->produced_char = coding->produced;
3473 }
3474
3475 \f
3476 /*** 7. C library functions ***/
3477
3478 /* In Emacs Lisp, a coding system is represented by a Lisp symbol which
3479    has a property `coding-system'.  The value of this property is a
3480    vector of length 5 (called the coding-vector).  Among elements of
3481    this vector, the first (element[0]) and the fifth (element[4])
3482    carry important information for decoding/encoding.  Before
3483    decoding/encoding, this information should be set in fields of a
3484    structure of type `coding_system'.
3485
3486    The value of the property `coding-system' can be a symbol of another
3487    subsidiary coding-system.  In that case, Emacs gets coding-vector
3488    from that symbol.
3489
3490    `element[0]' contains information to be set in `coding->type'.  The
3491    value and its meaning is as follows:
3492
3493    0 -- coding_type_emacs_mule
3494    1 -- coding_type_sjis
3495    2 -- coding_type_iso2022
3496    3 -- coding_type_big5
3497    4 -- coding_type_ccl encoder/decoder written in CCL
3498    nil -- coding_type_no_conversion
3499    t -- coding_type_undecided (automatic conversion on decoding,
3500                                no-conversion on encoding)
3501
3502    `element[4]' contains information to be set in `coding->flags' and
3503    `coding->spec'.  The meaning varies by `coding->type'.
3504
3505    If `coding->type' is `coding_type_iso2022', element[4] is a vector
3506    of length 32 (of which the first 13 sub-elements are used now).
3507    Meanings of these sub-elements are:
3508
3509    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3510         If the value is an integer of valid charset, the charset is
3511         assumed to be designated to graphic register N initially.
3512
3513         If the value is minus, it is a minus value of charset which
3514         reserves graphic register N, which means that the charset is
3515         not designated initially but should be designated to graphic
3516         register N just before encoding a character in that charset.
3517
3518         If the value is nil, graphic register N is never used on
3519         encoding.
3520
3521    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3522         Each value takes t or nil.  See the section ISO2022 of
3523         `coding.h' for more information.
3524
3525    If `coding->type' is `coding_type_big5', element[4] is t to denote
3526    BIG5-ETen or nil to denote BIG5-HKU.
3527
3528    If `coding->type' takes the other value, element[4] is ignored.
3529
3530    Emacs Lisp's coding systems also carry information about format of
3531    end-of-line in a value of property `eol-type'.  If the value is
3532    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3533    means CODING_EOL_CR.  If it is not integer, it should be a vector
3534    of subsidiary coding systems of which property `eol-type' has one
3535    of the above values.
3536
3537 */
3538
3539 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3540    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
3541    is setup so that no conversion is necessary and return -1, else
3542    return 0.  */
3543
3544 int
3545 setup_coding_system (coding_system, coding)
3546      Lisp_Object coding_system;
3547      struct coding_system *coding;
3548 {
3549   Lisp_Object coding_spec, coding_type, eol_type, plist;
3550   Lisp_Object val;
3551
3552   /* At first, zero clear all members.  */
3553   bzero (coding, sizeof (struct coding_system));
3554
3555   /* Initialize some fields required for all kinds of coding systems.  */
3556   coding->symbol = coding_system;
3557   coding->heading_ascii = -1;
3558   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
3559   coding->composing = COMPOSITION_DISABLED;
3560   coding->cmp_data = NULL;
3561
3562   if (NILP (coding_system))
3563     goto label_invalid_coding_system;
3564
3565   coding_spec = Fget (coding_system, Qcoding_system);
3566
3567   if (!VECTORP (coding_spec)
3568       || XVECTOR (coding_spec)->size != 5
3569       || !CONSP (XVECTOR (coding_spec)->contents[3]))
3570     goto label_invalid_coding_system;
3571
3572   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3573   if (VECTORP (eol_type))
3574     {
3575       coding->eol_type = CODING_EOL_UNDECIDED;
3576       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3577     }
3578   else if (XFASTINT (eol_type) == 1)
3579     {
3580       coding->eol_type = CODING_EOL_CRLF;
3581       coding->common_flags
3582         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3583     }
3584   else if (XFASTINT (eol_type) == 2)
3585     {
3586       coding->eol_type = CODING_EOL_CR;
3587       coding->common_flags
3588         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3589     }
3590   else
3591     coding->eol_type = CODING_EOL_LF;
3592
3593   coding_type = XVECTOR (coding_spec)->contents[0];
3594   /* Try short cut.  */
3595   if (SYMBOLP (coding_type))
3596     {
3597       if (EQ (coding_type, Qt))
3598         {
3599           coding->type = coding_type_undecided;
3600           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3601         }
3602       else
3603         coding->type = coding_type_no_conversion;
3604       /* Initialize this member.  Any thing other than
3605          CODING_CATEGORY_IDX_UTF_16_BE and
3606          CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3607          special treatment in detect_eol.  */
3608       coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3609
3610       return 0;
3611     }
3612
3613   /* Get values of coding system properties:
3614      `post-read-conversion', `pre-write-conversion',
3615      `translation-table-for-decode', `translation-table-for-encode'.  */
3616   plist = XVECTOR (coding_spec)->contents[3];
3617   /* Pre & post conversion functions should be disabled if
3618      inhibit_eol_conversion is nonzero.  This is the case that a code
3619      conversion function is called while those functions are running.  */
3620   if (! inhibit_pre_post_conversion)
3621     {
3622       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3623       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3624     }
3625   val = Fplist_get (plist, Qtranslation_table_for_decode);
3626   if (SYMBOLP (val))
3627     val = Fget (val, Qtranslation_table_for_decode);
3628   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3629   val = Fplist_get (plist, Qtranslation_table_for_encode);
3630   if (SYMBOLP (val))
3631     val = Fget (val, Qtranslation_table_for_encode);
3632   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3633   val = Fplist_get (plist, Qcoding_category);
3634   if (!NILP (val))
3635     {
3636       val = Fget (val, Qcoding_category_index);
3637       if (INTEGERP (val))
3638         coding->category_idx = XINT (val);
3639       else
3640         goto label_invalid_coding_system;
3641     }
3642   else
3643     goto label_invalid_coding_system;
3644
3645   /* If the coding system has non-nil `composition' property, enable
3646      composition handling.  */
3647   val = Fplist_get (plist, Qcomposition);
3648   if (!NILP (val))
3649     coding->composing = COMPOSITION_NO;
3650
3651   switch (XFASTINT (coding_type))
3652     {
3653     case 0:
3654       coding->type = coding_type_emacs_mule;
3655       coding->common_flags
3656         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3657       if (!NILP (coding->post_read_conversion))
3658         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3659       if (!NILP (coding->pre_write_conversion))
3660         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3661       break;
3662
3663     case 1:
3664       coding->type = coding_type_sjis;
3665       coding->common_flags
3666         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3667       break;
3668
3669     case 2:
3670       coding->type = coding_type_iso2022;
3671       coding->common_flags
3672         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3673       {
3674         Lisp_Object val, temp;
3675         Lisp_Object *flags;
3676         int i, charset, reg_bits = 0;
3677
3678         val = XVECTOR (coding_spec)->contents[4];
3679
3680         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3681           goto label_invalid_coding_system;
3682
3683         flags = XVECTOR (val)->contents;
3684         coding->flags
3685           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3686              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3687              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3688              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3689              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3690              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3691              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3692              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3693              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3694              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3695              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3696              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3697              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3698              );
3699
3700         /* Invoke graphic register 0 to plane 0.  */
3701         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3702         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3703         CODING_SPEC_ISO_INVOCATION (coding, 1)
3704           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3705         /* Not single shifting at first.  */
3706         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3707         /* Beginning of buffer should also be regarded as bol. */
3708         CODING_SPEC_ISO_BOL (coding) = 1;
3709
3710         for (charset = 0; charset <= MAX_CHARSET; charset++)
3711           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3712         val = Vcharset_revision_alist;
3713         while (CONSP (val))
3714           {
3715             charset = get_charset_id (Fcar_safe (XCAR (val)));
3716             if (charset >= 0
3717                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3718                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3719               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3720             val = XCDR (val);
3721           }
3722
3723         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3724            FLAGS[REG] can be one of below:
3725                 integer CHARSET: CHARSET occupies register I,
3726                 t: designate nothing to REG initially, but can be used
3727                   by any charsets,
3728                 list of integer, nil, or t: designate the first
3729                   element (if integer) to REG initially, the remaining
3730                   elements (if integer) is designated to REG on request,
3731                   if an element is t, REG can be used by any charsets,
3732                 nil: REG is never used.  */
3733         for (charset = 0; charset <= MAX_CHARSET; charset++)
3734           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3735             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3736         for (i = 0; i < 4; i++)
3737           {
3738             if ((INTEGERP (flags[i])
3739                  && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
3740                 || (charset = get_charset_id (flags[i])) >= 0)
3741               {
3742                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3743                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3744               }
3745             else if (EQ (flags[i], Qt))
3746               {
3747                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3748                 reg_bits |= 1 << i;
3749                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3750               }
3751             else if (CONSP (flags[i]))
3752               {
3753                 Lisp_Object tail;
3754                 tail = flags[i];
3755
3756                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3757                 if ((INTEGERP (XCAR (tail))
3758                      && (charset = XINT (XCAR (tail)),
3759                          CHARSET_VALID_P (charset)))
3760                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3761                   {
3762                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3763                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3764                   }
3765                 else
3766                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3767                 tail = XCDR (tail);
3768                 while (CONSP (tail))
3769                   {
3770                     if ((INTEGERP (XCAR (tail))
3771                          && (charset = XINT (XCAR (tail)),
3772                              CHARSET_VALID_P (charset)))
3773                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3774                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3775                         = i;
3776                     else if (EQ (XCAR (tail), Qt))
3777                       reg_bits |= 1 << i;
3778                     tail = XCDR (tail);
3779                   }
3780               }
3781             else
3782               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3783
3784             CODING_SPEC_ISO_DESIGNATION (coding, i)
3785               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3786           }
3787
3788         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3789           {
3790             /* REG 1 can be used only by locking shift in 7-bit env.  */
3791             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3792               reg_bits &= ~2;
3793             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3794               /* Without any shifting, only REG 0 and 1 can be used.  */
3795               reg_bits &= 3;
3796           }
3797
3798         if (reg_bits)
3799           for (charset = 0; charset <= MAX_CHARSET; charset++)
3800             {
3801               if (CHARSET_DEFINED_P (charset)
3802                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3803                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3804                 {
3805                   /* There exist some default graphic registers to be
3806                      used by CHARSET.  */
3807
3808                   /* We had better avoid designating a charset of
3809                      CHARS96 to REG 0 as far as possible.  */
3810                   if (CHARSET_CHARS (charset) == 96)
3811                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3812                       = (reg_bits & 2
3813                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3814                   else
3815                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3816                       = (reg_bits & 1
3817                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3818                 }
3819             }
3820       }
3821       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3822       coding->spec.iso2022.last_invalid_designation_register = -1;
3823       break;
3824
3825     case 3:
3826       coding->type = coding_type_big5;
3827       coding->common_flags
3828         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3829       coding->flags
3830         = (NILP (XVECTOR (coding_spec)->contents[4])
3831            ? CODING_FLAG_BIG5_HKU
3832            : CODING_FLAG_BIG5_ETEN);
3833       break;
3834
3835     case 4:
3836       coding->type = coding_type_ccl;
3837       coding->common_flags
3838         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3839       {
3840         val = XVECTOR (coding_spec)->contents[4];
3841         if (! CONSP (val)
3842             || setup_ccl_program (&(coding->spec.ccl.decoder),
3843                                   XCAR (val)) < 0
3844             || setup_ccl_program (&(coding->spec.ccl.encoder),
3845                                   XCDR (val)) < 0)
3846           goto label_invalid_coding_system;
3847
3848         bzero (coding->spec.ccl.valid_codes, 256);
3849         val = Fplist_get (plist, Qvalid_codes);
3850         if (CONSP (val))
3851           {
3852             Lisp_Object this;
3853
3854             for (; CONSP (val); val = XCDR (val))
3855               {
3856                 this = XCAR (val);
3857                 if (INTEGERP (this)
3858                     && XINT (this) >= 0 && XINT (this) < 256)
3859                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3860                 else if (CONSP (this)
3861                          && INTEGERP (XCAR (this))
3862                          && INTEGERP (XCDR (this)))
3863                   {
3864                     int start = XINT (XCAR (this));
3865                     int end = XINT (XCDR (this));
3866
3867                     if (start >= 0 && start <= end && end < 256)
3868                       while (start <= end)
3869                         coding->spec.ccl.valid_codes[start++] = 1;
3870                   }
3871               }
3872           }
3873       }
3874       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3875       coding->spec.ccl.cr_carryover = 0;
3876       coding->spec.ccl.eight_bit_carryover[0] = 0;
3877       break;
3878
3879     case 5:
3880       coding->type = coding_type_raw_text;
3881       break;
3882
3883     default:
3884       goto label_invalid_coding_system;
3885     }
3886   return 0;
3887
3888  label_invalid_coding_system:
3889   coding->type = coding_type_no_conversion;
3890   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3891   coding->common_flags = 0;
3892   coding->eol_type = CODING_EOL_LF;
3893   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3894   return -1;
3895 }
3896
3897 /* Free memory blocks allocated for storing composition information.  */
3898
3899 void
3900 coding_free_composition_data (coding)
3901      struct coding_system *coding;
3902 {
3903   struct composition_data *cmp_data = coding->cmp_data, *next;
3904
3905   if (!cmp_data)
3906     return;
3907   /* Memory blocks are chained.  At first, rewind to the first, then,
3908      free blocks one by one.  */
3909   while (cmp_data->prev)
3910     cmp_data = cmp_data->prev;
3911   while (cmp_data)
3912     {
3913       next = cmp_data->next;
3914       xfree (cmp_data);
3915       cmp_data = next;
3916     }
3917   coding->cmp_data = NULL;
3918 }
3919
3920 /* Set `char_offset' member of all memory blocks pointed by
3921    coding->cmp_data to POS.  */
3922
3923 void
3924 coding_adjust_composition_offset (coding, pos)
3925      struct coding_system *coding;
3926      int pos;
3927 {
3928   struct composition_data *cmp_data;
3929
3930   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3931     cmp_data->char_offset = pos;
3932 }
3933
3934 /* Setup raw-text or one of its subsidiaries in the structure
3935    coding_system CODING according to the already setup value eol_type
3936    in CODING.  CODING should be setup for some coding system in
3937    advance.  */
3938
3939 void
3940 setup_raw_text_coding_system (coding)
3941      struct coding_system *coding;
3942 {
3943   if (coding->type != coding_type_raw_text)
3944     {
3945       coding->symbol = Qraw_text;
3946       coding->type = coding_type_raw_text;
3947       if (coding->eol_type != CODING_EOL_UNDECIDED)
3948         {
3949           Lisp_Object subsidiaries;
3950           subsidiaries = Fget (Qraw_text, Qeol_type);
3951
3952           if (VECTORP (subsidiaries)
3953               && XVECTOR (subsidiaries)->size == 3)
3954             coding->symbol
3955               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3956         }
3957       setup_coding_system (coding->symbol, coding);
3958     }
3959   return;
3960 }
3961
3962 /* Emacs has a mechanism to automatically detect a coding system if it
3963    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3964    it's impossible to distinguish some coding systems accurately
3965    because they use the same range of codes.  So, at first, coding
3966    systems are categorized into 7, those are:
3967
3968    o coding-category-emacs-mule
3969
3970         The category for a coding system which has the same code range
3971         as Emacs' internal format.  Assigned the coding-system (Lisp
3972         symbol) `emacs-mule' by default.
3973
3974    o coding-category-sjis
3975
3976         The category for a coding system which has the same code range
3977         as SJIS.  Assigned the coding-system (Lisp
3978         symbol) `japanese-shift-jis' by default.
3979
3980    o coding-category-iso-7
3981
3982         The category for a coding system which has the same code range
3983         as ISO2022 of 7-bit environment.  This doesn't use any locking
3984         shift and single shift functions.  This can encode/decode all
3985         charsets.  Assigned the coding-system (Lisp symbol)
3986         `iso-2022-7bit' by default.
3987
3988    o coding-category-iso-7-tight
3989
3990         Same as coding-category-iso-7 except that this can
3991         encode/decode only the specified charsets.
3992
3993    o coding-category-iso-8-1
3994
3995         The category for a coding system which has the same code range
3996         as ISO2022 of 8-bit environment and graphic plane 1 used only
3997         for DIMENSION1 charset.  This doesn't use any locking shift
3998         and single shift functions.  Assigned the coding-system (Lisp
3999         symbol) `iso-latin-1' by default.
4000
4001    o coding-category-iso-8-2
4002
4003         The category for a coding system which has the same code range
4004         as ISO2022 of 8-bit environment and graphic plane 1 used only
4005         for DIMENSION2 charset.  This doesn't use any locking shift
4006         and single shift functions.  Assigned the coding-system (Lisp
4007         symbol) `japanese-iso-8bit' by default.
4008
4009    o coding-category-iso-7-else
4010
4011         The category for a coding system which has the same code range
4012         as ISO2022 of 7-bit environment but uses locking shift or
4013         single shift functions.  Assigned the coding-system (Lisp
4014         symbol) `iso-2022-7bit-lock' by default.
4015
4016    o coding-category-iso-8-else
4017
4018         The category for a coding system which has the same code range
4019         as ISO2022 of 8-bit environment but uses locking shift or
4020         single shift functions.  Assigned the coding-system (Lisp
4021         symbol) `iso-2022-8bit-ss2' by default.
4022
4023    o coding-category-big5
4024
4025         The category for a coding system which has the same code range
4026         as BIG5.  Assigned the coding-system (Lisp symbol)
4027         `cn-big5' by default.
4028
4029    o coding-category-utf-8
4030
4031         The category for a coding system which has the same code range
4032         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
4033         symbol) `utf-8' by default.
4034
4035    o coding-category-utf-16-be
4036
4037         The category for a coding system in which a text has an
4038         Unicode signature (cf. Unicode Standard) in the order of BIG
4039         endian at the head.  Assigned the coding-system (Lisp symbol)
4040         `utf-16-be' by default.
4041
4042    o coding-category-utf-16-le
4043
4044         The category for a coding system in which a text has an
4045         Unicode signature (cf. Unicode Standard) in the order of
4046         LITTLE endian at the head.  Assigned the coding-system (Lisp
4047         symbol) `utf-16-le' by default.
4048
4049    o coding-category-ccl
4050
4051         The category for a coding system of which encoder/decoder is
4052         written in CCL programs.  The default value is nil, i.e., no
4053         coding system is assigned.
4054
4055    o coding-category-binary
4056
4057         The category for a coding system not categorized in any of the
4058         above.  Assigned the coding-system (Lisp symbol)
4059         `no-conversion' by default.
4060
4061    Each of them is a Lisp symbol and the value is an actual
4062    `coding-system' (this is also a Lisp symbol) assigned by a user.
4063    What Emacs does actually is to detect a category of coding system.
4064    Then, it uses a `coding-system' assigned to it.  If Emacs can't
4065    decide a single possible category, it selects a category of the
4066    highest priority.  Priorities of categories are also specified by a
4067    user in a Lisp variable `coding-category-list'.
4068
4069 */
4070
4071 static
4072 int ascii_skip_code[256];
4073
4074 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
4075    If it detects possible coding systems, return an integer in which
4076    appropriate flag bits are set.  Flag bits are defined by macros
4077    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
4078    it should point the table `coding_priorities'.  In that case, only
4079    the flag bit for a coding system of the highest priority is set in
4080    the returned value.  If MULTIBYTEP is nonzero, 8-bit codes of the
4081    range 0x80..0x9F are in multibyte form.
4082
4083    How many ASCII characters are at the head is returned as *SKIP.  */
4084
4085 static int
4086 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
4087      unsigned char *source;
4088      int src_bytes, *priorities, *skip;
4089      int multibytep;
4090 {
4091   register unsigned char c;
4092   unsigned char *src = source, *src_end = source + src_bytes;
4093   unsigned int mask, utf16_examined_p, iso2022_examined_p;
4094   int i;
4095
4096   /* At first, skip all ASCII characters and control characters except
4097      for three ISO2022 specific control characters.  */
4098   ascii_skip_code[ISO_CODE_SO] = 0;
4099   ascii_skip_code[ISO_CODE_SI] = 0;
4100   ascii_skip_code[ISO_CODE_ESC] = 0;
4101
4102  label_loop_detect_coding:
4103   while (src < src_end && ascii_skip_code[*src]) src++;
4104   *skip = src - source;
4105
4106   if (src >= src_end)
4107     /* We found nothing other than ASCII.  There's nothing to do.  */
4108     return 0;
4109
4110   c = *src;
4111   /* The text seems to be encoded in some multilingual coding system.
4112      Now, try to find in which coding system the text is encoded.  */
4113   if (c < 0x80)
4114     {
4115       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
4116       /* C is an ISO2022 specific control code of C0.  */
4117       mask = detect_coding_iso2022 (src, src_end, multibytep);
4118       if (mask == 0)
4119         {
4120           /* No valid ISO2022 code follows C.  Try again.  */
4121           src++;
4122           if (c == ISO_CODE_ESC)
4123             ascii_skip_code[ISO_CODE_ESC] = 1;
4124           else
4125             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
4126           goto label_loop_detect_coding;
4127         }
4128       if (priorities)
4129         {
4130           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4131             {
4132               if (mask & priorities[i])
4133                 return priorities[i];
4134             }
4135           return CODING_CATEGORY_MASK_RAW_TEXT;
4136         }
4137     }
4138   else
4139     {
4140       int try;
4141
4142       if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
4143         c = src[1] - 0x20;
4144
4145       if (c < 0xA0)
4146         {
4147           /* C is the first byte of SJIS character code,
4148              or a leading-code of Emacs' internal format (emacs-mule),
4149              or the first byte of UTF-16.  */
4150           try = (CODING_CATEGORY_MASK_SJIS
4151                   | CODING_CATEGORY_MASK_EMACS_MULE
4152                   | CODING_CATEGORY_MASK_UTF_16_BE
4153                   | CODING_CATEGORY_MASK_UTF_16_LE);
4154
4155           /* Or, if C is a special latin extra code,
4156              or is an ISO2022 specific control code of C1 (SS2 or SS3),
4157              or is an ISO2022 control-sequence-introducer (CSI),
4158              we should also consider the possibility of ISO2022 codings.  */
4159           if ((VECTORP (Vlatin_extra_code_table)
4160                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4161               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
4162               || (c == ISO_CODE_CSI
4163                   && (src < src_end
4164                       && (*src == ']'
4165                           || ((*src == '0' || *src == '1' || *src == '2')
4166                               && src + 1 < src_end
4167                               && src[1] == ']')))))
4168             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4169                      | CODING_CATEGORY_MASK_ISO_8BIT);
4170         }
4171       else
4172         /* C is a character of ISO2022 in graphic plane right,
4173            or a SJIS's 1-byte character code (i.e. JISX0201),
4174            or the first byte of BIG5's 2-byte code,
4175            or the first byte of UTF-8/16.  */
4176         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4177                 | CODING_CATEGORY_MASK_ISO_8BIT
4178                 | CODING_CATEGORY_MASK_SJIS
4179                 | CODING_CATEGORY_MASK_BIG5
4180                 | CODING_CATEGORY_MASK_UTF_8
4181                 | CODING_CATEGORY_MASK_UTF_16_BE
4182                 | CODING_CATEGORY_MASK_UTF_16_LE);
4183
4184       /* Or, we may have to consider the possibility of CCL.  */
4185       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4186           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4187               ->spec.ccl.valid_codes)[c])
4188         try |= CODING_CATEGORY_MASK_CCL;
4189
4190       mask = 0;
4191       utf16_examined_p = iso2022_examined_p = 0;
4192       if (priorities)
4193         {
4194           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4195             {
4196               if (!iso2022_examined_p
4197                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4198                 {
4199                   mask |= detect_coding_iso2022 (src, src_end, multibytep);
4200                   iso2022_examined_p = 1;
4201                 }
4202               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
4203                 mask |= detect_coding_sjis (src, src_end, multibytep);
4204               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
4205                 mask |= detect_coding_utf_8 (src, src_end, multibytep);
4206               else if (!utf16_examined_p
4207                        && (priorities[i] & try &
4208                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
4209                 {
4210                   mask |= detect_coding_utf_16 (src, src_end, multibytep);
4211                   utf16_examined_p = 1;
4212                 }
4213               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
4214                 mask |= detect_coding_big5 (src, src_end, multibytep);
4215               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
4216                 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4217               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
4218                 mask |= detect_coding_ccl (src, src_end, multibytep);
4219               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
4220                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
4221               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
4222                 mask |= CODING_CATEGORY_MASK_BINARY;
4223               if (mask & priorities[i])
4224                 return priorities[i];
4225             }
4226           return CODING_CATEGORY_MASK_RAW_TEXT;
4227         }
4228       if (try & CODING_CATEGORY_MASK_ISO)
4229         mask |= detect_coding_iso2022 (src, src_end, multibytep);
4230       if (try & CODING_CATEGORY_MASK_SJIS)
4231         mask |= detect_coding_sjis (src, src_end, multibytep);
4232       if (try & CODING_CATEGORY_MASK_BIG5)
4233         mask |= detect_coding_big5 (src, src_end, multibytep);
4234       if (try & CODING_CATEGORY_MASK_UTF_8)
4235         mask |= detect_coding_utf_8 (src, src_end, multibytep);
4236       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
4237         mask |= detect_coding_utf_16 (src, src_end, multibytep);
4238       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
4239         mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4240       if (try & CODING_CATEGORY_MASK_CCL)
4241         mask |= detect_coding_ccl (src, src_end, multibytep);
4242     }
4243   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4244 }
4245
4246 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4247    The information of the detected coding system is set in CODING.  */
4248
4249 void
4250 detect_coding (coding, src, src_bytes)
4251      struct coding_system *coding;
4252      const unsigned char *src;
4253      int src_bytes;
4254 {
4255   unsigned int idx;
4256   int skip, mask;
4257   Lisp_Object val;
4258
4259   val = Vcoding_category_list;
4260   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4261                              coding->src_multibyte);
4262   coding->heading_ascii = skip;
4263
4264   if (!mask) return;
4265
4266   /* We found a single coding system of the highest priority in MASK.  */
4267   idx = 0;
4268   while (mask && ! (mask & 1)) mask >>= 1, idx++;
4269   if (! mask)
4270     idx = CODING_CATEGORY_IDX_RAW_TEXT;
4271
4272   val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]);
4273
4274   if (coding->eol_type != CODING_EOL_UNDECIDED)
4275     {
4276       Lisp_Object tmp;
4277
4278       tmp = Fget (val, Qeol_type);
4279       if (VECTORP (tmp))
4280         val = XVECTOR (tmp)->contents[coding->eol_type];
4281     }
4282
4283   /* Setup this new coding system while preserving some slots.  */
4284   {
4285     int src_multibyte = coding->src_multibyte;
4286     int dst_multibyte = coding->dst_multibyte;
4287
4288     setup_coding_system (val, coding);
4289     coding->src_multibyte = src_multibyte;
4290     coding->dst_multibyte = dst_multibyte;
4291     coding->heading_ascii = skip;
4292   }
4293 }
4294
4295 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
4296    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4297    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4298
4299    How many non-eol characters are at the head is returned as *SKIP.  */
4300
4301 #define MAX_EOL_CHECK_COUNT 3
4302
4303 static int
4304 detect_eol_type (source, src_bytes, skip)
4305      unsigned char *source;
4306      int src_bytes, *skip;
4307 {
4308   unsigned char *src = source, *src_end = src + src_bytes;
4309   unsigned char c;
4310   int total = 0;                /* How many end-of-lines are found so far.  */
4311   int eol_type = CODING_EOL_UNDECIDED;
4312   int this_eol_type;
4313
4314   *skip = 0;
4315
4316   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4317     {
4318       c = *src++;
4319       if (c == '\n' || c == '\r')
4320         {
4321           if (*skip == 0)
4322             *skip = src - 1 - source;
4323           total++;
4324           if (c == '\n')
4325             this_eol_type = CODING_EOL_LF;
4326           else if (src >= src_end || *src != '\n')
4327             this_eol_type = CODING_EOL_CR;
4328           else
4329             this_eol_type = CODING_EOL_CRLF, src++;
4330
4331           if (eol_type == CODING_EOL_UNDECIDED)
4332             /* This is the first end-of-line.  */
4333             eol_type = this_eol_type;
4334           else if (eol_type != this_eol_type)
4335             {
4336               /* The found type is different from what found before.  */
4337               eol_type = CODING_EOL_INCONSISTENT;
4338               break;
4339             }
4340         }
4341     }
4342
4343   if (*skip == 0)
4344     *skip = src_end - source;
4345   return eol_type;
4346 }
4347
4348 /* Like detect_eol_type, but detect EOL type in 2-octet
4349    big-endian/little-endian format for coding systems utf-16-be and
4350    utf-16-le.  */
4351
4352 static int
4353 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4354      unsigned char *source;
4355      int src_bytes, *skip, big_endian_p;
4356 {
4357   unsigned char *src = source, *src_end = src + src_bytes;
4358   unsigned int c1, c2;
4359   int total = 0;                /* How many end-of-lines are found so far.  */
4360   int eol_type = CODING_EOL_UNDECIDED;
4361   int this_eol_type;
4362   int msb, lsb;
4363
4364   if (big_endian_p)
4365     msb = 0, lsb = 1;
4366   else
4367     msb = 1, lsb = 0;
4368
4369   *skip = 0;
4370
4371   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4372     {
4373       c1 = (src[msb] << 8) | (src[lsb]);
4374       src += 2;
4375
4376       if (c1 == '\n' || c1 == '\r')
4377         {
4378           if (*skip == 0)
4379             *skip = src - 2 - source;
4380           total++;
4381           if (c1 == '\n')
4382             {
4383               this_eol_type = CODING_EOL_LF;
4384             }
4385           else
4386             {
4387               if ((src + 1) >= src_end)
4388                 {
4389                   this_eol_type = CODING_EOL_CR;
4390                 }
4391               else
4392                 {
4393                   c2 = (src[msb] << 8) | (src[lsb]);
4394                   if (c2 == '\n')
4395                     this_eol_type = CODING_EOL_CRLF, src += 2;
4396                   else
4397                     this_eol_type = CODING_EOL_CR;
4398                 }
4399             }
4400
4401           if (eol_type == CODING_EOL_UNDECIDED)
4402             /* This is the first end-of-line.  */
4403             eol_type = this_eol_type;
4404           else if (eol_type != this_eol_type)
4405             {
4406               /* The found type is different from what found before.  */
4407               eol_type = CODING_EOL_INCONSISTENT;
4408               break;
4409             }
4410         }
4411     }
4412
4413   if (*skip == 0)
4414     *skip = src_end - source;
4415   return eol_type;
4416 }
4417
4418 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4419    is encoded.  If it detects an appropriate format of end-of-line, it
4420    sets the information in *CODING.  */
4421
4422 void
4423 detect_eol (coding, src, src_bytes)
4424      struct coding_system *coding;
4425      const unsigned char *src;
4426      int src_bytes;
4427 {
4428   Lisp_Object val;
4429   int skip;
4430   int eol_type;
4431
4432   switch (coding->category_idx)
4433     {
4434     case CODING_CATEGORY_IDX_UTF_16_BE:
4435       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4436       break;
4437     case CODING_CATEGORY_IDX_UTF_16_LE:
4438       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4439       break;
4440     default:
4441       eol_type = detect_eol_type (src, src_bytes, &skip);
4442       break;
4443     }
4444
4445   if (coding->heading_ascii > skip)
4446     coding->heading_ascii = skip;
4447   else
4448     skip = coding->heading_ascii;
4449
4450   if (eol_type == CODING_EOL_UNDECIDED)
4451     return;
4452   if (eol_type == CODING_EOL_INCONSISTENT)
4453     {
4454 #if 0
4455       /* This code is suppressed until we find a better way to
4456          distinguish raw text file and binary file.  */
4457
4458       /* If we have already detected that the coding is raw-text, the
4459          coding should actually be no-conversion.  */
4460       if (coding->type == coding_type_raw_text)
4461         {
4462           setup_coding_system (Qno_conversion, coding);
4463           return;
4464         }
4465       /* Else, let's decode only text code anyway.  */
4466 #endif /* 0 */
4467       eol_type = CODING_EOL_LF;
4468     }
4469
4470   val = Fget (coding->symbol, Qeol_type);
4471   if (VECTORP (val) && XVECTOR (val)->size == 3)
4472     {
4473       int src_multibyte = coding->src_multibyte;
4474       int dst_multibyte = coding->dst_multibyte;
4475       struct composition_data *cmp_data = coding->cmp_data;
4476
4477       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
4478       coding->src_multibyte = src_multibyte;
4479       coding->dst_multibyte = dst_multibyte;
4480       coding->heading_ascii = skip;
4481       coding->cmp_data = cmp_data;
4482     }
4483 }
4484
4485 #define CONVERSION_BUFFER_EXTRA_ROOM 256
4486
4487 #define DECODING_BUFFER_MAG(coding)                     \
4488   (coding->type == coding_type_iso2022                  \
4489    ? 3                                                  \
4490    : (coding->type == coding_type_ccl                   \
4491       ? coding->spec.ccl.decoder.buf_magnification      \
4492       : 2))
4493
4494 /* Return maximum size (bytes) of a buffer enough for decoding
4495    SRC_BYTES of text encoded in CODING.  */
4496
4497 int
4498 decoding_buffer_size (coding, src_bytes)
4499      struct coding_system *coding;
4500      int src_bytes;
4501 {
4502   return (src_bytes * DECODING_BUFFER_MAG (coding)
4503           + CONVERSION_BUFFER_EXTRA_ROOM);
4504 }
4505
4506 /* Return maximum size (bytes) of a buffer enough for encoding
4507    SRC_BYTES of text to CODING.  */
4508
4509 int
4510 encoding_buffer_size (coding, src_bytes)
4511      struct coding_system *coding;
4512      int src_bytes;
4513 {
4514   int magnification;
4515
4516   if (coding->type == coding_type_ccl)
4517     {
4518       magnification = coding->spec.ccl.encoder.buf_magnification;
4519       if (coding->eol_type == CODING_EOL_CRLF)
4520         magnification *= 2;
4521     }
4522   else if (CODING_REQUIRE_ENCODING (coding))
4523     magnification = 3;
4524   else
4525     magnification = 1;
4526
4527   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4528 }
4529
4530 /* Working buffer for code conversion.  */
4531 struct conversion_buffer
4532 {
4533   int size;                     /* size of data.  */
4534   int on_stack;                 /* 1 if allocated by alloca.  */
4535   unsigned char *data;
4536 };
4537
4538 /* Don't use alloca for allocating memory space larger than this, lest
4539    we overflow their stack.  */
4540 #define MAX_ALLOCA 16*1024
4541
4542 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
4543 #define allocate_conversion_buffer(buf, len)            \
4544   do {                                                  \
4545     if (len < MAX_ALLOCA)                               \
4546       {                                                 \
4547         buf.data = (unsigned char *) alloca (len);      \
4548         buf.on_stack = 1;                               \
4549       }                                                 \
4550     else                                                \
4551       {                                                 \
4552         buf.data = (unsigned char *) xmalloc (len);     \
4553         buf.on_stack = 0;                               \
4554       }                                                 \
4555     buf.size = len;                                     \
4556   } while (0)
4557
4558 /* Double the allocated memory for *BUF.  */
4559 static void
4560 extend_conversion_buffer (buf)
4561      struct conversion_buffer *buf;
4562 {
4563   if (buf->on_stack)
4564     {
4565       unsigned char *save = buf->data;
4566       buf->data = (unsigned char *) xmalloc (buf->size * 2);
4567       bcopy (save, buf->data, buf->size);
4568       buf->on_stack = 0;
4569     }
4570   else
4571     {
4572       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4573     }
4574   buf->size *= 2;
4575 }
4576
4577 /* Free the allocated memory for BUF if it is not on stack.  */
4578 static void
4579 free_conversion_buffer (buf)
4580      struct conversion_buffer *buf;
4581 {
4582   if (!buf->on_stack)
4583     xfree (buf->data);
4584 }
4585
4586 int
4587 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4588      struct coding_system *coding;
4589      unsigned char *source, *destination;
4590      int src_bytes, dst_bytes, encodep;
4591 {
4592   struct ccl_program *ccl
4593     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
4594   unsigned char *dst = destination;
4595
4596   ccl->suppress_error = coding->suppress_error;
4597   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4598   if (encodep)
4599     {
4600       /* On encoding, EOL format is converted within ccl_driver.  For
4601          that, setup proper information in the structure CCL.  */
4602       ccl->eol_type = coding->eol_type;
4603       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4604         ccl->eol_type = CODING_EOL_LF;
4605       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4606       ccl->eight_bit_control = coding->dst_multibyte;
4607     }
4608   else
4609     ccl->eight_bit_control = 1;
4610   ccl->multibyte = coding->src_multibyte;
4611   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4612     {
4613       /* Move carryover bytes to DESTINATION.  */
4614       unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4615       while (*p)
4616         *dst++ = *p++;
4617       coding->spec.ccl.eight_bit_carryover[0] = 0;
4618       if (dst_bytes)
4619         dst_bytes -= dst - destination;
4620     }
4621
4622   coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4623                                   &(coding->consumed))
4624                       + dst - destination);
4625
4626   if (encodep)
4627     {
4628       coding->produced_char = coding->produced;
4629       coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4630     }
4631   else if (!ccl->eight_bit_control)
4632     {
4633       /* The produced bytes forms a valid multibyte sequence. */
4634       coding->produced_char
4635         = multibyte_chars_in_text (destination, coding->produced);
4636       coding->spec.ccl.eight_bit_carryover[0] = 0;
4637     }
4638   else
4639     {
4640       /* On decoding, the destination should always multibyte.  But,
4641          CCL program might have been generated an invalid multibyte
4642          sequence.  Here we make such a sequence valid as
4643          multibyte.  */
4644       int bytes
4645         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
4646
4647       if ((coding->consumed < src_bytes
4648            || !ccl->last_block)
4649           && coding->produced >= 1
4650           && destination[coding->produced - 1] >= 0x80)
4651         {
4652           /* We should not convert the tailing 8-bit codes to
4653              multibyte form even if they doesn't form a valid
4654              multibyte sequence.  They may form a valid sequence in
4655              the next call.  */
4656           int carryover = 0;
4657
4658           if (destination[coding->produced - 1] < 0xA0)
4659             carryover = 1;
4660           else if (coding->produced >= 2)
4661             {
4662               if (destination[coding->produced - 2] >= 0x80)
4663                 {
4664                   if (destination[coding->produced - 2] < 0xA0)
4665                     carryover = 2;
4666                   else if (coding->produced >= 3
4667                            && destination[coding->produced - 3] >= 0x80
4668                            && destination[coding->produced - 3] < 0xA0)
4669                     carryover = 3;
4670                 }
4671             }
4672           if (carryover > 0)
4673             {
4674               BCOPY_SHORT (destination + coding->produced - carryover,
4675                            coding->spec.ccl.eight_bit_carryover,
4676                            carryover);
4677               coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4678               coding->produced -= carryover;
4679             }
4680         }
4681       coding->produced = str_as_multibyte (destination, bytes,
4682                                            coding->produced,
4683                                            &(coding->produced_char));
4684     }
4685
4686   switch (ccl->status)
4687     {
4688     case CCL_STAT_SUSPEND_BY_SRC:
4689       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4690       break;
4691     case CCL_STAT_SUSPEND_BY_DST:
4692       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4693       break;
4694     case CCL_STAT_QUIT:
4695     case CCL_STAT_INVALID_CMD:
4696       coding->result = CODING_FINISH_INTERRUPT;
4697       break;
4698     default:
4699       coding->result = CODING_FINISH_NORMAL;
4700       break;
4701     }
4702   return coding->result;
4703 }
4704
4705 /* Decode EOL format of the text at PTR of BYTES length destructively
4706    according to CODING->eol_type.  This is called after the CCL
4707    program produced a decoded text at PTR.  If we do CRLF->LF
4708    conversion, update CODING->produced and CODING->produced_char.  */
4709
4710 static void
4711 decode_eol_post_ccl (coding, ptr, bytes)
4712      struct coding_system *coding;
4713      unsigned char *ptr;
4714      int bytes;
4715 {
4716   Lisp_Object val, saved_coding_symbol;
4717   unsigned char *pend = ptr + bytes;
4718   int dummy;
4719
4720   /* Remember the current coding system symbol.  We set it back when
4721      an inconsistent EOL is found so that `last-coding-system-used' is
4722      set to the coding system that doesn't specify EOL conversion.  */
4723   saved_coding_symbol = coding->symbol;
4724
4725   coding->spec.ccl.cr_carryover = 0;
4726   if (coding->eol_type == CODING_EOL_UNDECIDED)
4727     {
4728       /* Here, to avoid the call of setup_coding_system, we directly
4729          call detect_eol_type.  */
4730       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4731       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4732         coding->eol_type = CODING_EOL_LF;
4733       if (coding->eol_type != CODING_EOL_UNDECIDED)
4734         {
4735           val = Fget (coding->symbol, Qeol_type);
4736           if (VECTORP (val) && XVECTOR (val)->size == 3)
4737             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4738         }
4739       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4740     }
4741
4742   if (coding->eol_type == CODING_EOL_LF
4743       || coding->eol_type == CODING_EOL_UNDECIDED)
4744     {
4745       /* We have nothing to do.  */
4746       ptr = pend;
4747     }
4748   else if (coding->eol_type == CODING_EOL_CRLF)
4749     {
4750       unsigned char *pstart = ptr, *p = ptr;
4751
4752       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4753           && *(pend - 1) == '\r')
4754         {
4755           /* If the last character is CR, we can't handle it here
4756              because LF will be in the not-yet-decoded source text.
4757              Record that the CR is not yet processed.  */
4758           coding->spec.ccl.cr_carryover = 1;
4759           coding->produced--;
4760           coding->produced_char--;
4761           pend--;
4762         }
4763       while (ptr < pend)
4764         {
4765           if (*ptr == '\r')
4766             {
4767               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4768                 {
4769                   *p++ = '\n';
4770                   ptr += 2;
4771                 }
4772               else
4773                 {
4774                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4775                     goto undo_eol_conversion;
4776                   *p++ = *ptr++;
4777                 }
4778             }
4779           else if (*ptr == '\n'
4780                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4781             goto undo_eol_conversion;
4782           else
4783             *p++ = *ptr++;
4784           continue;
4785
4786         undo_eol_conversion:
4787           /* We have faced with inconsistent EOL format at PTR.
4788              Convert all LFs before PTR back to CRLFs.  */
4789           for (p--, ptr--; p >= pstart; p--)
4790             {
4791               if (*p == '\n')
4792                 *ptr-- = '\n', *ptr-- = '\r';
4793               else
4794                 *ptr-- = *p;
4795             }
4796           /*  If carryover is recorded, cancel it because we don't
4797               convert CRLF anymore.  */
4798           if (coding->spec.ccl.cr_carryover)
4799             {
4800               coding->spec.ccl.cr_carryover = 0;
4801               coding->produced++;
4802               coding->produced_char++;
4803               pend++;
4804             }
4805           p = ptr = pend;
4806           coding->eol_type = CODING_EOL_LF;
4807           coding->symbol = saved_coding_symbol;
4808         }
4809       if (p < pend)
4810         {
4811           /* As each two-byte sequence CRLF was converted to LF, (PEND
4812              - P) is the number of deleted characters.  */
4813           coding->produced -= pend - p;
4814           coding->produced_char -= pend - p;
4815         }
4816     }
4817   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4818     {
4819       unsigned char *p = ptr;
4820
4821       for (; ptr < pend; ptr++)
4822         {
4823           if (*ptr == '\r')
4824             *ptr = '\n';
4825           else if (*ptr == '\n'
4826                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4827             {
4828               for (; p < ptr; p++)
4829                 {
4830                   if (*p == '\n')
4831                     *p = '\r';
4832                 }
4833               ptr = pend;
4834               coding->eol_type = CODING_EOL_LF;
4835               coding->symbol = saved_coding_symbol;
4836             }
4837         }
4838     }
4839 }
4840
4841 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4842    decoding, it may detect coding system and format of end-of-line if
4843    those are not yet decided.  The source should be unibyte, the
4844    result is multibyte if CODING->dst_multibyte is nonzero, else
4845    unibyte.  */
4846
4847 int
4848 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4849      struct coding_system *coding;
4850      const unsigned char *source;
4851      unsigned char *destination;
4852      int src_bytes, dst_bytes;
4853 {
4854   int extra = 0;
4855
4856   if (coding->type == coding_type_undecided)
4857     detect_coding (coding, source, src_bytes);
4858
4859   if (coding->eol_type == CODING_EOL_UNDECIDED
4860       && coding->type != coding_type_ccl)
4861     {
4862       detect_eol (coding, source, src_bytes);
4863       /* We had better recover the original eol format if we
4864          encounter an inconsistent eol format while decoding.  */
4865       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4866     }
4867
4868   coding->produced = coding->produced_char = 0;
4869   coding->consumed = coding->consumed_char = 0;
4870   coding->errors = 0;
4871   coding->result = CODING_FINISH_NORMAL;
4872
4873   switch (coding->type)
4874     {
4875     case coding_type_sjis:
4876       decode_coding_sjis_big5 (coding, source, destination,
4877                                src_bytes, dst_bytes, 1);
4878       break;
4879
4880     case coding_type_iso2022:
4881       decode_coding_iso2022 (coding, source, destination,
4882                              src_bytes, dst_bytes);
4883       break;
4884
4885     case coding_type_big5:
4886       decode_coding_sjis_big5 (coding, source, destination,
4887                                src_bytes, dst_bytes, 0);
4888       break;
4889
4890     case coding_type_emacs_mule:
4891       decode_coding_emacs_mule (coding, source, destination,
4892                                 src_bytes, dst_bytes);
4893       break;
4894
4895     case coding_type_ccl:
4896       if (coding->spec.ccl.cr_carryover)
4897         {
4898           /* Put the CR which was not processed by the previous call
4899              of decode_eol_post_ccl in DESTINATION.  It will be
4900              decoded together with the following LF by the call to
4901              decode_eol_post_ccl below.  */
4902           *destination = '\r';
4903           coding->produced++;
4904           coding->produced_char++;
4905           dst_bytes--;
4906           extra = coding->spec.ccl.cr_carryover;
4907         }
4908       ccl_coding_driver (coding, source, destination + extra,
4909                          src_bytes, dst_bytes, 0);
4910       if (coding->eol_type != CODING_EOL_LF)
4911         {
4912           coding->produced += extra;
4913           coding->produced_char += extra;
4914           decode_eol_post_ccl (coding, destination, coding->produced);
4915         }
4916       break;
4917
4918     default:
4919       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4920     }
4921
4922   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4923       && coding->mode & CODING_MODE_LAST_BLOCK
4924       && coding->consumed == src_bytes)
4925     coding->result = CODING_FINISH_NORMAL;
4926
4927   if (coding->mode & CODING_MODE_LAST_BLOCK
4928       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4929     {
4930       const unsigned char *src = source + coding->consumed;
4931       unsigned char *dst = destination + coding->produced;
4932
4933       src_bytes -= coding->consumed;
4934       coding->errors++;
4935       if (COMPOSING_P (coding))
4936         DECODE_COMPOSITION_END ('1');
4937       while (src_bytes--)
4938         {
4939           int c = *src++;
4940           dst += CHAR_STRING (c, dst);
4941           coding->produced_char++;
4942         }
4943       coding->consumed = coding->consumed_char = src - source;
4944       coding->produced = dst - destination;
4945       coding->result = CODING_FINISH_NORMAL;
4946     }
4947
4948   if (!coding->dst_multibyte)
4949     {
4950       coding->produced = str_as_unibyte (destination, coding->produced);
4951       coding->produced_char = coding->produced;
4952     }
4953
4954   return coding->result;
4955 }
4956
4957 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4958    multibyteness of the source is CODING->src_multibyte, the
4959    multibyteness of the result is always unibyte.  */
4960
4961 int
4962 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4963      struct coding_system *coding;
4964      const unsigned char *source;
4965      unsigned char *destination;
4966      int src_bytes, dst_bytes;
4967 {
4968   coding->produced = coding->produced_char = 0;
4969   coding->consumed = coding->consumed_char = 0;
4970   coding->errors = 0;
4971   coding->result = CODING_FINISH_NORMAL;
4972
4973   switch (coding->type)
4974     {
4975     case coding_type_sjis:
4976       encode_coding_sjis_big5 (coding, source, destination,
4977                                src_bytes, dst_bytes, 1);
4978       break;
4979
4980     case coding_type_iso2022:
4981       encode_coding_iso2022 (coding, source, destination,
4982                              src_bytes, dst_bytes);
4983       break;
4984
4985     case coding_type_big5:
4986       encode_coding_sjis_big5 (coding, source, destination,
4987                                src_bytes, dst_bytes, 0);
4988       break;
4989
4990     case coding_type_emacs_mule:
4991       encode_coding_emacs_mule (coding, source, destination,
4992                                 src_bytes, dst_bytes);
4993       break;
4994
4995     case coding_type_ccl:
4996       ccl_coding_driver (coding, source, destination,
4997                          src_bytes, dst_bytes, 1);
4998       break;
4999
5000     default:
5001       encode_eol (coding, source, destination, src_bytes, dst_bytes);
5002     }
5003
5004   if (coding->mode & CODING_MODE_LAST_BLOCK
5005       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
5006     {
5007       const unsigned char *src = source + coding->consumed;
5008       unsigned char *dst = destination + coding->produced;
5009
5010       if (coding->type == coding_type_iso2022)
5011         ENCODE_RESET_PLANE_AND_REGISTER;
5012       if (COMPOSING_P (coding))
5013         *dst++ = ISO_CODE_ESC, *dst++ = '1';
5014       if (coding->consumed < src_bytes)
5015         {
5016           int len = src_bytes - coding->consumed;
5017
5018           BCOPY_SHORT (src, dst, len);
5019           if (coding->src_multibyte)
5020             len = str_as_unibyte (dst, len);
5021           dst += len;
5022           coding->consumed = src_bytes;
5023         }
5024       coding->produced = coding->produced_char = dst - destination;
5025       coding->result = CODING_FINISH_NORMAL;
5026     }
5027
5028   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
5029       && coding->consumed == src_bytes)
5030     coding->result = CODING_FINISH_NORMAL;
5031
5032   return coding->result;
5033 }
5034
5035 /* Scan text in the region between *BEG and *END (byte positions),
5036    skip characters which we don't have to decode by coding system
5037    CODING at the head and tail, then set *BEG and *END to the region
5038    of the text we actually have to convert.  The caller should move
5039    the gap out of the region in advance if the region is from a
5040    buffer.
5041
5042    If STR is not NULL, *BEG and *END are indices into STR.  */
5043
5044 static void
5045 shrink_decoding_region (beg, end, coding, str)
5046      int *beg, *end;
5047      struct coding_system *coding;
5048      unsigned char *str;
5049 {
5050   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
5051   int eol_conversion;
5052   Lisp_Object translation_table;
5053
5054   if (coding->type == coding_type_ccl
5055       || coding->type == coding_type_undecided
5056       || coding->eol_type != CODING_EOL_LF
5057       || !NILP (coding->post_read_conversion)
5058       || coding->composing != COMPOSITION_DISABLED)
5059     {
5060       /* We can't skip any data.  */
5061       return;
5062     }
5063   if (coding->type == coding_type_no_conversion
5064       || coding->type == coding_type_raw_text
5065       || coding->type == coding_type_emacs_mule)
5066     {
5067       /* We need no conversion, but don't have to skip any data here.
5068          Decoding routine handles them effectively anyway.  */
5069       return;
5070     }
5071
5072   translation_table = coding->translation_table_for_decode;
5073   if (NILP (translation_table) && !NILP (Venable_character_translation))
5074     translation_table = Vstandard_translation_table_for_decode;
5075   if (CHAR_TABLE_P (translation_table))
5076     {
5077       int i;
5078       for (i = 0; i < 128; i++)
5079         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5080           break;
5081       if (i < 128)
5082         /* Some ASCII character should be translated.  We give up
5083            shrinking.  */
5084         return;
5085     }
5086
5087   if (coding->heading_ascii >= 0)
5088     /* Detection routine has already found how much we can skip at the
5089        head.  */
5090     *beg += coding->heading_ascii;
5091
5092   if (str)
5093     {
5094       begp_orig = begp = str + *beg;
5095       endp_orig = endp = str + *end;
5096     }
5097   else
5098     {
5099       begp_orig = begp = BYTE_POS_ADDR (*beg);
5100       endp_orig = endp = begp + *end - *beg;
5101     }
5102
5103   eol_conversion = (coding->eol_type == CODING_EOL_CR
5104                     || coding->eol_type == CODING_EOL_CRLF);
5105
5106   switch (coding->type)
5107     {
5108     case coding_type_sjis:
5109     case coding_type_big5:
5110       /* We can skip all ASCII characters at the head.  */
5111       if (coding->heading_ascii < 0)
5112         {
5113           if (eol_conversion)
5114             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
5115           else
5116             while (begp < endp && *begp < 0x80) begp++;
5117         }
5118       /* We can skip all ASCII characters at the tail except for the
5119          second byte of SJIS or BIG5 code.  */
5120       if (eol_conversion)
5121         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
5122       else
5123         while (begp < endp && endp[-1] < 0x80) endp--;
5124       /* Do not consider LF as ascii if preceded by CR, since that
5125          confuses eol decoding. */
5126       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5127         endp++;
5128       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
5129         endp++;
5130       break;
5131
5132     case coding_type_iso2022:
5133       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5134         /* We can't skip any data.  */
5135         break;
5136       if (coding->heading_ascii < 0)
5137         {
5138           /* We can skip all ASCII characters at the head except for a
5139              few control codes.  */
5140           while (begp < endp && (c = *begp) < 0x80
5141                  && c != ISO_CODE_CR && c != ISO_CODE_SO
5142                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
5143                  && (!eol_conversion || c != ISO_CODE_LF))
5144             begp++;
5145         }
5146       switch (coding->category_idx)
5147         {
5148         case CODING_CATEGORY_IDX_ISO_8_1:
5149         case CODING_CATEGORY_IDX_ISO_8_2:
5150           /* We can skip all ASCII characters at the tail.  */
5151           if (eol_conversion)
5152             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
5153           else
5154             while (begp < endp && endp[-1] < 0x80) endp--;
5155           /* Do not consider LF as ascii if preceded by CR, since that
5156              confuses eol decoding. */
5157           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5158             endp++;
5159           break;
5160
5161         case CODING_CATEGORY_IDX_ISO_7:
5162         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
5163           {
5164             /* We can skip all characters at the tail except for 8-bit
5165                codes and ESC and the following 2-byte at the tail.  */
5166             unsigned char *eight_bit = NULL;
5167
5168             if (eol_conversion)
5169               while (begp < endp
5170                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
5171                 {
5172                   if (!eight_bit && c & 0x80) eight_bit = endp;
5173                   endp--;
5174                 }
5175             else
5176               while (begp < endp
5177                      && (c = endp[-1]) != ISO_CODE_ESC)
5178                 {
5179                   if (!eight_bit && c & 0x80) eight_bit = endp;
5180                   endp--;
5181                 }
5182             /* Do not consider LF as ascii if preceded by CR, since that
5183                confuses eol decoding. */
5184             if (begp < endp && endp < endp_orig
5185                 && endp[-1] == '\r' && endp[0] == '\n')
5186               endp++;
5187             if (begp < endp && endp[-1] == ISO_CODE_ESC)
5188               {
5189                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
5190                   /* This is an ASCII designation sequence.  We can
5191                      surely skip the tail.  But, if we have
5192                      encountered an 8-bit code, skip only the codes
5193                      after that.  */
5194                   endp = eight_bit ? eight_bit : endp + 2;
5195                 else
5196                   /* Hmmm, we can't skip the tail.  */
5197                   endp = endp_orig;
5198               }
5199             else if (eight_bit)
5200               endp = eight_bit;
5201           }
5202         }
5203       break;
5204
5205     default:
5206       abort ();
5207     }
5208   *beg += begp - begp_orig;
5209   *end += endp - endp_orig;
5210   return;
5211 }
5212
5213 /* Like shrink_decoding_region but for encoding.  */
5214
5215 static void
5216 shrink_encoding_region (beg, end, coding, str)
5217      int *beg, *end;
5218      struct coding_system *coding;
5219      unsigned char *str;
5220 {
5221   unsigned char *begp_orig, *begp, *endp_orig, *endp;
5222   int eol_conversion;
5223   Lisp_Object translation_table;
5224
5225   if (coding->type == coding_type_ccl
5226       || coding->eol_type == CODING_EOL_CRLF
5227       || coding->eol_type == CODING_EOL_CR
5228       || (coding->cmp_data && coding->cmp_data->used > 0))
5229     {
5230       /* We can't skip any data.  */
5231       return;
5232     }
5233   if (coding->type == coding_type_no_conversion
5234       || coding->type == coding_type_raw_text
5235       || coding->type == coding_type_emacs_mule
5236       || coding->type == coding_type_undecided)
5237     {
5238       /* We need no conversion, but don't have to skip any data here.
5239          Encoding routine handles them effectively anyway.  */
5240       return;
5241     }
5242
5243   translation_table = coding->translation_table_for_encode;
5244   if (NILP (translation_table) && !NILP (Venable_character_translation))
5245     translation_table = Vstandard_translation_table_for_encode;
5246   if (CHAR_TABLE_P (translation_table))
5247     {
5248       int i;
5249       for (i = 0; i < 128; i++)
5250         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5251           break;
5252       if (i < 128)
5253         /* Some ASCII character should be translated.  We give up
5254            shrinking.  */
5255         return;
5256     }
5257
5258   if (str)
5259     {
5260       begp_orig = begp = str + *beg;
5261       endp_orig = endp = str + *end;
5262     }
5263   else
5264     {
5265       begp_orig = begp = BYTE_POS_ADDR (*beg);
5266       endp_orig = endp = begp + *end - *beg;
5267     }
5268
5269   eol_conversion = (coding->eol_type == CODING_EOL_CR
5270                     || coding->eol_type == CODING_EOL_CRLF);
5271
5272   /* Here, we don't have to check coding->pre_write_conversion because
5273      the caller is expected to have handled it already.  */
5274   switch (coding->type)
5275     {
5276     case coding_type_iso2022:
5277       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5278         /* We can't skip any data.  */
5279         break;
5280       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5281         {
5282           unsigned char *bol = begp;
5283           while (begp < endp && *begp < 0x80)
5284             {
5285               begp++;
5286               if (begp[-1] == '\n')
5287                 bol = begp;
5288             }
5289           begp = bol;
5290           goto label_skip_tail;
5291         }
5292       /* fall down ... */
5293
5294     case coding_type_sjis:
5295     case coding_type_big5:
5296       /* We can skip all ASCII characters at the head and tail.  */
5297       if (eol_conversion)
5298         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5299       else
5300         while (begp < endp && *begp < 0x80) begp++;
5301     label_skip_tail:
5302       if (eol_conversion)
5303         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5304       else
5305         while (begp < endp && *(endp - 1) < 0x80) endp--;
5306       break;
5307
5308     default:
5309       abort ();
5310     }
5311
5312   *beg += begp - begp_orig;
5313   *end += endp - endp_orig;
5314   return;
5315 }
5316
5317 /* As shrinking conversion region requires some overhead, we don't try
5318    shrinking if the length of conversion region is less than this
5319    value.  */
5320 static int shrink_conversion_region_threshhold = 1024;
5321
5322 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
5323   do {                                                                  \
5324     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
5325       {                                                                 \
5326         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
5327         else shrink_decoding_region (beg, end, coding, str);            \
5328       }                                                                 \
5329   } while (0)
5330
5331 static Lisp_Object
5332 code_convert_region_unwind (arg)
5333      Lisp_Object arg;
5334 {
5335   inhibit_pre_post_conversion = 0;
5336   Vlast_coding_system_used = arg;
5337   return Qnil;
5338 }
5339
5340 /* Store information about all compositions in the range FROM and TO
5341    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
5342    buffer or a string, defaults to the current buffer.  */
5343
5344 void
5345 coding_save_composition (coding, from, to, obj)
5346      struct coding_system *coding;
5347      int from, to;
5348      Lisp_Object obj;
5349 {
5350   Lisp_Object prop;
5351   int start, end;
5352
5353   if (coding->composing == COMPOSITION_DISABLED)
5354     return;
5355   if (!coding->cmp_data)
5356     coding_allocate_composition_data (coding, from);
5357   if (!find_composition (from, to, &start, &end, &prop, obj)
5358       || end > to)
5359     return;
5360   if (start < from
5361       && (!find_composition (end, to, &start, &end, &prop, obj)
5362           || end > to))
5363     return;
5364   coding->composing = COMPOSITION_NO;
5365   do
5366     {
5367       if (COMPOSITION_VALID_P (start, end, prop))
5368         {
5369           enum composition_method method = COMPOSITION_METHOD (prop);
5370           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5371               >= COMPOSITION_DATA_SIZE)
5372             coding_allocate_composition_data (coding, from);
5373           /* For relative composition, we remember start and end
5374              positions, for the other compositions, we also remember
5375              components.  */
5376           CODING_ADD_COMPOSITION_START (coding, start - from, method);
5377           if (method != COMPOSITION_RELATIVE)
5378             {
5379               /* We must store a*/
5380               Lisp_Object val, ch;
5381
5382               val = COMPOSITION_COMPONENTS (prop);
5383               if (CONSP (val))
5384                 while (CONSP (val))
5385                   {
5386                     ch = XCAR (val), val = XCDR (val);
5387                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5388                   }
5389               else if (VECTORP (val) || STRINGP (val))
5390                 {
5391                   int len = (VECTORP (val)
5392                              ? XVECTOR (val)->size : SCHARS (val));
5393                   int i;
5394                   for (i = 0; i < len; i++)
5395                     {
5396                       ch = (STRINGP (val)
5397                             ? Faref (val, make_number (i))
5398                             : XVECTOR (val)->contents[i]);
5399                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5400                     }
5401                 }
5402               else              /* INTEGERP (val) */
5403                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5404             }
5405           CODING_ADD_COMPOSITION_END (coding, end - from);
5406         }
5407       start = end;
5408     }
5409   while (start < to
5410          && find_composition (start, to, &start, &end, &prop, obj)
5411          && end <= to);
5412
5413   /* Make coding->cmp_data point to the first memory block.  */
5414   while (coding->cmp_data->prev)
5415     coding->cmp_data = coding->cmp_data->prev;
5416   coding->cmp_data_start = 0;
5417 }
5418
5419 /* Reflect the saved information about compositions to OBJ.
5420    CODING->cmp_data points to a memory block for the information.  OBJ
5421    is a buffer or a string, defaults to the current buffer.  */
5422
5423 void
5424 coding_restore_composition (coding, obj)
5425      struct coding_system *coding;
5426      Lisp_Object obj;
5427 {
5428   struct composition_data *cmp_data = coding->cmp_data;
5429
5430   if (!cmp_data)
5431     return;
5432
5433   while (cmp_data->prev)
5434     cmp_data = cmp_data->prev;
5435
5436   while (cmp_data)
5437     {
5438       int i;
5439
5440       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5441            i += cmp_data->data[i])
5442         {
5443           int *data = cmp_data->data + i;
5444           enum composition_method method = (enum composition_method) data[3];
5445           Lisp_Object components;
5446
5447           if (data[0] < 0 || i + data[0] > cmp_data->used)
5448             /* Invalid composition data.  */
5449             break;
5450
5451           if (method == COMPOSITION_RELATIVE)
5452             components = Qnil;
5453           else
5454             {
5455               int len = data[0] - 4, j;
5456               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5457
5458               if (method == COMPOSITION_WITH_RULE_ALTCHARS
5459                   && len % 2 == 0)
5460                 len --;
5461               if (len < 1)
5462                 /* Invalid composition data.  */
5463                 break;
5464               for (j = 0; j < len; j++)
5465                 args[j] = make_number (data[4 + j]);
5466               components = (method == COMPOSITION_WITH_ALTCHARS
5467                             ? Fstring (len, args)
5468                             : Fvector (len, args));
5469             }
5470           compose_text (data[1], data[2], components, Qnil, obj);
5471         }
5472       cmp_data = cmp_data->next;
5473     }
5474 }
5475
5476 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
5477    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5478    coding system CODING, and return the status code of code conversion
5479    (currently, this value has no meaning).
5480
5481    How many characters (and bytes) are converted to how many
5482    characters (and bytes) are recorded in members of the structure
5483    CODING.
5484
5485    If REPLACE is nonzero, we do various things as if the original text
5486    is deleted and a new text is inserted.  See the comments in
5487    replace_range (insdel.c) to know what we are doing.
5488
5489    If REPLACE is zero, it is assumed that the source text is unibyte.
5490    Otherwise, it is assumed that the source text is multibyte.  */
5491
5492 int
5493 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5494      int from, from_byte, to, to_byte, encodep, replace;
5495      struct coding_system *coding;
5496 {
5497   int len = to - from, len_byte = to_byte - from_byte;
5498   int nchars_del = 0, nbytes_del = 0;
5499   int require, inserted, inserted_byte;
5500   int head_skip, tail_skip, total_skip = 0;
5501   Lisp_Object saved_coding_symbol;
5502   int first = 1;
5503   unsigned char *src, *dst;
5504   Lisp_Object deletion;
5505   int orig_point = PT, orig_len = len;
5506   int prev_Z;
5507   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5508
5509   deletion = Qnil;
5510   saved_coding_symbol = coding->symbol;
5511
5512   if (from < PT && PT < to)
5513     {
5514       TEMP_SET_PT_BOTH (from, from_byte);
5515       orig_point = from;
5516     }
5517
5518   if (replace)
5519     {
5520       int saved_from = from;
5521       int saved_inhibit_modification_hooks;
5522
5523       prepare_to_modify_buffer (from, to, &from);
5524       if (saved_from != from)
5525         {
5526           to = from + len;
5527           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
5528           len_byte = to_byte - from_byte;
5529         }
5530
5531       /* The code conversion routine can not preserve text properties
5532          for now.  So, we must remove all text properties in the
5533          region.  Here, we must suppress all modification hooks.  */
5534       saved_inhibit_modification_hooks = inhibit_modification_hooks;
5535       inhibit_modification_hooks = 1;
5536       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5537       inhibit_modification_hooks = saved_inhibit_modification_hooks;
5538     }
5539
5540   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5541     {
5542       /* We must detect encoding of text and eol format.  */
5543
5544       if (from < GPT && to > GPT)
5545         move_gap_both (from, from_byte);
5546       if (coding->type == coding_type_undecided)
5547         {
5548           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
5549           if (coding->type == coding_type_undecided)
5550             {
5551               /* It seems that the text contains only ASCII, but we
5552                  should not leave it undecided because the deeper
5553                  decoding routine (decode_coding) tries to detect the
5554                  encodings again in vain.  */
5555               coding->type = coding_type_emacs_mule;
5556               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5557               /* As emacs-mule decoder will handle composition, we
5558                  need this setting to allocate coding->cmp_data
5559                  later.  */
5560               coding->composing = COMPOSITION_NO;
5561             }
5562         }
5563       if (coding->eol_type == CODING_EOL_UNDECIDED
5564           && coding->type != coding_type_ccl)
5565         {
5566           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5567           if (coding->eol_type == CODING_EOL_UNDECIDED)
5568             coding->eol_type = CODING_EOL_LF;
5569           /* We had better recover the original eol format if we
5570              encounter an inconsistent eol format while decoding.  */
5571           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5572         }
5573     }
5574
5575   /* Now we convert the text.  */
5576
5577   /* For encoding, we must process pre-write-conversion in advance.  */
5578   if (! inhibit_pre_post_conversion
5579       && encodep
5580       && SYMBOLP (coding->pre_write_conversion)
5581       && ! NILP (Ffboundp (coding->pre_write_conversion)))
5582     {
5583       /* The function in pre-write-conversion may put a new text in a
5584          new buffer.  */
5585       struct buffer *prev = current_buffer;
5586       Lisp_Object new;
5587
5588       record_unwind_protect (code_convert_region_unwind,
5589                              Vlast_coding_system_used);
5590       /* We should not call any more pre-write/post-read-conversion
5591          functions while this pre-write-conversion is running.  */
5592       inhibit_pre_post_conversion = 1;
5593       call2 (coding->pre_write_conversion,
5594              make_number (from), make_number (to));
5595       inhibit_pre_post_conversion = 0;
5596       /* Discard the unwind protect.  */
5597       specpdl_ptr--;
5598
5599       if (current_buffer != prev)
5600         {
5601           len = ZV - BEGV;
5602           new = Fcurrent_buffer ();
5603           set_buffer_internal_1 (prev);
5604           del_range_2 (from, from_byte, to, to_byte, 0);
5605           TEMP_SET_PT_BOTH (from, from_byte);
5606           insert_from_buffer (XBUFFER (new), 1, len, 0);
5607           Fkill_buffer (new);
5608           if (orig_point >= to)
5609             orig_point += len - orig_len;
5610           else if (orig_point > from)
5611             orig_point = from;
5612           orig_len = len;
5613           to = from + len;
5614           from_byte = CHAR_TO_BYTE (from);
5615           to_byte = CHAR_TO_BYTE (to);
5616           len_byte = to_byte - from_byte;
5617           TEMP_SET_PT_BOTH (from, from_byte);
5618         }
5619     }
5620
5621   if (replace)
5622     {
5623       if (! EQ (current_buffer->undo_list, Qt))
5624         deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5625       else
5626         {
5627           nchars_del = to - from;
5628           nbytes_del = to_byte - from_byte;
5629         }
5630     }
5631
5632   if (coding->composing != COMPOSITION_DISABLED)
5633     {
5634       if (encodep)
5635         coding_save_composition (coding, from, to, Fcurrent_buffer ());
5636       else
5637         coding_allocate_composition_data (coding, from);
5638     }
5639
5640   /* Try to skip the heading and tailing ASCIIs.  */
5641   if (coding->type != coding_type_ccl)
5642     {
5643       int from_byte_orig = from_byte, to_byte_orig = to_byte;
5644
5645       if (from < GPT && GPT < to)
5646         move_gap_both (from, from_byte);
5647       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5648       if (from_byte == to_byte
5649           && (encodep || NILP (coding->post_read_conversion))
5650           && ! CODING_REQUIRE_FLUSHING (coding))
5651         {
5652           coding->produced = len_byte;
5653           coding->produced_char = len;
5654           if (!replace)
5655             /* We must record and adjust for this new text now.  */
5656             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5657           return 0;
5658         }
5659
5660       head_skip = from_byte - from_byte_orig;
5661       tail_skip = to_byte_orig - to_byte;
5662       total_skip = head_skip + tail_skip;
5663       from += head_skip;
5664       to -= tail_skip;
5665       len -= total_skip; len_byte -= total_skip;
5666     }
5667
5668   /* For conversion, we must put the gap before the text in addition to
5669      making the gap larger for efficient decoding.  The required gap
5670      size starts from 2000 which is the magic number used in make_gap.
5671      But, after one batch of conversion, it will be incremented if we
5672      find that it is not enough .  */
5673   require = 2000;
5674
5675   if (GAP_SIZE  < require)
5676     make_gap (require - GAP_SIZE);
5677   move_gap_both (from, from_byte);
5678
5679   inserted = inserted_byte = 0;
5680
5681   GAP_SIZE += len_byte;
5682   ZV -= len;
5683   Z -= len;
5684   ZV_BYTE -= len_byte;
5685   Z_BYTE -= len_byte;
5686
5687   if (GPT - BEG < BEG_UNCHANGED)
5688     BEG_UNCHANGED = GPT - BEG;
5689   if (Z - GPT < END_UNCHANGED)
5690     END_UNCHANGED = Z - GPT;
5691
5692   if (!encodep && coding->src_multibyte)
5693     {
5694       /* Decoding routines expects that the source text is unibyte.
5695          We must convert 8-bit characters of multibyte form to
5696          unibyte.  */
5697       int len_byte_orig = len_byte;
5698       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5699       if (len_byte < len_byte_orig)
5700         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5701                     len_byte);
5702       coding->src_multibyte = 0;
5703     }
5704
5705   for (;;)
5706     {
5707       int result;
5708
5709       /* The buffer memory is now:
5710          +--------+converted-text+---------+-------original-text-------+---+
5711          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5712                   |<---------------------- GAP ----------------------->|  */
5713       src = GAP_END_ADDR - len_byte;
5714       dst = GPT_ADDR + inserted_byte;
5715
5716       if (encodep)
5717         result = encode_coding (coding, src, dst, len_byte, 0);
5718       else
5719         {
5720           if (coding->composing != COMPOSITION_DISABLED)
5721             coding->cmp_data->char_offset = from + inserted;
5722           result = decode_coding (coding, src, dst, len_byte, 0);
5723         }
5724
5725       /* The buffer memory is now:
5726          +--------+-------converted-text----+--+------original-text----+---+
5727          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5728                   |<---------------------- GAP ----------------------->|  */
5729
5730       inserted += coding->produced_char;
5731       inserted_byte += coding->produced;
5732       len_byte -= coding->consumed;
5733
5734       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5735         {
5736           coding_allocate_composition_data (coding, from + inserted);
5737           continue;
5738         }
5739
5740       src += coding->consumed;
5741       dst += coding->produced;
5742
5743       if (result == CODING_FINISH_NORMAL)
5744         {
5745           src += len_byte;
5746           break;
5747         }
5748       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5749         {
5750           unsigned char *pend = dst, *p = pend - inserted_byte;
5751           Lisp_Object eol_type;
5752
5753           /* Encode LFs back to the original eol format (CR or CRLF).  */
5754           if (coding->eol_type == CODING_EOL_CR)
5755             {
5756               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5757             }
5758           else
5759             {
5760               int count = 0;
5761
5762               while (p < pend) if (*p++ == '\n') count++;
5763               if (src - dst < count)
5764                 {
5765                   /* We don't have sufficient room for encoding LFs
5766                      back to CRLF.  We must record converted and
5767                      not-yet-converted text back to the buffer
5768                      content, enlarge the gap, then record them out of
5769                      the buffer contents again.  */
5770                   int add = len_byte + inserted_byte;
5771
5772                   GAP_SIZE -= add;
5773                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5774                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5775                   make_gap (count - GAP_SIZE);
5776                   GAP_SIZE += add;
5777                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5778                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5779                   /* Don't forget to update SRC, DST, and PEND.  */
5780                   src = GAP_END_ADDR - len_byte;
5781                   dst = GPT_ADDR + inserted_byte;
5782                   pend = dst;
5783                 }
5784               inserted += count;
5785               inserted_byte += count;
5786               coding->produced += count;
5787               p = dst = pend + count;
5788               while (count)
5789                 {
5790                   *--p = *--pend;
5791                   if (*p == '\n') count--, *--p = '\r';
5792                 }
5793             }
5794
5795           /* Suppress eol-format conversion in the further conversion.  */
5796           coding->eol_type = CODING_EOL_LF;
5797
5798           /* Set the coding system symbol to that for Unix-like EOL.  */
5799           eol_type = Fget (saved_coding_symbol, Qeol_type);
5800           if (VECTORP (eol_type)
5801               && XVECTOR (eol_type)->size == 3
5802               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5803             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5804           else
5805             coding->symbol = saved_coding_symbol;
5806
5807           continue;
5808         }
5809       if (len_byte <= 0)
5810         {
5811           if (coding->type != coding_type_ccl
5812               || coding->mode & CODING_MODE_LAST_BLOCK)
5813             break;
5814           coding->mode |= CODING_MODE_LAST_BLOCK;
5815           continue;
5816         }
5817       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5818         {
5819           /* The source text ends in invalid codes.  Let's just
5820              make them valid buffer contents, and finish conversion.  */
5821           if (multibyte_p)
5822             {
5823               unsigned char *start = dst;
5824
5825               inserted += len_byte;
5826               while (len_byte--)
5827                 {
5828                   int c = *src++;
5829                   dst += CHAR_STRING (c, dst);
5830                 }
5831
5832               inserted_byte += dst - start;
5833             }
5834           else
5835             {
5836               inserted += len_byte;
5837               inserted_byte += len_byte;
5838               while (len_byte--)
5839                 *dst++ = *src++;
5840             }
5841           break;
5842         }
5843       if (result == CODING_FINISH_INTERRUPT)
5844         {
5845           /* The conversion procedure was interrupted by a user.  */
5846           break;
5847         }
5848       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5849       if (coding->consumed < 1)
5850         {
5851           /* It's quite strange to require more memory without
5852              consuming any bytes.  Perhaps CCL program bug.  */
5853           break;
5854         }
5855       if (first)
5856         {
5857           /* We have just done the first batch of conversion which was
5858              stopped because of insufficient gap.  Let's reconsider the
5859              required gap size (i.e. SRT - DST) now.
5860
5861              We have converted ORIG bytes (== coding->consumed) into
5862              NEW bytes (coding->produced).  To convert the remaining
5863              LEN bytes, we may need REQUIRE bytes of gap, where:
5864                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5865                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5866              Here, we are sure that NEW >= ORIG.  */
5867           float ratio;
5868
5869           if (coding->produced <= coding->consumed)
5870             {
5871               /* This happens because of CCL-based coding system with
5872                  eol-type CRLF.  */
5873               require = 0;
5874             }
5875           else
5876             {
5877               ratio = (coding->produced - coding->consumed) / coding->consumed;
5878               require = len_byte * ratio;
5879             }
5880           first = 0;
5881         }
5882       if ((src - dst) < (require + 2000))
5883         {
5884           /* See the comment above the previous call of make_gap.  */
5885           int add = len_byte + inserted_byte;
5886
5887           GAP_SIZE -= add;
5888           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5889           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5890           make_gap (require + 2000);
5891           GAP_SIZE += add;
5892           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5893           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5894         }
5895     }
5896   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5897
5898   if (encodep && coding->dst_multibyte)
5899     {
5900       /* The output is unibyte.  We must convert 8-bit characters to
5901          multibyte form.  */
5902       if (inserted_byte * 2 > GAP_SIZE)
5903         {
5904           GAP_SIZE -= inserted_byte;
5905           ZV += inserted_byte; Z += inserted_byte;
5906           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5907           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5908           make_gap (inserted_byte - GAP_SIZE);
5909           GAP_SIZE += inserted_byte;
5910           ZV -= inserted_byte; Z -= inserted_byte;
5911           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5912           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5913         }
5914       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5915     }
5916
5917   /* If we shrank the conversion area, adjust it now.  */
5918   if (total_skip > 0)
5919     {
5920       if (tail_skip > 0)
5921         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5922       inserted += total_skip; inserted_byte += total_skip;
5923       GAP_SIZE += total_skip;
5924       GPT -= head_skip; GPT_BYTE -= head_skip;
5925       ZV -= total_skip; ZV_BYTE -= total_skip;
5926       Z -= total_skip; Z_BYTE -= total_skip;
5927       from -= head_skip; from_byte -= head_skip;
5928       to += tail_skip; to_byte += tail_skip;
5929     }
5930
5931   prev_Z = Z;
5932   if (! EQ (current_buffer->undo_list, Qt))
5933     adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5934   else
5935     adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del,
5936                                  inserted, inserted_byte);
5937   inserted = Z - prev_Z;
5938
5939   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5940     coding_restore_composition (coding, Fcurrent_buffer ());
5941   coding_free_composition_data (coding);
5942
5943   if (! inhibit_pre_post_conversion
5944       && ! encodep && ! NILP (coding->post_read_conversion))
5945     {
5946       Lisp_Object val;
5947       Lisp_Object saved_coding_system;
5948
5949       if (from != PT)
5950         TEMP_SET_PT_BOTH (from, from_byte);
5951       prev_Z = Z;
5952       record_unwind_protect (code_convert_region_unwind,
5953                              Vlast_coding_system_used);
5954       saved_coding_system = Vlast_coding_system_used;
5955       Vlast_coding_system_used = coding->symbol;
5956       /* We should not call any more pre-write/post-read-conversion
5957          functions while this post-read-conversion is running.  */
5958       inhibit_pre_post_conversion = 1;
5959       val = call1 (coding->post_read_conversion, make_number (inserted));
5960       inhibit_pre_post_conversion = 0;
5961       coding->symbol = Vlast_coding_system_used;
5962       Vlast_coding_system_used = saved_coding_system;
5963       /* Discard the unwind protect.  */
5964       specpdl_ptr--;
5965       CHECK_NUMBER (val);
5966       inserted += Z - prev_Z;
5967     }
5968
5969   if (orig_point >= from)
5970     {
5971       if (orig_point >= from + orig_len)
5972         orig_point += inserted - orig_len;
5973       else
5974         orig_point = from;
5975       TEMP_SET_PT (orig_point);
5976     }
5977
5978   if (replace)
5979     {
5980       signal_after_change (from, to - from, inserted);
5981       update_compositions (from, from + inserted, CHECK_BORDER);
5982     }
5983
5984   {
5985     coding->consumed = to_byte - from_byte;
5986     coding->consumed_char = to - from;
5987     coding->produced = inserted_byte;
5988     coding->produced_char = inserted;
5989   }
5990
5991   return 0;
5992 }
5993
5994 Lisp_Object
5995 run_pre_post_conversion_on_str (str, coding, encodep)
5996      Lisp_Object str;
5997      struct coding_system *coding;
5998      int encodep;
5999 {
6000   int count = SPECPDL_INDEX ();
6001   struct gcpro gcpro1, gcpro2;
6002   int multibyte = STRING_MULTIBYTE (str);
6003   Lisp_Object buffer;
6004   struct buffer *buf;
6005   Lisp_Object old_deactivate_mark;
6006
6007   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
6008   record_unwind_protect (code_convert_region_unwind,
6009                          Vlast_coding_system_used);
6010   /* It is not crucial to specbind this.  */
6011   old_deactivate_mark = Vdeactivate_mark;
6012   GCPRO2 (str, old_deactivate_mark);
6013
6014   buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
6015   buf = XBUFFER (buffer);
6016
6017   delete_all_overlays (buf);
6018   buf->directory = current_buffer->directory;
6019   buf->read_only = Qnil;
6020   buf->filename = Qnil;
6021   buf->undo_list = Qt;
6022   eassert (buf->overlays_before == NULL);
6023   eassert (buf->overlays_after == NULL);
6024
6025   set_buffer_internal (buf);
6026   /* We must insert the contents of STR as is without
6027      unibyte<->multibyte conversion.  For that, we adjust the
6028      multibyteness of the working buffer to that of STR.  */
6029   Ferase_buffer ();
6030   buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
6031
6032   insert_from_string (str, 0, 0,
6033                       SCHARS (str), SBYTES (str), 0);
6034   UNGCPRO;
6035   inhibit_pre_post_conversion = 1;
6036   if (encodep)
6037     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
6038   else
6039     {
6040       Vlast_coding_system_used = coding->symbol;
6041       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
6042       call1 (coding->post_read_conversion, make_number (Z - BEG));
6043       coding->symbol = Vlast_coding_system_used;
6044     }
6045   inhibit_pre_post_conversion = 0;
6046   Vdeactivate_mark = old_deactivate_mark;
6047   str = make_buffer_string (BEG, Z, 1);
6048   return unbind_to (count, str);
6049 }
6050
6051 Lisp_Object
6052 decode_coding_string (str, coding, nocopy)
6053      Lisp_Object str;
6054      struct coding_system *coding;
6055      int nocopy;
6056 {
6057   int len;
6058   struct conversion_buffer buf;
6059   int from, to_byte;
6060   Lisp_Object saved_coding_symbol;
6061   int result;
6062   int require_decoding;
6063   int shrinked_bytes = 0;
6064   Lisp_Object newstr;
6065   int consumed, consumed_char, produced, produced_char;
6066
6067   from = 0;
6068   to_byte = SBYTES (str);
6069
6070   saved_coding_symbol = coding->symbol;
6071   coding->src_multibyte = STRING_MULTIBYTE (str);
6072   coding->dst_multibyte = 1;
6073   if (CODING_REQUIRE_DETECTION (coding))
6074     {
6075       /* See the comments in code_convert_region.  */
6076       if (coding->type == coding_type_undecided)
6077         {
6078           detect_coding (coding, SDATA (str), to_byte);
6079           if (coding->type == coding_type_undecided)
6080             {
6081               coding->type = coding_type_emacs_mule;
6082               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
6083               /* As emacs-mule decoder will handle composition, we
6084                  need this setting to allocate coding->cmp_data
6085                  later.  */
6086               coding->composing = COMPOSITION_NO;
6087             }
6088         }
6089       if (coding->eol_type == CODING_EOL_UNDECIDED
6090           && coding->type != coding_type_ccl)
6091         {
6092           saved_coding_symbol = coding->symbol;
6093           detect_eol (coding, SDATA (str), to_byte);
6094           if (coding->eol_type == CODING_EOL_UNDECIDED)
6095             coding->eol_type = CODING_EOL_LF;
6096           /* We had better recover the original eol format if we
6097              encounter an inconsistent eol format while decoding.  */
6098           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
6099         }
6100     }
6101
6102   if (coding->type == coding_type_no_conversion
6103       || coding->type == coding_type_raw_text)
6104     coding->dst_multibyte = 0;
6105
6106   require_decoding = CODING_REQUIRE_DECODING (coding);
6107
6108   if (STRING_MULTIBYTE (str))
6109     {
6110       /* Decoding routines expect the source text to be unibyte.  */
6111       str = Fstring_as_unibyte (str);
6112       to_byte = SBYTES (str);
6113       nocopy = 1;
6114       coding->src_multibyte = 0;
6115     }
6116
6117   /* Try to skip the heading and tailing ASCIIs.  */
6118   if (require_decoding && coding->type != coding_type_ccl)
6119     {
6120       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6121                                 0);
6122       if (from == to_byte)
6123         require_decoding = 0;
6124       shrinked_bytes = from + (SBYTES (str) - to_byte);
6125     }
6126
6127   if (!require_decoding
6128       && !(SYMBOLP (coding->post_read_conversion)
6129            && !NILP (Ffboundp (coding->post_read_conversion))))
6130     {
6131       coding->consumed = SBYTES (str);
6132       coding->consumed_char = SCHARS (str);
6133       if (coding->dst_multibyte)
6134         {
6135           str = Fstring_as_multibyte (str);
6136           nocopy = 1;
6137         }
6138       coding->produced = SBYTES (str);
6139       coding->produced_char = SCHARS (str);
6140       return (nocopy ? str : Fcopy_sequence (str));
6141     }
6142
6143   if (coding->composing != COMPOSITION_DISABLED)
6144     coding_allocate_composition_data (coding, from);
6145   len = decoding_buffer_size (coding, to_byte - from);
6146   allocate_conversion_buffer (buf, len);
6147
6148   consumed = consumed_char = produced = produced_char = 0;
6149   while (1)
6150     {
6151       result = decode_coding (coding, SDATA (str) + from + consumed,
6152                               buf.data + produced, to_byte - from - consumed,
6153                               buf.size - produced);
6154       consumed += coding->consumed;
6155       consumed_char += coding->consumed_char;
6156       produced += coding->produced;
6157       produced_char += coding->produced_char;
6158       if (result == CODING_FINISH_NORMAL
6159           || (result == CODING_FINISH_INSUFFICIENT_SRC
6160               && coding->consumed == 0))
6161         break;
6162       if (result == CODING_FINISH_INSUFFICIENT_CMP)
6163         coding_allocate_composition_data (coding, from + produced_char);
6164       else if (result == CODING_FINISH_INSUFFICIENT_DST)
6165         extend_conversion_buffer (&buf);
6166       else if (result == CODING_FINISH_INCONSISTENT_EOL)
6167         {
6168           Lisp_Object eol_type;
6169
6170           /* Recover the original EOL format.  */
6171           if (coding->eol_type == CODING_EOL_CR)
6172             {
6173               unsigned char *p;
6174               for (p = buf.data; p < buf.data + produced; p++)
6175                 if (*p == '\n') *p = '\r';
6176             }
6177           else if (coding->eol_type == CODING_EOL_CRLF)
6178             {
6179               int num_eol = 0;
6180               unsigned char *p0, *p1;
6181               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
6182                 if (*p0 == '\n') num_eol++;
6183               if (produced + num_eol >= buf.size)
6184                 extend_conversion_buffer (&buf);
6185               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
6186                 {
6187                   *--p1 = *--p0;
6188                   if (*p0 == '\n') *--p1 = '\r';
6189                 }
6190               produced += num_eol;
6191               produced_char += num_eol;
6192             }
6193           /* Suppress eol-format conversion in the further conversion.  */
6194           coding->eol_type = CODING_EOL_LF;
6195
6196           /* Set the coding system symbol to that for Unix-like EOL.  */
6197           eol_type = Fget (saved_coding_symbol, Qeol_type);
6198           if (VECTORP (eol_type)
6199               && XVECTOR (eol_type)->size == 3
6200               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
6201             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
6202           else
6203             coding->symbol = saved_coding_symbol;
6204
6205
6206         }
6207     }
6208
6209   coding->consumed = consumed;
6210   coding->consumed_char = consumed_char;
6211   coding->produced = produced;
6212   coding->produced_char = produced_char;
6213
6214   if (coding->dst_multibyte)
6215     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
6216                                            produced + shrinked_bytes);
6217   else
6218     newstr = make_uninit_string (produced + shrinked_bytes);
6219   if (from > 0)
6220     STRING_COPYIN (newstr, 0, SDATA (str), from);
6221   STRING_COPYIN (newstr, from, buf.data, produced);
6222   if (shrinked_bytes > from)
6223     STRING_COPYIN (newstr, from + produced,
6224                    SDATA (str) + to_byte,
6225                    shrinked_bytes - from);
6226   free_conversion_buffer (&buf);
6227
6228   if (coding->cmp_data && coding->cmp_data->used)
6229     coding_restore_composition (coding, newstr);
6230   coding_free_composition_data (coding);
6231
6232   if (SYMBOLP (coding->post_read_conversion)
6233       && !NILP (Ffboundp (coding->post_read_conversion)))
6234     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
6235
6236   return newstr;
6237 }
6238
6239 Lisp_Object
6240 encode_coding_string (str, coding, nocopy)
6241      Lisp_Object str;
6242      struct coding_system *coding;
6243      int nocopy;
6244 {
6245   int len;
6246   struct conversion_buffer buf;
6247   int from, to, to_byte;
6248   int result;
6249   int shrinked_bytes = 0;
6250   Lisp_Object newstr;
6251   int consumed, consumed_char, produced, produced_char;
6252
6253   if (SYMBOLP (coding->pre_write_conversion)
6254       && !NILP (Ffboundp (coding->pre_write_conversion)))
6255     str = run_pre_post_conversion_on_str (str, coding, 1);
6256
6257   from = 0;
6258   to = SCHARS (str);
6259   to_byte = SBYTES (str);
6260
6261   /* Encoding routines determine the multibyteness of the source text
6262      by coding->src_multibyte.  */
6263   coding->src_multibyte = STRING_MULTIBYTE (str);
6264   coding->dst_multibyte = 0;
6265   if (! CODING_REQUIRE_ENCODING (coding))
6266     {
6267       coding->consumed = SBYTES (str);
6268       coding->consumed_char = SCHARS (str);
6269       if (STRING_MULTIBYTE (str))
6270         {
6271           str = Fstring_as_unibyte (str);
6272           nocopy = 1;
6273         }
6274       coding->produced = SBYTES (str);
6275       coding->produced_char = SCHARS (str);
6276       return (nocopy ? str : Fcopy_sequence (str));
6277     }
6278
6279   if (coding->composing != COMPOSITION_DISABLED)
6280     coding_save_composition (coding, from, to, str);
6281
6282   /* Try to skip the heading and tailing ASCIIs.  */
6283   if (coding->type != coding_type_ccl)
6284     {
6285       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6286                                 1);
6287       if (from == to_byte)
6288         return (nocopy ? str : Fcopy_sequence (str));
6289       shrinked_bytes = from + (SBYTES (str) - to_byte);
6290     }
6291
6292   len = encoding_buffer_size (coding, to_byte - from);
6293   allocate_conversion_buffer (buf, len);
6294
6295   consumed = consumed_char = produced = produced_char = 0;
6296   while (1)
6297     {
6298       result = encode_coding (coding, SDATA (str) + from + consumed,
6299                               buf.data + produced, to_byte - from - consumed,
6300                               buf.size - produced);
6301       consumed += coding->consumed;
6302       consumed_char += coding->consumed_char;
6303       produced += coding->produced;
6304       produced_char += coding->produced_char;
6305       if (result == CODING_FINISH_NORMAL
6306           || (result == CODING_FINISH_INSUFFICIENT_SRC
6307               && coding->consumed == 0))
6308         break;
6309       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
6310       extend_conversion_buffer (&buf);
6311     }
6312
6313   coding->consumed = consumed;
6314   coding->consumed_char = consumed_char;
6315   coding->produced = produced;
6316   coding->produced_char = produced_char;
6317
6318   newstr = make_uninit_string (produced + shrinked_bytes);
6319   if (from > 0)
6320     STRING_COPYIN (newstr, 0, SDATA (str), from);
6321   STRING_COPYIN (newstr, from, buf.data, produced);
6322   if (shrinked_bytes > from)
6323     STRING_COPYIN (newstr, from + produced,
6324                    SDATA (str) + to_byte,
6325                    shrinked_bytes - from);
6326
6327   free_conversion_buffer (&buf);
6328   coding_free_composition_data (coding);
6329
6330   return newstr;
6331 }
6332
6333 \f
6334 #ifdef emacs
6335 /*** 8. Emacs Lisp library functions ***/
6336
6337 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6338        doc: /* Return t if OBJECT is nil or a coding-system.
6339 See the documentation of `make-coding-system' for information
6340 about coding-system objects.  */)
6341      (obj)
6342      Lisp_Object obj;
6343 {
6344   if (NILP (obj))
6345     return Qt;
6346   if (!SYMBOLP (obj))
6347     return Qnil;
6348   if (! NILP (Fget (obj, Qcoding_system_define_form)))
6349     return Qt;
6350   /* Get coding-spec vector for OBJ.  */
6351   obj = Fget (obj, Qcoding_system);
6352   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6353           ? Qt : Qnil);
6354 }
6355
6356 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6357        Sread_non_nil_coding_system, 1, 1, 0,
6358        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6359      (prompt)
6360      Lisp_Object prompt;
6361 {
6362   Lisp_Object val;
6363   do
6364     {
6365       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6366                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6367     }
6368   while (SCHARS (val) == 0);
6369   return (Fintern (val, Qnil));
6370 }
6371
6372 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6373        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6374 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6375      (prompt, default_coding_system)
6376      Lisp_Object prompt, default_coding_system;
6377 {
6378   Lisp_Object val;
6379   if (SYMBOLP (default_coding_system))
6380     default_coding_system = SYMBOL_NAME (default_coding_system);
6381   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6382                           Qt, Qnil, Qcoding_system_history,
6383                           default_coding_system, Qnil);
6384   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6385 }
6386
6387 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6388        1, 1, 0,
6389        doc: /* Check validity of CODING-SYSTEM.
6390 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6391 It is valid if it is nil or a symbol with a non-nil `coding-system' property.
6392 The value of this property should be a vector of length 5.  */)
6393      (coding_system)
6394      Lisp_Object coding_system;
6395 {
6396   Lisp_Object define_form;
6397
6398   define_form = Fget (coding_system, Qcoding_system_define_form);
6399   if (! NILP (define_form))
6400     {
6401       Fput (coding_system, Qcoding_system_define_form, Qnil);
6402       safe_eval (define_form);
6403     }
6404   if (!NILP (Fcoding_system_p (coding_system)))
6405     return coding_system;
6406   while (1)
6407     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6408 }
6409 \f
6410 Lisp_Object
6411 detect_coding_system (src, src_bytes, highest, multibytep)
6412      const unsigned char *src;
6413      int src_bytes, highest;
6414      int multibytep;
6415 {
6416   int coding_mask, eol_type;
6417   Lisp_Object val, tmp;
6418   int dummy;
6419
6420   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
6421   eol_type  = detect_eol_type (src, src_bytes, &dummy);
6422   if (eol_type == CODING_EOL_INCONSISTENT)
6423     eol_type = CODING_EOL_UNDECIDED;
6424
6425   if (!coding_mask)
6426     {
6427       val = Qundecided;
6428       if (eol_type != CODING_EOL_UNDECIDED)
6429         {
6430           Lisp_Object val2;
6431           val2 = Fget (Qundecided, Qeol_type);
6432           if (VECTORP (val2))
6433             val = XVECTOR (val2)->contents[eol_type];
6434         }
6435       return (highest ? val : Fcons (val, Qnil));
6436     }
6437
6438   /* At first, gather possible coding systems in VAL.  */
6439   val = Qnil;
6440   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
6441     {
6442       Lisp_Object category_val, category_index;
6443
6444       category_index = Fget (XCAR (tmp), Qcoding_category_index);
6445       category_val = Fsymbol_value (XCAR (tmp));
6446       if (!NILP (category_val)
6447           && NATNUMP (category_index)
6448           && (coding_mask & (1 << XFASTINT (category_index))))
6449         {
6450           val = Fcons (category_val, val);
6451           if (highest)
6452             break;
6453         }
6454     }
6455   if (!highest)
6456     val = Fnreverse (val);
6457
6458   /* Then, replace the elements with subsidiary coding systems.  */
6459   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
6460     {
6461       if (eol_type != CODING_EOL_UNDECIDED
6462           && eol_type != CODING_EOL_INCONSISTENT)
6463         {
6464           Lisp_Object eol;
6465           eol = Fget (XCAR (tmp), Qeol_type);
6466           if (VECTORP (eol))
6467             XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]);
6468         }
6469     }
6470   return (highest ? XCAR (val) : val);
6471 }
6472
6473 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6474        2, 3, 0,
6475        doc: /* Detect how the byte sequence in the region is encoded.
6476 Return a list of possible coding systems used on decoding a byte
6477 sequence containing the bytes in the region between START and END when
6478 the coding system `undecided' is specified.  The list is ordered by
6479 priority decided in the current language environment.
6480
6481 If only ASCII characters are found, it returns a list of single element
6482 `undecided' or its subsidiary coding system according to a detected
6483 end-of-line format.
6484
6485 If optional argument HIGHEST is non-nil, return the coding system of
6486 highest priority.  */)
6487      (start, end, highest)
6488      Lisp_Object start, end, highest;
6489 {
6490   int from, to;
6491   int from_byte, to_byte;
6492   int include_anchor_byte = 0;
6493
6494   CHECK_NUMBER_COERCE_MARKER (start);
6495   CHECK_NUMBER_COERCE_MARKER (end);
6496
6497   validate_region (&start, &end);
6498   from = XINT (start), to = XINT (end);
6499   from_byte = CHAR_TO_BYTE (from);
6500   to_byte = CHAR_TO_BYTE (to);
6501
6502   if (from < GPT && to >= GPT)
6503     move_gap_both (to, to_byte);
6504   /* If we an anchor byte `\0' follows the region, we include it in
6505      the detecting source.  Then code detectors can handle the tailing
6506      byte sequence more accurately.
6507
6508      Fix me: This is not a perfect solution.  It is better that we
6509      add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
6510   */
6511   if (to == Z || (to == GPT && GAP_SIZE > 0))
6512     include_anchor_byte = 1;
6513   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6514                                to_byte - from_byte + include_anchor_byte,
6515                                !NILP (highest),
6516                                !NILP (current_buffer
6517                                       ->enable_multibyte_characters));
6518 }
6519
6520 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6521        1, 2, 0,
6522        doc: /* Detect how the byte sequence in STRING is encoded.
6523 Return a list of possible coding systems used on decoding a byte
6524 sequence containing the bytes in STRING when the coding system
6525 `undecided' is specified.  The list is ordered by priority decided in
6526 the current language environment.
6527
6528 If only ASCII characters are found, it returns a list of single element
6529 `undecided' or its subsidiary coding system according to a detected
6530 end-of-line format.
6531
6532 If optional argument HIGHEST is non-nil, return the coding system of
6533 highest priority.  */)
6534      (string, highest)
6535      Lisp_Object string, highest;
6536 {
6537   CHECK_STRING (string);
6538
6539   return detect_coding_system (SDATA (string),
6540                                /* "+ 1" is to include the anchor byte
6541                                   `\0'.  With this, code detectors can
6542                                   handle the tailing bytes more
6543                                   accurately.  */
6544                                SBYTES (string) + 1,
6545                                !NILP (highest),
6546                                STRING_MULTIBYTE (string));
6547 }
6548
6549 /*  Subroutine for Fsafe_coding_systems_region_internal.
6550
6551     Return a list of coding systems that safely encode the multibyte
6552     text between P and PEND.  SAFE_CODINGS, if non-nil, is an alist of
6553     possible coding systems.  If it is nil, it means that we have not
6554     yet found any coding systems.
6555
6556     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
6557     element of WORK_TABLE is set to t once the element is looked up.
6558
6559     If a non-ASCII single byte char is found, set
6560     *single_byte_char_found to 1.  */
6561
6562 static Lisp_Object
6563 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6564      unsigned char *p, *pend;
6565      Lisp_Object safe_codings, work_table;
6566      int *single_byte_char_found;
6567 {
6568   int c, len;
6569   Lisp_Object val, ch;
6570   Lisp_Object prev, tail;
6571
6572   while (p < pend)
6573     {
6574       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6575       p += len;
6576       if (ASCII_BYTE_P (c))
6577         /* We can ignore ASCII characters here.  */
6578         continue;
6579       if (SINGLE_BYTE_CHAR_P (c))
6580         *single_byte_char_found = 1;
6581       if (NILP (safe_codings))
6582         /* Already all coding systems are excluded.  But, we can't
6583            terminate the loop here because non-ASCII single-byte char
6584            must be found.  */
6585         continue;
6586       /* Check the safe coding systems for C.  */
6587       ch = make_number (c);
6588       val = Faref (work_table, ch);
6589       if (EQ (val, Qt))
6590         /* This element was already checked.  Ignore it.  */
6591         continue;
6592       /* Remember that we checked this element.  */
6593       Faset (work_table, ch, Qt);
6594
6595       for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
6596         {
6597           Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
6598           int encodable;
6599
6600           elt = XCAR (tail);
6601           if (CONSP (XCDR (elt)))
6602             {
6603               /* This entry has this format now:
6604                  ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6605                           ACCEPT-LATIN-EXTRA ) */
6606               val = XCDR (elt);
6607               encodable = ! NILP (Faref (XCAR (val), ch));
6608               if (! encodable)
6609                 {
6610                   val = XCDR (val);
6611                   translation_table = XCAR (val);
6612                   hash_table = XCAR (XCDR (val));
6613                   accept_latin_extra = XCAR (XCDR (XCDR (val)));
6614                 }
6615             }
6616           else
6617             {
6618               /* This entry has this format now: ( CODING . SAFE-CHARS) */
6619               encodable = ! NILP (Faref (XCDR (elt), ch));
6620               if (! encodable)
6621                 {
6622                   /* Transform the format to:
6623                      ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6624                        ACCEPT-LATIN-EXTRA )  */
6625                   val = Fget (XCAR (elt), Qcoding_system);
6626                   translation_table
6627                     = Fplist_get (AREF (val, 3),
6628                                   Qtranslation_table_for_encode);
6629                   if (SYMBOLP (translation_table))
6630                     translation_table = Fget (translation_table,
6631                                               Qtranslation_table);
6632                   hash_table
6633                     = (CHAR_TABLE_P (translation_table)
6634                        ? XCHAR_TABLE (translation_table)->extras[1]
6635                        : Qnil);
6636                   accept_latin_extra
6637                     = ((EQ (AREF (val, 0), make_number (2))
6638                         && VECTORP (AREF (val, 4)))
6639                        ? AREF (AREF (val, 4), 16)
6640                        : Qnil);
6641                   XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
6642                                         translation_table, hash_table,
6643                                         accept_latin_extra));
6644                 }
6645             }
6646
6647           if (! encodable
6648               && ((CHAR_TABLE_P (translation_table)
6649                    && ! NILP (Faref (translation_table, ch)))
6650                   || (HASH_TABLE_P (hash_table)
6651                       && ! NILP (Fgethash (ch, hash_table, Qnil)))
6652                   || (SINGLE_BYTE_CHAR_P (c)
6653                       && ! NILP (accept_latin_extra)
6654                       && VECTORP (Vlatin_extra_code_table)
6655                       && ! NILP (AREF (Vlatin_extra_code_table, c)))))
6656             encodable = 1;
6657           if (encodable)
6658             prev = tail;
6659           else
6660             {
6661               /* Exclude this coding system from SAFE_CODINGS.  */
6662               if (EQ (tail, safe_codings))
6663                 safe_codings = XCDR (safe_codings);
6664               else
6665                 XSETCDR (prev, XCDR (tail));
6666             }
6667         }
6668     }
6669   return safe_codings;
6670 }
6671
6672 DEFUN ("find-coding-systems-region-internal",
6673        Ffind_coding_systems_region_internal,
6674        Sfind_coding_systems_region_internal, 2, 2, 0,
6675        doc: /* Internal use only.  */)
6676      (start, end)
6677      Lisp_Object start, end;
6678 {
6679   Lisp_Object work_table, safe_codings;
6680   int non_ascii_p = 0;
6681   int single_byte_char_found = 0;
6682   const unsigned char *p1, *p1end, *p2, *p2end, *p;
6683
6684   if (STRINGP (start))
6685     {
6686       if (!STRING_MULTIBYTE (start))
6687         return Qt;
6688       p1 = SDATA (start), p1end = p1 + SBYTES (start);
6689       p2 = p2end = p1end;
6690       if (SCHARS (start) != SBYTES (start))
6691         non_ascii_p = 1;
6692     }
6693   else
6694     {
6695       int from, to, stop;
6696
6697       CHECK_NUMBER_COERCE_MARKER (start);
6698       CHECK_NUMBER_COERCE_MARKER (end);
6699       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6700         args_out_of_range (start, end);
6701       if (NILP (current_buffer->enable_multibyte_characters))
6702         return Qt;
6703       from = CHAR_TO_BYTE (XINT (start));
6704       to = CHAR_TO_BYTE (XINT (end));
6705       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6706       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6707       if (stop == to)
6708         p2 = p2end = p1end;
6709       else
6710         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6711       if (XINT (end) - XINT (start) != to - from)
6712         non_ascii_p = 1;
6713     }
6714
6715   if (!non_ascii_p)
6716     {
6717       /* We are sure that the text contains no multibyte character.
6718          Check if it contains eight-bit-graphic.  */
6719       p = p1;
6720       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6721       if (p == p1end)
6722         {
6723           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6724           if (p == p2end)
6725             return Qt;
6726         }
6727     }
6728
6729   /* The text contains non-ASCII characters.  */
6730
6731   work_table = Fmake_char_table (Qchar_coding_system, Qnil);
6732   safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
6733
6734   safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
6735                                     &single_byte_char_found);
6736   if (p2 < p2end)
6737     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6738                                       &single_byte_char_found);
6739   if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
6740     safe_codings = Qt;
6741   else
6742     {
6743       /* Turn safe_codings to a list of coding systems... */
6744       Lisp_Object val;
6745
6746       if (single_byte_char_found)
6747         /* ... and append these for eight-bit chars.  */
6748         val = Fcons (Qraw_text,
6749                      Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
6750       else
6751         /* ... and append generic coding systems.  */
6752         val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
6753
6754       for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
6755         val = Fcons (XCAR (XCAR (safe_codings)), val);
6756       safe_codings = val;
6757     }
6758
6759   return safe_codings;
6760 }
6761
6762
6763 /* Search from position POS for such characters that are unencodable
6764    accoding to SAFE_CHARS, and return a list of their positions.  P
6765    points where in the memory the character at POS exists.  Limit the
6766    search at PEND or when Nth unencodable characters are found.
6767
6768    If SAFE_CHARS is a char table, an element for an unencodable
6769    character is nil.
6770
6771    If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
6772
6773    Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
6774    eight-bit-graphic characters are unencodable.  */
6775
6776 static Lisp_Object
6777 unencodable_char_position (safe_chars, pos, p, pend, n)
6778      Lisp_Object safe_chars;
6779      int pos;
6780      unsigned char *p, *pend;
6781      int n;
6782 {
6783   Lisp_Object pos_list;
6784
6785   pos_list = Qnil;
6786   while (p < pend)
6787     {
6788       int len;
6789       int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
6790
6791       if (c >= 128
6792           && (CHAR_TABLE_P (safe_chars)
6793               ? NILP (CHAR_TABLE_REF (safe_chars, c))
6794               : (NILP (safe_chars) || c < 256)))
6795         {
6796           pos_list = Fcons (make_number (pos), pos_list);
6797           if (--n <= 0)
6798             break;
6799         }
6800       pos++;
6801       p += len;
6802     }
6803   return Fnreverse (pos_list);
6804 }
6805
6806
6807 DEFUN ("unencodable-char-position", Funencodable_char_position,
6808        Sunencodable_char_position, 3, 5, 0,
6809        doc: /*
6810 Return position of first un-encodable character in a region.
6811 START and END specfiy the region and CODING-SYSTEM specifies the
6812 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
6813
6814 If optional 4th argument COUNT is non-nil, it specifies at most how
6815 many un-encodable characters to search.  In this case, the value is a
6816 list of positions.
6817
6818 If optional 5th argument STRING is non-nil, it is a string to search
6819 for un-encodable characters.  In that case, START and END are indexes
6820 to the string.  */)
6821      (start, end, coding_system, count, string)
6822      Lisp_Object start, end, coding_system, count, string;
6823 {
6824   int n;
6825   Lisp_Object safe_chars;
6826   struct coding_system coding;
6827   Lisp_Object positions;
6828   int from, to;
6829   unsigned char *p, *pend;
6830
6831   if (NILP (string))
6832     {
6833       validate_region (&start, &end);
6834       from = XINT (start);
6835       to = XINT (end);
6836       if (NILP (current_buffer->enable_multibyte_characters))
6837         return Qnil;
6838       p = CHAR_POS_ADDR (from);
6839       if (to == GPT)
6840         pend = GPT_ADDR;
6841       else
6842         pend = CHAR_POS_ADDR (to);
6843     }
6844   else
6845     {
6846       CHECK_STRING (string);
6847       CHECK_NATNUM (start);
6848       CHECK_NATNUM (end);
6849       from = XINT (start);
6850       to = XINT (end);
6851       if (from > to
6852           || to > SCHARS (string))
6853         args_out_of_range_3 (string, start, end);
6854       if (! STRING_MULTIBYTE (string))
6855         return Qnil;
6856       p = SDATA (string) + string_char_to_byte (string, from);
6857       pend = SDATA (string) + string_char_to_byte (string, to);
6858     }
6859
6860   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
6861
6862   if (NILP (count))
6863     n = 1;
6864   else
6865     {
6866       CHECK_NATNUM (count);
6867       n = XINT (count);
6868     }
6869
6870   if (coding.type == coding_type_no_conversion
6871       || coding.type == coding_type_raw_text)
6872     return Qnil;
6873
6874   if (coding.type == coding_type_undecided)
6875     safe_chars = Qnil;
6876   else
6877     safe_chars = coding_safe_chars (coding_system);
6878
6879   if (STRINGP (string)
6880       || from >= GPT || to <= GPT)
6881     positions = unencodable_char_position (safe_chars, from, p, pend, n);
6882   else
6883     {
6884       Lisp_Object args[2];
6885
6886       args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
6887       n -= XINT (Flength (args[0]));
6888       if (n <= 0)
6889         positions = args[0];
6890       else
6891         {
6892           args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
6893                                                pend, n);
6894           positions = Fappend (2, args);
6895         }
6896     }
6897
6898   return  (NILP (count) ? Fcar (positions) : positions);
6899 }
6900
6901
6902 Lisp_Object
6903 code_convert_region1 (start, end, coding_system, encodep)
6904      Lisp_Object start, end, coding_system;
6905      int encodep;
6906 {
6907   struct coding_system coding;
6908   int from, to;
6909
6910   CHECK_NUMBER_COERCE_MARKER (start);
6911   CHECK_NUMBER_COERCE_MARKER (end);
6912   CHECK_SYMBOL (coding_system);
6913
6914   validate_region (&start, &end);
6915   from = XFASTINT (start);
6916   to = XFASTINT (end);
6917
6918   if (NILP (coding_system))
6919     return make_number (to - from);
6920
6921   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6922     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6923
6924   coding.mode |= CODING_MODE_LAST_BLOCK;
6925   coding.src_multibyte = coding.dst_multibyte
6926     = !NILP (current_buffer->enable_multibyte_characters);
6927   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
6928                        &coding, encodep, 1);
6929   Vlast_coding_system_used = coding.symbol;
6930   return make_number (coding.produced_char);
6931 }
6932
6933 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6934        3, 3, "r\nzCoding system: ",
6935        doc: /* Decode the current region from the specified coding system.
6936 When called from a program, takes three arguments:
6937 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6938 This function sets `last-coding-system-used' to the precise coding system
6939 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6940 not fully specified.)
6941 It returns the length of the decoded text.  */)
6942      (start, end, coding_system)
6943      Lisp_Object start, end, coding_system;
6944 {
6945   return code_convert_region1 (start, end, coding_system, 0);
6946 }
6947
6948 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6949        3, 3, "r\nzCoding system: ",
6950        doc: /* Encode the current region into the specified coding system.
6951 When called from a program, takes three arguments:
6952 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6953 This function sets `last-coding-system-used' to the precise coding system
6954 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6955 not fully specified.)
6956 It returns the length of the encoded text.  */)
6957      (start, end, coding_system)
6958      Lisp_Object start, end, coding_system;
6959 {
6960   return code_convert_region1 (start, end, coding_system, 1);
6961 }
6962
6963 Lisp_Object
6964 code_convert_string1 (string, coding_system, nocopy, encodep)
6965      Lisp_Object string, coding_system, nocopy;
6966      int encodep;
6967 {
6968   struct coding_system coding;
6969
6970   CHECK_STRING (string);
6971   CHECK_SYMBOL (coding_system);
6972
6973   if (NILP (coding_system))
6974     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
6975
6976   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6977     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6978
6979   coding.mode |= CODING_MODE_LAST_BLOCK;
6980   string = (encodep
6981             ? encode_coding_string (string, &coding, !NILP (nocopy))
6982             : decode_coding_string (string, &coding, !NILP (nocopy)));
6983   Vlast_coding_system_used = coding.symbol;
6984
6985   return string;
6986 }
6987
6988 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6989        2, 3, 0,
6990        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6991 Optional arg NOCOPY non-nil means it is OK to return STRING itself
6992 if the decoding operation is trivial.
6993 This function sets `last-coding-system-used' to the precise coding system
6994 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6995 not fully specified.)  */)
6996      (string, coding_system, nocopy)
6997      Lisp_Object string, coding_system, nocopy;
6998 {
6999   return code_convert_string1 (string, coding_system, nocopy, 0);
7000 }
7001
7002 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7003        2, 3, 0,
7004        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7005 Optional arg NOCOPY non-nil means it is OK to return STRING itself
7006 if the encoding operation is trivial.
7007 This function sets `last-coding-system-used' to the precise coding system
7008 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7009 not fully specified.)  */)
7010      (string, coding_system, nocopy)
7011      Lisp_Object string, coding_system, nocopy;
7012 {
7013   return code_convert_string1 (string, coding_system, nocopy, 1);
7014 }
7015
7016 /* Encode or decode STRING according to CODING_SYSTEM.
7017    Do not set Vlast_coding_system_used.
7018
7019    This function is called only from macros DECODE_FILE and
7020    ENCODE_FILE, thus we ignore character composition.  */
7021
7022 Lisp_Object
7023 code_convert_string_norecord (string, coding_system, encodep)
7024      Lisp_Object string, coding_system;
7025      int encodep;
7026 {
7027   struct coding_system coding;
7028
7029   CHECK_STRING (string);
7030   CHECK_SYMBOL (coding_system);
7031
7032   if (NILP (coding_system))
7033     return string;
7034
7035   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
7036     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
7037
7038   coding.composing = COMPOSITION_DISABLED;
7039   coding.mode |= CODING_MODE_LAST_BLOCK;
7040   return (encodep
7041           ? encode_coding_string (string, &coding, 1)
7042           : decode_coding_string (string, &coding, 1));
7043 }
7044 \f
7045 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7046        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7047 Return the corresponding character.  */)
7048      (code)
7049      Lisp_Object code;
7050 {
7051   unsigned char c1, c2, s1, s2;
7052   Lisp_Object val;
7053
7054   CHECK_NUMBER (code);
7055   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
7056   if (s1 == 0)
7057     {
7058       if (s2 < 0x80)
7059         XSETFASTINT (val, s2);
7060       else if (s2 >= 0xA0 || s2 <= 0xDF)
7061         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
7062       else
7063         error ("Invalid Shift JIS code: %x", XFASTINT (code));
7064     }
7065   else
7066     {
7067       if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF)
7068           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
7069         error ("Invalid Shift JIS code: %x", XFASTINT (code));
7070       DECODE_SJIS (s1, s2, c1, c2);
7071       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
7072     }
7073   return val;
7074 }
7075
7076 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7077        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7078 Return the corresponding code in SJIS.  */)
7079      (ch)
7080      Lisp_Object ch;
7081 {
7082   int charset, c1, c2, s1, s2;
7083   Lisp_Object val;
7084
7085   CHECK_NUMBER (ch);
7086   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7087   if (charset == CHARSET_ASCII)
7088     {
7089       val = ch;
7090     }
7091   else if (charset == charset_jisx0208
7092            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
7093     {
7094       ENCODE_SJIS (c1, c2, s1, s2);
7095       XSETFASTINT (val, (s1 << 8) | s2);
7096     }
7097   else if (charset == charset_katakana_jisx0201
7098            && c1 > 0x20 && c2 < 0xE0)
7099     {
7100       XSETFASTINT (val, c1 | 0x80);
7101     }
7102   else
7103     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
7104   return val;
7105 }
7106
7107 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7108        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7109 Return the corresponding character.  */)
7110      (code)
7111      Lisp_Object code;
7112 {
7113   int charset;
7114   unsigned char b1, b2, c1, c2;
7115   Lisp_Object val;
7116
7117   CHECK_NUMBER (code);
7118   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
7119   if (b1 == 0)
7120     {
7121       if (b2 >= 0x80)
7122         error ("Invalid BIG5 code: %x", XFASTINT (code));
7123       val = code;
7124     }
7125   else
7126     {
7127       if ((b1 < 0xA1 || b1 > 0xFE)
7128           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
7129         error ("Invalid BIG5 code: %x", XFASTINT (code));
7130       DECODE_BIG5 (b1, b2, charset, c1, c2);
7131       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
7132     }
7133   return val;
7134 }
7135
7136 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7137        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7138 Return the corresponding character code in Big5.  */)
7139      (ch)
7140      Lisp_Object ch;
7141 {
7142   int charset, c1, c2, b1, b2;
7143   Lisp_Object val;
7144
7145   CHECK_NUMBER (ch);
7146   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7147   if (charset == CHARSET_ASCII)
7148     {
7149       val = ch;
7150     }
7151   else if ((charset == charset_big5_1
7152             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
7153            || (charset == charset_big5_2
7154                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
7155     {
7156       ENCODE_BIG5 (charset, c1, c2, b1, b2);
7157       XSETFASTINT (val, (b1 << 8) | b2);
7158     }
7159   else
7160     error ("Can't encode to Big5: %d", XFASTINT (ch));
7161   return val;
7162 }
7163 \f
7164 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
7165        Sset_terminal_coding_system_internal, 1, 1, 0,
7166        doc: /* Internal use only.  */)
7167      (coding_system)
7168      Lisp_Object coding_system;
7169 {
7170   CHECK_SYMBOL (coding_system);
7171   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
7172   /* We had better not send unsafe characters to terminal.  */
7173   terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
7174   /* Character composition should be disabled.  */
7175   terminal_coding.composing = COMPOSITION_DISABLED;
7176   /* Error notification should be suppressed.  */
7177   terminal_coding.suppress_error = 1;
7178   terminal_coding.src_multibyte = 1;
7179   terminal_coding.dst_multibyte = 0;
7180   return Qnil;
7181 }
7182
7183 DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
7184        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7185        doc: /* Internal use only.  */)
7186      (coding_system)
7187      Lisp_Object coding_system;
7188 {
7189   CHECK_SYMBOL (coding_system);
7190   setup_coding_system (Fcheck_coding_system (coding_system),
7191                        &safe_terminal_coding);
7192   /* Character composition should be disabled.  */
7193   safe_terminal_coding.composing = COMPOSITION_DISABLED;
7194   /* Error notification should be suppressed.  */
7195   terminal_coding.suppress_error = 1;
7196   safe_terminal_coding.src_multibyte = 1;
7197   safe_terminal_coding.dst_multibyte = 0;
7198   return Qnil;
7199 }
7200
7201 DEFUN ("terminal-coding-system", Fterminal_coding_system,
7202        Sterminal_coding_system, 0, 0, 0,
7203        doc: /* Return coding system specified for terminal output.  */)
7204      ()
7205 {
7206   return terminal_coding.symbol;
7207 }
7208
7209 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
7210        Sset_keyboard_coding_system_internal, 1, 1, 0,
7211        doc: /* Internal use only.  */)
7212      (coding_system)
7213      Lisp_Object coding_system;
7214 {
7215   CHECK_SYMBOL (coding_system);
7216   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
7217   /* Character composition should be disabled.  */
7218   keyboard_coding.composing = COMPOSITION_DISABLED;
7219   return Qnil;
7220 }
7221
7222 DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
7223        Skeyboard_coding_system, 0, 0, 0,
7224        doc: /* Return coding system specified for decoding keyboard input.  */)
7225      ()
7226 {
7227   return keyboard_coding.symbol;
7228 }
7229
7230 \f
7231 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7232        Sfind_operation_coding_system,  1, MANY, 0,
7233        doc: /* Choose a coding system for an operation based on the target name.
7234 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7235 DECODING-SYSTEM is the coding system to use for decoding
7236 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7237 for encoding (in case OPERATION does encoding).
7238
7239 The first argument OPERATION specifies an I/O primitive:
7240   For file I/O, `insert-file-contents' or `write-region'.
7241   For process I/O, `call-process', `call-process-region', or `start-process'.
7242   For network I/O, `open-network-stream'.
7243
7244 The remaining arguments should be the same arguments that were passed
7245 to the primitive.  Depending on which primitive, one of those arguments
7246 is selected as the TARGET.  For example, if OPERATION does file I/O,
7247 whichever argument specifies the file name is TARGET.
7248
7249 TARGET has a meaning which depends on OPERATION:
7250   For file I/O, TARGET is a file name.
7251   For process I/O, TARGET is a process name.
7252   For network I/O, TARGET is a service name or a port number
7253
7254 This function looks up what specified for TARGET in,
7255 `file-coding-system-alist', `process-coding-system-alist',
7256 or `network-coding-system-alist' depending on OPERATION.
7257 They may specify a coding system, a cons of coding systems,
7258 or a function symbol to call.
7259 In the last case, we call the function with one argument,
7260 which is a list of all the arguments given to this function.
7261
7262 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7263      (nargs, args)
7264      int nargs;
7265      Lisp_Object *args;
7266 {
7267   Lisp_Object operation, target_idx, target, val;
7268   register Lisp_Object chain;
7269
7270   if (nargs < 2)
7271     error ("Too few arguments");
7272   operation = args[0];
7273   if (!SYMBOLP (operation)
7274       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7275     error ("Invalid first argument");
7276   if (nargs < 1 + XINT (target_idx))
7277     error ("Too few arguments for operation: %s",
7278            SDATA (SYMBOL_NAME (operation)));
7279   /* For write-region, if the 6th argument (i.e. VISIT, the 5th
7280      argument to write-region) is string, it must be treated as a
7281      target file name.  */
7282   if (EQ (operation, Qwrite_region)
7283       && nargs > 5
7284       && STRINGP (args[5]))
7285     target_idx = make_number (4);
7286   target = args[XINT (target_idx) + 1];
7287   if (!(STRINGP (target)
7288         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7289     error ("Invalid argument %d", XINT (target_idx) + 1);
7290
7291   chain = ((EQ (operation, Qinsert_file_contents)
7292             || EQ (operation, Qwrite_region))
7293            ? Vfile_coding_system_alist
7294            : (EQ (operation, Qopen_network_stream)
7295               ? Vnetwork_coding_system_alist
7296               : Vprocess_coding_system_alist));
7297   if (NILP (chain))
7298     return Qnil;
7299
7300   for (; CONSP (chain); chain = XCDR (chain))
7301     {
7302       Lisp_Object elt;
7303       elt = XCAR (chain);
7304
7305       if (CONSP (elt)
7306           && ((STRINGP (target)
7307                && STRINGP (XCAR (elt))
7308                && fast_string_match (XCAR (elt), target) >= 0)
7309               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7310         {
7311           val = XCDR (elt);
7312           /* Here, if VAL is both a valid coding system and a valid
7313              function symbol, we return VAL as a coding system.  */
7314           if (CONSP (val))
7315             return val;
7316           if (! SYMBOLP (val))
7317             return Qnil;
7318           if (! NILP (Fcoding_system_p (val)))
7319             return Fcons (val, val);
7320           if (! NILP (Ffboundp (val)))
7321             {
7322               val = call1 (val, Flist (nargs, args));
7323               if (CONSP (val))
7324                 return val;
7325               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7326                 return Fcons (val, val);
7327             }
7328           return Qnil;
7329         }
7330     }
7331   return Qnil;
7332 }
7333
7334 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
7335        Supdate_coding_systems_internal, 0, 0, 0,
7336        doc: /* Update internal database for ISO2022 and CCL based coding systems.
7337 When values of any coding categories are changed, you must
7338 call this function.  */)
7339      ()
7340 {
7341   int i;
7342
7343   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
7344     {
7345       Lisp_Object val;
7346
7347       val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[i]);
7348       if (!NILP (val))
7349         {
7350           if (! coding_system_table[i])
7351             coding_system_table[i] = ((struct coding_system *)
7352                                       xmalloc (sizeof (struct coding_system)));
7353           setup_coding_system (val, coding_system_table[i]);
7354         }
7355       else if (coding_system_table[i])
7356         {
7357           xfree (coding_system_table[i]);
7358           coding_system_table[i] = NULL;
7359         }
7360     }
7361
7362   return Qnil;
7363 }
7364
7365 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
7366        Sset_coding_priority_internal, 0, 0, 0,
7367        doc: /* Update internal database for the current value of `coding-category-list'.
7368 This function is internal use only.  */)
7369      ()
7370 {
7371   int i = 0, idx;
7372   Lisp_Object val;
7373
7374   val = Vcoding_category_list;
7375
7376   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
7377     {
7378       if (! SYMBOLP (XCAR (val)))
7379         break;
7380       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
7381       if (idx >= CODING_CATEGORY_IDX_MAX)
7382         break;
7383       coding_priorities[i++] = (1 << idx);
7384       val = XCDR (val);
7385     }
7386   /* If coding-category-list is valid and contains all coding
7387      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
7388      the following code saves Emacs from crashing.  */
7389   while (i < CODING_CATEGORY_IDX_MAX)
7390     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
7391
7392   return Qnil;
7393 }
7394
7395 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7396        Sdefine_coding_system_internal, 1, 1, 0,
7397        doc: /* Register CODING-SYSTEM as a base coding system.
7398 This function is internal use only.  */)
7399      (coding_system)
7400      Lisp_Object coding_system;
7401 {
7402   Lisp_Object safe_chars, slot;
7403
7404   if (NILP (Fcheck_coding_system (coding_system)))
7405     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
7406   safe_chars = coding_safe_chars (coding_system);
7407   if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
7408     error ("No valid safe-chars property for %s",
7409            SDATA (SYMBOL_NAME (coding_system)));
7410   if (EQ (safe_chars, Qt))
7411     {
7412       if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
7413         XSETCAR (Vcoding_system_safe_chars,
7414                  Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
7415     }
7416   else
7417     {
7418       slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
7419       if (NILP (slot))
7420         XSETCDR (Vcoding_system_safe_chars,
7421                  nconc2 (XCDR (Vcoding_system_safe_chars),
7422                          Fcons (Fcons (coding_system, safe_chars), Qnil)));
7423       else
7424         XSETCDR (slot, safe_chars);
7425     }
7426   return Qnil;
7427 }
7428
7429 #endif /* emacs */
7430
7431 \f
7432 /*** 9. Post-amble ***/
7433
7434 void
7435 init_coding_once ()
7436 {
7437   int i;
7438
7439   /* Emacs' internal format specific initialize routine.  */
7440   for (i = 0; i <= 0x20; i++)
7441     emacs_code_class[i] = EMACS_control_code;
7442   emacs_code_class[0x0A] = EMACS_linefeed_code;
7443   emacs_code_class[0x0D] = EMACS_carriage_return_code;
7444   for (i = 0x21 ; i < 0x7F; i++)
7445     emacs_code_class[i] = EMACS_ascii_code;
7446   emacs_code_class[0x7F] = EMACS_control_code;
7447   for (i = 0x80; i < 0xFF; i++)
7448     emacs_code_class[i] = EMACS_invalid_code;
7449   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
7450   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
7451   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
7452   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
7453
7454   /* ISO2022 specific initialize routine.  */
7455   for (i = 0; i < 0x20; i++)
7456     iso_code_class[i] = ISO_control_0;
7457   for (i = 0x21; i < 0x7F; i++)
7458     iso_code_class[i] = ISO_graphic_plane_0;
7459   for (i = 0x80; i < 0xA0; i++)
7460     iso_code_class[i] = ISO_control_1;
7461   for (i = 0xA1; i < 0xFF; i++)
7462     iso_code_class[i] = ISO_graphic_plane_1;
7463   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7464   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7465   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7466   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7467   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7468   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7469   iso_code_class[ISO_CODE_ESC] = ISO_escape;
7470   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7471   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7472   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7473
7474   setup_coding_system (Qnil, &keyboard_coding);
7475   setup_coding_system (Qnil, &terminal_coding);
7476   setup_coding_system (Qnil, &safe_terminal_coding);
7477   setup_coding_system (Qnil, &default_buffer_file_coding);
7478
7479   bzero (coding_system_table, sizeof coding_system_table);
7480
7481   bzero (ascii_skip_code, sizeof ascii_skip_code);
7482   for (i = 0; i < 128; i++)
7483     ascii_skip_code[i] = 1;
7484
7485 #if defined (MSDOS) || defined (WINDOWSNT)
7486   system_eol_type = CODING_EOL_CRLF;
7487 #else
7488   system_eol_type = CODING_EOL_LF;
7489 #endif
7490
7491   inhibit_pre_post_conversion = 0;
7492 }
7493
7494 #ifdef emacs
7495
7496 void
7497 syms_of_coding ()
7498 {
7499   Qtarget_idx = intern ("target-idx");
7500   staticpro (&Qtarget_idx);
7501
7502   Qcoding_system_history = intern ("coding-system-history");
7503   staticpro (&Qcoding_system_history);
7504   Fset (Qcoding_system_history, Qnil);
7505
7506   /* Target FILENAME is the first argument.  */
7507   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7508   /* Target FILENAME is the third argument.  */
7509   Fput (Qwrite_region, Qtarget_idx, make_number (2));
7510
7511   Qcall_process = intern ("call-process");
7512   staticpro (&Qcall_process);
7513   /* Target PROGRAM is the first argument.  */
7514   Fput (Qcall_process, Qtarget_idx, make_number (0));
7515
7516   Qcall_process_region = intern ("call-process-region");
7517   staticpro (&Qcall_process_region);
7518   /* Target PROGRAM is the third argument.  */
7519   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7520
7521   Qstart_process = intern ("start-process");
7522   staticpro (&Qstart_process);
7523   /* Target PROGRAM is the third argument.  */
7524   Fput (Qstart_process, Qtarget_idx, make_number (2));
7525
7526   Qopen_network_stream = intern ("open-network-stream");
7527   staticpro (&Qopen_network_stream);
7528   /* Target SERVICE is the fourth argument.  */
7529   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7530
7531   Qcoding_system = intern ("coding-system");
7532   staticpro (&Qcoding_system);
7533
7534   Qeol_type = intern ("eol-type");
7535   staticpro (&Qeol_type);
7536
7537   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
7538   staticpro (&Qbuffer_file_coding_system);
7539
7540   Qpost_read_conversion = intern ("post-read-conversion");
7541   staticpro (&Qpost_read_conversion);
7542
7543   Qpre_write_conversion = intern ("pre-write-conversion");
7544   staticpro (&Qpre_write_conversion);
7545
7546   Qno_conversion = intern ("no-conversion");
7547   staticpro (&Qno_conversion);
7548
7549   Qundecided = intern ("undecided");
7550   staticpro (&Qundecided);
7551
7552   Qcoding_system_p = intern ("coding-system-p");
7553   staticpro (&Qcoding_system_p);
7554
7555   Qcoding_system_error = intern ("coding-system-error");
7556   staticpro (&Qcoding_system_error);
7557
7558   Fput (Qcoding_system_error, Qerror_conditions,
7559         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7560   Fput (Qcoding_system_error, Qerror_message,
7561         build_string ("Invalid coding system"));
7562
7563   Qcoding_category = intern ("coding-category");
7564   staticpro (&Qcoding_category);
7565   Qcoding_category_index = intern ("coding-category-index");
7566   staticpro (&Qcoding_category_index);
7567
7568   Vcoding_category_table
7569     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
7570   staticpro (&Vcoding_category_table);
7571   {
7572     int i;
7573     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7574       {
7575         XVECTOR (Vcoding_category_table)->contents[i]
7576           = intern (coding_category_name[i]);
7577         Fput (XVECTOR (Vcoding_category_table)->contents[i],
7578               Qcoding_category_index, make_number (i));
7579       }
7580   }
7581
7582   Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
7583   staticpro (&Vcoding_system_safe_chars);
7584
7585   Qtranslation_table = intern ("translation-table");
7586   staticpro (&Qtranslation_table);
7587   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
7588
7589   Qtranslation_table_id = intern ("translation-table-id");
7590   staticpro (&Qtranslation_table_id);
7591
7592   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7593   staticpro (&Qtranslation_table_for_decode);
7594
7595   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7596   staticpro (&Qtranslation_table_for_encode);
7597
7598   Qsafe_chars = intern ("safe-chars");
7599   staticpro (&Qsafe_chars);
7600
7601   Qchar_coding_system = intern ("char-coding-system");
7602   staticpro (&Qchar_coding_system);
7603
7604   /* Intern this now in case it isn't already done.
7605      Setting this variable twice is harmless.
7606      But don't staticpro it here--that is done in alloc.c.  */
7607   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7608   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7609   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
7610
7611   Qvalid_codes = intern ("valid-codes");
7612   staticpro (&Qvalid_codes);
7613
7614   Qemacs_mule = intern ("emacs-mule");
7615   staticpro (&Qemacs_mule);
7616
7617   Qraw_text = intern ("raw-text");
7618   staticpro (&Qraw_text);
7619
7620   Qutf_8 = intern ("utf-8");
7621   staticpro (&Qutf_8);
7622
7623   Qcoding_system_define_form = intern ("coding-system-define-form");
7624   staticpro (&Qcoding_system_define_form);
7625
7626   defsubr (&Scoding_system_p);
7627   defsubr (&Sread_coding_system);
7628   defsubr (&Sread_non_nil_coding_system);
7629   defsubr (&Scheck_coding_system);
7630   defsubr (&Sdetect_coding_region);
7631   defsubr (&Sdetect_coding_string);
7632   defsubr (&Sfind_coding_systems_region_internal);
7633   defsubr (&Sunencodable_char_position);
7634   defsubr (&Sdecode_coding_region);
7635   defsubr (&Sencode_coding_region);
7636   defsubr (&Sdecode_coding_string);
7637   defsubr (&Sencode_coding_string);
7638   defsubr (&Sdecode_sjis_char);
7639   defsubr (&Sencode_sjis_char);
7640   defsubr (&Sdecode_big5_char);
7641   defsubr (&Sencode_big5_char);
7642   defsubr (&Sset_terminal_coding_system_internal);
7643   defsubr (&Sset_safe_terminal_coding_system_internal);
7644   defsubr (&Sterminal_coding_system);
7645   defsubr (&Sset_keyboard_coding_system_internal);
7646   defsubr (&Skeyboard_coding_system);
7647   defsubr (&Sfind_operation_coding_system);
7648   defsubr (&Supdate_coding_systems_internal);
7649   defsubr (&Sset_coding_priority_internal);
7650   defsubr (&Sdefine_coding_system_internal);
7651
7652   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7653                doc: /* List of coding systems.
7654
7655 Do not alter the value of this variable manually.  This variable should be
7656 updated by the functions `make-coding-system' and
7657 `define-coding-system-alias'.  */);
7658   Vcoding_system_list = Qnil;
7659
7660   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7661                doc: /* Alist of coding system names.
7662 Each element is one element list of coding system name.
7663 This variable is given to `completing-read' as TABLE argument.
7664
7665 Do not alter the value of this variable manually.  This variable should be
7666 updated by the functions `make-coding-system' and
7667 `define-coding-system-alias'.  */);
7668   Vcoding_system_alist = Qnil;
7669
7670   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7671                doc: /* List of coding-categories (symbols) ordered by priority.
7672
7673 On detecting a coding system, Emacs tries code detection algorithms
7674 associated with each coding-category one by one in this order.  When
7675 one algorithm agrees with a byte sequence of source text, the coding
7676 system bound to the corresponding coding-category is selected.  */);
7677   {
7678     int i;
7679
7680     Vcoding_category_list = Qnil;
7681     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7682       Vcoding_category_list
7683         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7684                  Vcoding_category_list);
7685   }
7686
7687   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7688                doc: /* Specify the coding system for read operations.
7689 It is useful to bind this variable with `let', but do not set it globally.
7690 If the value is a coding system, it is used for decoding on read operation.
7691 If not, an appropriate element is used from one of the coding system alists:
7692 There are three such tables, `file-coding-system-alist',
7693 `process-coding-system-alist', and `network-coding-system-alist'.  */);
7694   Vcoding_system_for_read = Qnil;
7695
7696   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7697                doc: /* Specify the coding system for write operations.
7698 Programs bind this variable with `let', but you should not set it globally.
7699 If the value is a coding system, it is used for encoding of output,
7700 when writing it to a file and when sending it to a file or subprocess.
7701
7702 If this does not specify a coding system, an appropriate element
7703 is used from one of the coding system alists:
7704 There are three such tables, `file-coding-system-alist',
7705 `process-coding-system-alist', and `network-coding-system-alist'.
7706 For output to files, if the above procedure does not specify a coding system,
7707 the value of `buffer-file-coding-system' is used.  */);
7708   Vcoding_system_for_write = Qnil;
7709
7710   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7711                doc: /* Coding system used in the latest file or process I/O.
7712 Also set by `encode-coding-region', `decode-coding-region',
7713 `encode-coding-string' and `decode-coding-string'.  */);
7714   Vlast_coding_system_used = Qnil;
7715
7716   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7717                doc: /* *Non-nil means always inhibit code conversion of end-of-line format.
7718 See info node `Coding Systems' and info node `Text and Binary' concerning
7719 such conversion.  */);
7720   inhibit_eol_conversion = 0;
7721
7722   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
7723                doc: /* Non-nil means process buffer inherits coding system of process output.
7724 Bind it to t if the process output is to be treated as if it were a file
7725 read from some filesystem.  */);
7726   inherit_process_coding_system = 0;
7727
7728   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
7729                doc: /* Alist to decide a coding system to use for a file I/O operation.
7730 The format is ((PATTERN . VAL) ...),
7731 where PATTERN is a regular expression matching a file name,
7732 VAL is a coding system, a cons of coding systems, or a function symbol.
7733 If VAL is a coding system, it is used for both decoding and encoding
7734 the file contents.
7735 If VAL is a cons of coding systems, the car part is used for decoding,
7736 and the cdr part is used for encoding.
7737 If VAL is a function symbol, the function must return a coding system
7738 or a cons of coding systems which are used as above.  The function gets
7739 the arguments with which `find-operation-coding-system' was called.
7740
7741 See also the function `find-operation-coding-system'
7742 and the variable `auto-coding-alist'.  */);
7743   Vfile_coding_system_alist = Qnil;
7744
7745   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
7746     doc: /* Alist to decide a coding system to use for a process I/O operation.
7747 The format is ((PATTERN . VAL) ...),
7748 where PATTERN is a regular expression matching a program name,
7749 VAL is a coding system, a cons of coding systems, or a function symbol.
7750 If VAL is a coding system, it is used for both decoding what received
7751 from the program and encoding what sent to the program.
7752 If VAL is a cons of coding systems, the car part is used for decoding,
7753 and the cdr part is used for encoding.
7754 If VAL is a function symbol, the function must return a coding system
7755 or a cons of coding systems which are used as above.
7756
7757 See also the function `find-operation-coding-system'.  */);
7758   Vprocess_coding_system_alist = Qnil;
7759
7760   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
7761     doc: /* Alist to decide a coding system to use for a network I/O operation.
7762 The format is ((PATTERN . VAL) ...),
7763 where PATTERN is a regular expression matching a network service name
7764 or is a port number to connect to,
7765 VAL is a coding system, a cons of coding systems, or a function symbol.
7766 If VAL is a coding system, it is used for both decoding what received
7767 from the network stream and encoding what sent to the network stream.
7768 If VAL is a cons of coding systems, the car part is used for decoding,
7769 and the cdr part is used for encoding.
7770 If VAL is a function symbol, the function must return a coding system
7771 or a cons of coding systems which are used as above.
7772
7773 See also the function `find-operation-coding-system'.  */);
7774   Vnetwork_coding_system_alist = Qnil;
7775
7776   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
7777                doc: /* Coding system to use with system messages.
7778 Also used for decoding keyboard input on X Window system.  */);
7779   Vlocale_coding_system = Qnil;
7780
7781   /* The eol mnemonics are reset in startup.el system-dependently.  */
7782   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
7783                doc: /* *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
7784   eol_mnemonic_unix = build_string (":");
7785
7786   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
7787                doc: /* *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
7788   eol_mnemonic_dos = build_string ("\\");
7789
7790   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
7791                doc: /* *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
7792   eol_mnemonic_mac = build_string ("/");
7793
7794   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
7795                doc: /* *String displayed in mode line when end-of-line format is not yet determined.  */);
7796   eol_mnemonic_undecided = build_string (":");
7797
7798   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
7799                doc: /* *Non-nil enables character translation while encoding and decoding.  */);
7800   Venable_character_translation = Qt;
7801
7802   DEFVAR_LISP ("standard-translation-table-for-decode",
7803                &Vstandard_translation_table_for_decode,
7804                doc: /* Table for translating characters while decoding.  */);
7805   Vstandard_translation_table_for_decode = Qnil;
7806
7807   DEFVAR_LISP ("standard-translation-table-for-encode",
7808                &Vstandard_translation_table_for_encode,
7809                doc: /* Table for translating characters while encoding.  */);
7810   Vstandard_translation_table_for_encode = Qnil;
7811
7812   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
7813                doc: /* Alist of charsets vs revision numbers.
7814 While encoding, if a charset (car part of an element) is found,
7815 designate it with the escape sequence identifying revision (cdr part of the element).  */);
7816   Vcharset_revision_alist = Qnil;
7817
7818   DEFVAR_LISP ("default-process-coding-system",
7819                &Vdefault_process_coding_system,
7820                doc: /* Cons of coding systems used for process I/O by default.
7821 The car part is used for decoding a process output,
7822 the cdr part is used for encoding a text to be sent to a process.  */);
7823   Vdefault_process_coding_system = Qnil;
7824
7825   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
7826                doc: /* Table of extra Latin codes in the range 128..159 (inclusive).
7827 This is a vector of length 256.
7828 If Nth element is non-nil, the existence of code N in a file
7829 \(or output of subprocess) doesn't prevent it to be detected as
7830 a coding system of ISO 2022 variant which has a flag
7831 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
7832 or reading output of a subprocess.
7833 Only 128th through 159th elements has a meaning.  */);
7834   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
7835
7836   DEFVAR_LISP ("select-safe-coding-system-function",
7837                &Vselect_safe_coding_system_function,
7838                doc: /* Function to call to select safe coding system for encoding a text.
7839
7840 If set, this function is called to force a user to select a proper
7841 coding system which can encode the text in the case that a default
7842 coding system used in each operation can't encode the text.
7843
7844 The default value is `select-safe-coding-system' (which see).  */);
7845   Vselect_safe_coding_system_function = Qnil;
7846
7847   DEFVAR_BOOL ("coding-system-require-warning",
7848                &coding_system_require_warning,
7849                doc: /* Internal use only.
7850 If non-nil, on writing a file, `select-safe-coding-system-function' is
7851 called even if `coding-system-for-write' is non-nil.  The command
7852 `universal-coding-system-argument' binds this variable to t temporarily.  */);
7853   coding_system_require_warning = 0;
7854
7855
7856   DEFVAR_BOOL ("inhibit-iso-escape-detection",
7857                &inhibit_iso_escape_detection,
7858                doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
7859
7860 By default, on reading a file, Emacs tries to detect how the text is
7861 encoded.  This code detection is sensitive to escape sequences.  If
7862 the sequence is valid as ISO2022, the code is determined as one of
7863 the ISO2022 encodings, and the file is decoded by the corresponding
7864 coding system (e.g. `iso-2022-7bit').
7865
7866 However, there may be a case that you want to read escape sequences in
7867 a file as is.  In such a case, you can set this variable to non-nil.
7868 Then, as the code detection ignores any escape sequences, no file is
7869 detected as encoded in some ISO2022 encoding.  The result is that all
7870 escape sequences become visible in a buffer.
7871
7872 The default value is nil, and it is strongly recommended not to change
7873 it.  That is because many Emacs Lisp source files that contain
7874 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
7875 in Emacs's distribution, and they won't be decoded correctly on
7876 reading if you suppress escape sequence detection.
7877
7878 The other way to read escape sequences in a file without decoding is
7879 to explicitly specify some coding system that doesn't use ISO2022's
7880 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
7881   inhibit_iso_escape_detection = 0;
7882
7883   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
7884                doc: /* Char table for translating self-inserting characters.
7885 This is applied to the result of input methods, not their input.  See also
7886 `keyboard-translate-table'.  */);
7887     Vtranslation_table_for_input = Qnil;
7888 }
7889
7890 char *
7891 emacs_strerror (error_number)
7892      int error_number;
7893 {
7894   char *str;
7895
7896   synchronize_system_messages_locale ();
7897   str = strerror (error_number);
7898
7899   if (! NILP (Vlocale_coding_system))
7900     {
7901       Lisp_Object dec = code_convert_string_norecord (build_string (str),
7902                                                       Vlocale_coding_system,
7903                                                       0);
7904       str = (char *) SDATA (dec);
7905     }
7906
7907   return str;
7908 }
7909
7910 #endif /* emacs */
7911
7912 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
7913    (do not change this comment) */