src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995,97,1998,2002,2003  Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001,2002,2003  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /*** TABLE OF CONTENTS ***
  24
  25   0. General comments
  26   1. Preamble
  27   2. Emacs' internal format (emacs-mule) handlers
  28   3. ISO2022 handlers
  29   4. Shift-JIS and BIG5 handlers
  30   5. CCL handlers
  31   6. End-of-line handlers
  32   7. C library functions
  33   8. Emacs Lisp library functions
  34   9. Post-amble
  35
  36 */
  37
  38 /*** 0. General comments ***/
  39
  40
  41 /*** GENERAL NOTE on CODING SYSTEMS ***
  42
  43   A coding system is an encoding mechanism for one or more character
  44   sets.  Here's a list of coding systems which Emacs can handle.  When
  45   we say "decode", it means converting some other coding system to
  46   Emacs' internal format (emacs-mule), and when we say "encode",
  47   it means converting the coding system emacs-mule to some other
  48   coding system.
  49
  50   0. Emacs' internal format (emacs-mule)
  51
  52   Emacs itself holds a multi-lingual character in buffers and strings
  53   in a special format.  Details are described in section 2.
  54
  55   1. ISO2022
  56
  57   The most famous coding system for multiple character sets.  X's
  58   Compound Text, various EUCs (Extended Unix Code), and coding
  59   systems used in Internet communication such as ISO-2022-JP are
  60   all variants of ISO2022.  Details are described in section 3.
  61
  62   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  63
  64   A coding system to encode character sets: ASCII, JISX0201, and
  65   JISX0208.  Widely used for PC's in Japan.  Details are described in
  66   section 4.
  67
  68   3. BIG5
  69
  70   A coding system to encode the character sets ASCII and Big5.  Widely
  71   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  72   described in section 4.  In this file, when we write "BIG5"
  73   (all uppercase), we mean the coding system, and when we write
  74   "Big5" (capitalized), we mean the character set.
  75
  76   4. Raw text
  77
  78   A coding system for text containing random 8-bit code.  Emacs does
  79   no code conversion on such text except for end-of-line format.
  80
  81   5. Other
  82
  83   If a user wants to read/write text encoded in a coding system not
  84   listed above, he can supply a decoder and an encoder for it as CCL
  85   (Code Conversion Language) programs.  Emacs executes the CCL program
  86   while reading/writing.
  87
  88   Emacs represents a coding system by a Lisp symbol that has a property
  89   `coding-system'.  But, before actually using the coding system, the
  90   information about it is set in a structure of type `struct
  91   coding_system' for rapid processing.  See section 6 for more details.
  92
  93 */
  94
  95 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  96
  97   How end-of-line of text is encoded depends on the operating system.
  98   For instance, Unix's format is just one byte of `line-feed' code,
  99   whereas DOS's format is two-byte sequence of `carriage-return' and
 100   `line-feed' codes.  MacOS's format is usually one byte of
 101   `carriage-return'.
 102
 103   Since text character encoding and end-of-line encoding are
 104   independent, any coding system described above can have any
 105   end-of-line format.  So Emacs has information about end-of-line
 106   format in each coding-system.  See section 6 for more details.
 107
 108 */
 109
 110 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 111
 112   These functions check if a text between SRC and SRC_END is encoded
 113   in the coding system category XXX.  Each returns an integer value in
 114   which appropriate flag bits for the category XXX are set.  The flag
 115   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 116   template for these functions.  If MULTIBYTEP is nonzero, 8-bit codes
 117   of the range 0x80..0x9F are in multibyte form.  */
 118 #if 0
 119 int
 120 detect_coding_emacs_mule (src, src_end, multibytep)
 121      unsigned char *src, *src_end;
 122      int multibytep;
 123 {
 124   ...
 125 }
 126 #endif
 127
 128 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 129
 130   These functions decode SRC_BYTES length of unibyte text at SOURCE
 131   encoded in CODING to Emacs' internal format.  The resulting
 132   multibyte text goes to a place pointed to by DESTINATION, the length
 133   of which should not exceed DST_BYTES.
 134
 135   These functions set the information about original and decoded texts
 136   in the members `produced', `produced_char', `consumed', and
 137   `consumed_char' of the structure *CODING.  They also set the member
 138   `result' to one of CODING_FINISH_XXX indicating how the decoding
 139   finished.
 140
 141   DST_BYTES zero means that the source area and destination area are
 142   overlapped, which means that we can produce a decoded text until it
 143   reaches the head of the not-yet-decoded source text.
 144
 145   Below is a template for these functions.  */
 146 #if 0
 147 static void
 148 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 149      struct coding_system *coding;
 150      unsigned char *source, *destination;
 151      int src_bytes, dst_bytes;
 152 {
 153   ...
 154 }
 155 #endif
 156
 157 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 158
 159   These functions encode SRC_BYTES length text at SOURCE from Emacs'
 160   internal multibyte format to CODING.  The resulting unibyte text
 161   goes to a place pointed to by DESTINATION, the length of which
 162   should not exceed DST_BYTES.
 163
 164   These functions set the information about original and encoded texts
 165   in the members `produced', `produced_char', `consumed', and
 166   `consumed_char' of the structure *CODING.  They also set the member
 167   `result' to one of CODING_FINISH_XXX indicating how the encoding
 168   finished.
 169
 170   DST_BYTES zero means that the source area and destination area are
 171   overlapped, which means that we can produce encoded text until it
 172   reaches at the head of the not-yet-encoded source text.
 173
 174   Below is a template for these functions.  */
 175 #if 0
 176 static void
 177 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 178      struct coding_system *coding;
 179      unsigned char *source, *destination;
 180      int src_bytes, dst_bytes;
 181 {
 182   ...
 183 }
 184 #endif
 185
 186 /*** COMMONLY USED MACROS ***/
 187
 188 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 189    get one, two, and three bytes from the source text respectively.
 190    If there are not enough bytes in the source, they jump to
 191    `label_end_of_loop'.  The caller should set variables `coding',
 192    `src' and `src_end' to appropriate pointer in advance.  These
 193    macros are called from decoding routines `decode_coding_XXX', thus
 194    it is assumed that the source text is unibyte.  */
 195
 196 #define ONE_MORE_BYTE(c1)                                       \
 197   do {                                                          \
 198     if (src >= src_end)                                         \
 199       {                                                         \
 200         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 201         goto label_end_of_loop;                                 \
 202       }                                                         \
 203     c1 = *src++;                                                \
 204   } while (0)
 205
 206 #define TWO_MORE_BYTES(c1, c2)                                  \
 207   do {                                                          \
 208     if (src + 1 >= src_end)                                     \
 209       {                                                         \
 210         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 211         goto label_end_of_loop;                                 \
 212       }                                                         \
 213     c1 = *src++;                                                \
 214     c2 = *src++;                                                \
 215   } while (0)
 216
 217
 218 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
 219    form if MULTIBYTEP is nonzero.  */
 220
 221 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)           \
 222   do {                                                          \
 223     if (src >= src_end)                                         \
 224       {                                                         \
 225         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 226         goto label_end_of_loop;                                 \
 227       }                                                         \
 228     c1 = *src++;                                                \
 229     if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)         \
 230       c1 = *src++ - 0x20;                                       \
 231   } while (0)
 232
 233 /* Set C to the next character at the source text pointed by `src'.
 234    If there are not enough characters in the source, jump to
 235    `label_end_of_loop'.  The caller should set variables `coding'
 236    `src', `src_end', and `translation_table' to appropriate pointers
 237    in advance.  This macro is used in encoding routines
 238    `encode_coding_XXX', thus it assumes that the source text is in
 239    multibyte form except for 8-bit characters.  8-bit characters are
 240    in multibyte form if coding->src_multibyte is nonzero, else they
 241    are represented by a single byte.  */
 242
 243 #define ONE_MORE_CHAR(c)                                        \
 244   do {                                                          \
 245     int len = src_end - src;                                    \
 246     int bytes;                                                  \
 247     if (len <= 0)                                               \
 248       {                                                         \
 249         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 250         goto label_end_of_loop;                                 \
 251       }                                                         \
 252     if (coding->src_multibyte                                   \
 253         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 254       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 255     else                                                        \
 256       c = *src, bytes = 1;                                      \
 257     if (!NILP (translation_table))                              \
 258       c = translate_char (translation_table, c, -1, 0, 0);      \
 259     src += bytes;                                               \
 260   } while (0)
 261
 262
 263 /* Produce a multibyte form of character C to `dst'.  Jump to
 264    `label_end_of_loop' if there's not enough space at `dst'.
 265
 266    If we are now in the middle of a composition sequence, the decoded
 267    character may be ALTCHAR (for the current composition).  In that
 268    case, the character goes to coding->cmp_data->data instead of
 269    `dst'.
 270
 271    This macro is used in decoding routines.  */
 272
 273 #define EMIT_CHAR(c)                                                    \
 274   do {                                                                  \
 275     if (! COMPOSING_P (coding)                                          \
 276         || coding->composing == COMPOSITION_RELATIVE                    \
 277         || coding->composing == COMPOSITION_WITH_RULE)                  \
 278       {                                                                 \
 279         int bytes = CHAR_BYTES (c);                                     \
 280         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 281           {                                                             \
 282             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 283             goto label_end_of_loop;                                     \
 284           }                                                             \
 285         dst += CHAR_STRING (c, dst);                                    \
 286         coding->produced_char++;                                        \
 287       }                                                                 \
 288                                                                         \
 289     if (COMPOSING_P (coding)                                            \
 290         && coding->composing != COMPOSITION_RELATIVE)                   \
 291       {                                                                 \
 292         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 293         coding->composition_rule_follows                                \
 294           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 295       }                                                                 \
 296   } while (0)
 297
 298
 299 #define EMIT_ONE_BYTE(c)                                        \
 300   do {                                                          \
 301     if (dst >= (dst_bytes ? dst_end : src))                     \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     *dst++ = c;                                                 \
 307   } while (0)
 308
 309 #define EMIT_TWO_BYTES(c1, c2)                                  \
 310   do {                                                          \
 311     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 312       {                                                         \
 313         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 314         goto label_end_of_loop;                                 \
 315       }                                                         \
 316     *dst++ = c1, *dst++ = c2;                                   \
 317   } while (0)
 318
 319 #define EMIT_BYTES(from, to)                                    \
 320   do {                                                          \
 321     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 322       {                                                         \
 323         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 324         goto label_end_of_loop;                                 \
 325       }                                                         \
 326     while (from < to)                                           \
 327       *dst++ = *from++;                                         \
 328   } while (0)
 329
 330 \f
 331 /*** 1. Preamble ***/
 332
 333 #ifdef emacs
 334 #include <config.h>
 335 #endif
 336
 337 #include <stdio.h>
 338
 339 #ifdef emacs
 340
 341 #include "lisp.h"
 342 #include "buffer.h"
 343 #include "charset.h"
 344 #include "composite.h"
 345 #include "ccl.h"
 346 #include "coding.h"
 347 #include "window.h"
 348
 349 #else  /* not emacs */
 350
 351 #include "mulelib.h"
 352
 353 #endif /* not emacs */
 354
 355 Lisp_Object Qcoding_system, Qeol_type;
 356 Lisp_Object Qbuffer_file_coding_system;
 357 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 358 Lisp_Object Qno_conversion, Qundecided;
 359 Lisp_Object Qcoding_system_history;
 360 Lisp_Object Qsafe_chars;
 361 Lisp_Object Qvalid_codes;
 362
 363 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 364 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 365 Lisp_Object Qstart_process, Qopen_network_stream;
 366 Lisp_Object Qtarget_idx;
 367
 368 Lisp_Object Vselect_safe_coding_system_function;
 369
 370 int coding_system_require_warning;
 371
 372 /* Mnemonic string for each format of end-of-line.  */
 373 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 374 /* Mnemonic string to indicate format of end-of-line is not yet
 375    decided.  */
 376 Lisp_Object eol_mnemonic_undecided;
 377
 378 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 379    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 380 int system_eol_type;
 381
 382 #ifdef emacs
 383
 384 /* Information about which coding system is safe for which chars.
 385    The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
 386
 387    GENERIC-LIST is a list of generic coding systems which can encode
 388    any characters.
 389
 390    NON-GENERIC-ALIST is an alist of non generic coding systems vs the
 391    corresponding char table that contains safe chars.  */
 392 Lisp_Object Vcoding_system_safe_chars;
 393
 394 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 395
 396 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 397
 398 /* Coding system emacs-mule and raw-text are for converting only
 399    end-of-line format.  */
 400 Lisp_Object Qemacs_mule, Qraw_text;
 401
 402 /* Coding-systems are handed between Emacs Lisp programs and C internal
 403    routines by the following three variables.  */
 404 /* Coding-system for reading files and receiving data from process.  */
 405 Lisp_Object Vcoding_system_for_read;
 406 /* Coding-system for writing files and sending data to process.  */
 407 Lisp_Object Vcoding_system_for_write;
 408 /* Coding-system actually used in the latest I/O.  */
 409 Lisp_Object Vlast_coding_system_used;
 410
 411 /* A vector of length 256 which contains information about special
 412    Latin codes (especially for dealing with Microsoft codes).  */
 413 Lisp_Object Vlatin_extra_code_table;
 414
 415 /* Flag to inhibit code conversion of end-of-line format.  */
 416 int inhibit_eol_conversion;
 417
 418 /* Flag to inhibit ISO2022 escape sequence detection.  */
 419 int inhibit_iso_escape_detection;
 420
 421 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 422 int inherit_process_coding_system;
 423
 424 /* Coding system to be used to encode text for terminal display.  */
 425 struct coding_system terminal_coding;
 426
 427 /* Coding system to be used to encode text for terminal display when
 428    terminal coding system is nil.  */
 429 struct coding_system safe_terminal_coding;
 430
 431 /* Coding system of what is sent from terminal keyboard.  */
 432 struct coding_system keyboard_coding;
 433
 434 /* Default coding system to be used to write a file.  */
 435 struct coding_system default_buffer_file_coding;
 436
 437 Lisp_Object Vfile_coding_system_alist;
 438 Lisp_Object Vprocess_coding_system_alist;
 439 Lisp_Object Vnetwork_coding_system_alist;
 440
 441 Lisp_Object Vlocale_coding_system;
 442
 443 #endif /* emacs */
 444
 445 Lisp_Object Qcoding_category, Qcoding_category_index;
 446
 447 /* List of symbols `coding-category-xxx' ordered by priority.  */
 448 Lisp_Object Vcoding_category_list;
 449
 450 /* Table of coding categories (Lisp symbols).  */
 451 Lisp_Object Vcoding_category_table;
 452
 453 /* Table of names of symbol for each coding-category.  */
 454 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 455   "coding-category-emacs-mule",
 456   "coding-category-sjis",
 457   "coding-category-iso-7",
 458   "coding-category-iso-7-tight",
 459   "coding-category-iso-8-1",
 460   "coding-category-iso-8-2",
 461   "coding-category-iso-7-else",
 462   "coding-category-iso-8-else",
 463   "coding-category-ccl",
 464   "coding-category-big5",
 465   "coding-category-utf-8",
 466   "coding-category-utf-16-be",
 467   "coding-category-utf-16-le",
 468   "coding-category-raw-text",
 469   "coding-category-binary"
 470 };
 471
 472 /* Table of pointers to coding systems corresponding to each coding
 473    categories.  */
 474 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 475
 476 /* Table of coding category masks.  Nth element is a mask for a coding
 477    category of which priority is Nth.  */
 478 static
 479 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 480
 481 /* Flag to tell if we look up translation table on character code
 482    conversion.  */
 483 Lisp_Object Venable_character_translation;
 484 /* Standard translation table to look up on decoding (reading).  */
 485 Lisp_Object Vstandard_translation_table_for_decode;
 486 /* Standard translation table to look up on encoding (writing).  */
 487 Lisp_Object Vstandard_translation_table_for_encode;
 488
 489 Lisp_Object Qtranslation_table;
 490 Lisp_Object Qtranslation_table_id;
 491 Lisp_Object Qtranslation_table_for_decode;
 492 Lisp_Object Qtranslation_table_for_encode;
 493
 494 /* Alist of charsets vs revision number.  */
 495 Lisp_Object Vcharset_revision_alist;
 496
 497 /* Default coding systems used for process I/O.  */
 498 Lisp_Object Vdefault_process_coding_system;
 499
 500 /* Char table for translating Quail and self-inserting input.  */
 501 Lisp_Object Vtranslation_table_for_input;
 502
 503 /* Global flag to tell that we can't call post-read-conversion and
 504    pre-write-conversion functions.  Usually the value is zero, but it
 505    is set to 1 temporarily while such functions are running.  This is
 506    to avoid infinite recursive call.  */
 507 static int inhibit_pre_post_conversion;
 508
 509 Lisp_Object Qchar_coding_system;
 510
 511 /* Return `safe-chars' property of CODING_SYSTEM (symbol).  Don't check
 512    its validity.  */
 513
 514 Lisp_Object
 515 coding_safe_chars (coding_system)
 516      Lisp_Object coding_system;
 517 {
 518   Lisp_Object coding_spec, plist, safe_chars;
 519
 520   coding_spec = Fget (coding_system, Qcoding_system);
 521   plist = XVECTOR (coding_spec)->contents[3];
 522   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 523   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 524 }
 525
 526 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 527   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 528
 529 \f
 530 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 531
 532 /* Emacs' internal format for representation of multiple character
 533    sets is a kind of multi-byte encoding, i.e. characters are
 534    represented by variable-length sequences of one-byte codes.
 535
 536    ASCII characters and control characters (e.g. `tab', `newline') are
 537    represented by one-byte sequences which are their ASCII codes, in
 538    the range 0x00 through 0x7F.
 539
 540    8-bit characters of the range 0x80..0x9F are represented by
 541    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 542    code + 0x20).
 543
 544    8-bit characters of the range 0xA0..0xFF are represented by
 545    one-byte sequences which are their 8-bit code.
 546
 547    The other characters are represented by a sequence of `base
 548    leading-code', optional `extended leading-code', and one or two
 549    `position-code's.  The length of the sequence is determined by the
 550    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 551    whereas extended leading-code and position-code take the range 0xA0
 552    through 0xFF.  See `charset.h' for more details about leading-code
 553    and position-code.
 554
 555    --- CODE RANGE of Emacs' internal format ---
 556    character set        range
 557    -------------        -----
 558    ascii                0x00..0x7F
 559    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 560    eight-bit-graphic    0xA0..0xBF
 561    ELSE                 0x81..0x9D + [0xA0..0xFF]+
 562    ---------------------------------------------
 563
 564    As this is the internal character representation, the format is
 565    usually not used externally (i.e. in a file or in a data sent to a
 566    process).  But, it is possible to have a text externally in this
 567    format (i.e. by encoding by the coding system `emacs-mule').
 568
 569    In that case, a sequence of one-byte codes has a slightly different
 570    form.
 571
 572    Firstly, all characters in eight-bit-control are represented by
 573    one-byte sequences which are their 8-bit code.
 574
 575    Next, character composition data are represented by the byte
 576    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 577    where,
 578         METHOD is 0xF0 plus one of composition method (enum
 579         composition_method),
 580
 581         BYTES is 0xA0 plus the byte length of these composition data,
 582
 583         CHARS is 0xA0 plus the number of characters composed by these
 584         data,
 585
 586         COMPONENTs are characters of multibyte form or composition
 587         rules encoded by two-byte of ASCII codes.
 588
 589    In addition, for backward compatibility, the following formats are
 590    also recognized as composition data on decoding.
 591
 592    0x80 MSEQ ...
 593    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 594
 595    Here,
 596         MSEQ is a multibyte form but in these special format:
 597           ASCII: 0xA0 ASCII_CODE+0x80,
 598           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 599         RULE is a one byte code of the range 0xA0..0xF0 that
 600         represents a composition rule.
 601   */
 602
 603 enum emacs_code_class_type emacs_code_class[256];
 604
 605 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 606    Check if a text is encoded in Emacs' internal format.  If it is,
 607    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 608
 609 static int
 610 detect_coding_emacs_mule (src, src_end, multibytep)
 611       unsigned char *src, *src_end;
 612       int multibytep;
 613 {
 614   unsigned char c;
 615   int composing = 0;
 616   /* Dummy for ONE_MORE_BYTE.  */
 617   struct coding_system dummy_coding;
 618   struct coding_system *coding = &dummy_coding;
 619
 620   while (1)
 621     {
 622       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 623
 624       if (composing)
 625         {
 626           if (c < 0xA0)
 627             composing = 0;
 628           else if (c == 0xA0)
 629             {
 630               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 631               c &= 0x7F;
 632             }
 633           else
 634             c -= 0x20;
 635         }
 636
 637       if (c < 0x20)
 638         {
 639           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 640             return 0;
 641         }
 642       else if (c >= 0x80 && c < 0xA0)
 643         {
 644           if (c == 0x80)
 645             /* Old leading code for a composite character.  */
 646             composing = 1;
 647           else
 648             {
 649               unsigned char *src_base = src - 1;
 650               int bytes;
 651
 652               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 653                                                bytes))
 654                 return 0;
 655               src = src_base + bytes;
 656             }
 657         }
 658     }
 659  label_end_of_loop:
 660   return CODING_CATEGORY_MASK_EMACS_MULE;
 661 }
 662
 663
 664 /* Record the starting position START and METHOD of one composition.  */
 665
 666 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
 667   do {                                                          \
 668     struct composition_data *cmp_data = coding->cmp_data;       \
 669     int *data = cmp_data->data + cmp_data->used;                \
 670     coding->cmp_data_start = cmp_data->used;                    \
 671     data[0] = -1;                                               \
 672     data[1] = cmp_data->char_offset + start;                    \
 673     data[3] = (int) method;                                     \
 674     cmp_data->used += 4;                                        \
 675   } while (0)
 676
 677 /* Record the ending position END of the current composition.  */
 678
 679 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
 680   do {                                                          \
 681     struct composition_data *cmp_data = coding->cmp_data;       \
 682     int *data = cmp_data->data + coding->cmp_data_start;        \
 683     data[0] = cmp_data->used - coding->cmp_data_start;          \
 684     data[2] = cmp_data->char_offset + end;                      \
 685   } while (0)
 686
 687 /* Record one COMPONENT (alternate character or composition rule).  */
 688
 689 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)             \
 690   do {                                                                  \
 691     coding->cmp_data->data[coding->cmp_data->used++] = component;       \
 692     if (coding->cmp_data->used - coding->cmp_data_start                 \
 693         == COMPOSITION_DATA_MAX_BUNCH_LENGTH)                           \
 694       {                                                                 \
 695         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
 696         coding->composing = COMPOSITION_NO;                             \
 697       }                                                                 \
 698   } while (0)
 699
 700
 701 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
 702    is not less than SRC_END, return -1 without incrementing Src.  */
 703
 704 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
 705
 706
 707 /* Decode a character represented as a component of composition
 708    sequence of Emacs 20 style at SRC.  Set C to that character, store
 709    its multibyte form sequence at P, and set P to the end of that
 710    sequence.  If no valid character is found, set C to -1.  */
 711
 712 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)                \
 713   do {                                                          \
 714     int bytes;                                                  \
 715                                                                 \
 716     c = SAFE_ONE_MORE_BYTE ();                                  \
 717     if (c < 0)                                                  \
 718       break;                                                    \
 719     if (CHAR_HEAD_P (c))                                        \
 720       c = -1;                                                   \
 721     else if (c == 0xA0)                                         \
 722       {                                                         \
 723         c = SAFE_ONE_MORE_BYTE ();                              \
 724         if (c < 0xA0)                                           \
 725           c = -1;                                               \
 726         else                                                    \
 727           {                                                     \
 728             c -= 0xA0;                                          \
 729             *p++ = c;                                           \
 730           }                                                     \
 731       }                                                         \
 732     else if (BASE_LEADING_CODE_P (c - 0x20))                    \
 733       {                                                         \
 734         unsigned char *p0 = p;                                  \
 735                                                                 \
 736         c -= 0x20;                                              \
 737         *p++ = c;                                               \
 738         bytes = BYTES_BY_CHAR_HEAD (c);                         \
 739         while (--bytes)                                         \
 740           {                                                     \
 741             c = SAFE_ONE_MORE_BYTE ();                          \
 742             if (c < 0)                                          \
 743               break;                                            \
 744             *p++ = c;                                           \
 745           }                                                     \
 746         if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)      \
 747             || (coding->flags /* We are recovering a file.  */  \
 748                 && p0[0] == LEADING_CODE_8_BIT_CONTROL          \
 749                 && ! CHAR_HEAD_P (p0[1])))                      \
 750           c = STRING_CHAR (p0, bytes);                          \
 751         else                                                    \
 752           c = -1;                                               \
 753       }                                                         \
 754     else                                                        \
 755       c = -1;                                                   \
 756   } while (0)
 757
 758
 759 /* Decode a composition rule represented as a component of composition
 760    sequence of Emacs 20 style at SRC.  Set C to the rule.  If not
 761    valid rule is found, set C to -1.  */
 762
 763 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c)           \
 764   do {                                                  \
 765     c = SAFE_ONE_MORE_BYTE ();                          \
 766     c -= 0xA0;                                          \
 767     if (c < 0 || c >= 81)                               \
 768       c = -1;                                           \
 769     else                                                \
 770       {                                                 \
 771         gref = c / 9, nref = c % 9;                     \
 772         c = COMPOSITION_ENCODE_RULE (gref, nref);       \
 773       }                                                 \
 774   } while (0)
 775
 776
 777 /* Decode composition sequence encoded by `emacs-mule' at the source
 778    pointed by SRC.  SRC_END is the end of source.  Store information
 779    of the composition in CODING->cmp_data.
 780
 781    For backward compatibility, decode also a composition sequence of
 782    Emacs 20 style.  In that case, the composition sequence contains
 783    characters that should be extracted into a buffer or string.  Store
 784    those characters at *DESTINATION in multibyte form.
 785
 786    If we encounter an invalid byte sequence, return 0.
 787    If we encounter an insufficient source or destination, or
 788    insufficient space in CODING->cmp_data, return 1.
 789    Otherwise, return consumed bytes in the source.
 790
 791 */
 792 static INLINE int
 793 decode_composition_emacs_mule (coding, src, src_end,
 794                                destination, dst_end, dst_bytes)
 795      struct coding_system *coding;
 796      unsigned char *src, *src_end, **destination, *dst_end;
 797      int dst_bytes;
 798 {
 799   unsigned char *dst = *destination;
 800   int method, data_len, nchars;
 801   unsigned char *src_base = src++;
 802   /* Store components of composition.  */
 803   int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 804   int ncomponent;
 805   /* Store multibyte form of characters to be composed.  This is for
 806      Emacs 20 style composition sequence.  */
 807   unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 808   unsigned char *bufp = buf;
 809   int c, i, gref, nref;
 810
 811   if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 812       >= COMPOSITION_DATA_SIZE)
 813     {
 814       coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 815       return -1;
 816     }
 817
 818   ONE_MORE_BYTE (c);
 819   if (c - 0xF0 >= COMPOSITION_RELATIVE
 820            && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 821     {
 822       int with_rule;
 823
 824       method = c - 0xF0;
 825       with_rule = (method == COMPOSITION_WITH_RULE
 826                    || method == COMPOSITION_WITH_RULE_ALTCHARS);
 827       ONE_MORE_BYTE (c);
 828       data_len = c - 0xA0;
 829       if (data_len < 4
 830           || src_base + data_len > src_end)
 831         return 0;
 832       ONE_MORE_BYTE (c);
 833       nchars = c - 0xA0;
 834       if (c < 1)
 835         return 0;
 836       for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 837         {
 838           /* If it is longer than this, it can't be valid.  */
 839           if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
 840             return 0;
 841
 842           if (ncomponent % 2 && with_rule)
 843             {
 844               ONE_MORE_BYTE (gref);
 845               gref -= 32;
 846               ONE_MORE_BYTE (nref);
 847               nref -= 32;
 848               c = COMPOSITION_ENCODE_RULE (gref, nref);
 849             }
 850           else
 851             {
 852               int bytes;
 853               if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
 854                   || (coding->flags /* We are recovering a file.  */
 855                       && src[0] == LEADING_CODE_8_BIT_CONTROL
 856                       && ! CHAR_HEAD_P (src[1])))
 857                 c = STRING_CHAR (src, bytes);
 858               else
 859                 c = *src, bytes = 1;
 860               src += bytes;
 861             }
 862           component[ncomponent] = c;
 863         }
 864     }
 865   else
 866     {
 867       /* This may be an old Emacs 20 style format.  See the comment at
 868          the section 2 of this file.  */
 869       while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 870       if (src == src_end
 871           && !(coding->mode & CODING_MODE_LAST_BLOCK))
 872         goto label_end_of_loop;
 873
 874       src_end = src;
 875       src = src_base + 1;
 876       if (c < 0xC0)
 877         {
 878           method = COMPOSITION_RELATIVE;
 879           for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 880             {
 881               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 882               if (c < 0)
 883                 break;
 884               component[ncomponent++] = c;
 885             }
 886           if (ncomponent < 2)
 887             return 0;
 888           nchars = ncomponent;
 889         }
 890       else if (c == 0xFF)
 891         {
 892           method = COMPOSITION_WITH_RULE;
 893           src++;
 894           DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 895           if (c < 0)
 896             return 0;
 897           component[0] = c;
 898           for (ncomponent = 1;
 899                ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 900             {
 901               DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 902               if (c < 0)
 903                 break;
 904               component[ncomponent++] = c;
 905               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 906               if (c < 0)
 907                 break;
 908               component[ncomponent++] = c;
 909             }
 910           if (ncomponent < 3)
 911             return 0;
 912           nchars = (ncomponent + 1) / 2;
 913         }
 914       else
 915         return 0;
 916     }
 917
 918   if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 919     {
 920       CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 921       for (i = 0; i < ncomponent; i++)
 922         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 923       CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 924       if (buf < bufp)
 925         {
 926           unsigned char *p = buf;
 927           EMIT_BYTES (p, bufp);
 928           *destination += bufp - buf;
 929           coding->produced_char += nchars;
 930         }
 931       return (src - src_base);
 932     }
 933  label_end_of_loop:
 934   return -1;
 935 }
 936
 937 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 938
 939 static void
 940 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 941      struct coding_system *coding;
 942      unsigned char *source, *destination;
 943      int src_bytes, dst_bytes;
 944 {
 945   unsigned char *src = source;
 946   unsigned char *src_end = source + src_bytes;
 947   unsigned char *dst = destination;
 948   unsigned char *dst_end = destination + dst_bytes;
 949   /* SRC_BASE remembers the start position in source in each loop.
 950      The loop will be exited when there's not enough source code, or
 951      when there's not enough destination area to produce a
 952      character.  */
 953   unsigned char *src_base;
 954
 955   coding->produced_char = 0;
 956   while ((src_base = src) < src_end)
 957     {
 958       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 959       int bytes;
 960
 961       if (*src == '\r')
 962         {
 963           int c = *src++;
 964
 965           if (coding->eol_type == CODING_EOL_CR)
 966             c = '\n';
 967           else if (coding->eol_type == CODING_EOL_CRLF)
 968             {
 969               ONE_MORE_BYTE (c);
 970               if (c != '\n')
 971                 {
 972                   src--;
 973                   c = '\r';
 974                 }
 975             }
 976           *dst++ = c;
 977           coding->produced_char++;
 978           continue;
 979         }
 980       else if (*src == '\n')
 981         {
 982           if ((coding->eol_type == CODING_EOL_CR
 983                || coding->eol_type == CODING_EOL_CRLF)
 984               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 985             {
 986               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 987               goto label_end_of_loop;
 988             }
 989           *dst++ = *src++;
 990           coding->produced_char++;
 991           continue;
 992         }
 993       else if (*src == 0x80 && coding->cmp_data)
 994         {
 995           /* Start of composition data.  */
 996           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
 997                                                          &dst, dst_end,
 998                                                          dst_bytes);
 999           if (consumed < 0)
1000             goto label_end_of_loop;
1001           else if (consumed > 0)
1002             {
1003               src += consumed;
1004               continue;
1005             }
1006           bytes = CHAR_STRING (*src, tmp);
1007           p = tmp;
1008           src++;
1009         }
1010       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
1011                || (coding->flags /* We are recovering a file.  */
1012                    && src[0] == LEADING_CODE_8_BIT_CONTROL
1013                    && ! CHAR_HEAD_P (src[1])))
1014         {
1015           p = src;
1016           src += bytes;
1017         }
1018       else
1019         {
1020           bytes = CHAR_STRING (*src, tmp);
1021           p = tmp;
1022           src++;
1023         }
1024       if (dst + bytes >= (dst_bytes ? dst_end : src))
1025         {
1026           coding->result = CODING_FINISH_INSUFFICIENT_DST;
1027           break;
1028         }
1029       while (bytes--) *dst++ = *p++;
1030       coding->produced_char++;
1031     }
1032  label_end_of_loop:
1033   coding->consumed = coding->consumed_char = src_base - source;
1034   coding->produced = dst - destination;
1035 }
1036
1037
1038 /* Encode composition data stored at DATA into a special byte sequence
1039    starting by 0x80.  Update CODING->cmp_data_start and maybe
1040    CODING->cmp_data for the next call.  */
1041
1042 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                     \
1043   do {                                                                  \
1044     unsigned char buf[1024], *p0 = buf, *p;                             \
1045     int len = data[0];                                                  \
1046     int i;                                                              \
1047                                                                         \
1048     buf[0] = 0x80;                                                      \
1049     buf[1] = 0xF0 + data[3];    /* METHOD */                            \
1050     buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */           \
1051     p = buf + 4;                                                        \
1052     if (data[3] == COMPOSITION_WITH_RULE                                \
1053         || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
1054       {                                                                 \
1055         p += CHAR_STRING (data[4], p);                                  \
1056         for (i = 5; i < len; i += 2)                                    \
1057           {                                                             \
1058             int gref, nref;                                             \
1059              COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
1060             *p++ = 0x20 + gref;                                         \
1061             *p++ = 0x20 + nref;                                         \
1062             p += CHAR_STRING (data[i + 1], p);                          \
1063           }                                                             \
1064       }                                                                 \
1065     else                                                                \
1066       {                                                                 \
1067         for (i = 4; i < len; i++)                                       \
1068           p += CHAR_STRING (data[i], p);                                \
1069       }                                                                 \
1070     buf[2] = 0xA0 + (p - buf);  /* COMPONENTS-BYTES */                  \
1071                                                                         \
1072     if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))              \
1073       {                                                                 \
1074         coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
1075         goto label_end_of_loop;                                         \
1076       }                                                                 \
1077     while (p0 < p)                                                      \
1078       *dst++ = *p0++;                                                   \
1079     coding->cmp_data_start += data[0];                                  \
1080     if (coding->cmp_data_start == coding->cmp_data->used                \
1081         && coding->cmp_data->next)                                      \
1082       {                                                                 \
1083         coding->cmp_data = coding->cmp_data->next;                      \
1084         coding->cmp_data_start = 0;                                     \
1085       }                                                                 \
1086   } while (0)
1087
1088
1089 static void encode_eol P_ ((struct coding_system *, const unsigned char *,
1090                             unsigned char *, int, int));
1091
1092 static void
1093 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1094      struct coding_system *coding;
1095      unsigned char *source, *destination;
1096      int src_bytes, dst_bytes;
1097 {
1098   unsigned char *src = source;
1099   unsigned char *src_end = source + src_bytes;
1100   unsigned char *dst = destination;
1101   unsigned char *dst_end = destination + dst_bytes;
1102   unsigned char *src_base;
1103   int c;
1104   int char_offset;
1105   int *data;
1106
1107   Lisp_Object translation_table;
1108
1109   translation_table = Qnil;
1110
1111   /* Optimization for the case that there's no composition.  */
1112   if (!coding->cmp_data || coding->cmp_data->used == 0)
1113     {
1114       encode_eol (coding, source, destination, src_bytes, dst_bytes);
1115       return;
1116     }
1117
1118   char_offset = coding->cmp_data->char_offset;
1119   data = coding->cmp_data->data + coding->cmp_data_start;
1120   while (1)
1121     {
1122       src_base = src;
1123
1124       /* If SRC starts a composition, encode the information about the
1125          composition in advance.  */
1126       if (coding->cmp_data_start < coding->cmp_data->used
1127           && char_offset + coding->consumed_char == data[1])
1128         {
1129           ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1130           char_offset = coding->cmp_data->char_offset;
1131           data = coding->cmp_data->data + coding->cmp_data_start;
1132         }
1133
1134       ONE_MORE_CHAR (c);
1135       if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1136                         || coding->eol_type == CODING_EOL_CR))
1137         {
1138           if (coding->eol_type == CODING_EOL_CRLF)
1139             EMIT_TWO_BYTES ('\r', c);
1140           else
1141             EMIT_ONE_BYTE ('\r');
1142         }
1143       else if (SINGLE_BYTE_CHAR_P (c))
1144         {
1145           if (coding->flags && ! ASCII_BYTE_P (c))
1146             {
1147               /* As we are auto saving, retain the multibyte form for
1148                  8-bit chars.  */
1149               unsigned char buf[MAX_MULTIBYTE_LENGTH];
1150               int bytes = CHAR_STRING (c, buf);
1151
1152               if (bytes == 1)
1153                 EMIT_ONE_BYTE (buf[0]);
1154               else
1155                 EMIT_TWO_BYTES (buf[0], buf[1]);
1156             }
1157           else
1158             EMIT_ONE_BYTE (c);
1159         }
1160       else
1161         EMIT_BYTES (src_base, src);
1162       coding->consumed_char++;
1163     }
1164  label_end_of_loop:
1165   coding->consumed = src_base - source;
1166   coding->produced = coding->produced_char = dst - destination;
1167   return;
1168 }
1169
1170 \f
1171 /*** 3. ISO2022 handlers ***/
1172
1173 /* The following note describes the coding system ISO2022 briefly.
1174    Since the intention of this note is to help understand the
1175    functions in this file, some parts are NOT ACCURATE or are OVERLY
1176    SIMPLIFIED.  For thorough understanding, please refer to the
1177    original document of ISO2022.  This is equivalent to the standard
1178    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
1179
1180    ISO2022 provides many mechanisms to encode several character sets
1181    in 7-bit and 8-bit environments.  For 7-bit environments, all text
1182    is encoded using bytes less than 128.  This may make the encoded
1183    text a little bit longer, but the text passes more easily through
1184    several types of gateway, some of which strip off the MSB (Most
1185    Significant Bit).
1186
1187    There are two kinds of character sets: control character sets and
1188    graphic character sets.  The former contain control characters such
1189    as `newline' and `escape' to provide control functions (control
1190    functions are also provided by escape sequences).  The latter
1191    contain graphic characters such as 'A' and '-'.  Emacs recognizes
1192    two control character sets and many graphic character sets.
1193
1194    Graphic character sets are classified into one of the following
1195    four classes, according to the number of bytes (DIMENSION) and
1196    number of characters in one dimension (CHARS) of the set:
1197    - DIMENSION1_CHARS94
1198    - DIMENSION1_CHARS96
1199    - DIMENSION2_CHARS94
1200    - DIMENSION2_CHARS96
1201
1202    In addition, each character set is assigned an identification tag,
1203    unique for each set, called the "final character" (denoted as <F>
1204    hereafter).  The <F> of each character set is decided by ECMA(*)
1205    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
1206    (0x30..0x3F are for private use only).
1207
1208    Note (*): ECMA = European Computer Manufacturers Association
1209
1210    Here are examples of graphic character sets [NAME(<F>)]:
1211         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1212         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1213         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1214         o DIMENSION2_CHARS96 -- none for the moment
1215
1216    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
1217         C0 [0x00..0x1F] -- control character plane 0
1218         GL [0x20..0x7F] -- graphic character plane 0
1219         C1 [0x80..0x9F] -- control character plane 1
1220         GR [0xA0..0xFF] -- graphic character plane 1
1221
1222    A control character set is directly designated and invoked to C0 or
1223    C1 by an escape sequence.  The most common case is that:
1224    - ISO646's  control character set is designated/invoked to C0, and
1225    - ISO6429's control character set is designated/invoked to C1,
1226    and usually these designations/invocations are omitted in encoded
1227    text.  In a 7-bit environment, only C0 can be used, and a control
1228    character for C1 is encoded by an appropriate escape sequence to
1229    fit into the environment.  All control characters for C1 are
1230    defined to have corresponding escape sequences.
1231
1232    A graphic character set is at first designated to one of four
1233    graphic registers (G0 through G3), then these graphic registers are
1234    invoked to GL or GR.  These designations and invocations can be
1235    done independently.  The most common case is that G0 is invoked to
1236    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
1237    these invocations and designations are omitted in encoded text.
1238    In a 7-bit environment, only GL can be used.
1239
1240    When a graphic character set of CHARS94 is invoked to GL, codes
1241    0x20 and 0x7F of the GL area work as control characters SPACE and
1242    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1243    be used.
1244
1245    There are two ways of invocation: locking-shift and single-shift.
1246    With locking-shift, the invocation lasts until the next different
1247    invocation, whereas with single-shift, the invocation affects the
1248    following character only and doesn't affect the locking-shift
1249    state.  Invocations are done by the following control characters or
1250    escape sequences:
1251
1252    ----------------------------------------------------------------------
1253    abbrev  function                  cntrl escape seq   description
1254    ----------------------------------------------------------------------
1255    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
1256    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
1257    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
1258    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
1259    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
1260    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
1261    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
1262    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
1263    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
1264    ----------------------------------------------------------------------
1265    (*) These are not used by any known coding system.
1266
1267    Control characters for these functions are defined by macros
1268    ISO_CODE_XXX in `coding.h'.
1269
1270    Designations are done by the following escape sequences:
1271    ----------------------------------------------------------------------
1272    escape sequence      description
1273    ----------------------------------------------------------------------
1274    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
1275    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
1276    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
1277    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
1278    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
1279    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
1280    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
1281    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
1282    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
1283    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
1284    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
1285    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
1286    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
1287    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
1288    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
1289    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
1290    ----------------------------------------------------------------------
1291
1292    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
1293    of dimension 1, chars 94, and final character <F>, etc...
1294
1295    Note (*): Although these designations are not allowed in ISO2022,
1296    Emacs accepts them on decoding, and produces them on encoding
1297    CHARS96 character sets in a coding system which is characterized as
1298    7-bit environment, non-locking-shift, and non-single-shift.
1299
1300    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
1301    '(' can be omitted.  We refer to this as "short-form" hereafter.
1302
1303    Now you may notice that there are a lot of ways of encoding the
1304    same multilingual text in ISO2022.  Actually, there exist many
1305    coding systems such as Compound Text (used in X11's inter client
1306    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
1307    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
1308    localized platforms), and all of these are variants of ISO2022.
1309
1310    In addition to the above, Emacs handles two more kinds of escape
1311    sequences: ISO6429's direction specification and Emacs' private
1312    sequence for specifying character composition.
1313
1314    ISO6429's direction specification takes the following form:
1315         o CSI ']'      -- end of the current direction
1316         o CSI '0' ']'  -- end of the current direction
1317         o CSI '1' ']'  -- start of left-to-right text
1318         o CSI '2' ']'  -- start of right-to-left text
1319    The control character CSI (0x9B: control sequence introducer) is
1320    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1321
1322    Character composition specification takes the following form:
1323         o ESC '0' -- start relative composition
1324         o ESC '1' -- end composition
1325         o ESC '2' -- start rule-base composition (*)
1326         o ESC '3' -- start relative composition with alternate chars  (**)
1327         o ESC '4' -- start rule-base composition with alternate chars  (**)
1328   Since these are not standard escape sequences of any ISO standard,
1329   the use of them with these meanings is restricted to Emacs only.
1330
1331   (*) This form is used only in Emacs 20.5 and older versions,
1332   but the newer versions can safely decode it.
1333   (**) This form is used only in Emacs 21.1 and newer versions,
1334   and the older versions can't decode it.
1335
1336   Here's a list of example usages of these composition escape
1337   sequences (categorized by `enum composition_method').
1338
1339   COMPOSITION_RELATIVE:
1340         ESC 0 CHAR [ CHAR ] ESC 1
1341   COMPOSITION_WITH_RULE:
1342         ESC 2 CHAR [ RULE CHAR ] ESC 1
1343   COMPOSITION_WITH_ALTCHARS:
1344         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
1345   COMPOSITION_WITH_RULE_ALTCHARS:
1346         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
1347
1348 enum iso_code_class_type iso_code_class[256];
1349
1350 #define CHARSET_OK(idx, charset, c)                                     \
1351   (coding_system_table[idx]                                             \
1352    && (charset == CHARSET_ASCII                                         \
1353        || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
1354            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
1355    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1356                                               charset)                  \
1357        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1358
1359 #define SHIFT_OUT_OK(idx) \
1360   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1361
1362 #define COMPOSITION_OK(idx)     \
1363   (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
1364
1365 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1366    Check if a text is encoded in ISO2022.  If it is, return an
1367    integer in which appropriate flag bits any of:
1368         CODING_CATEGORY_MASK_ISO_7
1369         CODING_CATEGORY_MASK_ISO_7_TIGHT
1370         CODING_CATEGORY_MASK_ISO_8_1
1371         CODING_CATEGORY_MASK_ISO_8_2
1372         CODING_CATEGORY_MASK_ISO_7_ELSE
1373         CODING_CATEGORY_MASK_ISO_8_ELSE
1374    are set.  If a code which should never appear in ISO2022 is found,
1375    returns 0.  */
1376
1377 static int
1378 detect_coding_iso2022 (src, src_end, multibytep)
1379      unsigned char *src, *src_end;
1380      int multibytep;
1381 {
1382   int mask = CODING_CATEGORY_MASK_ISO;
1383   int mask_found = 0;
1384   int reg[4], shift_out = 0, single_shifting = 0;
1385   int c, c1, charset;
1386   /* Dummy for ONE_MORE_BYTE.  */
1387   struct coding_system dummy_coding;
1388   struct coding_system *coding = &dummy_coding;
1389   Lisp_Object safe_chars;
1390
1391   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1392   while (mask && src < src_end)
1393     {
1394       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1395     retry:
1396       switch (c)
1397         {
1398         case ISO_CODE_ESC:
1399           if (inhibit_iso_escape_detection)
1400             break;
1401           single_shifting = 0;
1402           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1403           if (c >= '(' && c <= '/')
1404             {
1405               /* Designation sequence for a charset of dimension 1.  */
1406               ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1407               if (c1 < ' ' || c1 >= 0x80
1408                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1409                 /* Invalid designation sequence.  Just ignore.  */
1410                 break;
1411               reg[(c - '(') % 4] = charset;
1412             }
1413           else if (c == '$')
1414             {
1415               /* Designation sequence for a charset of dimension 2.  */
1416               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1417               if (c >= '@' && c <= 'B')
1418                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
1419                 reg[0] = charset = iso_charset_table[1][0][c];
1420               else if (c >= '(' && c <= '/')
1421                 {
1422                   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1423                   if (c1 < ' ' || c1 >= 0x80
1424                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1425                     /* Invalid designation sequence.  Just ignore.  */
1426                     break;
1427                   reg[(c - '(') % 4] = charset;
1428                 }
1429               else
1430                 /* Invalid designation sequence.  Just ignore.  */
1431                 break;
1432             }
1433           else if (c == 'N' || c == 'O')
1434             {
1435               /* ESC <Fe> for SS2 or SS3.  */
1436               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
1437               break;
1438             }
1439           else if (c >= '0' && c <= '4')
1440             {
1441               /* ESC <Fp> for start/end composition.  */
1442               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
1443                 mask_found |= CODING_CATEGORY_MASK_ISO_7;
1444               else
1445                 mask &= ~CODING_CATEGORY_MASK_ISO_7;
1446               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
1447                 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1448               else
1449                 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1450               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
1451                 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1452               else
1453                 mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
1454               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
1455                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1456               else
1457                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1458               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
1459                 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1460               else
1461                 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1462               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
1463                 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1464               else
1465                 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1466               break;
1467             }
1468           else
1469             /* Invalid escape sequence.  Just ignore.  */
1470             break;
1471
1472           /* We found a valid designation sequence for CHARSET.  */
1473           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
1474           c = MAKE_CHAR (charset, 0, 0);
1475           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
1476             mask_found |= CODING_CATEGORY_MASK_ISO_7;
1477           else
1478             mask &= ~CODING_CATEGORY_MASK_ISO_7;
1479           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
1480             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1481           else
1482             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1483           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
1484             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1485           else
1486             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1487           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
1488             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1489           else
1490             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1491           break;
1492
1493         case ISO_CODE_SO:
1494           if (inhibit_iso_escape_detection)
1495             break;
1496           single_shifting = 0;
1497           if (shift_out == 0
1498               && (reg[1] >= 0
1499                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1500                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1501             {
1502               /* Locking shift out.  */
1503               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1504               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1505             }
1506           break;
1507
1508         case ISO_CODE_SI:
1509           if (inhibit_iso_escape_detection)
1510             break;
1511           single_shifting = 0;
1512           if (shift_out == 1)
1513             {
1514               /* Locking shift in.  */
1515               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1516               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1517             }
1518           break;
1519
1520         case ISO_CODE_CSI:
1521           single_shifting = 0;
1522         case ISO_CODE_SS2:
1523         case ISO_CODE_SS3:
1524           {
1525             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1526
1527             if (inhibit_iso_escape_detection)
1528               break;
1529             if (c != ISO_CODE_CSI)
1530               {
1531                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1532                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1533                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1534                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1535                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1536                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1537                 single_shifting = 1;
1538               }
1539             if (VECTORP (Vlatin_extra_code_table)
1540                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1541               {
1542                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1543                     & CODING_FLAG_ISO_LATIN_EXTRA)
1544                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1545                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1546                     & CODING_FLAG_ISO_LATIN_EXTRA)
1547                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1548               }
1549             mask &= newmask;
1550             mask_found |= newmask;
1551           }
1552           break;
1553
1554         default:
1555           if (c < 0x80)
1556             {
1557               single_shifting = 0;
1558               break;
1559             }
1560           else if (c < 0xA0)
1561             {
1562               single_shifting = 0;
1563               if (VECTORP (Vlatin_extra_code_table)
1564                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1565                 {
1566                   int newmask = 0;
1567
1568                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1569                       & CODING_FLAG_ISO_LATIN_EXTRA)
1570                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1571                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1572                       & CODING_FLAG_ISO_LATIN_EXTRA)
1573                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1574                   mask &= newmask;
1575                   mask_found |= newmask;
1576                 }
1577               else
1578                 return 0;
1579             }
1580           else
1581             {
1582               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1583                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1584               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1585               /* Check the length of succeeding codes of the range
1586                  0xA0..0FF.  If the byte length is odd, we exclude
1587                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1588                  when we are not single shifting.  */
1589               if (!single_shifting
1590                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1591                 {
1592                   int i = 1;
1593
1594                   c = -1;
1595                   while (src < src_end)
1596                     {
1597                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1598                       if (c < 0xA0)
1599                         break;
1600                       i++;
1601                     }
1602
1603                   if (i & 1 && src < src_end)
1604                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1605                   else
1606                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1607                   if (c >= 0)
1608                     /* This means that we have read one extra byte.  */
1609                     goto retry;
1610                 }
1611             }
1612           break;
1613         }
1614     }
1615  label_end_of_loop:
1616   return (mask & mask_found);
1617 }
1618
1619 /* Decode a character of which charset is CHARSET, the 1st position
1620    code is C1, the 2nd position code is C2, and return the decoded
1621    character code.  If the variable `translation_table' is non-nil,
1622    returned the translated code.  */
1623
1624 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1625   (NILP (translation_table)                     \
1626    ? MAKE_CHAR (charset, c1, c2)                \
1627    : translate_char (translation_table, -1, charset, c1, c2))
1628
1629 /* Set designation state into CODING.  */
1630 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1631   do {                                                                     \
1632     int charset, c;                                                        \
1633                                                                            \
1634     if (final_char < '0' || final_char >= 128)                             \
1635       goto label_invalid_code;                                             \
1636     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1637                                  make_number (chars),                      \
1638                                  make_number (final_char));                \
1639     c = MAKE_CHAR (charset, 0, 0);                                         \
1640     if (charset >= 0                                                       \
1641         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1642             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1643       {                                                                    \
1644         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1645             && reg == 0                                                    \
1646             && charset == CHARSET_ASCII)                                   \
1647           {                                                                \
1648             /* We should insert this designation sequence as is so         \
1649                that it is surely written back to a file.  */               \
1650             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1651             goto label_invalid_code;                                       \
1652           }                                                                \
1653         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1654         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1655             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1656           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1657         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1658       }                                                                    \
1659     else                                                                   \
1660       {                                                                    \
1661         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1662         goto label_invalid_code;                                           \
1663       }                                                                    \
1664   } while (0)
1665
1666 /* Allocate a memory block for storing information about compositions.
1667    The block is chained to the already allocated blocks.  */
1668
1669 void
1670 coding_allocate_composition_data (coding, char_offset)
1671      struct coding_system *coding;
1672      int char_offset;
1673 {
1674   struct composition_data *cmp_data
1675     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1676
1677   cmp_data->char_offset = char_offset;
1678   cmp_data->used = 0;
1679   cmp_data->prev = coding->cmp_data;
1680   cmp_data->next = NULL;
1681   if (coding->cmp_data)
1682     coding->cmp_data->next = cmp_data;
1683   coding->cmp_data = cmp_data;
1684   coding->cmp_data_start = 0;
1685 }
1686
1687 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1688    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1689    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1690    ESC 3 : altchar composition :  ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1691    ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1692   */
1693
1694 #define DECODE_COMPOSITION_START(c1)                                       \
1695   do {                                                                     \
1696     if (coding->composing == COMPOSITION_DISABLED)                         \
1697       {                                                                    \
1698         *dst++ = ISO_CODE_ESC;                                             \
1699         *dst++ = c1 & 0x7f;                                                \
1700         coding->produced_char += 2;                                        \
1701       }                                                                    \
1702     else if (!COMPOSING_P (coding))                                        \
1703       {                                                                    \
1704         /* This is surely the start of a composition.  We must be sure     \
1705            that coding->cmp_data has enough space to store the             \
1706            information about the composition.  If not, terminate the       \
1707            current decoding loop, allocate one more memory block for       \
1708            coding->cmp_data in the caller, then start the decoding         \
1709            loop again.  We can't allocate memory here directly because     \
1710            it may cause buffer/string relocation.  */                      \
1711         if (!coding->cmp_data                                              \
1712             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1713                 >= COMPOSITION_DATA_SIZE))                                 \
1714           {                                                                \
1715             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1716             goto label_end_of_loop;                                        \
1717           }                                                                \
1718         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1719                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1720                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1721                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1722         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1723                                       coding->composing);                  \
1724         coding->composition_rule_follows = 0;                              \
1725       }                                                                    \
1726     else                                                                   \
1727       {                                                                    \
1728         /* We are already handling a composition.  If the method is        \
1729            the following two, the codes following the current escape       \
1730            sequence are actual characters stored in a buffer.  */          \
1731         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1732             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1733           {                                                                \
1734             coding->composing = COMPOSITION_RELATIVE;                      \
1735             coding->composition_rule_follows = 0;                          \
1736           }                                                                \
1737       }                                                                    \
1738   } while (0)
1739
1740 /* Handle composition end sequence ESC 1.  */
1741
1742 #define DECODE_COMPOSITION_END(c1)                                      \
1743   do {                                                                  \
1744     if (! COMPOSING_P (coding))                                         \
1745       {                                                                 \
1746         *dst++ = ISO_CODE_ESC;                                          \
1747         *dst++ = c1;                                                    \
1748         coding->produced_char += 2;                                     \
1749       }                                                                 \
1750     else                                                                \
1751       {                                                                 \
1752         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1753         coding->composing = COMPOSITION_NO;                             \
1754       }                                                                 \
1755   } while (0)
1756
1757 /* Decode a composition rule from the byte C1 (and maybe one more byte
1758    from SRC) and store one encoded composition rule in
1759    coding->cmp_data.  */
1760
1761 #define DECODE_COMPOSITION_RULE(c1)                                     \
1762   do {                                                                  \
1763     int rule = 0;                                                       \
1764     (c1) -= 32;                                                         \
1765     if (c1 < 81)                /* old format (before ver.21) */        \
1766       {                                                                 \
1767         int gref = (c1) / 9;                                            \
1768         int nref = (c1) % 9;                                            \
1769         if (gref == 4) gref = 10;                                       \
1770         if (nref == 4) nref = 10;                                       \
1771         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1772       }                                                                 \
1773     else if (c1 < 93)           /* new format (after ver.21) */         \
1774       {                                                                 \
1775         ONE_MORE_BYTE (c2);                                             \
1776         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1777       }                                                                 \
1778     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1779     coding->composition_rule_follows = 0;                               \
1780   } while (0)
1781
1782
1783 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1784
1785 static void
1786 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1787      struct coding_system *coding;
1788      unsigned char *source, *destination;
1789      int src_bytes, dst_bytes;
1790 {
1791   unsigned char *src = source;
1792   unsigned char *src_end = source + src_bytes;
1793   unsigned char *dst = destination;
1794   unsigned char *dst_end = destination + dst_bytes;
1795   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1796   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1797   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1798   /* SRC_BASE remembers the start position in source in each loop.
1799      The loop will be exited when there's not enough source code
1800      (within macro ONE_MORE_BYTE), or when there's not enough
1801      destination area to produce a character (within macro
1802      EMIT_CHAR).  */
1803   unsigned char *src_base;
1804   int c, charset;
1805   Lisp_Object translation_table;
1806   Lisp_Object safe_chars;
1807
1808   safe_chars = coding_safe_chars (coding->symbol);
1809
1810   if (NILP (Venable_character_translation))
1811     translation_table = Qnil;
1812   else
1813     {
1814       translation_table = coding->translation_table_for_decode;
1815       if (NILP (translation_table))
1816         translation_table = Vstandard_translation_table_for_decode;
1817     }
1818
1819   coding->result = CODING_FINISH_NORMAL;
1820
1821   while (1)
1822     {
1823       int c1, c2;
1824
1825       src_base = src;
1826       ONE_MORE_BYTE (c1);
1827
1828       /* We produce no character or one character.  */
1829       switch (iso_code_class [c1])
1830         {
1831         case ISO_0x20_or_0x7F:
1832           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1833             {
1834               DECODE_COMPOSITION_RULE (c1);
1835               continue;
1836             }
1837           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1838             {
1839               /* This is SPACE or DEL.  */
1840               charset = CHARSET_ASCII;
1841               break;
1842             }
1843           /* This is a graphic character, we fall down ...  */
1844
1845         case ISO_graphic_plane_0:
1846           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1847             {
1848               DECODE_COMPOSITION_RULE (c1);
1849               continue;
1850             }
1851           charset = charset0;
1852           break;
1853
1854         case ISO_0xA0_or_0xFF:
1855           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1856               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1857             goto label_invalid_code;
1858           /* This is a graphic character, we fall down ... */
1859
1860         case ISO_graphic_plane_1:
1861           if (charset1 < 0)
1862             goto label_invalid_code;
1863           charset = charset1;
1864           break;
1865
1866         case ISO_control_0:
1867           if (COMPOSING_P (coding))
1868             DECODE_COMPOSITION_END ('1');
1869
1870           /* All ISO2022 control characters in this class have the
1871              same representation in Emacs internal format.  */
1872           if (c1 == '\n'
1873               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1874               && (coding->eol_type == CODING_EOL_CR
1875                   || coding->eol_type == CODING_EOL_CRLF))
1876             {
1877               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1878               goto label_end_of_loop;
1879             }
1880           charset = CHARSET_ASCII;
1881           break;
1882
1883         case ISO_control_1:
1884           if (COMPOSING_P (coding))
1885             DECODE_COMPOSITION_END ('1');
1886           goto label_invalid_code;
1887
1888         case ISO_carriage_return:
1889           if (COMPOSING_P (coding))
1890             DECODE_COMPOSITION_END ('1');
1891
1892           if (coding->eol_type == CODING_EOL_CR)
1893             c1 = '\n';
1894           else if (coding->eol_type == CODING_EOL_CRLF)
1895             {
1896               ONE_MORE_BYTE (c1);
1897               if (c1 != ISO_CODE_LF)
1898                 {
1899                   src--;
1900                   c1 = '\r';
1901                 }
1902             }
1903           charset = CHARSET_ASCII;
1904           break;
1905
1906         case ISO_shift_out:
1907           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1908               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1909             goto label_invalid_code;
1910           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1911           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1912           continue;
1913
1914         case ISO_shift_in:
1915           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1916             goto label_invalid_code;
1917           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1918           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1919           continue;
1920
1921         case ISO_single_shift_2_7:
1922         case ISO_single_shift_2:
1923           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1924             goto label_invalid_code;
1925           /* SS2 is handled as an escape sequence of ESC 'N' */
1926           c1 = 'N';
1927           goto label_escape_sequence;
1928
1929         case ISO_single_shift_3:
1930           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1931             goto label_invalid_code;
1932           /* SS2 is handled as an escape sequence of ESC 'O' */
1933           c1 = 'O';
1934           goto label_escape_sequence;
1935
1936         case ISO_control_sequence_introducer:
1937           /* CSI is handled as an escape sequence of ESC '[' ...  */
1938           c1 = '[';
1939           goto label_escape_sequence;
1940
1941         case ISO_escape:
1942           ONE_MORE_BYTE (c1);
1943         label_escape_sequence:
1944           /* Escape sequences handled by Emacs are invocation,
1945              designation, direction specification, and character
1946              composition specification.  */
1947           switch (c1)
1948             {
1949             case '&':           /* revision of following character set */
1950               ONE_MORE_BYTE (c1);
1951               if (!(c1 >= '@' && c1 <= '~'))
1952                 goto label_invalid_code;
1953               ONE_MORE_BYTE (c1);
1954               if (c1 != ISO_CODE_ESC)
1955                 goto label_invalid_code;
1956               ONE_MORE_BYTE (c1);
1957               goto label_escape_sequence;
1958
1959             case '$':           /* designation of 2-byte character set */
1960               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1961                 goto label_invalid_code;
1962               ONE_MORE_BYTE (c1);
1963               if (c1 >= '@' && c1 <= 'B')
1964                 {       /* designation of JISX0208.1978, GB2312.1980,
1965                            or JISX0208.1980 */
1966                   DECODE_DESIGNATION (0, 2, 94, c1);
1967                 }
1968               else if (c1 >= 0x28 && c1 <= 0x2B)
1969                 {       /* designation of DIMENSION2_CHARS94 character set */
1970                   ONE_MORE_BYTE (c2);
1971                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1972                 }
1973               else if (c1 >= 0x2C && c1 <= 0x2F)
1974                 {       /* designation of DIMENSION2_CHARS96 character set */
1975                   ONE_MORE_BYTE (c2);
1976                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1977                 }
1978               else
1979                 goto label_invalid_code;
1980               /* We must update these variables now.  */
1981               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1982               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1983               continue;
1984
1985             case 'n':           /* invocation of locking-shift-2 */
1986               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1987                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1988                 goto label_invalid_code;
1989               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1990               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1991               continue;
1992
1993             case 'o':           /* invocation of locking-shift-3 */
1994               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1995                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1996                 goto label_invalid_code;
1997               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1998               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1999               continue;
2000
2001             case 'N':           /* invocation of single-shift-2 */
2002               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2003                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
2004                 goto label_invalid_code;
2005               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
2006               ONE_MORE_BYTE (c1);
2007               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2008                 goto label_invalid_code;
2009               break;
2010
2011             case 'O':           /* invocation of single-shift-3 */
2012               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2013                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
2014                 goto label_invalid_code;
2015               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
2016               ONE_MORE_BYTE (c1);
2017               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2018                 goto label_invalid_code;
2019               break;
2020
2021             case '0': case '2': case '3': case '4': /* start composition */
2022               DECODE_COMPOSITION_START (c1);
2023               continue;
2024
2025             case '1':           /* end composition */
2026               DECODE_COMPOSITION_END (c1);
2027               continue;
2028
2029             case '[':           /* specification of direction */
2030               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
2031                 goto label_invalid_code;
2032               /* For the moment, nested direction is not supported.
2033                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
2034                  left-to-right, and nonzero means right-to-left.  */
2035               ONE_MORE_BYTE (c1);
2036               switch (c1)
2037                 {
2038                 case ']':       /* end of the current direction */
2039                   coding->mode &= ~CODING_MODE_DIRECTION;
2040
2041                 case '0':       /* end of the current direction */
2042                 case '1':       /* start of left-to-right direction */
2043                   ONE_MORE_BYTE (c1);
2044                   if (c1 == ']')
2045                     coding->mode &= ~CODING_MODE_DIRECTION;
2046                   else
2047                     goto label_invalid_code;
2048                   break;
2049
2050                 case '2':       /* start of right-to-left direction */
2051                   ONE_MORE_BYTE (c1);
2052                   if (c1 == ']')
2053                     coding->mode |= CODING_MODE_DIRECTION;
2054                   else
2055                     goto label_invalid_code;
2056                   break;
2057
2058                 default:
2059                   goto label_invalid_code;
2060                 }
2061               continue;
2062
2063             case '%':
2064               if (COMPOSING_P (coding))
2065                 DECODE_COMPOSITION_END ('1');
2066               ONE_MORE_BYTE (c1);
2067               if (c1 == '/')
2068                 {
2069                   /* CTEXT extended segment:
2070                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
2071                      We keep these bytes as is for the moment.
2072                      They may be decoded by post-read-conversion.  */
2073                   int dim, M, L;
2074                   int size, required;
2075                   int produced_chars;
2076
2077                   ONE_MORE_BYTE (dim);
2078                   ONE_MORE_BYTE (M);
2079                   ONE_MORE_BYTE (L);
2080                   size = ((M - 128) * 128) + (L - 128);
2081                   required = 8 + size * 2;
2082                   if (dst + required > (dst_bytes ? dst_end : src))
2083                     goto label_end_of_loop;
2084                   *dst++ = ISO_CODE_ESC;
2085                   *dst++ = '%';
2086                   *dst++ = '/';
2087                   *dst++ = dim;
2088                   produced_chars = 4;
2089                   dst += CHAR_STRING (M, dst), produced_chars++;
2090                   dst += CHAR_STRING (L, dst), produced_chars++;
2091                   while (size-- > 0)
2092                     {
2093                       ONE_MORE_BYTE (c1);
2094                       dst += CHAR_STRING (c1, dst), produced_chars++;
2095                     }
2096                   coding->produced_char += produced_chars;
2097                 }
2098               else if (c1 == 'G')
2099                 {
2100                   unsigned char *d = dst;
2101                   int produced_chars;
2102
2103                   /* XFree86 extension for embedding UTF-8 in CTEXT:
2104                      ESC % G --UTF-8-BYTES-- ESC % @
2105                      We keep these bytes as is for the moment.
2106                      They may be decoded by post-read-conversion.  */
2107                   if (d + 6 > (dst_bytes ? dst_end : src))
2108                     goto label_end_of_loop;
2109                   *d++ = ISO_CODE_ESC;
2110                   *d++ = '%';
2111                   *d++ = 'G';
2112                   produced_chars = 3;
2113                   while (d + 1 < (dst_bytes ? dst_end : src))
2114                     {
2115                       ONE_MORE_BYTE (c1);
2116                       if (c1 == ISO_CODE_ESC
2117                           && src + 1 < src_end
2118                           && src[0] == '%'
2119                           && src[1] == '@')
2120                         break;
2121                       d += CHAR_STRING (c1, d), produced_chars++;
2122                     }
2123                   if (d + 3 > (dst_bytes ? dst_end : src))
2124                     goto label_end_of_loop;
2125                   *d++ = ISO_CODE_ESC;
2126                   *d++ = '%';
2127                   *d++ = '@';
2128                   dst = d;
2129                   coding->produced_char += produced_chars + 3;
2130                 }
2131               else
2132                 goto label_invalid_code;
2133               continue;
2134
2135             default:
2136               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
2137                 goto label_invalid_code;
2138               if (c1 >= 0x28 && c1 <= 0x2B)
2139                 {       /* designation of DIMENSION1_CHARS94 character set */
2140                   ONE_MORE_BYTE (c2);
2141                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
2142                 }
2143               else if (c1 >= 0x2C && c1 <= 0x2F)
2144                 {       /* designation of DIMENSION1_CHARS96 character set */
2145                   ONE_MORE_BYTE (c2);
2146                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
2147                 }
2148               else
2149                 goto label_invalid_code;
2150               /* We must update these variables now.  */
2151               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2152               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2153               continue;
2154             }
2155         }
2156
2157       /* Now we know CHARSET and 1st position code C1 of a character.
2158          Produce a multibyte sequence for that character while getting
2159          2nd position code C2 if necessary.  */
2160       if (CHARSET_DIMENSION (charset) == 2)
2161         {
2162           ONE_MORE_BYTE (c2);
2163           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2164             /* C2 is not in a valid range.  */
2165             goto label_invalid_code;
2166         }
2167       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2168       EMIT_CHAR (c);
2169       continue;
2170
2171     label_invalid_code:
2172       coding->errors++;
2173       if (COMPOSING_P (coding))
2174         DECODE_COMPOSITION_END ('1');
2175       src = src_base;
2176       c = *src++;
2177       EMIT_CHAR (c);
2178     }
2179
2180  label_end_of_loop:
2181   coding->consumed = coding->consumed_char = src_base - source;
2182   coding->produced = dst - destination;
2183   return;
2184 }
2185
2186
2187 /* ISO2022 encoding stuff.  */
2188
2189 /*
2190    It is not enough to say just "ISO2022" on encoding, we have to
2191    specify more details.  In Emacs, each ISO2022 coding system
2192    variant has the following specifications:
2193         1. Initial designation to G0 through G3.
2194         2. Allows short-form designation?
2195         3. ASCII should be designated to G0 before control characters?
2196         4. ASCII should be designated to G0 at end of line?
2197         5. 7-bit environment or 8-bit environment?
2198         6. Use locking-shift?
2199         7. Use Single-shift?
2200    And the following two are only for Japanese:
2201         8. Use ASCII in place of JIS0201-1976-Roman?
2202         9. Use JISX0208-1983 in place of JISX0208-1978?
2203    These specifications are encoded in `coding->flags' as flag bits
2204    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
2205    details.
2206 */
2207
2208 /* Produce codes (escape sequence) for designating CHARSET to graphic
2209    register REG at DST, and increment DST.  If <final-char> of CHARSET is
2210    '@', 'A', or 'B' and the coding system CODING allows, produce
2211    designation sequence of short-form.  */
2212
2213 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
2214   do {                                                                  \
2215     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
2216     char *intermediate_char_94 = "()*+";                                \
2217     char *intermediate_char_96 = ",-./";                                \
2218     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
2219                                                                         \
2220     if (revision < 255)                                                 \
2221       {                                                                 \
2222         *dst++ = ISO_CODE_ESC;                                          \
2223         *dst++ = '&';                                                   \
2224         *dst++ = '@' + revision;                                        \
2225       }                                                                 \
2226     *dst++ = ISO_CODE_ESC;                                              \
2227     if (CHARSET_DIMENSION (charset) == 1)                               \
2228       {                                                                 \
2229         if (CHARSET_CHARS (charset) == 94)                              \
2230           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
2231         else                                                            \
2232           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2233       }                                                                 \
2234     else                                                                \
2235       {                                                                 \
2236         *dst++ = '$';                                                   \
2237         if (CHARSET_CHARS (charset) == 94)                              \
2238           {                                                             \
2239             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
2240                 || reg != 0                                             \
2241                 || final_char < '@' || final_char > 'B')                \
2242               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
2243           }                                                             \
2244         else                                                            \
2245           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2246       }                                                                 \
2247     *dst++ = final_char;                                                \
2248     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
2249   } while (0)
2250
2251 /* The following two macros produce codes (control character or escape
2252    sequence) for ISO2022 single-shift functions (single-shift-2 and
2253    single-shift-3).  */
2254
2255 #define ENCODE_SINGLE_SHIFT_2                           \
2256   do {                                                  \
2257     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2258       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
2259     else                                                \
2260       *dst++ = ISO_CODE_SS2;                            \
2261     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2262   } while (0)
2263
2264 #define ENCODE_SINGLE_SHIFT_3                           \
2265   do {                                                  \
2266     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2267       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
2268     else                                                \
2269       *dst++ = ISO_CODE_SS3;                            \
2270     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2271   } while (0)
2272
2273 /* The following four macros produce codes (control character or
2274    escape sequence) for ISO2022 locking-shift functions (shift-in,
2275    shift-out, locking-shift-2, and locking-shift-3).  */
2276
2277 #define ENCODE_SHIFT_IN                         \
2278   do {                                          \
2279     *dst++ = ISO_CODE_SI;                       \
2280     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2281   } while (0)
2282
2283 #define ENCODE_SHIFT_OUT                        \
2284   do {                                          \
2285     *dst++ = ISO_CODE_SO;                       \
2286     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2287   } while (0)
2288
2289 #define ENCODE_LOCKING_SHIFT_2                  \
2290   do {                                          \
2291     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
2292     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2293   } while (0)
2294
2295 #define ENCODE_LOCKING_SHIFT_3                  \
2296   do {                                          \
2297     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
2298     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2299   } while (0)
2300
2301 /* Produce codes for a DIMENSION1 character whose character set is
2302    CHARSET and whose position-code is C1.  Designation and invocation
2303    sequences are also produced in advance if necessary.  */
2304
2305 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
2306   do {                                                                  \
2307     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2308       {                                                                 \
2309         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2310           *dst++ = c1 & 0x7F;                                           \
2311         else                                                            \
2312           *dst++ = c1 | 0x80;                                           \
2313         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2314         break;                                                          \
2315       }                                                                 \
2316     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2317       {                                                                 \
2318         *dst++ = c1 & 0x7F;                                             \
2319         break;                                                          \
2320       }                                                                 \
2321     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2322       {                                                                 \
2323         *dst++ = c1 | 0x80;                                             \
2324         break;                                                          \
2325       }                                                                 \
2326     else                                                                \
2327       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2328          must invoke it, or, at first, designate it to some graphic     \
2329          register.  Then repeat the loop to actually produce the        \
2330          character.  */                                                 \
2331       dst = encode_invocation_designation (charset, coding, dst);       \
2332   } while (1)
2333
2334 /* Produce codes for a DIMENSION2 character whose character set is
2335    CHARSET and whose position-codes are C1 and C2.  Designation and
2336    invocation codes are also produced in advance if necessary.  */
2337
2338 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
2339   do {                                                                  \
2340     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2341       {                                                                 \
2342         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2343           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
2344         else                                                            \
2345           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
2346         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2347         break;                                                          \
2348       }                                                                 \
2349     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2350       {                                                                 \
2351         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
2352         break;                                                          \
2353       }                                                                 \
2354     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2355       {                                                                 \
2356         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
2357         break;                                                          \
2358       }                                                                 \
2359     else                                                                \
2360       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2361          must invoke it, or, at first, designate it to some graphic     \
2362          register.  Then repeat the loop to actually produce the        \
2363          character.  */                                                 \
2364       dst = encode_invocation_designation (charset, coding, dst);       \
2365   } while (1)
2366
2367 #define ENCODE_ISO_CHARACTER(c)                                 \
2368   do {                                                          \
2369     int charset, c1, c2;                                        \
2370                                                                 \
2371     SPLIT_CHAR (c, charset, c1, c2);                            \
2372     if (CHARSET_DEFINED_P (charset))                            \
2373       {                                                         \
2374         if (CHARSET_DIMENSION (charset) == 1)                   \
2375           {                                                     \
2376             if (charset == CHARSET_ASCII                        \
2377                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
2378               charset = charset_latin_jisx0201;                 \
2379             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
2380           }                                                     \
2381         else                                                    \
2382           {                                                     \
2383             if (charset == charset_jisx0208                     \
2384                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
2385               charset = charset_jisx0208_1978;                  \
2386             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
2387           }                                                     \
2388       }                                                         \
2389     else                                                        \
2390       {                                                         \
2391         *dst++ = c1;                                            \
2392         if (c2 >= 0)                                            \
2393           *dst++ = c2;                                          \
2394       }                                                         \
2395   } while (0)
2396
2397
2398 /* Instead of encoding character C, produce one or two `?'s.  */
2399
2400 #define ENCODE_UNSAFE_CHARACTER(c)                              \
2401   do {                                                          \
2402     ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);        \
2403     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                   \
2404       ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);      \
2405   } while (0)
2406
2407
2408 /* Produce designation and invocation codes at a place pointed by DST
2409    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
2410    Return new DST.  */
2411
2412 unsigned char *
2413 encode_invocation_designation (charset, coding, dst)
2414      int charset;
2415      struct coding_system *coding;
2416      unsigned char *dst;
2417 {
2418   int reg;                      /* graphic register number */
2419
2420   /* At first, check designations.  */
2421   for (reg = 0; reg < 4; reg++)
2422     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2423       break;
2424
2425   if (reg >= 4)
2426     {
2427       /* CHARSET is not yet designated to any graphic registers.  */
2428       /* At first check the requested designation.  */
2429       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2430       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2431         /* Since CHARSET requests no special designation, designate it
2432            to graphic register 0.  */
2433         reg = 0;
2434
2435       ENCODE_DESIGNATION (charset, reg, coding);
2436     }
2437
2438   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2439       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2440     {
2441       /* Since the graphic register REG is not invoked to any graphic
2442          planes, invoke it to graphic plane 0.  */
2443       switch (reg)
2444         {
2445         case 0:                 /* graphic register 0 */
2446           ENCODE_SHIFT_IN;
2447           break;
2448
2449         case 1:                 /* graphic register 1 */
2450           ENCODE_SHIFT_OUT;
2451           break;
2452
2453         case 2:                 /* graphic register 2 */
2454           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2455             ENCODE_SINGLE_SHIFT_2;
2456           else
2457             ENCODE_LOCKING_SHIFT_2;
2458           break;
2459
2460         case 3:                 /* graphic register 3 */
2461           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2462             ENCODE_SINGLE_SHIFT_3;
2463           else
2464             ENCODE_LOCKING_SHIFT_3;
2465           break;
2466         }
2467     }
2468
2469   return dst;
2470 }
2471
2472 /* Produce 2-byte codes for encoded composition rule RULE.  */
2473
2474 #define ENCODE_COMPOSITION_RULE(rule)           \
2475   do {                                          \
2476     int gref, nref;                             \
2477     COMPOSITION_DECODE_RULE (rule, gref, nref); \
2478     *dst++ = 32 + 81 + gref;                    \
2479     *dst++ = 32 + nref;                         \
2480   } while (0)
2481
2482 /* Produce codes for indicating the start of a composition sequence
2483    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
2484    which specify information about the composition.  See the comment
2485    in coding.h for the format of DATA.  */
2486
2487 #define ENCODE_COMPOSITION_START(coding, data)                          \
2488   do {                                                                  \
2489     coding->composing = data[3];                                        \
2490     *dst++ = ISO_CODE_ESC;                                              \
2491     if (coding->composing == COMPOSITION_RELATIVE)                      \
2492       *dst++ = '0';                                                     \
2493     else                                                                \
2494       {                                                                 \
2495         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
2496                   ? '3' : '4');                                         \
2497         coding->cmp_data_index = coding->cmp_data_start + 4;            \
2498         coding->composition_rule_follows = 0;                           \
2499       }                                                                 \
2500   } while (0)
2501
2502 /* Produce codes for indicating the end of the current composition.  */
2503
2504 #define ENCODE_COMPOSITION_END(coding, data)                    \
2505   do {                                                          \
2506     *dst++ = ISO_CODE_ESC;                                      \
2507     *dst++ = '1';                                               \
2508     coding->cmp_data_start += data[0];                          \
2509     coding->composing = COMPOSITION_NO;                         \
2510     if (coding->cmp_data_start == coding->cmp_data->used        \
2511         && coding->cmp_data->next)                              \
2512       {                                                         \
2513         coding->cmp_data = coding->cmp_data->next;              \
2514         coding->cmp_data_start = 0;                             \
2515       }                                                         \
2516   } while (0)
2517
2518 /* Produce composition start sequence ESC 0.  Here, this sequence
2519    doesn't mean the start of a new composition but means that we have
2520    just produced components (alternate chars and composition rules) of
2521    the composition and the actual text follows in SRC.  */
2522
2523 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
2524   do {                                          \
2525     *dst++ = ISO_CODE_ESC;                      \
2526     *dst++ = '0';                               \
2527     coding->composing = COMPOSITION_RELATIVE;   \
2528   } while (0)
2529
2530 /* The following three macros produce codes for indicating direction
2531    of text.  */
2532 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
2533   do {                                                  \
2534     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
2535       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
2536     else                                                \
2537       *dst++ = ISO_CODE_CSI;                            \
2538   } while (0)
2539
2540 #define ENCODE_DIRECTION_R2L    \
2541   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
2542
2543 #define ENCODE_DIRECTION_L2R    \
2544   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
2545
2546 /* Produce codes for designation and invocation to reset the graphic
2547    planes and registers to initial state.  */
2548 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
2549   do {                                                                      \
2550     int reg;                                                                \
2551     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
2552       ENCODE_SHIFT_IN;                                                      \
2553     for (reg = 0; reg < 4; reg++)                                           \
2554       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
2555           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
2556               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
2557         ENCODE_DESIGNATION                                                  \
2558           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
2559   } while (0)
2560
2561 /* Produce designation sequences of charsets in the line started from
2562    SRC to a place pointed by DST, and return updated DST.
2563
2564    If the current block ends before any end-of-line, we may fail to
2565    find all the necessary designations.  */
2566
2567 static unsigned char *
2568 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2569      struct coding_system *coding;
2570      Lisp_Object translation_table;
2571      unsigned char *src, *src_end, *dst;
2572 {
2573   int charset, c, found = 0, reg;
2574   /* Table of charsets to be designated to each graphic register.  */
2575   int r[4];
2576
2577   for (reg = 0; reg < 4; reg++)
2578     r[reg] = -1;
2579
2580   while (found < 4)
2581     {
2582       ONE_MORE_CHAR (c);
2583       if (c == '\n')
2584         break;
2585
2586       charset = CHAR_CHARSET (c);
2587       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2588       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2589         {
2590           found++;
2591           r[reg] = charset;
2592         }
2593     }
2594
2595  label_end_of_loop:
2596   if (found)
2597     {
2598       for (reg = 0; reg < 4; reg++)
2599         if (r[reg] >= 0
2600             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2601           ENCODE_DESIGNATION (r[reg], reg, coding);
2602     }
2603
2604   return dst;
2605 }
2606
2607 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2608
2609 static void
2610 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2611      struct coding_system *coding;
2612      unsigned char *source, *destination;
2613      int src_bytes, dst_bytes;
2614 {
2615   unsigned char *src = source;
2616   unsigned char *src_end = source + src_bytes;
2617   unsigned char *dst = destination;
2618   unsigned char *dst_end = destination + dst_bytes;
2619   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2620      from DST_END to assure overflow checking is necessary only at the
2621      head of loop.  */
2622   unsigned char *adjusted_dst_end = dst_end - 19;
2623   /* SRC_BASE remembers the start position in source in each loop.
2624      The loop will be exited when there's not enough source text to
2625      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2626      there's not enough destination area to produce encoded codes
2627      (within macro EMIT_BYTES).  */
2628   unsigned char *src_base;
2629   int c;
2630   Lisp_Object translation_table;
2631   Lisp_Object safe_chars;
2632
2633   if (coding->flags & CODING_FLAG_ISO_SAFE)
2634     coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
2635
2636   safe_chars = coding_safe_chars (coding->symbol);
2637
2638   if (NILP (Venable_character_translation))
2639     translation_table = Qnil;
2640   else
2641     {
2642       translation_table = coding->translation_table_for_encode;
2643       if (NILP (translation_table))
2644         translation_table = Vstandard_translation_table_for_encode;
2645     }
2646
2647   coding->consumed_char = 0;
2648   coding->errors = 0;
2649   while (1)
2650     {
2651       src_base = src;
2652
2653       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2654         {
2655           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2656           break;
2657         }
2658
2659       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2660           && CODING_SPEC_ISO_BOL (coding))
2661         {
2662           /* We have to produce designation sequences if any now.  */
2663           dst = encode_designation_at_bol (coding, translation_table,
2664                                            src, src_end, dst);
2665           CODING_SPEC_ISO_BOL (coding) = 0;
2666         }
2667
2668       /* Check composition start and end.  */
2669       if (coding->composing != COMPOSITION_DISABLED
2670           && coding->cmp_data_start < coding->cmp_data->used)
2671         {
2672           struct composition_data *cmp_data = coding->cmp_data;
2673           int *data = cmp_data->data + coding->cmp_data_start;
2674           int this_pos = cmp_data->char_offset + coding->consumed_char;
2675
2676           if (coding->composing == COMPOSITION_RELATIVE)
2677             {
2678               if (this_pos == data[2])
2679                 {
2680                   ENCODE_COMPOSITION_END (coding, data);
2681                   cmp_data = coding->cmp_data;
2682                   data = cmp_data->data + coding->cmp_data_start;
2683                 }
2684             }
2685           else if (COMPOSING_P (coding))
2686             {
2687               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2688               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2689                 /* We have consumed components of the composition.
2690                    What follows in SRC is the composition's base
2691                    text.  */
2692                 ENCODE_COMPOSITION_FAKE_START (coding);
2693               else
2694                 {
2695                   int c = cmp_data->data[coding->cmp_data_index++];
2696                   if (coding->composition_rule_follows)
2697                     {
2698                       ENCODE_COMPOSITION_RULE (c);
2699                       coding->composition_rule_follows = 0;
2700                     }
2701                   else
2702                     {
2703                       if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2704                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2705                         ENCODE_UNSAFE_CHARACTER (c);
2706                       else
2707                         ENCODE_ISO_CHARACTER (c);
2708                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2709                         coding->composition_rule_follows = 1;
2710                     }
2711                   continue;
2712                 }
2713             }
2714           if (!COMPOSING_P (coding))
2715             {
2716               if (this_pos == data[1])
2717                 {
2718                   ENCODE_COMPOSITION_START (coding, data);
2719                   continue;
2720                 }
2721             }
2722         }
2723
2724       ONE_MORE_CHAR (c);
2725
2726       /* Now encode the character C.  */
2727       if (c < 0x20 || c == 0x7F)
2728         {
2729           if (c == '\r')
2730             {
2731               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2732                 {
2733                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2734                     ENCODE_RESET_PLANE_AND_REGISTER;
2735                   *dst++ = c;
2736                   continue;
2737                 }
2738               /* fall down to treat '\r' as '\n' ...  */
2739               c = '\n';
2740             }
2741           if (c == '\n')
2742             {
2743               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2744                 ENCODE_RESET_PLANE_AND_REGISTER;
2745               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2746                 bcopy (coding->spec.iso2022.initial_designation,
2747                        coding->spec.iso2022.current_designation,
2748                        sizeof coding->spec.iso2022.initial_designation);
2749               if (coding->eol_type == CODING_EOL_LF
2750                   || coding->eol_type == CODING_EOL_UNDECIDED)
2751                 *dst++ = ISO_CODE_LF;
2752               else if (coding->eol_type == CODING_EOL_CRLF)
2753                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2754               else
2755                 *dst++ = ISO_CODE_CR;
2756               CODING_SPEC_ISO_BOL (coding) = 1;
2757             }
2758           else
2759             {
2760               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2761                 ENCODE_RESET_PLANE_AND_REGISTER;
2762               *dst++ = c;
2763             }
2764         }
2765       else if (ASCII_BYTE_P (c))
2766         ENCODE_ISO_CHARACTER (c);
2767       else if (SINGLE_BYTE_CHAR_P (c))
2768         {
2769           *dst++ = c;
2770           coding->errors++;
2771         }
2772       else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2773                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2774         ENCODE_UNSAFE_CHARACTER (c);
2775       else
2776         ENCODE_ISO_CHARACTER (c);
2777
2778       coding->consumed_char++;
2779     }
2780
2781  label_end_of_loop:
2782   coding->consumed = src_base - source;
2783   coding->produced = coding->produced_char = dst - destination;
2784 }
2785
2786 \f
2787 /*** 4. SJIS and BIG5 handlers ***/
2788
2789 /* Although SJIS and BIG5 are not ISO coding systems, they are used
2790    quite widely.  So, for the moment, Emacs supports them in the bare
2791    C code.  But, in the future, they may be supported only by CCL.  */
2792
2793 /* SJIS is a coding system encoding three character sets: ASCII, right
2794    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2795    as is.  A character of charset katakana-jisx0201 is encoded by
2796    "position-code + 0x80".  A character of charset japanese-jisx0208
2797    is encoded in 2-byte but two position-codes are divided and shifted
2798    so that it fits in the range below.
2799
2800    --- CODE RANGE of SJIS ---
2801    (character set)      (range)
2802    ASCII                0x00 .. 0x7F
2803    KATAKANA-JISX0201    0xA1 .. 0xDF
2804    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2805             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2806    -------------------------------
2807
2808 */
2809
2810 /* BIG5 is a coding system encoding two character sets: ASCII and
2811    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2812    character set and is encoded in two bytes.
2813
2814    --- CODE RANGE of BIG5 ---
2815    (character set)      (range)
2816    ASCII                0x00 .. 0x7F
2817    Big5 (1st byte)      0xA1 .. 0xFE
2818         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2819    --------------------------
2820
2821    Since the number of characters in Big5 is larger than maximum
2822    characters in Emacs' charset (96x96), it can't be handled as one
2823    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2824    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2825    contains frequently used characters and the latter contains less
2826    frequently used characters.  */
2827
2828 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2829    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2830    C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
2831    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2832
2833 /* Number of Big5 characters which have the same code in 1st byte.  */
2834 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2835
2836 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2837   do {                                                                  \
2838     unsigned int temp                                                   \
2839       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2840     if (b1 < 0xC9)                                                      \
2841       charset = charset_big5_1;                                         \
2842     else                                                                \
2843       {                                                                 \
2844         charset = charset_big5_2;                                       \
2845         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2846       }                                                                 \
2847     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2848     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2849   } while (0)
2850
2851 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2852   do {                                                                  \
2853     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2854     if (charset == charset_big5_2)                                      \
2855       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2856     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2857     b2 = temp % BIG5_SAME_ROW;                                          \
2858     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2859   } while (0)
2860
2861 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2862    Check if a text is encoded in SJIS.  If it is, return
2863    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2864
2865 static int
2866 detect_coding_sjis (src, src_end, multibytep)
2867      unsigned char *src, *src_end;
2868      int multibytep;
2869 {
2870   int c;
2871   /* Dummy for ONE_MORE_BYTE.  */
2872   struct coding_system dummy_coding;
2873   struct coding_system *coding = &dummy_coding;
2874
2875   while (1)
2876     {
2877       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2878       if (c < 0x80)
2879         continue;
2880       if (c == 0x80 || c == 0xA0 || c > 0xEF)
2881         return 0;
2882       if (c <= 0x9F || c >= 0xE0)
2883         {
2884           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2885           if (c < 0x40 || c == 0x7F || c > 0xFC)
2886             return 0;
2887         }
2888     }
2889  label_end_of_loop:
2890   return CODING_CATEGORY_MASK_SJIS;
2891 }
2892
2893 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2894    Check if a text is encoded in BIG5.  If it is, return
2895    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2896
2897 static int
2898 detect_coding_big5 (src, src_end, multibytep)
2899      unsigned char *src, *src_end;
2900      int multibytep;
2901 {
2902   int c;
2903   /* Dummy for ONE_MORE_BYTE.  */
2904   struct coding_system dummy_coding;
2905   struct coding_system *coding = &dummy_coding;
2906
2907   while (1)
2908     {
2909       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2910       if (c < 0x80)
2911         continue;
2912       if (c < 0xA1 || c > 0xFE)
2913         return 0;
2914       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2915       if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2916         return 0;
2917     }
2918  label_end_of_loop:
2919   return CODING_CATEGORY_MASK_BIG5;
2920 }
2921
2922 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2923    Check if a text is encoded in UTF-8.  If it is, return
2924    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2925
2926 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2927 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2928 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2929 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2930 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2931 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2932 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2933
2934 static int
2935 detect_coding_utf_8 (src, src_end, multibytep)
2936      unsigned char *src, *src_end;
2937      int multibytep;
2938 {
2939   unsigned char c;
2940   int seq_maybe_bytes;
2941   /* Dummy for ONE_MORE_BYTE.  */
2942   struct coding_system dummy_coding;
2943   struct coding_system *coding = &dummy_coding;
2944
2945   while (1)
2946     {
2947       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2948       if (UTF_8_1_OCTET_P (c))
2949         continue;
2950       else if (UTF_8_2_OCTET_LEADING_P (c))
2951         seq_maybe_bytes = 1;
2952       else if (UTF_8_3_OCTET_LEADING_P (c))
2953         seq_maybe_bytes = 2;
2954       else if (UTF_8_4_OCTET_LEADING_P (c))
2955         seq_maybe_bytes = 3;
2956       else if (UTF_8_5_OCTET_LEADING_P (c))
2957         seq_maybe_bytes = 4;
2958       else if (UTF_8_6_OCTET_LEADING_P (c))
2959         seq_maybe_bytes = 5;
2960       else
2961         return 0;
2962
2963       do
2964         {
2965           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2966           if (!UTF_8_EXTRA_OCTET_P (c))
2967             return 0;
2968           seq_maybe_bytes--;
2969         }
2970       while (seq_maybe_bytes > 0);
2971     }
2972
2973  label_end_of_loop:
2974   return CODING_CATEGORY_MASK_UTF_8;
2975 }
2976
2977 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2978    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2979    Little Endian (otherwise).  If it is, return
2980    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2981    else return 0.  */
2982
2983 #define UTF_16_INVALID_P(val)   \
2984   (((val) == 0xFFFE)            \
2985    || ((val) == 0xFFFF))
2986
2987 #define UTF_16_HIGH_SURROGATE_P(val) \
2988   (((val) & 0xD800) == 0xD800)
2989
2990 #define UTF_16_LOW_SURROGATE_P(val) \
2991   (((val) & 0xDC00) == 0xDC00)
2992
2993 static int
2994 detect_coding_utf_16 (src, src_end, multibytep)
2995      unsigned char *src, *src_end;
2996      int multibytep;
2997 {
2998   unsigned char c1, c2;
2999   /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE.  */
3000   struct coding_system dummy_coding;
3001   struct coding_system *coding = &dummy_coding;
3002
3003   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
3004   ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
3005
3006   if ((c1 == 0xFF) && (c2 == 0xFE))
3007     return CODING_CATEGORY_MASK_UTF_16_LE;
3008   else if ((c1 == 0xFE) && (c2 == 0xFF))
3009     return CODING_CATEGORY_MASK_UTF_16_BE;
3010
3011  label_end_of_loop:
3012   return 0;
3013 }
3014
3015 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3016    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3017
3018 static void
3019 decode_coding_sjis_big5 (coding, source, destination,
3020                          src_bytes, dst_bytes, sjis_p)
3021      struct coding_system *coding;
3022      unsigned char *source, *destination;
3023      int src_bytes, dst_bytes;
3024      int sjis_p;
3025 {
3026   unsigned char *src = source;
3027   unsigned char *src_end = source + src_bytes;
3028   unsigned char *dst = destination;
3029   unsigned char *dst_end = destination + dst_bytes;
3030   /* SRC_BASE remembers the start position in source in each loop.
3031      The loop will be exited when there's not enough source code
3032      (within macro ONE_MORE_BYTE), or when there's not enough
3033      destination area to produce a character (within macro
3034      EMIT_CHAR).  */
3035   unsigned char *src_base;
3036   Lisp_Object translation_table;
3037
3038   if (NILP (Venable_character_translation))
3039     translation_table = Qnil;
3040   else
3041     {
3042       translation_table = coding->translation_table_for_decode;
3043       if (NILP (translation_table))
3044         translation_table = Vstandard_translation_table_for_decode;
3045     }
3046
3047   coding->produced_char = 0;
3048   while (1)
3049     {
3050       int c, charset, c1, c2;
3051
3052       src_base = src;
3053       ONE_MORE_BYTE (c1);
3054
3055       if (c1 < 0x80)
3056         {
3057           charset = CHARSET_ASCII;
3058           if (c1 < 0x20)
3059             {
3060               if (c1 == '\r')
3061                 {
3062                   if (coding->eol_type == CODING_EOL_CRLF)
3063                     {
3064                       ONE_MORE_BYTE (c2);
3065                       if (c2 == '\n')
3066                         c1 = c2;
3067                       else
3068                         /* To process C2 again, SRC is subtracted by 1.  */
3069                         src--;
3070                     }
3071                   else if (coding->eol_type == CODING_EOL_CR)
3072                     c1 = '\n';
3073                 }
3074               else if (c1 == '\n'
3075                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3076                        && (coding->eol_type == CODING_EOL_CR
3077                            || coding->eol_type == CODING_EOL_CRLF))
3078                 {
3079                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3080                   goto label_end_of_loop;
3081                 }
3082             }
3083         }
3084       else
3085         {
3086           if (sjis_p)
3087             {
3088               if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
3089                 goto label_invalid_code;
3090               if (c1 <= 0x9F || c1 >= 0xE0)
3091                 {
3092                   /* SJIS -> JISX0208 */
3093                   ONE_MORE_BYTE (c2);
3094                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
3095                     goto label_invalid_code;
3096                   DECODE_SJIS (c1, c2, c1, c2);
3097                   charset = charset_jisx0208;
3098                 }
3099               else
3100                 /* SJIS -> JISX0201-Kana */
3101                 charset = charset_katakana_jisx0201;
3102             }
3103           else
3104             {
3105               /* BIG5 -> Big5 */
3106               if (c1 < 0xA0 || c1 > 0xFE)
3107                 goto label_invalid_code;
3108               ONE_MORE_BYTE (c2);
3109               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
3110                 goto label_invalid_code;
3111               DECODE_BIG5 (c1, c2, charset, c1, c2);
3112             }
3113         }
3114
3115       c = DECODE_ISO_CHARACTER (charset, c1, c2);
3116       EMIT_CHAR (c);
3117       continue;
3118
3119     label_invalid_code:
3120       coding->errors++;
3121       src = src_base;
3122       c = *src++;
3123       EMIT_CHAR (c);
3124     }
3125
3126  label_end_of_loop:
3127   coding->consumed = coding->consumed_char = src_base - source;
3128   coding->produced = dst - destination;
3129   return;
3130 }
3131
3132 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3133    This function can encode charsets `ascii', `katakana-jisx0201',
3134    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
3135    are sure that all these charsets are registered as official charset
3136    (i.e. do not have extended leading-codes).  Characters of other
3137    charsets are produced without any encoding.  If SJIS_P is 1, encode
3138    SJIS text, else encode BIG5 text.  */
3139
3140 static void
3141 encode_coding_sjis_big5 (coding, source, destination,
3142                          src_bytes, dst_bytes, sjis_p)
3143      struct coding_system *coding;
3144      unsigned char *source, *destination;
3145      int src_bytes, dst_bytes;
3146      int sjis_p;
3147 {
3148   unsigned char *src = source;
3149   unsigned char *src_end = source + src_bytes;
3150   unsigned char *dst = destination;
3151   unsigned char *dst_end = destination + dst_bytes;
3152   /* SRC_BASE remembers the start position in source in each loop.
3153      The loop will be exited when there's not enough source text to
3154      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3155      there's not enough destination area to produce encoded codes
3156      (within macro EMIT_BYTES).  */
3157   unsigned char *src_base;
3158   Lisp_Object translation_table;
3159
3160   if (NILP (Venable_character_translation))
3161     translation_table = Qnil;
3162   else
3163     {
3164       translation_table = coding->translation_table_for_encode;
3165       if (NILP (translation_table))
3166         translation_table = Vstandard_translation_table_for_encode;
3167     }
3168
3169   while (1)
3170     {
3171       int c, charset, c1, c2;
3172
3173       src_base = src;
3174       ONE_MORE_CHAR (c);
3175
3176       /* Now encode the character C.  */
3177       if (SINGLE_BYTE_CHAR_P (c))
3178         {
3179           switch (c)
3180             {
3181             case '\r':
3182               if (!(coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
3183                 {
3184                   EMIT_ONE_BYTE (c);
3185                   break;
3186                 }
3187               c = '\n';
3188             case '\n':
3189               if (coding->eol_type == CODING_EOL_CRLF)
3190                 {
3191                   EMIT_TWO_BYTES ('\r', c);
3192                   break;
3193                 }
3194               else if (coding->eol_type == CODING_EOL_CR)
3195                 c = '\r';
3196             default:
3197               EMIT_ONE_BYTE (c);
3198             }
3199         }
3200       else
3201         {
3202           SPLIT_CHAR (c, charset, c1, c2);
3203           if (sjis_p)
3204             {
3205               if (charset == charset_jisx0208
3206                   || charset == charset_jisx0208_1978)
3207                 {
3208                   ENCODE_SJIS (c1, c2, c1, c2);
3209                   EMIT_TWO_BYTES (c1, c2);
3210                 }
3211               else if (charset == charset_katakana_jisx0201)
3212                 EMIT_ONE_BYTE (c1 | 0x80);
3213               else if (charset == charset_latin_jisx0201)
3214                 EMIT_ONE_BYTE (c1);
3215               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3216                 {
3217                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3218                   if (CHARSET_WIDTH (charset) > 1)
3219                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3220                 }
3221               else
3222                 /* There's no way other than producing the internal
3223                    codes as is.  */
3224                 EMIT_BYTES (src_base, src);
3225             }
3226           else
3227             {
3228               if (charset == charset_big5_1 || charset == charset_big5_2)
3229                 {
3230                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
3231                   EMIT_TWO_BYTES (c1, c2);
3232                 }
3233               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3234                 {
3235                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3236                   if (CHARSET_WIDTH (charset) > 1)
3237                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3238                 }
3239               else
3240                 /* There's no way other than producing the internal
3241                    codes as is.  */
3242                 EMIT_BYTES (src_base, src);
3243             }
3244         }
3245       coding->consumed_char++;
3246     }
3247
3248  label_end_of_loop:
3249   coding->consumed = src_base - source;
3250   coding->produced = coding->produced_char = dst - destination;
3251 }
3252
3253 \f
3254 /*** 5. CCL handlers ***/
3255
3256 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3257    Check if a text is encoded in a coding system of which
3258    encoder/decoder are written in CCL program.  If it is, return
3259    CODING_CATEGORY_MASK_CCL, else return 0.  */
3260
3261 static int
3262 detect_coding_ccl (src, src_end, multibytep)
3263      unsigned char *src, *src_end;
3264      int multibytep;
3265 {
3266   unsigned char *valid;
3267   int c;
3268   /* Dummy for ONE_MORE_BYTE.  */
3269   struct coding_system dummy_coding;
3270   struct coding_system *coding = &dummy_coding;
3271
3272   /* No coding system is assigned to coding-category-ccl.  */
3273   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3274     return 0;
3275
3276   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3277   while (1)
3278     {
3279       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
3280       if (! valid[c])
3281         return 0;
3282     }
3283  label_end_of_loop:
3284   return CODING_CATEGORY_MASK_CCL;
3285 }
3286
3287 \f
3288 /*** 6. End-of-line handlers ***/
3289
3290 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3291
3292 static void
3293 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3294      struct coding_system *coding;
3295      unsigned char *source, *destination;
3296      int src_bytes, dst_bytes;
3297 {
3298   unsigned char *src = source;
3299   unsigned char *dst = destination;
3300   unsigned char *src_end = src + src_bytes;
3301   unsigned char *dst_end = dst + dst_bytes;
3302   Lisp_Object translation_table;
3303   /* SRC_BASE remembers the start position in source in each loop.
3304      The loop will be exited when there's not enough source code
3305      (within macro ONE_MORE_BYTE), or when there's not enough
3306      destination area to produce a character (within macro
3307      EMIT_CHAR).  */
3308   unsigned char *src_base;
3309   int c;
3310
3311   translation_table = Qnil;
3312   switch (coding->eol_type)
3313     {
3314     case CODING_EOL_CRLF:
3315       while (1)
3316         {
3317           src_base = src;
3318           ONE_MORE_BYTE (c);
3319           if (c == '\r')
3320             {
3321               ONE_MORE_BYTE (c);
3322               if (c != '\n')
3323                 {
3324                   src--;
3325                   c = '\r';
3326                 }
3327             }
3328           else if (c == '\n'
3329                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
3330             {
3331               coding->result = CODING_FINISH_INCONSISTENT_EOL;
3332               goto label_end_of_loop;
3333             }
3334           EMIT_CHAR (c);
3335         }
3336       break;
3337
3338     case CODING_EOL_CR:
3339       while (1)
3340         {
3341           src_base = src;
3342           ONE_MORE_BYTE (c);
3343           if (c == '\n')
3344             {
3345               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3346                 {
3347                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3348                   goto label_end_of_loop;
3349                 }
3350             }
3351           else if (c == '\r')
3352             c = '\n';
3353           EMIT_CHAR (c);
3354         }
3355       break;
3356
3357     default:                    /* no need for EOL handling */
3358       while (1)
3359         {
3360           src_base = src;
3361           ONE_MORE_BYTE (c);
3362           EMIT_CHAR (c);
3363         }
3364     }
3365
3366  label_end_of_loop:
3367   coding->consumed = coding->consumed_char = src_base - source;
3368   coding->produced = dst - destination;
3369   return;
3370 }
3371
3372 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
3373    format of end-of-line according to `coding->eol_type'.  It also
3374    convert multibyte form 8-bit characters to unibyte if
3375    CODING->src_multibyte is nonzero.  If `coding->mode &
3376    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3377    also means end-of-line.  */
3378
3379 static void
3380 encode_eol (coding, source, destination, src_bytes, dst_bytes)
3381      struct coding_system *coding;
3382      const unsigned char *source;
3383      unsigned char *destination;
3384      int src_bytes, dst_bytes;
3385 {
3386   const unsigned char *src = source;
3387   unsigned char *dst = destination;
3388   const unsigned char *src_end = src + src_bytes;
3389   unsigned char *dst_end = dst + dst_bytes;
3390   Lisp_Object translation_table;
3391   /* SRC_BASE remembers the start position in source in each loop.
3392      The loop will be exited when there's not enough source text to
3393      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3394      there's not enough destination area to produce encoded codes
3395      (within macro EMIT_BYTES).  */
3396   const unsigned char *src_base;
3397   unsigned char *tmp;
3398   int c;
3399   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3400
3401   translation_table = Qnil;
3402   if (coding->src_multibyte
3403       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3404     {
3405       src_end--;
3406       src_bytes--;
3407       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3408     }
3409
3410   if (coding->eol_type == CODING_EOL_CRLF)
3411     {
3412       while (src < src_end)
3413         {
3414           src_base = src;
3415           c = *src++;
3416           if (c >= 0x20)
3417             EMIT_ONE_BYTE (c);
3418           else if (c == '\n' || (c == '\r' && selective_display))
3419             EMIT_TWO_BYTES ('\r', '\n');
3420           else
3421             EMIT_ONE_BYTE (c);
3422         }
3423       src_base = src;
3424     label_end_of_loop:
3425       ;
3426     }
3427   else
3428     {
3429       if (!dst_bytes || src_bytes <= dst_bytes)
3430         {
3431           safe_bcopy (src, dst, src_bytes);
3432           src_base = src_end;
3433           dst += src_bytes;
3434         }
3435       else
3436         {
3437           if (coding->src_multibyte
3438               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3439             dst_bytes--;
3440           safe_bcopy (src, dst, dst_bytes);
3441           src_base = src + dst_bytes;
3442           dst = destination + dst_bytes;
3443           coding->result = CODING_FINISH_INSUFFICIENT_DST;
3444         }
3445       if (coding->eol_type == CODING_EOL_CR)
3446         {
3447           for (tmp = destination; tmp < dst; tmp++)
3448             if (*tmp == '\n') *tmp = '\r';
3449         }
3450       else if (selective_display)
3451         {
3452           for (tmp = destination; tmp < dst; tmp++)
3453             if (*tmp == '\r') *tmp = '\n';
3454         }
3455     }
3456   if (coding->src_multibyte)
3457     dst = destination + str_as_unibyte (destination, dst - destination);
3458
3459   coding->consumed = src_base - source;
3460   coding->produced = dst - destination;
3461   coding->produced_char = coding->produced;
3462 }
3463
3464 \f
3465 /*** 7. C library functions ***/
3466
3467 /* In Emacs Lisp, a coding system is represented by a Lisp symbol which
3468    has a property `coding-system'.  The value of this property is a
3469    vector of length 5 (called the coding-vector).  Among elements of
3470    this vector, the first (element[0]) and the fifth (element[4])
3471    carry important information for decoding/encoding.  Before
3472    decoding/encoding, this information should be set in fields of a
3473    structure of type `coding_system'.
3474
3475    The value of the property `coding-system' can be a symbol of another
3476    subsidiary coding-system.  In that case, Emacs gets coding-vector
3477    from that symbol.
3478
3479    `element[0]' contains information to be set in `coding->type'.  The
3480    value and its meaning is as follows:
3481
3482    0 -- coding_type_emacs_mule
3483    1 -- coding_type_sjis
3484    2 -- coding_type_iso2022
3485    3 -- coding_type_big5
3486    4 -- coding_type_ccl encoder/decoder written in CCL
3487    nil -- coding_type_no_conversion
3488    t -- coding_type_undecided (automatic conversion on decoding,
3489                                no-conversion on encoding)
3490
3491    `element[4]' contains information to be set in `coding->flags' and
3492    `coding->spec'.  The meaning varies by `coding->type'.
3493
3494    If `coding->type' is `coding_type_iso2022', element[4] is a vector
3495    of length 32 (of which the first 13 sub-elements are used now).
3496    Meanings of these sub-elements are:
3497
3498    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3499         If the value is an integer of valid charset, the charset is
3500         assumed to be designated to graphic register N initially.
3501
3502         If the value is minus, it is a minus value of charset which
3503         reserves graphic register N, which means that the charset is
3504         not designated initially but should be designated to graphic
3505         register N just before encoding a character in that charset.
3506
3507         If the value is nil, graphic register N is never used on
3508         encoding.
3509
3510    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3511         Each value takes t or nil.  See the section ISO2022 of
3512         `coding.h' for more information.
3513
3514    If `coding->type' is `coding_type_big5', element[4] is t to denote
3515    BIG5-ETen or nil to denote BIG5-HKU.
3516
3517    If `coding->type' takes the other value, element[4] is ignored.
3518
3519    Emacs Lisp's coding systems also carry information about format of
3520    end-of-line in a value of property `eol-type'.  If the value is
3521    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3522    means CODING_EOL_CR.  If it is not integer, it should be a vector
3523    of subsidiary coding systems of which property `eol-type' has one
3524    of the above values.
3525
3526 */
3527
3528 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3529    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
3530    is setup so that no conversion is necessary and return -1, else
3531    return 0.  */
3532
3533 int
3534 setup_coding_system (coding_system, coding)
3535      Lisp_Object coding_system;
3536      struct coding_system *coding;
3537 {
3538   Lisp_Object coding_spec, coding_type, eol_type, plist;
3539   Lisp_Object val;
3540
3541   /* At first, zero clear all members.  */
3542   bzero (coding, sizeof (struct coding_system));
3543
3544   /* Initialize some fields required for all kinds of coding systems.  */
3545   coding->symbol = coding_system;
3546   coding->heading_ascii = -1;
3547   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
3548   coding->composing = COMPOSITION_DISABLED;
3549   coding->cmp_data = NULL;
3550
3551   if (NILP (coding_system))
3552     goto label_invalid_coding_system;
3553
3554   coding_spec = Fget (coding_system, Qcoding_system);
3555
3556   if (!VECTORP (coding_spec)
3557       || XVECTOR (coding_spec)->size != 5
3558       || !CONSP (XVECTOR (coding_spec)->contents[3]))
3559     goto label_invalid_coding_system;
3560
3561   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3562   if (VECTORP (eol_type))
3563     {
3564       coding->eol_type = CODING_EOL_UNDECIDED;
3565       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3566     }
3567   else if (XFASTINT (eol_type) == 1)
3568     {
3569       coding->eol_type = CODING_EOL_CRLF;
3570       coding->common_flags
3571         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3572     }
3573   else if (XFASTINT (eol_type) == 2)
3574     {
3575       coding->eol_type = CODING_EOL_CR;
3576       coding->common_flags
3577         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3578     }
3579   else
3580     coding->eol_type = CODING_EOL_LF;
3581
3582   coding_type = XVECTOR (coding_spec)->contents[0];
3583   /* Try short cut.  */
3584   if (SYMBOLP (coding_type))
3585     {
3586       if (EQ (coding_type, Qt))
3587         {
3588           coding->type = coding_type_undecided;
3589           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3590         }
3591       else
3592         coding->type = coding_type_no_conversion;
3593       /* Initialize this member.  Any thing other than
3594          CODING_CATEGORY_IDX_UTF_16_BE and
3595          CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3596          special treatment in detect_eol.  */
3597       coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3598
3599       return 0;
3600     }
3601
3602   /* Get values of coding system properties:
3603      `post-read-conversion', `pre-write-conversion',
3604      `translation-table-for-decode', `translation-table-for-encode'.  */
3605   plist = XVECTOR (coding_spec)->contents[3];
3606   /* Pre & post conversion functions should be disabled if
3607      inhibit_eol_conversion is nonzero.  This is the case that a code
3608      conversion function is called while those functions are running.  */
3609   if (! inhibit_pre_post_conversion)
3610     {
3611       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3612       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3613     }
3614   val = Fplist_get (plist, Qtranslation_table_for_decode);
3615   if (SYMBOLP (val))
3616     val = Fget (val, Qtranslation_table_for_decode);
3617   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3618   val = Fplist_get (plist, Qtranslation_table_for_encode);
3619   if (SYMBOLP (val))
3620     val = Fget (val, Qtranslation_table_for_encode);
3621   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3622   val = Fplist_get (plist, Qcoding_category);
3623   if (!NILP (val))
3624     {
3625       val = Fget (val, Qcoding_category_index);
3626       if (INTEGERP (val))
3627         coding->category_idx = XINT (val);
3628       else
3629         goto label_invalid_coding_system;
3630     }
3631   else
3632     goto label_invalid_coding_system;
3633
3634   /* If the coding system has non-nil `composition' property, enable
3635      composition handling.  */
3636   val = Fplist_get (plist, Qcomposition);
3637   if (!NILP (val))
3638     coding->composing = COMPOSITION_NO;
3639
3640   switch (XFASTINT (coding_type))
3641     {
3642     case 0:
3643       coding->type = coding_type_emacs_mule;
3644       coding->common_flags
3645         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3646       if (!NILP (coding->post_read_conversion))
3647         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3648       if (!NILP (coding->pre_write_conversion))
3649         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3650       break;
3651
3652     case 1:
3653       coding->type = coding_type_sjis;
3654       coding->common_flags
3655         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3656       break;
3657
3658     case 2:
3659       coding->type = coding_type_iso2022;
3660       coding->common_flags
3661         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3662       {
3663         Lisp_Object val, temp;
3664         Lisp_Object *flags;
3665         int i, charset, reg_bits = 0;
3666
3667         val = XVECTOR (coding_spec)->contents[4];
3668
3669         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3670           goto label_invalid_coding_system;
3671
3672         flags = XVECTOR (val)->contents;
3673         coding->flags
3674           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3675              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3676              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3677              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3678              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3679              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3680              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3681              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3682              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3683              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3684              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3685              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3686              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3687              );
3688
3689         /* Invoke graphic register 0 to plane 0.  */
3690         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3691         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3692         CODING_SPEC_ISO_INVOCATION (coding, 1)
3693           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3694         /* Not single shifting at first.  */
3695         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3696         /* Beginning of buffer should also be regarded as bol. */
3697         CODING_SPEC_ISO_BOL (coding) = 1;
3698
3699         for (charset = 0; charset <= MAX_CHARSET; charset++)
3700           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3701         val = Vcharset_revision_alist;
3702         while (CONSP (val))
3703           {
3704             charset = get_charset_id (Fcar_safe (XCAR (val)));
3705             if (charset >= 0
3706                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3707                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3708               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3709             val = XCDR (val);
3710           }
3711
3712         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3713            FLAGS[REG] can be one of below:
3714                 integer CHARSET: CHARSET occupies register I,
3715                 t: designate nothing to REG initially, but can be used
3716                   by any charsets,
3717                 list of integer, nil, or t: designate the first
3718                   element (if integer) to REG initially, the remaining
3719                   elements (if integer) is designated to REG on request,
3720                   if an element is t, REG can be used by any charsets,
3721                 nil: REG is never used.  */
3722         for (charset = 0; charset <= MAX_CHARSET; charset++)
3723           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3724             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3725         for (i = 0; i < 4; i++)
3726           {
3727             if ((INTEGERP (flags[i])
3728                  && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
3729                 || (charset = get_charset_id (flags[i])) >= 0)
3730               {
3731                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3732                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3733               }
3734             else if (EQ (flags[i], Qt))
3735               {
3736                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3737                 reg_bits |= 1 << i;
3738                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3739               }
3740             else if (CONSP (flags[i]))
3741               {
3742                 Lisp_Object tail;
3743                 tail = flags[i];
3744
3745                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3746                 if ((INTEGERP (XCAR (tail))
3747                      && (charset = XINT (XCAR (tail)),
3748                          CHARSET_VALID_P (charset)))
3749                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3750                   {
3751                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3752                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3753                   }
3754                 else
3755                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3756                 tail = XCDR (tail);
3757                 while (CONSP (tail))
3758                   {
3759                     if ((INTEGERP (XCAR (tail))
3760                          && (charset = XINT (XCAR (tail)),
3761                              CHARSET_VALID_P (charset)))
3762                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3763                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3764                         = i;
3765                     else if (EQ (XCAR (tail), Qt))
3766                       reg_bits |= 1 << i;
3767                     tail = XCDR (tail);
3768                   }
3769               }
3770             else
3771               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3772
3773             CODING_SPEC_ISO_DESIGNATION (coding, i)
3774               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3775           }
3776
3777         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3778           {
3779             /* REG 1 can be used only by locking shift in 7-bit env.  */
3780             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3781               reg_bits &= ~2;
3782             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3783               /* Without any shifting, only REG 0 and 1 can be used.  */
3784               reg_bits &= 3;
3785           }
3786
3787         if (reg_bits)
3788           for (charset = 0; charset <= MAX_CHARSET; charset++)
3789             {
3790               if (CHARSET_DEFINED_P (charset)
3791                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3792                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3793                 {
3794                   /* There exist some default graphic registers to be
3795                      used by CHARSET.  */
3796
3797                   /* We had better avoid designating a charset of
3798                      CHARS96 to REG 0 as far as possible.  */
3799                   if (CHARSET_CHARS (charset) == 96)
3800                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3801                       = (reg_bits & 2
3802                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3803                   else
3804                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3805                       = (reg_bits & 1
3806                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3807                 }
3808             }
3809       }
3810       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3811       coding->spec.iso2022.last_invalid_designation_register = -1;
3812       break;
3813
3814     case 3:
3815       coding->type = coding_type_big5;
3816       coding->common_flags
3817         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3818       coding->flags
3819         = (NILP (XVECTOR (coding_spec)->contents[4])
3820            ? CODING_FLAG_BIG5_HKU
3821            : CODING_FLAG_BIG5_ETEN);
3822       break;
3823
3824     case 4:
3825       coding->type = coding_type_ccl;
3826       coding->common_flags
3827         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3828       {
3829         val = XVECTOR (coding_spec)->contents[4];
3830         if (! CONSP (val)
3831             || setup_ccl_program (&(coding->spec.ccl.decoder),
3832                                   XCAR (val)) < 0
3833             || setup_ccl_program (&(coding->spec.ccl.encoder),
3834                                   XCDR (val)) < 0)
3835           goto label_invalid_coding_system;
3836
3837         bzero (coding->spec.ccl.valid_codes, 256);
3838         val = Fplist_get (plist, Qvalid_codes);
3839         if (CONSP (val))
3840           {
3841             Lisp_Object this;
3842
3843             for (; CONSP (val); val = XCDR (val))
3844               {
3845                 this = XCAR (val);
3846                 if (INTEGERP (this)
3847                     && XINT (this) >= 0 && XINT (this) < 256)
3848                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3849                 else if (CONSP (this)
3850                          && INTEGERP (XCAR (this))
3851                          && INTEGERP (XCDR (this)))
3852                   {
3853                     int start = XINT (XCAR (this));
3854                     int end = XINT (XCDR (this));
3855
3856                     if (start >= 0 && start <= end && end < 256)
3857                       while (start <= end)
3858                         coding->spec.ccl.valid_codes[start++] = 1;
3859                   }
3860               }
3861           }
3862       }
3863       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3864       coding->spec.ccl.cr_carryover = 0;
3865       coding->spec.ccl.eight_bit_carryover[0] = 0;
3866       break;
3867
3868     case 5:
3869       coding->type = coding_type_raw_text;
3870       break;
3871
3872     default:
3873       goto label_invalid_coding_system;
3874     }
3875   return 0;
3876
3877  label_invalid_coding_system:
3878   coding->type = coding_type_no_conversion;
3879   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3880   coding->common_flags = 0;
3881   coding->eol_type = CODING_EOL_LF;
3882   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3883   return -1;
3884 }
3885
3886 /* Free memory blocks allocated for storing composition information.  */
3887
3888 void
3889 coding_free_composition_data (coding)
3890      struct coding_system *coding;
3891 {
3892   struct composition_data *cmp_data = coding->cmp_data, *next;
3893
3894   if (!cmp_data)
3895     return;
3896   /* Memory blocks are chained.  At first, rewind to the first, then,
3897      free blocks one by one.  */
3898   while (cmp_data->prev)
3899     cmp_data = cmp_data->prev;
3900   while (cmp_data)
3901     {
3902       next = cmp_data->next;
3903       xfree (cmp_data);
3904       cmp_data = next;
3905     }
3906   coding->cmp_data = NULL;
3907 }
3908
3909 /* Set `char_offset' member of all memory blocks pointed by
3910    coding->cmp_data to POS.  */
3911
3912 void
3913 coding_adjust_composition_offset (coding, pos)
3914      struct coding_system *coding;
3915      int pos;
3916 {
3917   struct composition_data *cmp_data;
3918
3919   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3920     cmp_data->char_offset = pos;
3921 }
3922
3923 /* Setup raw-text or one of its subsidiaries in the structure
3924    coding_system CODING according to the already setup value eol_type
3925    in CODING.  CODING should be setup for some coding system in
3926    advance.  */
3927
3928 void
3929 setup_raw_text_coding_system (coding)
3930      struct coding_system *coding;
3931 {
3932   if (coding->type != coding_type_raw_text)
3933     {
3934       coding->symbol = Qraw_text;
3935       coding->type = coding_type_raw_text;
3936       if (coding->eol_type != CODING_EOL_UNDECIDED)
3937         {
3938           Lisp_Object subsidiaries;
3939           subsidiaries = Fget (Qraw_text, Qeol_type);
3940
3941           if (VECTORP (subsidiaries)
3942               && XVECTOR (subsidiaries)->size == 3)
3943             coding->symbol
3944               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3945         }
3946       setup_coding_system (coding->symbol, coding);
3947     }
3948   return;
3949 }
3950
3951 /* Emacs has a mechanism to automatically detect a coding system if it
3952    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3953    it's impossible to distinguish some coding systems accurately
3954    because they use the same range of codes.  So, at first, coding
3955    systems are categorized into 7, those are:
3956
3957    o coding-category-emacs-mule
3958
3959         The category for a coding system which has the same code range
3960         as Emacs' internal format.  Assigned the coding-system (Lisp
3961         symbol) `emacs-mule' by default.
3962
3963    o coding-category-sjis
3964
3965         The category for a coding system which has the same code range
3966         as SJIS.  Assigned the coding-system (Lisp
3967         symbol) `japanese-shift-jis' by default.
3968
3969    o coding-category-iso-7
3970
3971         The category for a coding system which has the same code range
3972         as ISO2022 of 7-bit environment.  This doesn't use any locking
3973         shift and single shift functions.  This can encode/decode all
3974         charsets.  Assigned the coding-system (Lisp symbol)
3975         `iso-2022-7bit' by default.
3976
3977    o coding-category-iso-7-tight
3978
3979         Same as coding-category-iso-7 except that this can
3980         encode/decode only the specified charsets.
3981
3982    o coding-category-iso-8-1
3983
3984         The category for a coding system which has the same code range
3985         as ISO2022 of 8-bit environment and graphic plane 1 used only
3986         for DIMENSION1 charset.  This doesn't use any locking shift
3987         and single shift functions.  Assigned the coding-system (Lisp
3988         symbol) `iso-latin-1' by default.
3989
3990    o coding-category-iso-8-2
3991
3992         The category for a coding system which has the same code range
3993         as ISO2022 of 8-bit environment and graphic plane 1 used only
3994         for DIMENSION2 charset.  This doesn't use any locking shift
3995         and single shift functions.  Assigned the coding-system (Lisp
3996         symbol) `japanese-iso-8bit' by default.
3997
3998    o coding-category-iso-7-else
3999
4000         The category for a coding system which has the same code range
4001         as ISO2022 of 7-bit environment but uses locking shift or
4002         single shift functions.  Assigned the coding-system (Lisp
4003         symbol) `iso-2022-7bit-lock' by default.
4004
4005    o coding-category-iso-8-else
4006
4007         The category for a coding system which has the same code range
4008         as ISO2022 of 8-bit environment but uses locking shift or
4009         single shift functions.  Assigned the coding-system (Lisp
4010         symbol) `iso-2022-8bit-ss2' by default.
4011
4012    o coding-category-big5
4013
4014         The category for a coding system which has the same code range
4015         as BIG5.  Assigned the coding-system (Lisp symbol)
4016         `cn-big5' by default.
4017
4018    o coding-category-utf-8
4019
4020         The category for a coding system which has the same code range
4021         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
4022         symbol) `utf-8' by default.
4023
4024    o coding-category-utf-16-be
4025
4026         The category for a coding system in which a text has an
4027         Unicode signature (cf. Unicode Standard) in the order of BIG
4028         endian at the head.  Assigned the coding-system (Lisp symbol)
4029         `utf-16-be' by default.
4030
4031    o coding-category-utf-16-le
4032
4033         The category for a coding system in which a text has an
4034         Unicode signature (cf. Unicode Standard) in the order of
4035         LITTLE endian at the head.  Assigned the coding-system (Lisp
4036         symbol) `utf-16-le' by default.
4037
4038    o coding-category-ccl
4039
4040         The category for a coding system of which encoder/decoder is
4041         written in CCL programs.  The default value is nil, i.e., no
4042         coding system is assigned.
4043
4044    o coding-category-binary
4045
4046         The category for a coding system not categorized in any of the
4047         above.  Assigned the coding-system (Lisp symbol)
4048         `no-conversion' by default.
4049
4050    Each of them is a Lisp symbol and the value is an actual
4051    `coding-system' (this is also a Lisp symbol) assigned by a user.
4052    What Emacs does actually is to detect a category of coding system.
4053    Then, it uses a `coding-system' assigned to it.  If Emacs can't
4054    decide a single possible category, it selects a category of the
4055    highest priority.  Priorities of categories are also specified by a
4056    user in a Lisp variable `coding-category-list'.
4057
4058 */
4059
4060 static
4061 int ascii_skip_code[256];
4062
4063 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
4064    If it detects possible coding systems, return an integer in which
4065    appropriate flag bits are set.  Flag bits are defined by macros
4066    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
4067    it should point the table `coding_priorities'.  In that case, only
4068    the flag bit for a coding system of the highest priority is set in
4069    the returned value.  If MULTIBYTEP is nonzero, 8-bit codes of the
4070    range 0x80..0x9F are in multibyte form.
4071
4072    How many ASCII characters are at the head is returned as *SKIP.  */
4073
4074 static int
4075 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
4076      unsigned char *source;
4077      int src_bytes, *priorities, *skip;
4078      int multibytep;
4079 {
4080   register unsigned char c;
4081   unsigned char *src = source, *src_end = source + src_bytes;
4082   unsigned int mask, utf16_examined_p, iso2022_examined_p;
4083   int i;
4084
4085   /* At first, skip all ASCII characters and control characters except
4086      for three ISO2022 specific control characters.  */
4087   ascii_skip_code[ISO_CODE_SO] = 0;
4088   ascii_skip_code[ISO_CODE_SI] = 0;
4089   ascii_skip_code[ISO_CODE_ESC] = 0;
4090
4091  label_loop_detect_coding:
4092   while (src < src_end && ascii_skip_code[*src]) src++;
4093   *skip = src - source;
4094
4095   if (src >= src_end)
4096     /* We found nothing other than ASCII.  There's nothing to do.  */
4097     return 0;
4098
4099   c = *src;
4100   /* The text seems to be encoded in some multilingual coding system.
4101      Now, try to find in which coding system the text is encoded.  */
4102   if (c < 0x80)
4103     {
4104       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
4105       /* C is an ISO2022 specific control code of C0.  */
4106       mask = detect_coding_iso2022 (src, src_end, multibytep);
4107       if (mask == 0)
4108         {
4109           /* No valid ISO2022 code follows C.  Try again.  */
4110           src++;
4111           if (c == ISO_CODE_ESC)
4112             ascii_skip_code[ISO_CODE_ESC] = 1;
4113           else
4114             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
4115           goto label_loop_detect_coding;
4116         }
4117       if (priorities)
4118         {
4119           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4120             {
4121               if (mask & priorities[i])
4122                 return priorities[i];
4123             }
4124           return CODING_CATEGORY_MASK_RAW_TEXT;
4125         }
4126     }
4127   else
4128     {
4129       int try;
4130
4131       if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
4132         c = src[1] - 0x20;
4133
4134       if (c < 0xA0)
4135         {
4136           /* C is the first byte of SJIS character code,
4137              or a leading-code of Emacs' internal format (emacs-mule),
4138              or the first byte of UTF-16.  */
4139           try = (CODING_CATEGORY_MASK_SJIS
4140                   | CODING_CATEGORY_MASK_EMACS_MULE
4141                   | CODING_CATEGORY_MASK_UTF_16_BE
4142                   | CODING_CATEGORY_MASK_UTF_16_LE);
4143
4144           /* Or, if C is a special latin extra code,
4145              or is an ISO2022 specific control code of C1 (SS2 or SS3),
4146              or is an ISO2022 control-sequence-introducer (CSI),
4147              we should also consider the possibility of ISO2022 codings.  */
4148           if ((VECTORP (Vlatin_extra_code_table)
4149                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4150               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
4151               || (c == ISO_CODE_CSI
4152                   && (src < src_end
4153                       && (*src == ']'
4154                           || ((*src == '0' || *src == '1' || *src == '2')
4155                               && src + 1 < src_end
4156                               && src[1] == ']')))))
4157             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4158                      | CODING_CATEGORY_MASK_ISO_8BIT);
4159         }
4160       else
4161         /* C is a character of ISO2022 in graphic plane right,
4162            or a SJIS's 1-byte character code (i.e. JISX0201),
4163            or the first byte of BIG5's 2-byte code,
4164            or the first byte of UTF-8/16.  */
4165         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4166                 | CODING_CATEGORY_MASK_ISO_8BIT
4167                 | CODING_CATEGORY_MASK_SJIS
4168                 | CODING_CATEGORY_MASK_BIG5
4169                 | CODING_CATEGORY_MASK_UTF_8
4170                 | CODING_CATEGORY_MASK_UTF_16_BE
4171                 | CODING_CATEGORY_MASK_UTF_16_LE);
4172
4173       /* Or, we may have to consider the possibility of CCL.  */
4174       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4175           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4176               ->spec.ccl.valid_codes)[c])
4177         try |= CODING_CATEGORY_MASK_CCL;
4178
4179       mask = 0;
4180       utf16_examined_p = iso2022_examined_p = 0;
4181       if (priorities)
4182         {
4183           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4184             {
4185               if (!iso2022_examined_p
4186                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4187                 {
4188                   mask |= detect_coding_iso2022 (src, src_end, multibytep);
4189                   iso2022_examined_p = 1;
4190                 }
4191               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
4192                 mask |= detect_coding_sjis (src, src_end, multibytep);
4193               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
4194                 mask |= detect_coding_utf_8 (src, src_end, multibytep);
4195               else if (!utf16_examined_p
4196                        && (priorities[i] & try &
4197                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
4198                 {
4199                   mask |= detect_coding_utf_16 (src, src_end, multibytep);
4200                   utf16_examined_p = 1;
4201                 }
4202               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
4203                 mask |= detect_coding_big5 (src, src_end, multibytep);
4204               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
4205                 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4206               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
4207                 mask |= detect_coding_ccl (src, src_end, multibytep);
4208               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
4209                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
4210               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
4211                 mask |= CODING_CATEGORY_MASK_BINARY;
4212               if (mask & priorities[i])
4213                 return priorities[i];
4214             }
4215           return CODING_CATEGORY_MASK_RAW_TEXT;
4216         }
4217       if (try & CODING_CATEGORY_MASK_ISO)
4218         mask |= detect_coding_iso2022 (src, src_end, multibytep);
4219       if (try & CODING_CATEGORY_MASK_SJIS)
4220         mask |= detect_coding_sjis (src, src_end, multibytep);
4221       if (try & CODING_CATEGORY_MASK_BIG5)
4222         mask |= detect_coding_big5 (src, src_end, multibytep);
4223       if (try & CODING_CATEGORY_MASK_UTF_8)
4224         mask |= detect_coding_utf_8 (src, src_end, multibytep);
4225       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
4226         mask |= detect_coding_utf_16 (src, src_end, multibytep);
4227       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
4228         mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4229       if (try & CODING_CATEGORY_MASK_CCL)
4230         mask |= detect_coding_ccl (src, src_end, multibytep);
4231     }
4232   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4233 }
4234
4235 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4236    The information of the detected coding system is set in CODING.  */
4237
4238 void
4239 detect_coding (coding, src, src_bytes)
4240      struct coding_system *coding;
4241      const unsigned char *src;
4242      int src_bytes;
4243 {
4244   unsigned int idx;
4245   int skip, mask;
4246   Lisp_Object val;
4247
4248   val = Vcoding_category_list;
4249   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4250                              coding->src_multibyte);
4251   coding->heading_ascii = skip;
4252
4253   if (!mask) return;
4254
4255   /* We found a single coding system of the highest priority in MASK.  */
4256   idx = 0;
4257   while (mask && ! (mask & 1)) mask >>= 1, idx++;
4258   if (! mask)
4259     idx = CODING_CATEGORY_IDX_RAW_TEXT;
4260
4261   val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]);
4262
4263   if (coding->eol_type != CODING_EOL_UNDECIDED)
4264     {
4265       Lisp_Object tmp;
4266
4267       tmp = Fget (val, Qeol_type);
4268       if (VECTORP (tmp))
4269         val = XVECTOR (tmp)->contents[coding->eol_type];
4270     }
4271
4272   /* Setup this new coding system while preserving some slots.  */
4273   {
4274     int src_multibyte = coding->src_multibyte;
4275     int dst_multibyte = coding->dst_multibyte;
4276
4277     setup_coding_system (val, coding);
4278     coding->src_multibyte = src_multibyte;
4279     coding->dst_multibyte = dst_multibyte;
4280     coding->heading_ascii = skip;
4281   }
4282 }
4283
4284 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
4285    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4286    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4287
4288    How many non-eol characters are at the head is returned as *SKIP.  */
4289
4290 #define MAX_EOL_CHECK_COUNT 3
4291
4292 static int
4293 detect_eol_type (source, src_bytes, skip)
4294      unsigned char *source;
4295      int src_bytes, *skip;
4296 {
4297   unsigned char *src = source, *src_end = src + src_bytes;
4298   unsigned char c;
4299   int total = 0;                /* How many end-of-lines are found so far.  */
4300   int eol_type = CODING_EOL_UNDECIDED;
4301   int this_eol_type;
4302
4303   *skip = 0;
4304
4305   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4306     {
4307       c = *src++;
4308       if (c == '\n' || c == '\r')
4309         {
4310           if (*skip == 0)
4311             *skip = src - 1 - source;
4312           total++;
4313           if (c == '\n')
4314             this_eol_type = CODING_EOL_LF;
4315           else if (src >= src_end || *src != '\n')
4316             this_eol_type = CODING_EOL_CR;
4317           else
4318             this_eol_type = CODING_EOL_CRLF, src++;
4319
4320           if (eol_type == CODING_EOL_UNDECIDED)
4321             /* This is the first end-of-line.  */
4322             eol_type = this_eol_type;
4323           else if (eol_type != this_eol_type)
4324             {
4325               /* The found type is different from what found before.  */
4326               eol_type = CODING_EOL_INCONSISTENT;
4327               break;
4328             }
4329         }
4330     }
4331
4332   if (*skip == 0)
4333     *skip = src_end - source;
4334   return eol_type;
4335 }
4336
4337 /* Like detect_eol_type, but detect EOL type in 2-octet
4338    big-endian/little-endian format for coding systems utf-16-be and
4339    utf-16-le.  */
4340
4341 static int
4342 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4343      unsigned char *source;
4344      int src_bytes, *skip, big_endian_p;
4345 {
4346   unsigned char *src = source, *src_end = src + src_bytes;
4347   unsigned int c1, c2;
4348   int total = 0;                /* How many end-of-lines are found so far.  */
4349   int eol_type = CODING_EOL_UNDECIDED;
4350   int this_eol_type;
4351   int msb, lsb;
4352
4353   if (big_endian_p)
4354     msb = 0, lsb = 1;
4355   else
4356     msb = 1, lsb = 0;
4357
4358   *skip = 0;
4359
4360   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4361     {
4362       c1 = (src[msb] << 8) | (src[lsb]);
4363       src += 2;
4364
4365       if (c1 == '\n' || c1 == '\r')
4366         {
4367           if (*skip == 0)
4368             *skip = src - 2 - source;
4369           total++;
4370           if (c1 == '\n')
4371             {
4372               this_eol_type = CODING_EOL_LF;
4373             }
4374           else
4375             {
4376               if ((src + 1) >= src_end)
4377                 {
4378                   this_eol_type = CODING_EOL_CR;
4379                 }
4380               else
4381                 {
4382                   c2 = (src[msb] << 8) | (src[lsb]);
4383                   if (c2 == '\n')
4384                     this_eol_type = CODING_EOL_CRLF, src += 2;
4385                   else
4386                     this_eol_type = CODING_EOL_CR;
4387                 }
4388             }
4389
4390           if (eol_type == CODING_EOL_UNDECIDED)
4391             /* This is the first end-of-line.  */
4392             eol_type = this_eol_type;
4393           else if (eol_type != this_eol_type)
4394             {
4395               /* The found type is different from what found before.  */
4396               eol_type = CODING_EOL_INCONSISTENT;
4397               break;
4398             }
4399         }
4400     }
4401
4402   if (*skip == 0)
4403     *skip = src_end - source;
4404   return eol_type;
4405 }
4406
4407 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4408    is encoded.  If it detects an appropriate format of end-of-line, it
4409    sets the information in *CODING.  */
4410
4411 void
4412 detect_eol (coding, src, src_bytes)
4413      struct coding_system *coding;
4414      const unsigned char *src;
4415      int src_bytes;
4416 {
4417   Lisp_Object val;
4418   int skip;
4419   int eol_type;
4420
4421   switch (coding->category_idx)
4422     {
4423     case CODING_CATEGORY_IDX_UTF_16_BE:
4424       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4425       break;
4426     case CODING_CATEGORY_IDX_UTF_16_LE:
4427       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4428       break;
4429     default:
4430       eol_type = detect_eol_type (src, src_bytes, &skip);
4431       break;
4432     }
4433
4434   if (coding->heading_ascii > skip)
4435     coding->heading_ascii = skip;
4436   else
4437     skip = coding->heading_ascii;
4438
4439   if (eol_type == CODING_EOL_UNDECIDED)
4440     return;
4441   if (eol_type == CODING_EOL_INCONSISTENT)
4442     {
4443 #if 0
4444       /* This code is suppressed until we find a better way to
4445          distinguish raw text file and binary file.  */
4446
4447       /* If we have already detected that the coding is raw-text, the
4448          coding should actually be no-conversion.  */
4449       if (coding->type == coding_type_raw_text)
4450         {
4451           setup_coding_system (Qno_conversion, coding);
4452           return;
4453         }
4454       /* Else, let's decode only text code anyway.  */
4455 #endif /* 0 */
4456       eol_type = CODING_EOL_LF;
4457     }
4458
4459   val = Fget (coding->symbol, Qeol_type);
4460   if (VECTORP (val) && XVECTOR (val)->size == 3)
4461     {
4462       int src_multibyte = coding->src_multibyte;
4463       int dst_multibyte = coding->dst_multibyte;
4464       struct composition_data *cmp_data = coding->cmp_data;
4465
4466       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
4467       coding->src_multibyte = src_multibyte;
4468       coding->dst_multibyte = dst_multibyte;
4469       coding->heading_ascii = skip;
4470       coding->cmp_data = cmp_data;
4471     }
4472 }
4473
4474 #define CONVERSION_BUFFER_EXTRA_ROOM 256
4475
4476 #define DECODING_BUFFER_MAG(coding)                     \
4477   (coding->type == coding_type_iso2022                  \
4478    ? 3                                                  \
4479    : (coding->type == coding_type_ccl                   \
4480       ? coding->spec.ccl.decoder.buf_magnification      \
4481       : 2))
4482
4483 /* Return maximum size (bytes) of a buffer enough for decoding
4484    SRC_BYTES of text encoded in CODING.  */
4485
4486 int
4487 decoding_buffer_size (coding, src_bytes)
4488      struct coding_system *coding;
4489      int src_bytes;
4490 {
4491   return (src_bytes * DECODING_BUFFER_MAG (coding)
4492           + CONVERSION_BUFFER_EXTRA_ROOM);
4493 }
4494
4495 /* Return maximum size (bytes) of a buffer enough for encoding
4496    SRC_BYTES of text to CODING.  */
4497
4498 int
4499 encoding_buffer_size (coding, src_bytes)
4500      struct coding_system *coding;
4501      int src_bytes;
4502 {
4503   int magnification;
4504
4505   if (coding->type == coding_type_ccl)
4506     magnification = coding->spec.ccl.encoder.buf_magnification;
4507   else if (CODING_REQUIRE_ENCODING (coding))
4508     magnification = 3;
4509   else
4510     magnification = 1;
4511
4512   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4513 }
4514
4515 /* Working buffer for code conversion.  */
4516 struct conversion_buffer
4517 {
4518   int size;                     /* size of data.  */
4519   int on_stack;                 /* 1 if allocated by alloca.  */
4520   unsigned char *data;
4521 };
4522
4523 /* Don't use alloca for allocating memory space larger than this, lest
4524    we overflow their stack.  */
4525 #define MAX_ALLOCA 16*1024
4526
4527 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
4528 #define allocate_conversion_buffer(buf, len)            \
4529   do {                                                  \
4530     if (len < MAX_ALLOCA)                               \
4531       {                                                 \
4532         buf.data = (unsigned char *) alloca (len);      \
4533         buf.on_stack = 1;                               \
4534       }                                                 \
4535     else                                                \
4536       {                                                 \
4537         buf.data = (unsigned char *) xmalloc (len);     \
4538         buf.on_stack = 0;                               \
4539       }                                                 \
4540     buf.size = len;                                     \
4541   } while (0)
4542
4543 /* Double the allocated memory for *BUF.  */
4544 static void
4545 extend_conversion_buffer (buf)
4546      struct conversion_buffer *buf;
4547 {
4548   if (buf->on_stack)
4549     {
4550       unsigned char *save = buf->data;
4551       buf->data = (unsigned char *) xmalloc (buf->size * 2);
4552       bcopy (save, buf->data, buf->size);
4553       buf->on_stack = 0;
4554     }
4555   else
4556     {
4557       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4558     }
4559   buf->size *= 2;
4560 }
4561
4562 /* Free the allocated memory for BUF if it is not on stack.  */
4563 static void
4564 free_conversion_buffer (buf)
4565      struct conversion_buffer *buf;
4566 {
4567   if (!buf->on_stack)
4568     xfree (buf->data);
4569 }
4570
4571 int
4572 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4573      struct coding_system *coding;
4574      unsigned char *source, *destination;
4575      int src_bytes, dst_bytes, encodep;
4576 {
4577   struct ccl_program *ccl
4578     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
4579   unsigned char *dst = destination;
4580
4581   ccl->suppress_error = coding->suppress_error;
4582   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4583   if (encodep)
4584     {
4585       /* On encoding, EOL format is converted within ccl_driver.  For
4586          that, setup proper information in the structure CCL.  */
4587       ccl->eol_type = coding->eol_type;
4588       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4589         ccl->eol_type = CODING_EOL_LF;
4590       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4591       ccl->eight_bit_control = coding->dst_multibyte;
4592     }
4593   else
4594     ccl->eight_bit_control = 1;
4595   ccl->multibyte = coding->src_multibyte;
4596   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4597     {
4598       /* Move carryover bytes to DESTINATION.  */
4599       unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4600       while (*p)
4601         *dst++ = *p++;
4602       coding->spec.ccl.eight_bit_carryover[0] = 0;
4603       if (dst_bytes)
4604         dst_bytes -= dst - destination;
4605     }
4606
4607   coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4608                                   &(coding->consumed))
4609                       + dst - destination);
4610
4611   if (encodep)
4612     {
4613       coding->produced_char = coding->produced;
4614       coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4615     }
4616   else if (!ccl->eight_bit_control)
4617     {
4618       /* The produced bytes forms a valid multibyte sequence. */
4619       coding->produced_char
4620         = multibyte_chars_in_text (destination, coding->produced);
4621       coding->spec.ccl.eight_bit_carryover[0] = 0;
4622     }
4623   else
4624     {
4625       /* On decoding, the destination should always multibyte.  But,
4626          CCL program might have been generated an invalid multibyte
4627          sequence.  Here we make such a sequence valid as
4628          multibyte.  */
4629       int bytes
4630         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
4631
4632       if ((coding->consumed < src_bytes
4633            || !ccl->last_block)
4634           && coding->produced >= 1
4635           && destination[coding->produced - 1] >= 0x80)
4636         {
4637           /* We should not convert the tailing 8-bit codes to
4638              multibyte form even if they doesn't form a valid
4639              multibyte sequence.  They may form a valid sequence in
4640              the next call.  */
4641           int carryover = 0;
4642
4643           if (destination[coding->produced - 1] < 0xA0)
4644             carryover = 1;
4645           else if (coding->produced >= 2)
4646             {
4647               if (destination[coding->produced - 2] >= 0x80)
4648                 {
4649                   if (destination[coding->produced - 2] < 0xA0)
4650                     carryover = 2;
4651                   else if (coding->produced >= 3
4652                            && destination[coding->produced - 3] >= 0x80
4653                            && destination[coding->produced - 3] < 0xA0)
4654                     carryover = 3;
4655                 }
4656             }
4657           if (carryover > 0)
4658             {
4659               BCOPY_SHORT (destination + coding->produced - carryover,
4660                            coding->spec.ccl.eight_bit_carryover,
4661                            carryover);
4662               coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4663               coding->produced -= carryover;
4664             }
4665         }
4666       coding->produced = str_as_multibyte (destination, bytes,
4667                                            coding->produced,
4668                                            &(coding->produced_char));
4669     }
4670
4671   switch (ccl->status)
4672     {
4673     case CCL_STAT_SUSPEND_BY_SRC:
4674       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4675       break;
4676     case CCL_STAT_SUSPEND_BY_DST:
4677       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4678       break;
4679     case CCL_STAT_QUIT:
4680     case CCL_STAT_INVALID_CMD:
4681       coding->result = CODING_FINISH_INTERRUPT;
4682       break;
4683     default:
4684       coding->result = CODING_FINISH_NORMAL;
4685       break;
4686     }
4687   return coding->result;
4688 }
4689
4690 /* Decode EOL format of the text at PTR of BYTES length destructively
4691    according to CODING->eol_type.  This is called after the CCL
4692    program produced a decoded text at PTR.  If we do CRLF->LF
4693    conversion, update CODING->produced and CODING->produced_char.  */
4694
4695 static void
4696 decode_eol_post_ccl (coding, ptr, bytes)
4697      struct coding_system *coding;
4698      unsigned char *ptr;
4699      int bytes;
4700 {
4701   Lisp_Object val, saved_coding_symbol;
4702   unsigned char *pend = ptr + bytes;
4703   int dummy;
4704
4705   /* Remember the current coding system symbol.  We set it back when
4706      an inconsistent EOL is found so that `last-coding-system-used' is
4707      set to the coding system that doesn't specify EOL conversion.  */
4708   saved_coding_symbol = coding->symbol;
4709
4710   coding->spec.ccl.cr_carryover = 0;
4711   if (coding->eol_type == CODING_EOL_UNDECIDED)
4712     {
4713       /* Here, to avoid the call of setup_coding_system, we directly
4714          call detect_eol_type.  */
4715       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4716       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4717         coding->eol_type = CODING_EOL_LF;
4718       if (coding->eol_type != CODING_EOL_UNDECIDED)
4719         {
4720           val = Fget (coding->symbol, Qeol_type);
4721           if (VECTORP (val) && XVECTOR (val)->size == 3)
4722             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4723         }
4724       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4725     }
4726
4727   if (coding->eol_type == CODING_EOL_LF
4728       || coding->eol_type == CODING_EOL_UNDECIDED)
4729     {
4730       /* We have nothing to do.  */
4731       ptr = pend;
4732     }
4733   else if (coding->eol_type == CODING_EOL_CRLF)
4734     {
4735       unsigned char *pstart = ptr, *p = ptr;
4736
4737       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4738           && *(pend - 1) == '\r')
4739         {
4740           /* If the last character is CR, we can't handle it here
4741              because LF will be in the not-yet-decoded source text.
4742              Record that the CR is not yet processed.  */
4743           coding->spec.ccl.cr_carryover = 1;
4744           coding->produced--;
4745           coding->produced_char--;
4746           pend--;
4747         }
4748       while (ptr < pend)
4749         {
4750           if (*ptr == '\r')
4751             {
4752               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4753                 {
4754                   *p++ = '\n';
4755                   ptr += 2;
4756                 }
4757               else
4758                 {
4759                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4760                     goto undo_eol_conversion;
4761                   *p++ = *ptr++;
4762                 }
4763             }
4764           else if (*ptr == '\n'
4765                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4766             goto undo_eol_conversion;
4767           else
4768             *p++ = *ptr++;
4769           continue;
4770
4771         undo_eol_conversion:
4772           /* We have faced with inconsistent EOL format at PTR.
4773              Convert all LFs before PTR back to CRLFs.  */
4774           for (p--, ptr--; p >= pstart; p--)
4775             {
4776               if (*p == '\n')
4777                 *ptr-- = '\n', *ptr-- = '\r';
4778               else
4779                 *ptr-- = *p;
4780             }
4781           /*  If carryover is recorded, cancel it because we don't
4782               convert CRLF anymore.  */
4783           if (coding->spec.ccl.cr_carryover)
4784             {
4785               coding->spec.ccl.cr_carryover = 0;
4786               coding->produced++;
4787               coding->produced_char++;
4788               pend++;
4789             }
4790           p = ptr = pend;
4791           coding->eol_type = CODING_EOL_LF;
4792           coding->symbol = saved_coding_symbol;
4793         }
4794       if (p < pend)
4795         {
4796           /* As each two-byte sequence CRLF was converted to LF, (PEND
4797              - P) is the number of deleted characters.  */
4798           coding->produced -= pend - p;
4799           coding->produced_char -= pend - p;
4800         }
4801     }
4802   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4803     {
4804       unsigned char *p = ptr;
4805
4806       for (; ptr < pend; ptr++)
4807         {
4808           if (*ptr == '\r')
4809             *ptr = '\n';
4810           else if (*ptr == '\n'
4811                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4812             {
4813               for (; p < ptr; p++)
4814                 {
4815                   if (*p == '\n')
4816                     *p = '\r';
4817                 }
4818               ptr = pend;
4819               coding->eol_type = CODING_EOL_LF;
4820               coding->symbol = saved_coding_symbol;
4821             }
4822         }
4823     }
4824 }
4825
4826 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4827    decoding, it may detect coding system and format of end-of-line if
4828    those are not yet decided.  The source should be unibyte, the
4829    result is multibyte if CODING->dst_multibyte is nonzero, else
4830    unibyte.  */
4831
4832 int
4833 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4834      struct coding_system *coding;
4835      const unsigned char *source;
4836      unsigned char *destination;
4837      int src_bytes, dst_bytes;
4838 {
4839   int extra = 0;
4840
4841   if (coding->type == coding_type_undecided)
4842     detect_coding (coding, source, src_bytes);
4843
4844   if (coding->eol_type == CODING_EOL_UNDECIDED
4845       && coding->type != coding_type_ccl)
4846     {
4847       detect_eol (coding, source, src_bytes);
4848       /* We had better recover the original eol format if we
4849          encounter an inconsistent eol format while decoding.  */
4850       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4851     }
4852
4853   coding->produced = coding->produced_char = 0;
4854   coding->consumed = coding->consumed_char = 0;
4855   coding->errors = 0;
4856   coding->result = CODING_FINISH_NORMAL;
4857
4858   switch (coding->type)
4859     {
4860     case coding_type_sjis:
4861       decode_coding_sjis_big5 (coding, source, destination,
4862                                src_bytes, dst_bytes, 1);
4863       break;
4864
4865     case coding_type_iso2022:
4866       decode_coding_iso2022 (coding, source, destination,
4867                              src_bytes, dst_bytes);
4868       break;
4869
4870     case coding_type_big5:
4871       decode_coding_sjis_big5 (coding, source, destination,
4872                                src_bytes, dst_bytes, 0);
4873       break;
4874
4875     case coding_type_emacs_mule:
4876       decode_coding_emacs_mule (coding, source, destination,
4877                                 src_bytes, dst_bytes);
4878       break;
4879
4880     case coding_type_ccl:
4881       if (coding->spec.ccl.cr_carryover)
4882         {
4883           /* Put the CR which was not processed by the previous call
4884              of decode_eol_post_ccl in DESTINATION.  It will be
4885              decoded together with the following LF by the call to
4886              decode_eol_post_ccl below.  */
4887           *destination = '\r';
4888           coding->produced++;
4889           coding->produced_char++;
4890           dst_bytes--;
4891           extra = coding->spec.ccl.cr_carryover;
4892         }
4893       ccl_coding_driver (coding, source, destination + extra,
4894                          src_bytes, dst_bytes, 0);
4895       if (coding->eol_type != CODING_EOL_LF)
4896         {
4897           coding->produced += extra;
4898           coding->produced_char += extra;
4899           decode_eol_post_ccl (coding, destination, coding->produced);
4900         }
4901       break;
4902
4903     default:
4904       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4905     }
4906
4907   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4908       && coding->mode & CODING_MODE_LAST_BLOCK
4909       && coding->consumed == src_bytes)
4910     coding->result = CODING_FINISH_NORMAL;
4911
4912   if (coding->mode & CODING_MODE_LAST_BLOCK
4913       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4914     {
4915       const unsigned char *src = source + coding->consumed;
4916       unsigned char *dst = destination + coding->produced;
4917
4918       src_bytes -= coding->consumed;
4919       coding->errors++;
4920       if (COMPOSING_P (coding))
4921         DECODE_COMPOSITION_END ('1');
4922       while (src_bytes--)
4923         {
4924           int c = *src++;
4925           dst += CHAR_STRING (c, dst);
4926           coding->produced_char++;
4927         }
4928       coding->consumed = coding->consumed_char = src - source;
4929       coding->produced = dst - destination;
4930       coding->result = CODING_FINISH_NORMAL;
4931     }
4932
4933   if (!coding->dst_multibyte)
4934     {
4935       coding->produced = str_as_unibyte (destination, coding->produced);
4936       coding->produced_char = coding->produced;
4937     }
4938
4939   return coding->result;
4940 }
4941
4942 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4943    multibyteness of the source is CODING->src_multibyte, the
4944    multibyteness of the result is always unibyte.  */
4945
4946 int
4947 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4948      struct coding_system *coding;
4949      const unsigned char *source;
4950      unsigned char *destination;
4951      int src_bytes, dst_bytes;
4952 {
4953   coding->produced = coding->produced_char = 0;
4954   coding->consumed = coding->consumed_char = 0;
4955   coding->errors = 0;
4956   coding->result = CODING_FINISH_NORMAL;
4957
4958   switch (coding->type)
4959     {
4960     case coding_type_sjis:
4961       encode_coding_sjis_big5 (coding, source, destination,
4962                                src_bytes, dst_bytes, 1);
4963       break;
4964
4965     case coding_type_iso2022:
4966       encode_coding_iso2022 (coding, source, destination,
4967                              src_bytes, dst_bytes);
4968       break;
4969
4970     case coding_type_big5:
4971       encode_coding_sjis_big5 (coding, source, destination,
4972                                src_bytes, dst_bytes, 0);
4973       break;
4974
4975     case coding_type_emacs_mule:
4976       encode_coding_emacs_mule (coding, source, destination,
4977                                 src_bytes, dst_bytes);
4978       break;
4979
4980     case coding_type_ccl:
4981       ccl_coding_driver (coding, source, destination,
4982                          src_bytes, dst_bytes, 1);
4983       break;
4984
4985     default:
4986       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4987     }
4988
4989   if (coding->mode & CODING_MODE_LAST_BLOCK
4990       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4991     {
4992       const unsigned char *src = source + coding->consumed;
4993       unsigned char *dst = destination + coding->produced;
4994
4995       if (coding->type == coding_type_iso2022)
4996         ENCODE_RESET_PLANE_AND_REGISTER;
4997       if (COMPOSING_P (coding))
4998         *dst++ = ISO_CODE_ESC, *dst++ = '1';
4999       if (coding->consumed < src_bytes)
5000         {
5001           int len = src_bytes - coding->consumed;
5002
5003           BCOPY_SHORT (src, dst, len);
5004           if (coding->src_multibyte)
5005             len = str_as_unibyte (dst, len);
5006           dst += len;
5007           coding->consumed = src_bytes;
5008         }
5009       coding->produced = coding->produced_char = dst - destination;
5010       coding->result = CODING_FINISH_NORMAL;
5011     }
5012
5013   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
5014       && coding->consumed == src_bytes)
5015     coding->result = CODING_FINISH_NORMAL;
5016
5017   return coding->result;
5018 }
5019
5020 /* Scan text in the region between *BEG and *END (byte positions),
5021    skip characters which we don't have to decode by coding system
5022    CODING at the head and tail, then set *BEG and *END to the region
5023    of the text we actually have to convert.  The caller should move
5024    the gap out of the region in advance if the region is from a
5025    buffer.
5026
5027    If STR is not NULL, *BEG and *END are indices into STR.  */
5028
5029 static void
5030 shrink_decoding_region (beg, end, coding, str)
5031      int *beg, *end;
5032      struct coding_system *coding;
5033      unsigned char *str;
5034 {
5035   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
5036   int eol_conversion;
5037   Lisp_Object translation_table;
5038
5039   if (coding->type == coding_type_ccl
5040       || coding->type == coding_type_undecided
5041       || coding->eol_type != CODING_EOL_LF
5042       || !NILP (coding->post_read_conversion)
5043       || coding->composing != COMPOSITION_DISABLED)
5044     {
5045       /* We can't skip any data.  */
5046       return;
5047     }
5048   if (coding->type == coding_type_no_conversion
5049       || coding->type == coding_type_raw_text
5050       || coding->type == coding_type_emacs_mule)
5051     {
5052       /* We need no conversion, but don't have to skip any data here.
5053          Decoding routine handles them effectively anyway.  */
5054       return;
5055     }
5056
5057   translation_table = coding->translation_table_for_decode;
5058   if (NILP (translation_table) && !NILP (Venable_character_translation))
5059     translation_table = Vstandard_translation_table_for_decode;
5060   if (CHAR_TABLE_P (translation_table))
5061     {
5062       int i;
5063       for (i = 0; i < 128; i++)
5064         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5065           break;
5066       if (i < 128)
5067         /* Some ASCII character should be translated.  We give up
5068            shrinking.  */
5069         return;
5070     }
5071
5072   if (coding->heading_ascii >= 0)
5073     /* Detection routine has already found how much we can skip at the
5074        head.  */
5075     *beg += coding->heading_ascii;
5076
5077   if (str)
5078     {
5079       begp_orig = begp = str + *beg;
5080       endp_orig = endp = str + *end;
5081     }
5082   else
5083     {
5084       begp_orig = begp = BYTE_POS_ADDR (*beg);
5085       endp_orig = endp = begp + *end - *beg;
5086     }
5087
5088   eol_conversion = (coding->eol_type == CODING_EOL_CR
5089                     || coding->eol_type == CODING_EOL_CRLF);
5090
5091   switch (coding->type)
5092     {
5093     case coding_type_sjis:
5094     case coding_type_big5:
5095       /* We can skip all ASCII characters at the head.  */
5096       if (coding->heading_ascii < 0)
5097         {
5098           if (eol_conversion)
5099             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
5100           else
5101             while (begp < endp && *begp < 0x80) begp++;
5102         }
5103       /* We can skip all ASCII characters at the tail except for the
5104          second byte of SJIS or BIG5 code.  */
5105       if (eol_conversion)
5106         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
5107       else
5108         while (begp < endp && endp[-1] < 0x80) endp--;
5109       /* Do not consider LF as ascii if preceded by CR, since that
5110          confuses eol decoding. */
5111       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5112         endp++;
5113       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
5114         endp++;
5115       break;
5116
5117     case coding_type_iso2022:
5118       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5119         /* We can't skip any data.  */
5120         break;
5121       if (coding->heading_ascii < 0)
5122         {
5123           /* We can skip all ASCII characters at the head except for a
5124              few control codes.  */
5125           while (begp < endp && (c = *begp) < 0x80
5126                  && c != ISO_CODE_CR && c != ISO_CODE_SO
5127                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
5128                  && (!eol_conversion || c != ISO_CODE_LF))
5129             begp++;
5130         }
5131       switch (coding->category_idx)
5132         {
5133         case CODING_CATEGORY_IDX_ISO_8_1:
5134         case CODING_CATEGORY_IDX_ISO_8_2:
5135           /* We can skip all ASCII characters at the tail.  */
5136           if (eol_conversion)
5137             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
5138           else
5139             while (begp < endp && endp[-1] < 0x80) endp--;
5140           /* Do not consider LF as ascii if preceded by CR, since that
5141              confuses eol decoding. */
5142           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5143             endp++;
5144           break;
5145
5146         case CODING_CATEGORY_IDX_ISO_7:
5147         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
5148           {
5149             /* We can skip all characters at the tail except for 8-bit
5150                codes and ESC and the following 2-byte at the tail.  */
5151             unsigned char *eight_bit = NULL;
5152
5153             if (eol_conversion)
5154               while (begp < endp
5155                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
5156                 {
5157                   if (!eight_bit && c & 0x80) eight_bit = endp;
5158                   endp--;
5159                 }
5160             else
5161               while (begp < endp
5162                      && (c = endp[-1]) != ISO_CODE_ESC)
5163                 {
5164                   if (!eight_bit && c & 0x80) eight_bit = endp;
5165                   endp--;
5166                 }
5167             /* Do not consider LF as ascii if preceded by CR, since that
5168                confuses eol decoding. */
5169             if (begp < endp && endp < endp_orig
5170                 && endp[-1] == '\r' && endp[0] == '\n')
5171               endp++;
5172             if (begp < endp && endp[-1] == ISO_CODE_ESC)
5173               {
5174                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
5175                   /* This is an ASCII designation sequence.  We can
5176                      surely skip the tail.  But, if we have
5177                      encountered an 8-bit code, skip only the codes
5178                      after that.  */
5179                   endp = eight_bit ? eight_bit : endp + 2;
5180                 else
5181                   /* Hmmm, we can't skip the tail.  */
5182                   endp = endp_orig;
5183               }
5184             else if (eight_bit)
5185               endp = eight_bit;
5186           }
5187         }
5188       break;
5189
5190     default:
5191       abort ();
5192     }
5193   *beg += begp - begp_orig;
5194   *end += endp - endp_orig;
5195   return;
5196 }
5197
5198 /* Like shrink_decoding_region but for encoding.  */
5199
5200 static void
5201 shrink_encoding_region (beg, end, coding, str)
5202      int *beg, *end;
5203      struct coding_system *coding;
5204      unsigned char *str;
5205 {
5206   unsigned char *begp_orig, *begp, *endp_orig, *endp;
5207   int eol_conversion;
5208   Lisp_Object translation_table;
5209
5210   if (coding->type == coding_type_ccl
5211       || coding->eol_type == CODING_EOL_CRLF
5212       || coding->eol_type == CODING_EOL_CR
5213       || (coding->cmp_data && coding->cmp_data->used > 0))
5214     {
5215       /* We can't skip any data.  */
5216       return;
5217     }
5218   if (coding->type == coding_type_no_conversion
5219       || coding->type == coding_type_raw_text
5220       || coding->type == coding_type_emacs_mule
5221       || coding->type == coding_type_undecided)
5222     {
5223       /* We need no conversion, but don't have to skip any data here.
5224          Encoding routine handles them effectively anyway.  */
5225       return;
5226     }
5227
5228   translation_table = coding->translation_table_for_encode;
5229   if (NILP (translation_table) && !NILP (Venable_character_translation))
5230     translation_table = Vstandard_translation_table_for_encode;
5231   if (CHAR_TABLE_P (translation_table))
5232     {
5233       int i;
5234       for (i = 0; i < 128; i++)
5235         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5236           break;
5237       if (i < 128)
5238         /* Some ASCII character should be translated.  We give up
5239            shrinking.  */
5240         return;
5241     }
5242
5243   if (str)
5244     {
5245       begp_orig = begp = str + *beg;
5246       endp_orig = endp = str + *end;
5247     }
5248   else
5249     {
5250       begp_orig = begp = BYTE_POS_ADDR (*beg);
5251       endp_orig = endp = begp + *end - *beg;
5252     }
5253
5254   eol_conversion = (coding->eol_type == CODING_EOL_CR
5255                     || coding->eol_type == CODING_EOL_CRLF);
5256
5257   /* Here, we don't have to check coding->pre_write_conversion because
5258      the caller is expected to have handled it already.  */
5259   switch (coding->type)
5260     {
5261     case coding_type_iso2022:
5262       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5263         /* We can't skip any data.  */
5264         break;
5265       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5266         {
5267           unsigned char *bol = begp;
5268           while (begp < endp && *begp < 0x80)
5269             {
5270               begp++;
5271               if (begp[-1] == '\n')
5272                 bol = begp;
5273             }
5274           begp = bol;
5275           goto label_skip_tail;
5276         }
5277       /* fall down ... */
5278
5279     case coding_type_sjis:
5280     case coding_type_big5:
5281       /* We can skip all ASCII characters at the head and tail.  */
5282       if (eol_conversion)
5283         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5284       else
5285         while (begp < endp && *begp < 0x80) begp++;
5286     label_skip_tail:
5287       if (eol_conversion)
5288         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5289       else
5290         while (begp < endp && *(endp - 1) < 0x80) endp--;
5291       break;
5292
5293     default:
5294       abort ();
5295     }
5296
5297   *beg += begp - begp_orig;
5298   *end += endp - endp_orig;
5299   return;
5300 }
5301
5302 /* As shrinking conversion region requires some overhead, we don't try
5303    shrinking if the length of conversion region is less than this
5304    value.  */
5305 static int shrink_conversion_region_threshhold = 1024;
5306
5307 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
5308   do {                                                                  \
5309     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
5310       {                                                                 \
5311         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
5312         else shrink_decoding_region (beg, end, coding, str);            \
5313       }                                                                 \
5314   } while (0)
5315
5316 static Lisp_Object
5317 code_convert_region_unwind (arg)
5318      Lisp_Object arg;
5319 {
5320   inhibit_pre_post_conversion = 0;
5321   Vlast_coding_system_used = arg;
5322   return Qnil;
5323 }
5324
5325 /* Store information about all compositions in the range FROM and TO
5326    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
5327    buffer or a string, defaults to the current buffer.  */
5328
5329 void
5330 coding_save_composition (coding, from, to, obj)
5331      struct coding_system *coding;
5332      int from, to;
5333      Lisp_Object obj;
5334 {
5335   Lisp_Object prop;
5336   int start, end;
5337
5338   if (coding->composing == COMPOSITION_DISABLED)
5339     return;
5340   if (!coding->cmp_data)
5341     coding_allocate_composition_data (coding, from);
5342   if (!find_composition (from, to, &start, &end, &prop, obj)
5343       || end > to)
5344     return;
5345   if (start < from
5346       && (!find_composition (end, to, &start, &end, &prop, obj)
5347           || end > to))
5348     return;
5349   coding->composing = COMPOSITION_NO;
5350   do
5351     {
5352       if (COMPOSITION_VALID_P (start, end, prop))
5353         {
5354           enum composition_method method = COMPOSITION_METHOD (prop);
5355           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5356               >= COMPOSITION_DATA_SIZE)
5357             coding_allocate_composition_data (coding, from);
5358           /* For relative composition, we remember start and end
5359              positions, for the other compositions, we also remember
5360              components.  */
5361           CODING_ADD_COMPOSITION_START (coding, start - from, method);
5362           if (method != COMPOSITION_RELATIVE)
5363             {
5364               /* We must store a*/
5365               Lisp_Object val, ch;
5366
5367               val = COMPOSITION_COMPONENTS (prop);
5368               if (CONSP (val))
5369                 while (CONSP (val))
5370                   {
5371                     ch = XCAR (val), val = XCDR (val);
5372                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5373                   }
5374               else if (VECTORP (val) || STRINGP (val))
5375                 {
5376                   int len = (VECTORP (val)
5377                              ? XVECTOR (val)->size : SCHARS (val));
5378                   int i;
5379                   for (i = 0; i < len; i++)
5380                     {
5381                       ch = (STRINGP (val)
5382                             ? Faref (val, make_number (i))
5383                             : XVECTOR (val)->contents[i]);
5384                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5385                     }
5386                 }
5387               else              /* INTEGERP (val) */
5388                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5389             }
5390           CODING_ADD_COMPOSITION_END (coding, end - from);
5391         }
5392       start = end;
5393     }
5394   while (start < to
5395          && find_composition (start, to, &start, &end, &prop, obj)
5396          && end <= to);
5397
5398   /* Make coding->cmp_data point to the first memory block.  */
5399   while (coding->cmp_data->prev)
5400     coding->cmp_data = coding->cmp_data->prev;
5401   coding->cmp_data_start = 0;
5402 }
5403
5404 /* Reflect the saved information about compositions to OBJ.
5405    CODING->cmp_data points to a memory block for the information.  OBJ
5406    is a buffer or a string, defaults to the current buffer.  */
5407
5408 void
5409 coding_restore_composition (coding, obj)
5410      struct coding_system *coding;
5411      Lisp_Object obj;
5412 {
5413   struct composition_data *cmp_data = coding->cmp_data;
5414
5415   if (!cmp_data)
5416     return;
5417
5418   while (cmp_data->prev)
5419     cmp_data = cmp_data->prev;
5420
5421   while (cmp_data)
5422     {
5423       int i;
5424
5425       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5426            i += cmp_data->data[i])
5427         {
5428           int *data = cmp_data->data + i;
5429           enum composition_method method = (enum composition_method) data[3];
5430           Lisp_Object components;
5431
5432           if (method == COMPOSITION_RELATIVE)
5433             components = Qnil;
5434           else
5435             {
5436               int len = data[0] - 4, j;
5437               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5438
5439               if (method == COMPOSITION_WITH_RULE_ALTCHARS
5440                   && len % 2 == 0)
5441                 len --;
5442               for (j = 0; j < len; j++)
5443                 args[j] = make_number (data[4 + j]);
5444               components = (method == COMPOSITION_WITH_ALTCHARS
5445                             ? Fstring (len, args) : Fvector (len, args));
5446             }
5447           compose_text (data[1], data[2], components, Qnil, obj);
5448         }
5449       cmp_data = cmp_data->next;
5450     }
5451 }
5452
5453 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
5454    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5455    coding system CODING, and return the status code of code conversion
5456    (currently, this value has no meaning).
5457
5458    How many characters (and bytes) are converted to how many
5459    characters (and bytes) are recorded in members of the structure
5460    CODING.
5461
5462    If REPLACE is nonzero, we do various things as if the original text
5463    is deleted and a new text is inserted.  See the comments in
5464    replace_range (insdel.c) to know what we are doing.
5465
5466    If REPLACE is zero, it is assumed that the source text is unibyte.
5467    Otherwise, it is assumed that the source text is multibyte.  */
5468
5469 int
5470 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5471      int from, from_byte, to, to_byte, encodep, replace;
5472      struct coding_system *coding;
5473 {
5474   int len = to - from, len_byte = to_byte - from_byte;
5475   int nchars_del = 0, nbytes_del = 0;
5476   int require, inserted, inserted_byte;
5477   int head_skip, tail_skip, total_skip = 0;
5478   Lisp_Object saved_coding_symbol;
5479   int first = 1;
5480   unsigned char *src, *dst;
5481   Lisp_Object deletion;
5482   int orig_point = PT, orig_len = len;
5483   int prev_Z;
5484   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5485
5486   deletion = Qnil;
5487   saved_coding_symbol = coding->symbol;
5488
5489   if (from < PT && PT < to)
5490     {
5491       TEMP_SET_PT_BOTH (from, from_byte);
5492       orig_point = from;
5493     }
5494
5495   if (replace)
5496     {
5497       int saved_from = from;
5498       int saved_inhibit_modification_hooks;
5499
5500       prepare_to_modify_buffer (from, to, &from);
5501       if (saved_from != from)
5502         {
5503           to = from + len;
5504           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
5505           len_byte = to_byte - from_byte;
5506         }
5507
5508       /* The code conversion routine can not preserve text properties
5509          for now.  So, we must remove all text properties in the
5510          region.  Here, we must suppress all modification hooks.  */
5511       saved_inhibit_modification_hooks = inhibit_modification_hooks;
5512       inhibit_modification_hooks = 1;
5513       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5514       inhibit_modification_hooks = saved_inhibit_modification_hooks;
5515     }
5516
5517   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5518     {
5519       /* We must detect encoding of text and eol format.  */
5520
5521       if (from < GPT && to > GPT)
5522         move_gap_both (from, from_byte);
5523       if (coding->type == coding_type_undecided)
5524         {
5525           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
5526           if (coding->type == coding_type_undecided)
5527             {
5528               /* It seems that the text contains only ASCII, but we
5529                  should not leave it undecided because the deeper
5530                  decoding routine (decode_coding) tries to detect the
5531                  encodings again in vain.  */
5532               coding->type = coding_type_emacs_mule;
5533               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5534               /* As emacs-mule decoder will handle composition, we
5535                  need this setting to allocate coding->cmp_data
5536                  later.  */
5537               coding->composing = COMPOSITION_NO;
5538             }
5539         }
5540       if (coding->eol_type == CODING_EOL_UNDECIDED
5541           && coding->type != coding_type_ccl)
5542         {
5543           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5544           if (coding->eol_type == CODING_EOL_UNDECIDED)
5545             coding->eol_type = CODING_EOL_LF;
5546           /* We had better recover the original eol format if we
5547              encounter an inconsistent eol format while decoding.  */
5548           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5549         }
5550     }
5551
5552   /* Now we convert the text.  */
5553
5554   /* For encoding, we must process pre-write-conversion in advance.  */
5555   if (! inhibit_pre_post_conversion
5556       && encodep
5557       && SYMBOLP (coding->pre_write_conversion)
5558       && ! NILP (Ffboundp (coding->pre_write_conversion)))
5559     {
5560       /* The function in pre-write-conversion may put a new text in a
5561          new buffer.  */
5562       struct buffer *prev = current_buffer;
5563       Lisp_Object new;
5564
5565       record_unwind_protect (code_convert_region_unwind,
5566                              Vlast_coding_system_used);
5567       /* We should not call any more pre-write/post-read-conversion
5568          functions while this pre-write-conversion is running.  */
5569       inhibit_pre_post_conversion = 1;
5570       call2 (coding->pre_write_conversion,
5571              make_number (from), make_number (to));
5572       inhibit_pre_post_conversion = 0;
5573       /* Discard the unwind protect.  */
5574       specpdl_ptr--;
5575
5576       if (current_buffer != prev)
5577         {
5578           len = ZV - BEGV;
5579           new = Fcurrent_buffer ();
5580           set_buffer_internal_1 (prev);
5581           del_range_2 (from, from_byte, to, to_byte, 0);
5582           TEMP_SET_PT_BOTH (from, from_byte);
5583           insert_from_buffer (XBUFFER (new), 1, len, 0);
5584           Fkill_buffer (new);
5585           if (orig_point >= to)
5586             orig_point += len - orig_len;
5587           else if (orig_point > from)
5588             orig_point = from;
5589           orig_len = len;
5590           to = from + len;
5591           from_byte = CHAR_TO_BYTE (from);
5592           to_byte = CHAR_TO_BYTE (to);
5593           len_byte = to_byte - from_byte;
5594           TEMP_SET_PT_BOTH (from, from_byte);
5595         }
5596     }
5597
5598   if (replace)
5599     {
5600       if (! EQ (current_buffer->undo_list, Qt))
5601         deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5602       else
5603         {
5604           nchars_del = to - from;
5605           nbytes_del = to_byte - from_byte;
5606         }
5607     }
5608
5609   if (coding->composing != COMPOSITION_DISABLED)
5610     {
5611       if (encodep)
5612         coding_save_composition (coding, from, to, Fcurrent_buffer ());
5613       else
5614         coding_allocate_composition_data (coding, from);
5615     }
5616
5617   /* Try to skip the heading and tailing ASCIIs.  */
5618   if (coding->type != coding_type_ccl)
5619     {
5620       int from_byte_orig = from_byte, to_byte_orig = to_byte;
5621
5622       if (from < GPT && GPT < to)
5623         move_gap_both (from, from_byte);
5624       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5625       if (from_byte == to_byte
5626           && (encodep || NILP (coding->post_read_conversion))
5627           && ! CODING_REQUIRE_FLUSHING (coding))
5628         {
5629           coding->produced = len_byte;
5630           coding->produced_char = len;
5631           if (!replace)
5632             /* We must record and adjust for this new text now.  */
5633             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5634           return 0;
5635         }
5636
5637       head_skip = from_byte - from_byte_orig;
5638       tail_skip = to_byte_orig - to_byte;
5639       total_skip = head_skip + tail_skip;
5640       from += head_skip;
5641       to -= tail_skip;
5642       len -= total_skip; len_byte -= total_skip;
5643     }
5644
5645   /* For conversion, we must put the gap before the text in addition to
5646      making the gap larger for efficient decoding.  The required gap
5647      size starts from 2000 which is the magic number used in make_gap.
5648      But, after one batch of conversion, it will be incremented if we
5649      find that it is not enough .  */
5650   require = 2000;
5651
5652   if (GAP_SIZE  < require)
5653     make_gap (require - GAP_SIZE);
5654   move_gap_both (from, from_byte);
5655
5656   inserted = inserted_byte = 0;
5657
5658   GAP_SIZE += len_byte;
5659   ZV -= len;
5660   Z -= len;
5661   ZV_BYTE -= len_byte;
5662   Z_BYTE -= len_byte;
5663
5664   if (GPT - BEG < BEG_UNCHANGED)
5665     BEG_UNCHANGED = GPT - BEG;
5666   if (Z - GPT < END_UNCHANGED)
5667     END_UNCHANGED = Z - GPT;
5668
5669   if (!encodep && coding->src_multibyte)
5670     {
5671       /* Decoding routines expects that the source text is unibyte.
5672          We must convert 8-bit characters of multibyte form to
5673          unibyte.  */
5674       int len_byte_orig = len_byte;
5675       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5676       if (len_byte < len_byte_orig)
5677         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5678                     len_byte);
5679       coding->src_multibyte = 0;
5680     }
5681
5682   for (;;)
5683     {
5684       int result;
5685
5686       /* The buffer memory is now:
5687          +--------+converted-text+---------+-------original-text-------+---+
5688          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5689                   |<---------------------- GAP ----------------------->|  */
5690       src = GAP_END_ADDR - len_byte;
5691       dst = GPT_ADDR + inserted_byte;
5692
5693       if (encodep)
5694         result = encode_coding (coding, src, dst, len_byte, 0);
5695       else
5696         {
5697           if (coding->composing != COMPOSITION_DISABLED)
5698             coding->cmp_data->char_offset = from + inserted;
5699           result = decode_coding (coding, src, dst, len_byte, 0);
5700         }
5701
5702       /* The buffer memory is now:
5703          +--------+-------converted-text----+--+------original-text----+---+
5704          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5705                   |<---------------------- GAP ----------------------->|  */
5706
5707       inserted += coding->produced_char;
5708       inserted_byte += coding->produced;
5709       len_byte -= coding->consumed;
5710
5711       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5712         {
5713           coding_allocate_composition_data (coding, from + inserted);
5714           continue;
5715         }
5716
5717       src += coding->consumed;
5718       dst += coding->produced;
5719
5720       if (result == CODING_FINISH_NORMAL)
5721         {
5722           src += len_byte;
5723           break;
5724         }
5725       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5726         {
5727           unsigned char *pend = dst, *p = pend - inserted_byte;
5728           Lisp_Object eol_type;
5729
5730           /* Encode LFs back to the original eol format (CR or CRLF).  */
5731           if (coding->eol_type == CODING_EOL_CR)
5732             {
5733               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5734             }
5735           else
5736             {
5737               int count = 0;
5738
5739               while (p < pend) if (*p++ == '\n') count++;
5740               if (src - dst < count)
5741                 {
5742                   /* We don't have sufficient room for encoding LFs
5743                      back to CRLF.  We must record converted and
5744                      not-yet-converted text back to the buffer
5745                      content, enlarge the gap, then record them out of
5746                      the buffer contents again.  */
5747                   int add = len_byte + inserted_byte;
5748
5749                   GAP_SIZE -= add;
5750                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5751                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5752                   make_gap (count - GAP_SIZE);
5753                   GAP_SIZE += add;
5754                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5755                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5756                   /* Don't forget to update SRC, DST, and PEND.  */
5757                   src = GAP_END_ADDR - len_byte;
5758                   dst = GPT_ADDR + inserted_byte;
5759                   pend = dst;
5760                 }
5761               inserted += count;
5762               inserted_byte += count;
5763               coding->produced += count;
5764               p = dst = pend + count;
5765               while (count)
5766                 {
5767                   *--p = *--pend;
5768                   if (*p == '\n') count--, *--p = '\r';
5769                 }
5770             }
5771
5772           /* Suppress eol-format conversion in the further conversion.  */
5773           coding->eol_type = CODING_EOL_LF;
5774
5775           /* Set the coding system symbol to that for Unix-like EOL.  */
5776           eol_type = Fget (saved_coding_symbol, Qeol_type);
5777           if (VECTORP (eol_type)
5778               && XVECTOR (eol_type)->size == 3
5779               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5780             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5781           else
5782             coding->symbol = saved_coding_symbol;
5783
5784           continue;
5785         }
5786       if (len_byte <= 0)
5787         {
5788           if (coding->type != coding_type_ccl
5789               || coding->mode & CODING_MODE_LAST_BLOCK)
5790             break;
5791           coding->mode |= CODING_MODE_LAST_BLOCK;
5792           continue;
5793         }
5794       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5795         {
5796           /* The source text ends in invalid codes.  Let's just
5797              make them valid buffer contents, and finish conversion.  */
5798           if (multibyte_p)
5799             {
5800               unsigned char *start = dst;
5801
5802               inserted += len_byte;
5803               while (len_byte--)
5804                 {
5805                   int c = *src++;
5806                   dst += CHAR_STRING (c, dst);
5807                 }
5808
5809               inserted_byte += dst - start;
5810             }
5811           else
5812             {
5813               inserted += len_byte;
5814               inserted_byte += len_byte;
5815               while (len_byte--)
5816                 *dst++ = *src++;
5817             }
5818           break;
5819         }
5820       if (result == CODING_FINISH_INTERRUPT)
5821         {
5822           /* The conversion procedure was interrupted by a user.  */
5823           break;
5824         }
5825       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5826       if (coding->consumed < 1)
5827         {
5828           /* It's quite strange to require more memory without
5829              consuming any bytes.  Perhaps CCL program bug.  */
5830           break;
5831         }
5832       if (first)
5833         {
5834           /* We have just done the first batch of conversion which was
5835              stopped because of insufficient gap.  Let's reconsider the
5836              required gap size (i.e. SRT - DST) now.
5837
5838              We have converted ORIG bytes (== coding->consumed) into
5839              NEW bytes (coding->produced).  To convert the remaining
5840              LEN bytes, we may need REQUIRE bytes of gap, where:
5841                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5842                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5843              Here, we are sure that NEW >= ORIG.  */
5844           float ratio;
5845
5846           if (coding->produced <= coding->consumed)
5847             {
5848               /* This happens because of CCL-based coding system with
5849                  eol-type CRLF.  */
5850               require = 0;
5851             }
5852           else
5853             {
5854               ratio = (coding->produced - coding->consumed) / coding->consumed;
5855               require = len_byte * ratio;
5856             }
5857           first = 0;
5858         }
5859       if ((src - dst) < (require + 2000))
5860         {
5861           /* See the comment above the previous call of make_gap.  */
5862           int add = len_byte + inserted_byte;
5863
5864           GAP_SIZE -= add;
5865           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5866           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5867           make_gap (require + 2000);
5868           GAP_SIZE += add;
5869           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5870           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5871         }
5872     }
5873   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5874
5875   if (encodep && coding->dst_multibyte)
5876     {
5877       /* The output is unibyte.  We must convert 8-bit characters to
5878          multibyte form.  */
5879       if (inserted_byte * 2 > GAP_SIZE)
5880         {
5881           GAP_SIZE -= inserted_byte;
5882           ZV += inserted_byte; Z += inserted_byte;
5883           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5884           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5885           make_gap (inserted_byte - GAP_SIZE);
5886           GAP_SIZE += inserted_byte;
5887           ZV -= inserted_byte; Z -= inserted_byte;
5888           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5889           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5890         }
5891       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5892     }
5893
5894   /* If we shrank the conversion area, adjust it now.  */
5895   if (total_skip > 0)
5896     {
5897       if (tail_skip > 0)
5898         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5899       inserted += total_skip; inserted_byte += total_skip;
5900       GAP_SIZE += total_skip;
5901       GPT -= head_skip; GPT_BYTE -= head_skip;
5902       ZV -= total_skip; ZV_BYTE -= total_skip;
5903       Z -= total_skip; Z_BYTE -= total_skip;
5904       from -= head_skip; from_byte -= head_skip;
5905       to += tail_skip; to_byte += tail_skip;
5906     }
5907
5908   prev_Z = Z;
5909   if (! EQ (current_buffer->undo_list, Qt))
5910     adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5911   else
5912     adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del,
5913                                  inserted, inserted_byte);
5914   inserted = Z - prev_Z;
5915
5916   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5917     coding_restore_composition (coding, Fcurrent_buffer ());
5918   coding_free_composition_data (coding);
5919
5920   if (! inhibit_pre_post_conversion
5921       && ! encodep && ! NILP (coding->post_read_conversion))
5922     {
5923       Lisp_Object val;
5924       Lisp_Object saved_coding_system;
5925
5926       if (from != PT)
5927         TEMP_SET_PT_BOTH (from, from_byte);
5928       prev_Z = Z;
5929       record_unwind_protect (code_convert_region_unwind,
5930                              Vlast_coding_system_used);
5931       saved_coding_system = Vlast_coding_system_used;
5932       Vlast_coding_system_used = coding->symbol;
5933       /* We should not call any more pre-write/post-read-conversion
5934          functions while this post-read-conversion is running.  */
5935       inhibit_pre_post_conversion = 1;
5936       val = call1 (coding->post_read_conversion, make_number (inserted));
5937       inhibit_pre_post_conversion = 0;
5938       coding->symbol = Vlast_coding_system_used;
5939       Vlast_coding_system_used = saved_coding_system;
5940       /* Discard the unwind protect.  */
5941       specpdl_ptr--;
5942       CHECK_NUMBER (val);
5943       inserted += Z - prev_Z;
5944     }
5945
5946   if (orig_point >= from)
5947     {
5948       if (orig_point >= from + orig_len)
5949         orig_point += inserted - orig_len;
5950       else
5951         orig_point = from;
5952       TEMP_SET_PT (orig_point);
5953     }
5954
5955   if (replace)
5956     {
5957       signal_after_change (from, to - from, inserted);
5958       update_compositions (from, from + inserted, CHECK_BORDER);
5959     }
5960
5961   {
5962     coding->consumed = to_byte - from_byte;
5963     coding->consumed_char = to - from;
5964     coding->produced = inserted_byte;
5965     coding->produced_char = inserted;
5966   }
5967
5968   return 0;
5969 }
5970
5971 Lisp_Object
5972 run_pre_post_conversion_on_str (str, coding, encodep)
5973      Lisp_Object str;
5974      struct coding_system *coding;
5975      int encodep;
5976 {
5977   int count = SPECPDL_INDEX ();
5978   struct gcpro gcpro1, gcpro2;
5979   int multibyte = STRING_MULTIBYTE (str);
5980   Lisp_Object buffer;
5981   struct buffer *buf;
5982   Lisp_Object old_deactivate_mark;
5983
5984   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5985   record_unwind_protect (code_convert_region_unwind,
5986                          Vlast_coding_system_used);
5987   /* It is not crucial to specbind this.  */
5988   old_deactivate_mark = Vdeactivate_mark;
5989   GCPRO2 (str, old_deactivate_mark);
5990
5991   buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
5992   buf = XBUFFER (buffer);
5993
5994   buf->directory = current_buffer->directory;
5995   buf->read_only = Qnil;
5996   buf->filename = Qnil;
5997   buf->undo_list = Qt;
5998   buf->overlays_before = Qnil;
5999   buf->overlays_after = Qnil;
6000
6001   set_buffer_internal (buf);
6002   /* We must insert the contents of STR as is without
6003      unibyte<->multibyte conversion.  For that, we adjust the
6004      multibyteness of the working buffer to that of STR.  */
6005   Ferase_buffer ();
6006   buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
6007
6008   insert_from_string (str, 0, 0,
6009                       SCHARS (str), SBYTES (str), 0);
6010   UNGCPRO;
6011   inhibit_pre_post_conversion = 1;
6012   if (encodep)
6013     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
6014   else
6015     {
6016       Vlast_coding_system_used = coding->symbol;
6017       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
6018       call1 (coding->post_read_conversion, make_number (Z - BEG));
6019       coding->symbol = Vlast_coding_system_used;
6020     }
6021   inhibit_pre_post_conversion = 0;
6022   Vdeactivate_mark = old_deactivate_mark;
6023   str = make_buffer_string (BEG, Z, 1);
6024   return unbind_to (count, str);
6025 }
6026
6027 Lisp_Object
6028 decode_coding_string (str, coding, nocopy)
6029      Lisp_Object str;
6030      struct coding_system *coding;
6031      int nocopy;
6032 {
6033   int len;
6034   struct conversion_buffer buf;
6035   int from, to_byte;
6036   Lisp_Object saved_coding_symbol;
6037   int result;
6038   int require_decoding;
6039   int shrinked_bytes = 0;
6040   Lisp_Object newstr;
6041   int consumed, consumed_char, produced, produced_char;
6042
6043   from = 0;
6044   to_byte = SBYTES (str);
6045
6046   saved_coding_symbol = coding->symbol;
6047   coding->src_multibyte = STRING_MULTIBYTE (str);
6048   coding->dst_multibyte = 1;
6049   if (CODING_REQUIRE_DETECTION (coding))
6050     {
6051       /* See the comments in code_convert_region.  */
6052       if (coding->type == coding_type_undecided)
6053         {
6054           detect_coding (coding, SDATA (str), to_byte);
6055           if (coding->type == coding_type_undecided)
6056             {
6057               coding->type = coding_type_emacs_mule;
6058               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
6059               /* As emacs-mule decoder will handle composition, we
6060                  need this setting to allocate coding->cmp_data
6061                  later.  */
6062               coding->composing = COMPOSITION_NO;
6063             }
6064         }
6065       if (coding->eol_type == CODING_EOL_UNDECIDED
6066           && coding->type != coding_type_ccl)
6067         {
6068           saved_coding_symbol = coding->symbol;
6069           detect_eol (coding, SDATA (str), to_byte);
6070           if (coding->eol_type == CODING_EOL_UNDECIDED)
6071             coding->eol_type = CODING_EOL_LF;
6072           /* We had better recover the original eol format if we
6073              encounter an inconsistent eol format while decoding.  */
6074           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
6075         }
6076     }
6077
6078   if (coding->type == coding_type_no_conversion
6079       || coding->type == coding_type_raw_text)
6080     coding->dst_multibyte = 0;
6081
6082   require_decoding = CODING_REQUIRE_DECODING (coding);
6083
6084   if (STRING_MULTIBYTE (str))
6085     {
6086       /* Decoding routines expect the source text to be unibyte.  */
6087       str = Fstring_as_unibyte (str);
6088       to_byte = SBYTES (str);
6089       nocopy = 1;
6090       coding->src_multibyte = 0;
6091     }
6092
6093   /* Try to skip the heading and tailing ASCIIs.  */
6094   if (require_decoding && coding->type != coding_type_ccl)
6095     {
6096       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6097                                 0);
6098       if (from == to_byte)
6099         require_decoding = 0;
6100       shrinked_bytes = from + (SBYTES (str) - to_byte);
6101     }
6102
6103   if (!require_decoding
6104       && !(SYMBOLP (coding->post_read_conversion)
6105            && !NILP (Ffboundp (coding->post_read_conversion))))
6106     {
6107       coding->consumed = SBYTES (str);
6108       coding->consumed_char = SCHARS (str);
6109       if (coding->dst_multibyte)
6110         {
6111           str = Fstring_as_multibyte (str);
6112           nocopy = 1;
6113         }
6114       coding->produced = SBYTES (str);
6115       coding->produced_char = SCHARS (str);
6116       return (nocopy ? str : Fcopy_sequence (str));
6117     }
6118
6119   if (coding->composing != COMPOSITION_DISABLED)
6120     coding_allocate_composition_data (coding, from);
6121   len = decoding_buffer_size (coding, to_byte - from);
6122   allocate_conversion_buffer (buf, len);
6123
6124   consumed = consumed_char = produced = produced_char = 0;
6125   while (1)
6126     {
6127       result = decode_coding (coding, SDATA (str) + from + consumed,
6128                               buf.data + produced, to_byte - from - consumed,
6129                               buf.size - produced);
6130       consumed += coding->consumed;
6131       consumed_char += coding->consumed_char;
6132       produced += coding->produced;
6133       produced_char += coding->produced_char;
6134       if (result == CODING_FINISH_NORMAL
6135           || (result == CODING_FINISH_INSUFFICIENT_SRC
6136               && coding->consumed == 0))
6137         break;
6138       if (result == CODING_FINISH_INSUFFICIENT_CMP)
6139         coding_allocate_composition_data (coding, from + produced_char);
6140       else if (result == CODING_FINISH_INSUFFICIENT_DST)
6141         extend_conversion_buffer (&buf);
6142       else if (result == CODING_FINISH_INCONSISTENT_EOL)
6143         {
6144           Lisp_Object eol_type;
6145
6146           /* Recover the original EOL format.  */
6147           if (coding->eol_type == CODING_EOL_CR)
6148             {
6149               unsigned char *p;
6150               for (p = buf.data; p < buf.data + produced; p++)
6151                 if (*p == '\n') *p = '\r';
6152             }
6153           else if (coding->eol_type == CODING_EOL_CRLF)
6154             {
6155               int num_eol = 0;
6156               unsigned char *p0, *p1;
6157               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
6158                 if (*p0 == '\n') num_eol++;
6159               if (produced + num_eol >= buf.size)
6160                 extend_conversion_buffer (&buf);
6161               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
6162                 {
6163                   *--p1 = *--p0;
6164                   if (*p0 == '\n') *--p1 = '\r';
6165                 }
6166               produced += num_eol;
6167               produced_char += num_eol;
6168             }
6169           /* Suppress eol-format conversion in the further conversion.  */
6170           coding->eol_type = CODING_EOL_LF;
6171
6172           /* Set the coding system symbol to that for Unix-like EOL.  */
6173           eol_type = Fget (saved_coding_symbol, Qeol_type);
6174           if (VECTORP (eol_type)
6175               && XVECTOR (eol_type)->size == 3
6176               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
6177             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
6178           else
6179             coding->symbol = saved_coding_symbol;
6180
6181
6182         }
6183     }
6184
6185   coding->consumed = consumed;
6186   coding->consumed_char = consumed_char;
6187   coding->produced = produced;
6188   coding->produced_char = produced_char;
6189
6190   if (coding->dst_multibyte)
6191     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
6192                                            produced + shrinked_bytes);
6193   else
6194     newstr = make_uninit_string (produced + shrinked_bytes);
6195   if (from > 0)
6196     STRING_COPYIN (newstr, 0, SDATA (str), from);
6197   STRING_COPYIN (newstr, from, buf.data, produced);
6198   if (shrinked_bytes > from)
6199     STRING_COPYIN (newstr, from + produced,
6200                    SDATA (str) + to_byte,
6201                    shrinked_bytes - from);
6202   free_conversion_buffer (&buf);
6203
6204   if (coding->cmp_data && coding->cmp_data->used)
6205     coding_restore_composition (coding, newstr);
6206   coding_free_composition_data (coding);
6207
6208   if (SYMBOLP (coding->post_read_conversion)
6209       && !NILP (Ffboundp (coding->post_read_conversion)))
6210     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
6211
6212   return newstr;
6213 }
6214
6215 Lisp_Object
6216 encode_coding_string (str, coding, nocopy)
6217      Lisp_Object str;
6218      struct coding_system *coding;
6219      int nocopy;
6220 {
6221   int len;
6222   struct conversion_buffer buf;
6223   int from, to, to_byte;
6224   int result;
6225   int shrinked_bytes = 0;
6226   Lisp_Object newstr;
6227   int consumed, consumed_char, produced, produced_char;
6228
6229   if (SYMBOLP (coding->pre_write_conversion)
6230       && !NILP (Ffboundp (coding->pre_write_conversion)))
6231     str = run_pre_post_conversion_on_str (str, coding, 1);
6232
6233   from = 0;
6234   to = SCHARS (str);
6235   to_byte = SBYTES (str);
6236
6237   /* Encoding routines determine the multibyteness of the source text
6238      by coding->src_multibyte.  */
6239   coding->src_multibyte = STRING_MULTIBYTE (str);
6240   coding->dst_multibyte = 0;
6241   if (! CODING_REQUIRE_ENCODING (coding))
6242     {
6243       coding->consumed = SBYTES (str);
6244       coding->consumed_char = SCHARS (str);
6245       if (STRING_MULTIBYTE (str))
6246         {
6247           str = Fstring_as_unibyte (str);
6248           nocopy = 1;
6249         }
6250       coding->produced = SBYTES (str);
6251       coding->produced_char = SCHARS (str);
6252       return (nocopy ? str : Fcopy_sequence (str));
6253     }
6254
6255   if (coding->composing != COMPOSITION_DISABLED)
6256     coding_save_composition (coding, from, to, str);
6257
6258   /* Try to skip the heading and tailing ASCIIs.  */
6259   if (coding->type != coding_type_ccl)
6260     {
6261       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6262                                 1);
6263       if (from == to_byte)
6264         return (nocopy ? str : Fcopy_sequence (str));
6265       shrinked_bytes = from + (SBYTES (str) - to_byte);
6266     }
6267
6268   len = encoding_buffer_size (coding, to_byte - from);
6269   allocate_conversion_buffer (buf, len);
6270
6271   consumed = consumed_char = produced = produced_char = 0;
6272   while (1)
6273     {
6274       result = encode_coding (coding, SDATA (str) + from + consumed,
6275                               buf.data + produced, to_byte - from - consumed,
6276                               buf.size - produced);
6277       consumed += coding->consumed;
6278       consumed_char += coding->consumed_char;
6279       produced += coding->produced;
6280       produced_char += coding->produced_char;
6281       if (result == CODING_FINISH_NORMAL
6282           || (result == CODING_FINISH_INSUFFICIENT_SRC
6283               && coding->consumed == 0))
6284         break;
6285       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
6286       extend_conversion_buffer (&buf);
6287     }
6288
6289   coding->consumed = consumed;
6290   coding->consumed_char = consumed_char;
6291   coding->produced = produced;
6292   coding->produced_char = produced_char;
6293
6294   newstr = make_uninit_string (produced + shrinked_bytes);
6295   if (from > 0)
6296     STRING_COPYIN (newstr, 0, SDATA (str), from);
6297   STRING_COPYIN (newstr, from, buf.data, produced);
6298   if (shrinked_bytes > from)
6299     STRING_COPYIN (newstr, from + produced,
6300                    SDATA (str) + to_byte,
6301                    shrinked_bytes - from);
6302
6303   free_conversion_buffer (&buf);
6304   coding_free_composition_data (coding);
6305
6306   return newstr;
6307 }
6308
6309 \f
6310 #ifdef emacs
6311 /*** 8. Emacs Lisp library functions ***/
6312
6313 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6314        doc: /* Return t if OBJECT is nil or a coding-system.
6315 See the documentation of `make-coding-system' for information
6316 about coding-system objects.  */)
6317      (obj)
6318      Lisp_Object obj;
6319 {
6320   if (NILP (obj))
6321     return Qt;
6322   if (!SYMBOLP (obj))
6323     return Qnil;
6324   /* Get coding-spec vector for OBJ.  */
6325   obj = Fget (obj, Qcoding_system);
6326   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6327           ? Qt : Qnil);
6328 }
6329
6330 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6331        Sread_non_nil_coding_system, 1, 1, 0,
6332        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6333      (prompt)
6334      Lisp_Object prompt;
6335 {
6336   Lisp_Object val;
6337   do
6338     {
6339       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6340                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6341     }
6342   while (SCHARS (val) == 0);
6343   return (Fintern (val, Qnil));
6344 }
6345
6346 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6347        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6348 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6349      (prompt, default_coding_system)
6350      Lisp_Object prompt, default_coding_system;
6351 {
6352   Lisp_Object val;
6353   if (SYMBOLP (default_coding_system))
6354     default_coding_system = SYMBOL_NAME (default_coding_system);
6355   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6356                           Qt, Qnil, Qcoding_system_history,
6357                           default_coding_system, Qnil);
6358   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6359 }
6360
6361 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6362        1, 1, 0,
6363        doc: /* Check validity of CODING-SYSTEM.
6364 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6365 It is valid if it is a symbol with a non-nil `coding-system' property.
6366 The value of property should be a vector of length 5.  */)
6367      (coding_system)
6368      Lisp_Object coding_system;
6369 {
6370   CHECK_SYMBOL (coding_system);
6371   if (!NILP (Fcoding_system_p (coding_system)))
6372     return coding_system;
6373   while (1)
6374     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6375 }
6376 \f
6377 Lisp_Object
6378 detect_coding_system (src, src_bytes, highest, multibytep)
6379      const unsigned char *src;
6380      int src_bytes, highest;
6381      int multibytep;
6382 {
6383   int coding_mask, eol_type;
6384   Lisp_Object val, tmp;
6385   int dummy;
6386
6387   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
6388   eol_type  = detect_eol_type (src, src_bytes, &dummy);
6389   if (eol_type == CODING_EOL_INCONSISTENT)
6390     eol_type = CODING_EOL_UNDECIDED;
6391
6392   if (!coding_mask)
6393     {
6394       val = Qundecided;
6395       if (eol_type != CODING_EOL_UNDECIDED)
6396         {
6397           Lisp_Object val2;
6398           val2 = Fget (Qundecided, Qeol_type);
6399           if (VECTORP (val2))
6400             val = XVECTOR (val2)->contents[eol_type];
6401         }
6402       return (highest ? val : Fcons (val, Qnil));
6403     }
6404
6405   /* At first, gather possible coding systems in VAL.  */
6406   val = Qnil;
6407   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
6408     {
6409       Lisp_Object category_val, category_index;
6410
6411       category_index = Fget (XCAR (tmp), Qcoding_category_index);
6412       category_val = Fsymbol_value (XCAR (tmp));
6413       if (!NILP (category_val)
6414           && NATNUMP (category_index)
6415           && (coding_mask & (1 << XFASTINT (category_index))))
6416         {
6417           val = Fcons (category_val, val);
6418           if (highest)
6419             break;
6420         }
6421     }
6422   if (!highest)
6423     val = Fnreverse (val);
6424
6425   /* Then, replace the elements with subsidiary coding systems.  */
6426   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
6427     {
6428       if (eol_type != CODING_EOL_UNDECIDED
6429           && eol_type != CODING_EOL_INCONSISTENT)
6430         {
6431           Lisp_Object eol;
6432           eol = Fget (XCAR (tmp), Qeol_type);
6433           if (VECTORP (eol))
6434             XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]);
6435         }
6436     }
6437   return (highest ? XCAR (val) : val);
6438 }
6439
6440 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6441        2, 3, 0,
6442        doc: /* Detect how the byte sequence in the region is encoded.
6443 Return a list of possible coding systems used on decoding a byte
6444 sequence containing the bytes in the region between START and END when
6445 the coding system `undecided' is specified.  The list is ordered by
6446 priority decided in the current language environment.
6447
6448 If only ASCII characters are found, it returns a list of single element
6449 `undecided' or its subsidiary coding system according to a detected
6450 end-of-line format.
6451
6452 If optional argument HIGHEST is non-nil, return the coding system of
6453 highest priority.  */)
6454      (start, end, highest)
6455      Lisp_Object start, end, highest;
6456 {
6457   int from, to;
6458   int from_byte, to_byte;
6459   int include_anchor_byte = 0;
6460
6461   CHECK_NUMBER_COERCE_MARKER (start);
6462   CHECK_NUMBER_COERCE_MARKER (end);
6463
6464   validate_region (&start, &end);
6465   from = XINT (start), to = XINT (end);
6466   from_byte = CHAR_TO_BYTE (from);
6467   to_byte = CHAR_TO_BYTE (to);
6468
6469   if (from < GPT && to >= GPT)
6470     move_gap_both (to, to_byte);
6471   /* If we an anchor byte `\0' follows the region, we include it in
6472      the detecting source.  Then code detectors can handle the tailing
6473      byte sequence more accurately.
6474
6475      Fix me: This is not a perfect solution.  It is better that we
6476      add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
6477   */
6478   if (to == Z || (to == GPT && GAP_SIZE > 0))
6479     include_anchor_byte = 1;
6480   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6481                                to_byte - from_byte + include_anchor_byte,
6482                                !NILP (highest),
6483                                !NILP (current_buffer
6484                                       ->enable_multibyte_characters));
6485 }
6486
6487 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6488        1, 2, 0,
6489        doc: /* Detect how the byte sequence in STRING is encoded.
6490 Return a list of possible coding systems used on decoding a byte
6491 sequence containing the bytes in STRING when the coding system
6492 `undecided' is specified.  The list is ordered by priority decided in
6493 the current language environment.
6494
6495 If only ASCII characters are found, it returns a list of single element
6496 `undecided' or its subsidiary coding system according to a detected
6497 end-of-line format.
6498
6499 If optional argument HIGHEST is non-nil, return the coding system of
6500 highest priority.  */)
6501      (string, highest)
6502      Lisp_Object string, highest;
6503 {
6504   CHECK_STRING (string);
6505
6506   return detect_coding_system (SDATA (string),
6507                                /* "+ 1" is to include the anchor byte
6508                                   `\0'.  With this, code detectors can
6509                                   handle the tailing bytes more
6510                                   accurately.  */
6511                                SBYTES (string) + 1,
6512                                !NILP (highest),
6513                                STRING_MULTIBYTE (string));
6514 }
6515
6516 /*  Subroutine for Fsafe_coding_systems_region_internal.
6517
6518     Return a list of coding systems that safely encode the multibyte
6519     text between P and PEND.  SAFE_CODINGS, if non-nil, is an alist of
6520     possible coding systems.  If it is nil, it means that we have not
6521     yet found any coding systems.
6522
6523     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
6524     element of WORK_TABLE is set to t once the element is looked up.
6525
6526     If a non-ASCII single byte char is found, set
6527     *single_byte_char_found to 1.  */
6528
6529 static Lisp_Object
6530 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6531      unsigned char *p, *pend;
6532      Lisp_Object safe_codings, work_table;
6533      int *single_byte_char_found;
6534 {
6535   int c, len;
6536   Lisp_Object val, ch;
6537   Lisp_Object prev, tail;
6538
6539   while (p < pend)
6540     {
6541       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6542       p += len;
6543       if (ASCII_BYTE_P (c))
6544         /* We can ignore ASCII characters here.  */
6545         continue;
6546       if (SINGLE_BYTE_CHAR_P (c))
6547         *single_byte_char_found = 1;
6548       if (NILP (safe_codings))
6549         /* Already all coding systems are excluded.  But, we can't
6550            terminate the loop here because non-ASCII single-byte char
6551            must be found.  */
6552         continue;
6553       /* Check the safe coding systems for C.  */
6554       ch = make_number (c);
6555       val = Faref (work_table, ch);
6556       if (EQ (val, Qt))
6557         /* This element was already checked.  Ignore it.  */
6558         continue;
6559       /* Remember that we checked this element.  */
6560       Faset (work_table, ch, Qt);
6561
6562       for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
6563         {
6564           Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
6565           int encodable;
6566
6567           elt = XCAR (tail);
6568           if (CONSP (XCDR (elt)))
6569             {
6570               /* This entry has this format now:
6571                  ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6572                           ACCEPT-LATIN-EXTRA ) */
6573               val = XCDR (elt);
6574               encodable = ! NILP (Faref (XCAR (val), ch));
6575               if (! encodable)
6576                 {
6577                   val = XCDR (val);
6578                   translation_table = XCAR (val);
6579                   hash_table = XCAR (XCDR (val));
6580                   accept_latin_extra = XCAR (XCDR (XCDR (val)));
6581                 }
6582             }
6583           else
6584             {
6585               /* This entry has this format now: ( CODING . SAFE-CHARS) */
6586               encodable = ! NILP (Faref (XCDR (elt), ch));
6587               if (! encodable)
6588                 {
6589                   /* Transform the format to:
6590                      ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6591                        ACCEPT-LATIN-EXTRA )  */
6592                   val = Fget (XCAR (elt), Qcoding_system);
6593                   translation_table
6594                     = Fplist_get (AREF (val, 3),
6595                                   Qtranslation_table_for_encode);
6596                   if (SYMBOLP (translation_table))
6597                     translation_table = Fget (translation_table,
6598                                               Qtranslation_table);
6599                   hash_table
6600                     = (CHAR_TABLE_P (translation_table)
6601                        ? XCHAR_TABLE (translation_table)->extras[1]
6602                        : Qnil);
6603                   accept_latin_extra
6604                     = ((EQ (AREF (val, 0), make_number (2))
6605                         && VECTORP (AREF (val, 4)))
6606                        ? AREF (AREF (val, 4), 16)
6607                        : Qnil);
6608                   XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
6609                                         translation_table, hash_table,
6610                                         accept_latin_extra));
6611                 }
6612             }
6613
6614           if (! encodable
6615               && ((CHAR_TABLE_P (translation_table)
6616                    && ! NILP (Faref (translation_table, ch)))
6617                   || (HASH_TABLE_P (hash_table)
6618                       && ! NILP (Fgethash (ch, hash_table, Qnil)))
6619                   || (SINGLE_BYTE_CHAR_P (c)
6620                       && ! NILP (accept_latin_extra)
6621                       && VECTORP (Vlatin_extra_code_table)
6622                       && ! NILP (AREF (Vlatin_extra_code_table, c)))))
6623             encodable = 1;
6624           if (encodable)
6625             prev = tail;
6626           else
6627             {
6628               /* Exclude this coding system from SAFE_CODINGS.  */
6629               if (EQ (tail, safe_codings))
6630                 safe_codings = XCDR (safe_codings);
6631               else
6632                 XSETCDR (prev, XCDR (tail));
6633             }
6634         }
6635     }
6636   return safe_codings;
6637 }
6638
6639 DEFUN ("find-coding-systems-region-internal",
6640        Ffind_coding_systems_region_internal,
6641        Sfind_coding_systems_region_internal, 2, 2, 0,
6642        doc: /* Internal use only.  */)
6643      (start, end)
6644      Lisp_Object start, end;
6645 {
6646   Lisp_Object work_table, safe_codings;
6647   int non_ascii_p = 0;
6648   int single_byte_char_found = 0;
6649   const unsigned char *p1, *p1end, *p2, *p2end, *p;
6650
6651   if (STRINGP (start))
6652     {
6653       if (!STRING_MULTIBYTE (start))
6654         return Qt;
6655       p1 = SDATA (start), p1end = p1 + SBYTES (start);
6656       p2 = p2end = p1end;
6657       if (SCHARS (start) != SBYTES (start))
6658         non_ascii_p = 1;
6659     }
6660   else
6661     {
6662       int from, to, stop;
6663
6664       CHECK_NUMBER_COERCE_MARKER (start);
6665       CHECK_NUMBER_COERCE_MARKER (end);
6666       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6667         args_out_of_range (start, end);
6668       if (NILP (current_buffer->enable_multibyte_characters))
6669         return Qt;
6670       from = CHAR_TO_BYTE (XINT (start));
6671       to = CHAR_TO_BYTE (XINT (end));
6672       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6673       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6674       if (stop == to)
6675         p2 = p2end = p1end;
6676       else
6677         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6678       if (XINT (end) - XINT (start) != to - from)
6679         non_ascii_p = 1;
6680     }
6681
6682   if (!non_ascii_p)
6683     {
6684       /* We are sure that the text contains no multibyte character.
6685          Check if it contains eight-bit-graphic.  */
6686       p = p1;
6687       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6688       if (p == p1end)
6689         {
6690           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6691           if (p == p2end)
6692             return Qt;
6693         }
6694     }
6695
6696   /* The text contains non-ASCII characters.  */
6697
6698   work_table = Fmake_char_table (Qchar_coding_system, Qnil);
6699   safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
6700
6701   safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
6702                                     &single_byte_char_found);
6703   if (p2 < p2end)
6704     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6705                                       &single_byte_char_found);
6706   if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
6707     safe_codings = Qt;
6708   else
6709     {
6710       /* Turn safe_codings to a list of coding systems... */
6711       Lisp_Object val;
6712
6713       if (single_byte_char_found)
6714         /* ... and append these for eight-bit chars.  */
6715         val = Fcons (Qraw_text,
6716                      Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
6717       else
6718         /* ... and append generic coding systems.  */
6719         val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
6720
6721       for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
6722         val = Fcons (XCAR (XCAR (safe_codings)), val);
6723       safe_codings = val;
6724     }
6725
6726   return safe_codings;
6727 }
6728
6729
6730 /* Search from position POS for such characters that are unencodable
6731    accoding to SAFE_CHARS, and return a list of their positions.  P
6732    points where in the memory the character at POS exists.  Limit the
6733    search at PEND or when Nth unencodable characters are found.
6734
6735    If SAFE_CHARS is a char table, an element for an unencodable
6736    character is nil.
6737
6738    If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
6739
6740    Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
6741    eight-bit-graphic characters are unencodable.  */
6742
6743 static Lisp_Object
6744 unencodable_char_position (safe_chars, pos, p, pend, n)
6745      Lisp_Object safe_chars;
6746      int pos;
6747      unsigned char *p, *pend;
6748      int n;
6749 {
6750   Lisp_Object pos_list;
6751
6752   pos_list = Qnil;
6753   while (p < pend)
6754     {
6755       int len;
6756       int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
6757
6758       if (c >= 128
6759           && (CHAR_TABLE_P (safe_chars)
6760               ? NILP (CHAR_TABLE_REF (safe_chars, c))
6761               : (NILP (safe_chars) || c < 256)))
6762         {
6763           pos_list = Fcons (make_number (pos), pos_list);
6764           if (--n <= 0)
6765             break;
6766         }
6767       pos++;
6768       p += len;
6769     }
6770   return Fnreverse (pos_list);
6771 }
6772
6773
6774 DEFUN ("unencodable-char-position", Funencodable_char_position,
6775        Sunencodable_char_position, 3, 5, 0,
6776        doc: /*
6777 Return position of first un-encodable character in a region.
6778 START and END specfiy the region and CODING-SYSTEM specifies the
6779 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
6780
6781 If optional 4th argument COUNT is non-nil, it specifies at most how
6782 many un-encodable characters to search.  In this case, the value is a
6783 list of positions.
6784
6785 If optional 5th argument STRING is non-nil, it is a string to search
6786 for un-encodable characters.  In that case, START and END are indexes
6787 to the string.  */)
6788      (start, end, coding_system, count, string)
6789      Lisp_Object start, end, coding_system, count, string;
6790 {
6791   int n;
6792   Lisp_Object safe_chars;
6793   struct coding_system coding;
6794   Lisp_Object positions;
6795   int from, to;
6796   unsigned char *p, *pend;
6797
6798   if (NILP (string))
6799     {
6800       validate_region (&start, &end);
6801       from = XINT (start);
6802       to = XINT (end);
6803       if (NILP (current_buffer->enable_multibyte_characters))
6804         return Qnil;
6805       p = CHAR_POS_ADDR (from);
6806       if (to == GPT)
6807         pend = GPT_ADDR;
6808       else
6809         pend = CHAR_POS_ADDR (to);
6810     }
6811   else
6812     {
6813       CHECK_STRING (string);
6814       CHECK_NATNUM (start);
6815       CHECK_NATNUM (end);
6816       from = XINT (start);
6817       to = XINT (end);
6818       if (from > to
6819           || to > SCHARS (string))
6820         args_out_of_range_3 (string, start, end);
6821       if (! STRING_MULTIBYTE (string))
6822         return Qnil;
6823       p = SDATA (string) + string_char_to_byte (string, from);
6824       pend = SDATA (string) + string_char_to_byte (string, to);
6825     }
6826
6827   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
6828
6829   if (NILP (count))
6830     n = 1;
6831   else
6832     {
6833       CHECK_NATNUM (count);
6834       n = XINT (count);
6835     }
6836
6837   if (coding.type == coding_type_no_conversion
6838       || coding.type == coding_type_raw_text)
6839     return Qnil;
6840
6841   if (coding.type == coding_type_undecided)
6842     safe_chars = Qnil;
6843   else
6844     safe_chars = coding_safe_chars (coding_system);
6845
6846   if (STRINGP (string)
6847       || from >= GPT || to <= GPT)
6848     positions = unencodable_char_position (safe_chars, from, p, pend, n);
6849   else
6850     {
6851       Lisp_Object args[2];
6852
6853       args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
6854       n -= XINT (Flength (args[0]));
6855       if (n <= 0)
6856         positions = args[0];
6857       else
6858         {
6859           args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
6860                                                pend, n);
6861           positions = Fappend (2, args);
6862         }
6863     }
6864
6865   return  (NILP (count) ? Fcar (positions) : positions);
6866 }
6867
6868
6869 Lisp_Object
6870 code_convert_region1 (start, end, coding_system, encodep)
6871      Lisp_Object start, end, coding_system;
6872      int encodep;
6873 {
6874   struct coding_system coding;
6875   int from, to;
6876
6877   CHECK_NUMBER_COERCE_MARKER (start);
6878   CHECK_NUMBER_COERCE_MARKER (end);
6879   CHECK_SYMBOL (coding_system);
6880
6881   validate_region (&start, &end);
6882   from = XFASTINT (start);
6883   to = XFASTINT (end);
6884
6885   if (NILP (coding_system))
6886     return make_number (to - from);
6887
6888   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6889     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6890
6891   coding.mode |= CODING_MODE_LAST_BLOCK;
6892   coding.src_multibyte = coding.dst_multibyte
6893     = !NILP (current_buffer->enable_multibyte_characters);
6894   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
6895                        &coding, encodep, 1);
6896   Vlast_coding_system_used = coding.symbol;
6897   return make_number (coding.produced_char);
6898 }
6899
6900 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6901        3, 3, "r\nzCoding system: ",
6902        doc: /* Decode the current region from the specified coding system.
6903 When called from a program, takes three arguments:
6904 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6905 This function sets `last-coding-system-used' to the precise coding system
6906 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6907 not fully specified.)
6908 It returns the length of the decoded text.  */)
6909      (start, end, coding_system)
6910      Lisp_Object start, end, coding_system;
6911 {
6912   return code_convert_region1 (start, end, coding_system, 0);
6913 }
6914
6915 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6916        3, 3, "r\nzCoding system: ",
6917        doc: /* Encode the current region into the specified coding system.
6918 When called from a program, takes three arguments:
6919 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6920 This function sets `last-coding-system-used' to the precise coding system
6921 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6922 not fully specified.)
6923 It returns the length of the encoded text.  */)
6924      (start, end, coding_system)
6925      Lisp_Object start, end, coding_system;
6926 {
6927   return code_convert_region1 (start, end, coding_system, 1);
6928 }
6929
6930 Lisp_Object
6931 code_convert_string1 (string, coding_system, nocopy, encodep)
6932      Lisp_Object string, coding_system, nocopy;
6933      int encodep;
6934 {
6935   struct coding_system coding;
6936
6937   CHECK_STRING (string);
6938   CHECK_SYMBOL (coding_system);
6939
6940   if (NILP (coding_system))
6941     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
6942
6943   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6944     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6945
6946   coding.mode |= CODING_MODE_LAST_BLOCK;
6947   string = (encodep
6948             ? encode_coding_string (string, &coding, !NILP (nocopy))
6949             : decode_coding_string (string, &coding, !NILP (nocopy)));
6950   Vlast_coding_system_used = coding.symbol;
6951
6952   return string;
6953 }
6954
6955 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6956        2, 3, 0,
6957        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6958 Optional arg NOCOPY non-nil means it is OK to return STRING itself
6959 if the decoding operation is trivial.
6960 This function sets `last-coding-system-used' to the precise coding system
6961 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6962 not fully specified.)  */)
6963      (string, coding_system, nocopy)
6964      Lisp_Object string, coding_system, nocopy;
6965 {
6966   return code_convert_string1 (string, coding_system, nocopy, 0);
6967 }
6968
6969 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6970        2, 3, 0,
6971        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6972 Optional arg NOCOPY non-nil means it is OK to return STRING itself
6973 if the encoding operation is trivial.
6974 This function sets `last-coding-system-used' to the precise coding system
6975 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6976 not fully specified.)  */)
6977      (string, coding_system, nocopy)
6978      Lisp_Object string, coding_system, nocopy;
6979 {
6980   return code_convert_string1 (string, coding_system, nocopy, 1);
6981 }
6982
6983 /* Encode or decode STRING according to CODING_SYSTEM.
6984    Do not set Vlast_coding_system_used.
6985
6986    This function is called only from macros DECODE_FILE and
6987    ENCODE_FILE, thus we ignore character composition.  */
6988
6989 Lisp_Object
6990 code_convert_string_norecord (string, coding_system, encodep)
6991      Lisp_Object string, coding_system;
6992      int encodep;
6993 {
6994   struct coding_system coding;
6995
6996   CHECK_STRING (string);
6997   CHECK_SYMBOL (coding_system);
6998
6999   if (NILP (coding_system))
7000     return string;
7001
7002   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
7003     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
7004
7005   coding.composing = COMPOSITION_DISABLED;
7006   coding.mode |= CODING_MODE_LAST_BLOCK;
7007   return (encodep
7008           ? encode_coding_string (string, &coding, 1)
7009           : decode_coding_string (string, &coding, 1));
7010 }
7011 \f
7012 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7013        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7014 Return the corresponding character.  */)
7015      (code)
7016      Lisp_Object code;
7017 {
7018   unsigned char c1, c2, s1, s2;
7019   Lisp_Object val;
7020
7021   CHECK_NUMBER (code);
7022   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
7023   if (s1 == 0)
7024     {
7025       if (s2 < 0x80)
7026         XSETFASTINT (val, s2);
7027       else if (s2 >= 0xA0 || s2 <= 0xDF)
7028         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
7029       else
7030         error ("Invalid Shift JIS code: %x", XFASTINT (code));
7031     }
7032   else
7033     {
7034       if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF)
7035           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
7036         error ("Invalid Shift JIS code: %x", XFASTINT (code));
7037       DECODE_SJIS (s1, s2, c1, c2);
7038       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
7039     }
7040   return val;
7041 }
7042
7043 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7044        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7045 Return the corresponding code in SJIS.  */)
7046      (ch)
7047      Lisp_Object ch;
7048 {
7049   int charset, c1, c2, s1, s2;
7050   Lisp_Object val;
7051
7052   CHECK_NUMBER (ch);
7053   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7054   if (charset == CHARSET_ASCII)
7055     {
7056       val = ch;
7057     }
7058   else if (charset == charset_jisx0208
7059            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
7060     {
7061       ENCODE_SJIS (c1, c2, s1, s2);
7062       XSETFASTINT (val, (s1 << 8) | s2);
7063     }
7064   else if (charset == charset_katakana_jisx0201
7065            && c1 > 0x20 && c2 < 0xE0)
7066     {
7067       XSETFASTINT (val, c1 | 0x80);
7068     }
7069   else
7070     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
7071   return val;
7072 }
7073
7074 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7075        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7076 Return the corresponding character.  */)
7077      (code)
7078      Lisp_Object code;
7079 {
7080   int charset;
7081   unsigned char b1, b2, c1, c2;
7082   Lisp_Object val;
7083
7084   CHECK_NUMBER (code);
7085   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
7086   if (b1 == 0)
7087     {
7088       if (b2 >= 0x80)
7089         error ("Invalid BIG5 code: %x", XFASTINT (code));
7090       val = code;
7091     }
7092   else
7093     {
7094       if ((b1 < 0xA1 || b1 > 0xFE)
7095           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
7096         error ("Invalid BIG5 code: %x", XFASTINT (code));
7097       DECODE_BIG5 (b1, b2, charset, c1, c2);
7098       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
7099     }
7100   return val;
7101 }
7102
7103 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7104        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7105 Return the corresponding character code in Big5.  */)
7106      (ch)
7107      Lisp_Object ch;
7108 {
7109   int charset, c1, c2, b1, b2;
7110   Lisp_Object val;
7111
7112   CHECK_NUMBER (ch);
7113   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7114   if (charset == CHARSET_ASCII)
7115     {
7116       val = ch;
7117     }
7118   else if ((charset == charset_big5_1
7119             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
7120            || (charset == charset_big5_2
7121                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
7122     {
7123       ENCODE_BIG5 (charset, c1, c2, b1, b2);
7124       XSETFASTINT (val, (b1 << 8) | b2);
7125     }
7126   else
7127     error ("Can't encode to Big5: %d", XFASTINT (ch));
7128   return val;
7129 }
7130 \f
7131 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
7132        Sset_terminal_coding_system_internal, 1, 1, 0,
7133        doc: /* Internal use only.  */)
7134      (coding_system)
7135      Lisp_Object coding_system;
7136 {
7137   CHECK_SYMBOL (coding_system);
7138   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
7139   /* We had better not send unsafe characters to terminal.  */
7140   terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
7141   /* Character composition should be disabled.  */
7142   terminal_coding.composing = COMPOSITION_DISABLED;
7143   /* Error notification should be suppressed.  */
7144   terminal_coding.suppress_error = 1;
7145   terminal_coding.src_multibyte = 1;
7146   terminal_coding.dst_multibyte = 0;
7147   return Qnil;
7148 }
7149
7150 DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
7151        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7152        doc: /* Internal use only.  */)
7153      (coding_system)
7154      Lisp_Object coding_system;
7155 {
7156   CHECK_SYMBOL (coding_system);
7157   setup_coding_system (Fcheck_coding_system (coding_system),
7158                        &safe_terminal_coding);
7159   /* Character composition should be disabled.  */
7160   safe_terminal_coding.composing = COMPOSITION_DISABLED;
7161   /* Error notification should be suppressed.  */
7162   terminal_coding.suppress_error = 1;
7163   safe_terminal_coding.src_multibyte = 1;
7164   safe_terminal_coding.dst_multibyte = 0;
7165   return Qnil;
7166 }
7167
7168 DEFUN ("terminal-coding-system", Fterminal_coding_system,
7169        Sterminal_coding_system, 0, 0, 0,
7170        doc: /* Return coding system specified for terminal output.  */)
7171      ()
7172 {
7173   return terminal_coding.symbol;
7174 }
7175
7176 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
7177        Sset_keyboard_coding_system_internal, 1, 1, 0,
7178        doc: /* Internal use only.  */)
7179      (coding_system)
7180      Lisp_Object coding_system;
7181 {
7182   CHECK_SYMBOL (coding_system);
7183   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
7184   /* Character composition should be disabled.  */
7185   keyboard_coding.composing = COMPOSITION_DISABLED;
7186   return Qnil;
7187 }
7188
7189 DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
7190        Skeyboard_coding_system, 0, 0, 0,
7191        doc: /* Return coding system specified for decoding keyboard input.  */)
7192      ()
7193 {
7194   return keyboard_coding.symbol;
7195 }
7196
7197 \f
7198 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7199        Sfind_operation_coding_system,  1, MANY, 0,
7200        doc: /* Choose a coding system for an operation based on the target name.
7201 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7202 DECODING-SYSTEM is the coding system to use for decoding
7203 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7204 for encoding (in case OPERATION does encoding).
7205
7206 The first argument OPERATION specifies an I/O primitive:
7207   For file I/O, `insert-file-contents' or `write-region'.
7208   For process I/O, `call-process', `call-process-region', or `start-process'.
7209   For network I/O, `open-network-stream'.
7210
7211 The remaining arguments should be the same arguments that were passed
7212 to the primitive.  Depending on which primitive, one of those arguments
7213 is selected as the TARGET.  For example, if OPERATION does file I/O,
7214 whichever argument specifies the file name is TARGET.
7215
7216 TARGET has a meaning which depends on OPERATION:
7217   For file I/O, TARGET is a file name.
7218   For process I/O, TARGET is a process name.
7219   For network I/O, TARGET is a service name or a port number
7220
7221 This function looks up what specified for TARGET in,
7222 `file-coding-system-alist', `process-coding-system-alist',
7223 or `network-coding-system-alist' depending on OPERATION.
7224 They may specify a coding system, a cons of coding systems,
7225 or a function symbol to call.
7226 In the last case, we call the function with one argument,
7227 which is a list of all the arguments given to this function.
7228
7229 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7230      (nargs, args)
7231      int nargs;
7232      Lisp_Object *args;
7233 {
7234   Lisp_Object operation, target_idx, target, val;
7235   register Lisp_Object chain;
7236
7237   if (nargs < 2)
7238     error ("Too few arguments");
7239   operation = args[0];
7240   if (!SYMBOLP (operation)
7241       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7242     error ("Invalid first argument");
7243   if (nargs < 1 + XINT (target_idx))
7244     error ("Too few arguments for operation: %s",
7245            SDATA (SYMBOL_NAME (operation)));
7246   /* For write-region, if the 6th argument (i.e. VISIT, the 5th
7247      argument to write-region) is string, it must be treated as a
7248      target file name.  */
7249   if (EQ (operation, Qwrite_region)
7250       && nargs > 5
7251       && STRINGP (args[5]))
7252     target_idx = make_number (4);
7253   target = args[XINT (target_idx) + 1];
7254   if (!(STRINGP (target)
7255         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7256     error ("Invalid argument %d", XINT (target_idx) + 1);
7257
7258   chain = ((EQ (operation, Qinsert_file_contents)
7259             || EQ (operation, Qwrite_region))
7260            ? Vfile_coding_system_alist
7261            : (EQ (operation, Qopen_network_stream)
7262               ? Vnetwork_coding_system_alist
7263               : Vprocess_coding_system_alist));
7264   if (NILP (chain))
7265     return Qnil;
7266
7267   for (; CONSP (chain); chain = XCDR (chain))
7268     {
7269       Lisp_Object elt;
7270       elt = XCAR (chain);
7271
7272       if (CONSP (elt)
7273           && ((STRINGP (target)
7274                && STRINGP (XCAR (elt))
7275                && fast_string_match (XCAR (elt), target) >= 0)
7276               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7277         {
7278           val = XCDR (elt);
7279           /* Here, if VAL is both a valid coding system and a valid
7280              function symbol, we return VAL as a coding system.  */
7281           if (CONSP (val))
7282             return val;
7283           if (! SYMBOLP (val))
7284             return Qnil;
7285           if (! NILP (Fcoding_system_p (val)))
7286             return Fcons (val, val);
7287           if (! NILP (Ffboundp (val)))
7288             {
7289               val = call1 (val, Flist (nargs, args));
7290               if (CONSP (val))
7291                 return val;
7292               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7293                 return Fcons (val, val);
7294             }
7295           return Qnil;
7296         }
7297     }
7298   return Qnil;
7299 }
7300
7301 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
7302        Supdate_coding_systems_internal, 0, 0, 0,
7303        doc: /* Update internal database for ISO2022 and CCL based coding systems.
7304 When values of any coding categories are changed, you must
7305 call this function.  */)
7306      ()
7307 {
7308   int i;
7309
7310   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
7311     {
7312       Lisp_Object val;
7313
7314       val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[i]);
7315       if (!NILP (val))
7316         {
7317           if (! coding_system_table[i])
7318             coding_system_table[i] = ((struct coding_system *)
7319                                       xmalloc (sizeof (struct coding_system)));
7320           setup_coding_system (val, coding_system_table[i]);
7321         }
7322       else if (coding_system_table[i])
7323         {
7324           xfree (coding_system_table[i]);
7325           coding_system_table[i] = NULL;
7326         }
7327     }
7328
7329   return Qnil;
7330 }
7331
7332 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
7333        Sset_coding_priority_internal, 0, 0, 0,
7334        doc: /* Update internal database for the current value of `coding-category-list'.
7335 This function is internal use only.  */)
7336      ()
7337 {
7338   int i = 0, idx;
7339   Lisp_Object val;
7340
7341   val = Vcoding_category_list;
7342
7343   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
7344     {
7345       if (! SYMBOLP (XCAR (val)))
7346         break;
7347       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
7348       if (idx >= CODING_CATEGORY_IDX_MAX)
7349         break;
7350       coding_priorities[i++] = (1 << idx);
7351       val = XCDR (val);
7352     }
7353   /* If coding-category-list is valid and contains all coding
7354      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
7355      the following code saves Emacs from crashing.  */
7356   while (i < CODING_CATEGORY_IDX_MAX)
7357     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
7358
7359   return Qnil;
7360 }
7361
7362 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7363        Sdefine_coding_system_internal, 1, 1, 0,
7364        doc: /* Register CODING-SYSTEM as a base coding system.
7365 This function is internal use only.  */)
7366      (coding_system)
7367      Lisp_Object coding_system;
7368 {
7369   Lisp_Object safe_chars, slot;
7370
7371   if (NILP (Fcheck_coding_system (coding_system)))
7372     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
7373   safe_chars = coding_safe_chars (coding_system);
7374   if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
7375     error ("No valid safe-chars property for %s",
7376            SDATA (SYMBOL_NAME (coding_system)));
7377   if (EQ (safe_chars, Qt))
7378     {
7379       if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
7380         XSETCAR (Vcoding_system_safe_chars,
7381                  Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
7382     }
7383   else
7384     {
7385       slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
7386       if (NILP (slot))
7387         XSETCDR (Vcoding_system_safe_chars,
7388                  nconc2 (XCDR (Vcoding_system_safe_chars),
7389                          Fcons (Fcons (coding_system, safe_chars), Qnil)));
7390       else
7391         XSETCDR (slot, safe_chars);
7392     }
7393   return Qnil;
7394 }
7395
7396 #endif /* emacs */
7397
7398 \f
7399 /*** 9. Post-amble ***/
7400
7401 void
7402 init_coding_once ()
7403 {
7404   int i;
7405
7406   /* Emacs' internal format specific initialize routine.  */
7407   for (i = 0; i <= 0x20; i++)
7408     emacs_code_class[i] = EMACS_control_code;
7409   emacs_code_class[0x0A] = EMACS_linefeed_code;
7410   emacs_code_class[0x0D] = EMACS_carriage_return_code;
7411   for (i = 0x21 ; i < 0x7F; i++)
7412     emacs_code_class[i] = EMACS_ascii_code;
7413   emacs_code_class[0x7F] = EMACS_control_code;
7414   for (i = 0x80; i < 0xFF; i++)
7415     emacs_code_class[i] = EMACS_invalid_code;
7416   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
7417   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
7418   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
7419   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
7420
7421   /* ISO2022 specific initialize routine.  */
7422   for (i = 0; i < 0x20; i++)
7423     iso_code_class[i] = ISO_control_0;
7424   for (i = 0x21; i < 0x7F; i++)
7425     iso_code_class[i] = ISO_graphic_plane_0;
7426   for (i = 0x80; i < 0xA0; i++)
7427     iso_code_class[i] = ISO_control_1;
7428   for (i = 0xA1; i < 0xFF; i++)
7429     iso_code_class[i] = ISO_graphic_plane_1;
7430   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7431   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7432   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7433   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7434   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7435   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7436   iso_code_class[ISO_CODE_ESC] = ISO_escape;
7437   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7438   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7439   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7440
7441   setup_coding_system (Qnil, &keyboard_coding);
7442   setup_coding_system (Qnil, &terminal_coding);
7443   setup_coding_system (Qnil, &safe_terminal_coding);
7444   setup_coding_system (Qnil, &default_buffer_file_coding);
7445
7446   bzero (coding_system_table, sizeof coding_system_table);
7447
7448   bzero (ascii_skip_code, sizeof ascii_skip_code);
7449   for (i = 0; i < 128; i++)
7450     ascii_skip_code[i] = 1;
7451
7452 #if defined (MSDOS) || defined (WINDOWSNT)
7453   system_eol_type = CODING_EOL_CRLF;
7454 #else
7455   system_eol_type = CODING_EOL_LF;
7456 #endif
7457
7458   inhibit_pre_post_conversion = 0;
7459 }
7460
7461 #ifdef emacs
7462
7463 void
7464 syms_of_coding ()
7465 {
7466   Qtarget_idx = intern ("target-idx");
7467   staticpro (&Qtarget_idx);
7468
7469   Qcoding_system_history = intern ("coding-system-history");
7470   staticpro (&Qcoding_system_history);
7471   Fset (Qcoding_system_history, Qnil);
7472
7473   /* Target FILENAME is the first argument.  */
7474   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7475   /* Target FILENAME is the third argument.  */
7476   Fput (Qwrite_region, Qtarget_idx, make_number (2));
7477
7478   Qcall_process = intern ("call-process");
7479   staticpro (&Qcall_process);
7480   /* Target PROGRAM is the first argument.  */
7481   Fput (Qcall_process, Qtarget_idx, make_number (0));
7482
7483   Qcall_process_region = intern ("call-process-region");
7484   staticpro (&Qcall_process_region);
7485   /* Target PROGRAM is the third argument.  */
7486   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7487
7488   Qstart_process = intern ("start-process");
7489   staticpro (&Qstart_process);
7490   /* Target PROGRAM is the third argument.  */
7491   Fput (Qstart_process, Qtarget_idx, make_number (2));
7492
7493   Qopen_network_stream = intern ("open-network-stream");
7494   staticpro (&Qopen_network_stream);
7495   /* Target SERVICE is the fourth argument.  */
7496   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7497
7498   Qcoding_system = intern ("coding-system");
7499   staticpro (&Qcoding_system);
7500
7501   Qeol_type = intern ("eol-type");
7502   staticpro (&Qeol_type);
7503
7504   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
7505   staticpro (&Qbuffer_file_coding_system);
7506
7507   Qpost_read_conversion = intern ("post-read-conversion");
7508   staticpro (&Qpost_read_conversion);
7509
7510   Qpre_write_conversion = intern ("pre-write-conversion");
7511   staticpro (&Qpre_write_conversion);
7512
7513   Qno_conversion = intern ("no-conversion");
7514   staticpro (&Qno_conversion);
7515
7516   Qundecided = intern ("undecided");
7517   staticpro (&Qundecided);
7518
7519   Qcoding_system_p = intern ("coding-system-p");
7520   staticpro (&Qcoding_system_p);
7521
7522   Qcoding_system_error = intern ("coding-system-error");
7523   staticpro (&Qcoding_system_error);
7524
7525   Fput (Qcoding_system_error, Qerror_conditions,
7526         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7527   Fput (Qcoding_system_error, Qerror_message,
7528         build_string ("Invalid coding system"));
7529
7530   Qcoding_category = intern ("coding-category");
7531   staticpro (&Qcoding_category);
7532   Qcoding_category_index = intern ("coding-category-index");
7533   staticpro (&Qcoding_category_index);
7534
7535   Vcoding_category_table
7536     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
7537   staticpro (&Vcoding_category_table);
7538   {
7539     int i;
7540     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7541       {
7542         XVECTOR (Vcoding_category_table)->contents[i]
7543           = intern (coding_category_name[i]);
7544         Fput (XVECTOR (Vcoding_category_table)->contents[i],
7545               Qcoding_category_index, make_number (i));
7546       }
7547   }
7548
7549   Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
7550   staticpro (&Vcoding_system_safe_chars);
7551
7552   Qtranslation_table = intern ("translation-table");
7553   staticpro (&Qtranslation_table);
7554   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
7555
7556   Qtranslation_table_id = intern ("translation-table-id");
7557   staticpro (&Qtranslation_table_id);
7558
7559   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7560   staticpro (&Qtranslation_table_for_decode);
7561
7562   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7563   staticpro (&Qtranslation_table_for_encode);
7564
7565   Qsafe_chars = intern ("safe-chars");
7566   staticpro (&Qsafe_chars);
7567
7568   Qchar_coding_system = intern ("char-coding-system");
7569   staticpro (&Qchar_coding_system);
7570
7571   /* Intern this now in case it isn't already done.
7572      Setting this variable twice is harmless.
7573      But don't staticpro it here--that is done in alloc.c.  */
7574   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7575   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7576   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
7577
7578   Qvalid_codes = intern ("valid-codes");
7579   staticpro (&Qvalid_codes);
7580
7581   Qemacs_mule = intern ("emacs-mule");
7582   staticpro (&Qemacs_mule);
7583
7584   Qraw_text = intern ("raw-text");
7585   staticpro (&Qraw_text);
7586
7587   defsubr (&Scoding_system_p);
7588   defsubr (&Sread_coding_system);
7589   defsubr (&Sread_non_nil_coding_system);
7590   defsubr (&Scheck_coding_system);
7591   defsubr (&Sdetect_coding_region);
7592   defsubr (&Sdetect_coding_string);
7593   defsubr (&Sfind_coding_systems_region_internal);
7594   defsubr (&Sunencodable_char_position);
7595   defsubr (&Sdecode_coding_region);
7596   defsubr (&Sencode_coding_region);
7597   defsubr (&Sdecode_coding_string);
7598   defsubr (&Sencode_coding_string);
7599   defsubr (&Sdecode_sjis_char);
7600   defsubr (&Sencode_sjis_char);
7601   defsubr (&Sdecode_big5_char);
7602   defsubr (&Sencode_big5_char);
7603   defsubr (&Sset_terminal_coding_system_internal);
7604   defsubr (&Sset_safe_terminal_coding_system_internal);
7605   defsubr (&Sterminal_coding_system);
7606   defsubr (&Sset_keyboard_coding_system_internal);
7607   defsubr (&Skeyboard_coding_system);
7608   defsubr (&Sfind_operation_coding_system);
7609   defsubr (&Supdate_coding_systems_internal);
7610   defsubr (&Sset_coding_priority_internal);
7611   defsubr (&Sdefine_coding_system_internal);
7612
7613   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7614                doc: /* List of coding systems.
7615
7616 Do not alter the value of this variable manually.  This variable should be
7617 updated by the functions `make-coding-system' and
7618 `define-coding-system-alias'.  */);
7619   Vcoding_system_list = Qnil;
7620
7621   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7622                doc: /* Alist of coding system names.
7623 Each element is one element list of coding system name.
7624 This variable is given to `completing-read' as TABLE argument.
7625
7626 Do not alter the value of this variable manually.  This variable should be
7627 updated by the functions `make-coding-system' and
7628 `define-coding-system-alias'.  */);
7629   Vcoding_system_alist = Qnil;
7630
7631   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7632                doc: /* List of coding-categories (symbols) ordered by priority.
7633
7634 On detecting a coding system, Emacs tries code detection algorithms
7635 associated with each coding-category one by one in this order.  When
7636 one algorithm agrees with a byte sequence of source text, the coding
7637 system bound to the corresponding coding-category is selected.  */);
7638   {
7639     int i;
7640
7641     Vcoding_category_list = Qnil;
7642     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7643       Vcoding_category_list
7644         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7645                  Vcoding_category_list);
7646   }
7647
7648   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7649                doc: /* Specify the coding system for read operations.
7650 It is useful to bind this variable with `let', but do not set it globally.
7651 If the value is a coding system, it is used for decoding on read operation.
7652 If not, an appropriate element is used from one of the coding system alists:
7653 There are three such tables, `file-coding-system-alist',
7654 `process-coding-system-alist', and `network-coding-system-alist'.  */);
7655   Vcoding_system_for_read = Qnil;
7656
7657   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7658                doc: /* Specify the coding system for write operations.
7659 Programs bind this variable with `let', but you should not set it globally.
7660 If the value is a coding system, it is used for encoding of output,
7661 when writing it to a file and when sending it to a file or subprocess.
7662
7663 If this does not specify a coding system, an appropriate element
7664 is used from one of the coding system alists:
7665 There are three such tables, `file-coding-system-alist',
7666 `process-coding-system-alist', and `network-coding-system-alist'.
7667 For output to files, if the above procedure does not specify a coding system,
7668 the value of `buffer-file-coding-system' is used.  */);
7669   Vcoding_system_for_write = Qnil;
7670
7671   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7672                doc: /* Coding system used in the latest file or process I/O.
7673 Also set by `encode-coding-region', `decode-coding-region',
7674 `encode-coding-string' and `decode-coding-string'.  */);
7675   Vlast_coding_system_used = Qnil;
7676
7677   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7678                doc: /* *Non-nil means always inhibit code conversion of end-of-line format.
7679 See info node `Coding Systems' and info node `Text and Binary' concerning
7680 such conversion.  */);
7681   inhibit_eol_conversion = 0;
7682
7683   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
7684                doc: /* Non-nil means process buffer inherits coding system of process output.
7685 Bind it to t if the process output is to be treated as if it were a file
7686 read from some filesystem.  */);
7687   inherit_process_coding_system = 0;
7688
7689   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
7690                doc: /* Alist to decide a coding system to use for a file I/O operation.
7691 The format is ((PATTERN . VAL) ...),
7692 where PATTERN is a regular expression matching a file name,
7693 VAL is a coding system, a cons of coding systems, or a function symbol.
7694 If VAL is a coding system, it is used for both decoding and encoding
7695 the file contents.
7696 If VAL is a cons of coding systems, the car part is used for decoding,
7697 and the cdr part is used for encoding.
7698 If VAL is a function symbol, the function must return a coding system
7699 or a cons of coding systems which are used as above.  The function gets
7700 the arguments with which `find-operation-coding-system' was called.
7701
7702 See also the function `find-operation-coding-system'
7703 and the variable `auto-coding-alist'.  */);
7704   Vfile_coding_system_alist = Qnil;
7705
7706   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
7707     doc: /* Alist to decide a coding system to use for a process I/O operation.
7708 The format is ((PATTERN . VAL) ...),
7709 where PATTERN is a regular expression matching a program name,
7710 VAL is a coding system, a cons of coding systems, or a function symbol.
7711 If VAL is a coding system, it is used for both decoding what received
7712 from the program and encoding what sent to the program.
7713 If VAL is a cons of coding systems, the car part is used for decoding,
7714 and the cdr part is used for encoding.
7715 If VAL is a function symbol, the function must return a coding system
7716 or a cons of coding systems which are used as above.
7717
7718 See also the function `find-operation-coding-system'.  */);
7719   Vprocess_coding_system_alist = Qnil;
7720
7721   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
7722     doc: /* Alist to decide a coding system to use for a network I/O operation.
7723 The format is ((PATTERN . VAL) ...),
7724 where PATTERN is a regular expression matching a network service name
7725 or is a port number to connect to,
7726 VAL is a coding system, a cons of coding systems, or a function symbol.
7727 If VAL is a coding system, it is used for both decoding what received
7728 from the network stream and encoding what sent to the network stream.
7729 If VAL is a cons of coding systems, the car part is used for decoding,
7730 and the cdr part is used for encoding.
7731 If VAL is a function symbol, the function must return a coding system
7732 or a cons of coding systems which are used as above.
7733
7734 See also the function `find-operation-coding-system'.  */);
7735   Vnetwork_coding_system_alist = Qnil;
7736
7737   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
7738                doc: /* Coding system to use with system messages.
7739 Also used for decoding keyboard input on X Window system.  */);
7740   Vlocale_coding_system = Qnil;
7741
7742   /* The eol mnemonics are reset in startup.el system-dependently.  */
7743   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
7744                doc: /* *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
7745   eol_mnemonic_unix = build_string (":");
7746
7747   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
7748                doc: /* *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
7749   eol_mnemonic_dos = build_string ("\\");
7750
7751   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
7752                doc: /* *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
7753   eol_mnemonic_mac = build_string ("/");
7754
7755   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
7756                doc: /* *String displayed in mode line when end-of-line format is not yet determined.  */);
7757   eol_mnemonic_undecided = build_string (":");
7758
7759   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
7760                doc: /* *Non-nil enables character translation while encoding and decoding.  */);
7761   Venable_character_translation = Qt;
7762
7763   DEFVAR_LISP ("standard-translation-table-for-decode",
7764                &Vstandard_translation_table_for_decode,
7765                doc: /* Table for translating characters while decoding.  */);
7766   Vstandard_translation_table_for_decode = Qnil;
7767
7768   DEFVAR_LISP ("standard-translation-table-for-encode",
7769                &Vstandard_translation_table_for_encode,
7770                doc: /* Table for translating characters while encoding.  */);
7771   Vstandard_translation_table_for_encode = Qnil;
7772
7773   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
7774                doc: /* Alist of charsets vs revision numbers.
7775 While encoding, if a charset (car part of an element) is found,
7776 designate it with the escape sequence identifying revision (cdr part of the element).  */);
7777   Vcharset_revision_alist = Qnil;
7778
7779   DEFVAR_LISP ("default-process-coding-system",
7780                &Vdefault_process_coding_system,
7781                doc: /* Cons of coding systems used for process I/O by default.
7782 The car part is used for decoding a process output,
7783 the cdr part is used for encoding a text to be sent to a process.  */);
7784   Vdefault_process_coding_system = Qnil;
7785
7786   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
7787                doc: /* Table of extra Latin codes in the range 128..159 (inclusive).
7788 This is a vector of length 256.
7789 If Nth element is non-nil, the existence of code N in a file
7790 \(or output of subprocess) doesn't prevent it to be detected as
7791 a coding system of ISO 2022 variant which has a flag
7792 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
7793 or reading output of a subprocess.
7794 Only 128th through 159th elements has a meaning.  */);
7795   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
7796
7797   DEFVAR_LISP ("select-safe-coding-system-function",
7798                &Vselect_safe_coding_system_function,
7799                doc: /* Function to call to select safe coding system for encoding a text.
7800
7801 If set, this function is called to force a user to select a proper
7802 coding system which can encode the text in the case that a default
7803 coding system used in each operation can't encode the text.
7804
7805 The default value is `select-safe-coding-system' (which see).  */);
7806   Vselect_safe_coding_system_function = Qnil;
7807
7808   DEFVAR_BOOL ("coding-system-require-warning",
7809                &coding_system_require_warning,
7810                doc: /* Internal use only.
7811 If non-nil, on writing a file, `select-safe-coding-system-function' is
7812 called even if `coding-system-for-write' is non-nil.  The command
7813 `universal-coding-system-argument' binds this variable to t temporarily.  */);
7814   coding_system_require_warning = 0;
7815
7816
7817   DEFVAR_BOOL ("inhibit-iso-escape-detection",
7818                &inhibit_iso_escape_detection,
7819                doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
7820
7821 By default, on reading a file, Emacs tries to detect how the text is
7822 encoded.  This code detection is sensitive to escape sequences.  If
7823 the sequence is valid as ISO2022, the code is determined as one of
7824 the ISO2022 encodings, and the file is decoded by the corresponding
7825 coding system (e.g. `iso-2022-7bit').
7826
7827 However, there may be a case that you want to read escape sequences in
7828 a file as is.  In such a case, you can set this variable to non-nil.
7829 Then, as the code detection ignores any escape sequences, no file is
7830 detected as encoded in some ISO2022 encoding.  The result is that all
7831 escape sequences become visible in a buffer.
7832
7833 The default value is nil, and it is strongly recommended not to change
7834 it.  That is because many Emacs Lisp source files that contain
7835 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
7836 in Emacs's distribution, and they won't be decoded correctly on
7837 reading if you suppress escape sequence detection.
7838
7839 The other way to read escape sequences in a file without decoding is
7840 to explicitly specify some coding system that doesn't use ISO2022's
7841 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
7842   inhibit_iso_escape_detection = 0;
7843
7844   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
7845                doc: /* Char table for translating self-inserting characters.
7846 This is applied to the result of input methods, not their input.  See also
7847 `keyboard-translate-table'.  */);
7848     Vtranslation_table_for_input = Qnil;
7849 }
7850
7851 char *
7852 emacs_strerror (error_number)
7853      int error_number;
7854 {
7855   char *str;
7856
7857   synchronize_system_messages_locale ();
7858   str = strerror (error_number);
7859
7860   if (! NILP (Vlocale_coding_system))
7861     {
7862       Lisp_Object dec = code_convert_string_norecord (build_string (str),
7863                                                       Vlocale_coding_system,
7864                                                       0);
7865       str = (char *) SDATA (dec);
7866     }
7867
7868   return str;
7869 }
7870
7871 #endif /* emacs */
7872