src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995,97,1998,2002,2003  Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001,2002,2003  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /*** TABLE OF CONTENTS ***
  24
  25   0. General comments
  26   1. Preamble
  27   2. Emacs' internal format (emacs-mule) handlers
  28   3. ISO2022 handlers
  29   4. Shift-JIS and BIG5 handlers
  30   5. CCL handlers
  31   6. End-of-line handlers
  32   7. C library functions
  33   8. Emacs Lisp library functions
  34   9. Post-amble
  35
  36 */
  37
  38 /*** 0. General comments ***/
  39
  40
  41 /*** GENERAL NOTE on CODING SYSTEMS ***
  42
  43   A coding system is an encoding mechanism for one or more character
  44   sets.  Here's a list of coding systems which Emacs can handle.  When
  45   we say "decode", it means converting some other coding system to
  46   Emacs' internal format (emacs-mule), and when we say "encode",
  47   it means converting the coding system emacs-mule to some other
  48   coding system.
  49
  50   0. Emacs' internal format (emacs-mule)
  51
  52   Emacs itself holds a multi-lingual character in buffers and strings
  53   in a special format.  Details are described in section 2.
  54
  55   1. ISO2022
  56
  57   The most famous coding system for multiple character sets.  X's
  58   Compound Text, various EUCs (Extended Unix Code), and coding
  59   systems used in Internet communication such as ISO-2022-JP are
  60   all variants of ISO2022.  Details are described in section 3.
  61
  62   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  63
  64   A coding system to encode character sets: ASCII, JISX0201, and
  65   JISX0208.  Widely used for PC's in Japan.  Details are described in
  66   section 4.
  67
  68   3. BIG5
  69
  70   A coding system to encode the character sets ASCII and Big5.  Widely
  71   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  72   described in section 4.  In this file, when we write "BIG5"
  73   (all uppercase), we mean the coding system, and when we write
  74   "Big5" (capitalized), we mean the character set.
  75
  76   4. Raw text
  77
  78   A coding system for text containing random 8-bit code.  Emacs does
  79   no code conversion on such text except for end-of-line format.
  80
  81   5. Other
  82
  83   If a user wants to read/write text encoded in a coding system not
  84   listed above, he can supply a decoder and an encoder for it as CCL
  85   (Code Conversion Language) programs.  Emacs executes the CCL program
  86   while reading/writing.
  87
  88   Emacs represents a coding system by a Lisp symbol that has a property
  89   `coding-system'.  But, before actually using the coding system, the
  90   information about it is set in a structure of type `struct
  91   coding_system' for rapid processing.  See section 6 for more details.
  92
  93 */
  94
  95 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  96
  97   How end-of-line of text is encoded depends on the operating system.
  98   For instance, Unix's format is just one byte of `line-feed' code,
  99   whereas DOS's format is two-byte sequence of `carriage-return' and
 100   `line-feed' codes.  MacOS's format is usually one byte of
 101   `carriage-return'.
 102
 103   Since text character encoding and end-of-line encoding are
 104   independent, any coding system described above can have any
 105   end-of-line format.  So Emacs has information about end-of-line
 106   format in each coding-system.  See section 6 for more details.
 107
 108 */
 109
 110 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 111
 112   These functions check if a text between SRC and SRC_END is encoded
 113   in the coding system category XXX.  Each returns an integer value in
 114   which appropriate flag bits for the category XXX are set.  The flag
 115   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 116   template for these functions.  If MULTIBYTEP is nonzero, 8-bit codes
 117   of the range 0x80..0x9F are in multibyte form.  */
 118 #if 0
 119 int
 120 detect_coding_emacs_mule (src, src_end, multibytep)
 121      unsigned char *src, *src_end;
 122      int multibytep;
 123 {
 124   ...
 125 }
 126 #endif
 127
 128 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 129
 130   These functions decode SRC_BYTES length of unibyte text at SOURCE
 131   encoded in CODING to Emacs' internal format.  The resulting
 132   multibyte text goes to a place pointed to by DESTINATION, the length
 133   of which should not exceed DST_BYTES.
 134
 135   These functions set the information about original and decoded texts
 136   in the members `produced', `produced_char', `consumed', and
 137   `consumed_char' of the structure *CODING.  They also set the member
 138   `result' to one of CODING_FINISH_XXX indicating how the decoding
 139   finished.
 140
 141   DST_BYTES zero means that the source area and destination area are
 142   overlapped, which means that we can produce a decoded text until it
 143   reaches the head of the not-yet-decoded source text.
 144
 145   Below is a template for these functions.  */
 146 #if 0
 147 static void
 148 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 149      struct coding_system *coding;
 150      unsigned char *source, *destination;
 151      int src_bytes, dst_bytes;
 152 {
 153   ...
 154 }
 155 #endif
 156
 157 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 158
 159   These functions encode SRC_BYTES length text at SOURCE from Emacs'
 160   internal multibyte format to CODING.  The resulting unibyte text
 161   goes to a place pointed to by DESTINATION, the length of which
 162   should not exceed DST_BYTES.
 163
 164   These functions set the information about original and encoded texts
 165   in the members `produced', `produced_char', `consumed', and
 166   `consumed_char' of the structure *CODING.  They also set the member
 167   `result' to one of CODING_FINISH_XXX indicating how the encoding
 168   finished.
 169
 170   DST_BYTES zero means that the source area and destination area are
 171   overlapped, which means that we can produce encoded text until it
 172   reaches at the head of the not-yet-encoded source text.
 173
 174   Below is a template for these functions.  */
 175 #if 0
 176 static void
 177 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 178      struct coding_system *coding;
 179      unsigned char *source, *destination;
 180      int src_bytes, dst_bytes;
 181 {
 182   ...
 183 }
 184 #endif
 185
 186 /*** COMMONLY USED MACROS ***/
 187
 188 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 189    get one, two, and three bytes from the source text respectively.
 190    If there are not enough bytes in the source, they jump to
 191    `label_end_of_loop'.  The caller should set variables `coding',
 192    `src' and `src_end' to appropriate pointer in advance.  These
 193    macros are called from decoding routines `decode_coding_XXX', thus
 194    it is assumed that the source text is unibyte.  */
 195
 196 #define ONE_MORE_BYTE(c1)                                       \
 197   do {                                                          \
 198     if (src >= src_end)                                         \
 199       {                                                         \
 200         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 201         goto label_end_of_loop;                                 \
 202       }                                                         \
 203     c1 = *src++;                                                \
 204   } while (0)
 205
 206 #define TWO_MORE_BYTES(c1, c2)                                  \
 207   do {                                                          \
 208     if (src + 1 >= src_end)                                     \
 209       {                                                         \
 210         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 211         goto label_end_of_loop;                                 \
 212       }                                                         \
 213     c1 = *src++;                                                \
 214     c2 = *src++;                                                \
 215   } while (0)
 216
 217
 218 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
 219    form if MULTIBYTEP is nonzero.  */
 220
 221 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)           \
 222   do {                                                          \
 223     if (src >= src_end)                                         \
 224       {                                                         \
 225         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 226         goto label_end_of_loop;                                 \
 227       }                                                         \
 228     c1 = *src++;                                                \
 229     if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)         \
 230       c1 = *src++ - 0x20;                                       \
 231   } while (0)
 232
 233 /* Set C to the next character at the source text pointed by `src'.
 234    If there are not enough characters in the source, jump to
 235    `label_end_of_loop'.  The caller should set variables `coding'
 236    `src', `src_end', and `translation_table' to appropriate pointers
 237    in advance.  This macro is used in encoding routines
 238    `encode_coding_XXX', thus it assumes that the source text is in
 239    multibyte form except for 8-bit characters.  8-bit characters are
 240    in multibyte form if coding->src_multibyte is nonzero, else they
 241    are represented by a single byte.  */
 242
 243 #define ONE_MORE_CHAR(c)                                        \
 244   do {                                                          \
 245     int len = src_end - src;                                    \
 246     int bytes;                                                  \
 247     if (len <= 0)                                               \
 248       {                                                         \
 249         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 250         goto label_end_of_loop;                                 \
 251       }                                                         \
 252     if (coding->src_multibyte                                   \
 253         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 254       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 255     else                                                        \
 256       c = *src, bytes = 1;                                      \
 257     if (!NILP (translation_table))                              \
 258       c = translate_char (translation_table, c, -1, 0, 0);      \
 259     src += bytes;                                               \
 260   } while (0)
 261
 262
 263 /* Produce a multibyte form of character C to `dst'.  Jump to
 264    `label_end_of_loop' if there's not enough space at `dst'.
 265
 266    If we are now in the middle of a composition sequence, the decoded
 267    character may be ALTCHAR (for the current composition).  In that
 268    case, the character goes to coding->cmp_data->data instead of
 269    `dst'.
 270
 271    This macro is used in decoding routines.  */
 272
 273 #define EMIT_CHAR(c)                                                    \
 274   do {                                                                  \
 275     if (! COMPOSING_P (coding)                                          \
 276         || coding->composing == COMPOSITION_RELATIVE                    \
 277         || coding->composing == COMPOSITION_WITH_RULE)                  \
 278       {                                                                 \
 279         int bytes = CHAR_BYTES (c);                                     \
 280         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 281           {                                                             \
 282             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 283             goto label_end_of_loop;                                     \
 284           }                                                             \
 285         dst += CHAR_STRING (c, dst);                                    \
 286         coding->produced_char++;                                        \
 287       }                                                                 \
 288                                                                         \
 289     if (COMPOSING_P (coding)                                            \
 290         && coding->composing != COMPOSITION_RELATIVE)                   \
 291       {                                                                 \
 292         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 293         coding->composition_rule_follows                                \
 294           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 295       }                                                                 \
 296   } while (0)
 297
 298
 299 #define EMIT_ONE_BYTE(c)                                        \
 300   do {                                                          \
 301     if (dst >= (dst_bytes ? dst_end : src))                     \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     *dst++ = c;                                                 \
 307   } while (0)
 308
 309 #define EMIT_TWO_BYTES(c1, c2)                                  \
 310   do {                                                          \
 311     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 312       {                                                         \
 313         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 314         goto label_end_of_loop;                                 \
 315       }                                                         \
 316     *dst++ = c1, *dst++ = c2;                                   \
 317   } while (0)
 318
 319 #define EMIT_BYTES(from, to)                                    \
 320   do {                                                          \
 321     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 322       {                                                         \
 323         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 324         goto label_end_of_loop;                                 \
 325       }                                                         \
 326     while (from < to)                                           \
 327       *dst++ = *from++;                                         \
 328   } while (0)
 329
 330 \f
 331 /*** 1. Preamble ***/
 332
 333 #ifdef emacs
 334 #include <config.h>
 335 #endif
 336
 337 #include <stdio.h>
 338
 339 #ifdef emacs
 340
 341 #include "lisp.h"
 342 #include "buffer.h"
 343 #include "charset.h"
 344 #include "composite.h"
 345 #include "ccl.h"
 346 #include "coding.h"
 347 #include "window.h"
 348 #include "intervals.h"
 349
 350 #else  /* not emacs */
 351
 352 #include "mulelib.h"
 353
 354 #endif /* not emacs */
 355
 356 Lisp_Object Qcoding_system, Qeol_type;
 357 Lisp_Object Qbuffer_file_coding_system;
 358 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 359 Lisp_Object Qno_conversion, Qundecided;
 360 Lisp_Object Qcoding_system_history;
 361 Lisp_Object Qsafe_chars;
 362 Lisp_Object Qvalid_codes;
 363
 364 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 365 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 366 Lisp_Object Qstart_process, Qopen_network_stream;
 367 Lisp_Object Qtarget_idx;
 368
 369 /* If a symbol has this property, evaluate the value to define the
 370    symbol as a coding system.  */
 371 Lisp_Object Qcoding_system_define_form;
 372
 373 Lisp_Object Vselect_safe_coding_system_function;
 374
 375 int coding_system_require_warning;
 376
 377 /* Mnemonic string for each format of end-of-line.  */
 378 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 379 /* Mnemonic string to indicate format of end-of-line is not yet
 380    decided.  */
 381 Lisp_Object eol_mnemonic_undecided;
 382
 383 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 384    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 385 int system_eol_type;
 386
 387 #ifdef emacs
 388
 389 /* Information about which coding system is safe for which chars.
 390    The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
 391
 392    GENERIC-LIST is a list of generic coding systems which can encode
 393    any characters.
 394
 395    NON-GENERIC-ALIST is an alist of non generic coding systems vs the
 396    corresponding char table that contains safe chars.  */
 397 Lisp_Object Vcoding_system_safe_chars;
 398
 399 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 400
 401 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 402
 403 /* Coding system emacs-mule and raw-text are for converting only
 404    end-of-line format.  */
 405 Lisp_Object Qemacs_mule, Qraw_text;
 406
 407 Lisp_Object Qutf_8;
 408
 409 /* Coding-systems are handed between Emacs Lisp programs and C internal
 410    routines by the following three variables.  */
 411 /* Coding-system for reading files and receiving data from process.  */
 412 Lisp_Object Vcoding_system_for_read;
 413 /* Coding-system for writing files and sending data to process.  */
 414 Lisp_Object Vcoding_system_for_write;
 415 /* Coding-system actually used in the latest I/O.  */
 416 Lisp_Object Vlast_coding_system_used;
 417
 418 /* A vector of length 256 which contains information about special
 419    Latin codes (especially for dealing with Microsoft codes).  */
 420 Lisp_Object Vlatin_extra_code_table;
 421
 422 /* Flag to inhibit code conversion of end-of-line format.  */
 423 int inhibit_eol_conversion;
 424
 425 /* Flag to inhibit ISO2022 escape sequence detection.  */
 426 int inhibit_iso_escape_detection;
 427
 428 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 429 int inherit_process_coding_system;
 430
 431 /* Coding system to be used to encode text for terminal display.  */
 432 struct coding_system terminal_coding;
 433
 434 /* Coding system to be used to encode text for terminal display when
 435    terminal coding system is nil.  */
 436 struct coding_system safe_terminal_coding;
 437
 438 /* Coding system of what is sent from terminal keyboard.  */
 439 struct coding_system keyboard_coding;
 440
 441 /* Default coding system to be used to write a file.  */
 442 struct coding_system default_buffer_file_coding;
 443
 444 Lisp_Object Vfile_coding_system_alist;
 445 Lisp_Object Vprocess_coding_system_alist;
 446 Lisp_Object Vnetwork_coding_system_alist;
 447
 448 Lisp_Object Vlocale_coding_system;
 449
 450 #endif /* emacs */
 451
 452 Lisp_Object Qcoding_category, Qcoding_category_index;
 453
 454 /* List of symbols `coding-category-xxx' ordered by priority.  */
 455 Lisp_Object Vcoding_category_list;
 456
 457 /* Table of coding categories (Lisp symbols).  */
 458 Lisp_Object Vcoding_category_table;
 459
 460 /* Table of names of symbol for each coding-category.  */
 461 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 462   "coding-category-emacs-mule",
 463   "coding-category-sjis",
 464   "coding-category-iso-7",
 465   "coding-category-iso-7-tight",
 466   "coding-category-iso-8-1",
 467   "coding-category-iso-8-2",
 468   "coding-category-iso-7-else",
 469   "coding-category-iso-8-else",
 470   "coding-category-ccl",
 471   "coding-category-big5",
 472   "coding-category-utf-8",
 473   "coding-category-utf-16-be",
 474   "coding-category-utf-16-le",
 475   "coding-category-raw-text",
 476   "coding-category-binary"
 477 };
 478
 479 /* Table of pointers to coding systems corresponding to each coding
 480    categories.  */
 481 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 482
 483 /* Table of coding category masks.  Nth element is a mask for a coding
 484    category of which priority is Nth.  */
 485 static
 486 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 487
 488 /* Flag to tell if we look up translation table on character code
 489    conversion.  */
 490 Lisp_Object Venable_character_translation;
 491 /* Standard translation table to look up on decoding (reading).  */
 492 Lisp_Object Vstandard_translation_table_for_decode;
 493 /* Standard translation table to look up on encoding (writing).  */
 494 Lisp_Object Vstandard_translation_table_for_encode;
 495
 496 Lisp_Object Qtranslation_table;
 497 Lisp_Object Qtranslation_table_id;
 498 Lisp_Object Qtranslation_table_for_decode;
 499 Lisp_Object Qtranslation_table_for_encode;
 500
 501 /* Alist of charsets vs revision number.  */
 502 Lisp_Object Vcharset_revision_alist;
 503
 504 /* Default coding systems used for process I/O.  */
 505 Lisp_Object Vdefault_process_coding_system;
 506
 507 /* Char table for translating Quail and self-inserting input.  */
 508 Lisp_Object Vtranslation_table_for_input;
 509
 510 /* Global flag to tell that we can't call post-read-conversion and
 511    pre-write-conversion functions.  Usually the value is zero, but it
 512    is set to 1 temporarily while such functions are running.  This is
 513    to avoid infinite recursive call.  */
 514 static int inhibit_pre_post_conversion;
 515
 516 Lisp_Object Qchar_coding_system;
 517
 518 /* Return `safe-chars' property of CODING_SYSTEM (symbol).  Don't check
 519    its validity.  */
 520
 521 Lisp_Object
 522 coding_safe_chars (coding_system)
 523      Lisp_Object coding_system;
 524 {
 525   Lisp_Object coding_spec, plist, safe_chars;
 526
 527   coding_spec = Fget (coding_system, Qcoding_system);
 528   plist = XVECTOR (coding_spec)->contents[3];
 529   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 530   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 531 }
 532
 533 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 534   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 535
 536 \f
 537 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 538
 539 /* Emacs' internal format for representation of multiple character
 540    sets is a kind of multi-byte encoding, i.e. characters are
 541    represented by variable-length sequences of one-byte codes.
 542
 543    ASCII characters and control characters (e.g. `tab', `newline') are
 544    represented by one-byte sequences which are their ASCII codes, in
 545    the range 0x00 through 0x7F.
 546
 547    8-bit characters of the range 0x80..0x9F are represented by
 548    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 549    code + 0x20).
 550
 551    8-bit characters of the range 0xA0..0xFF are represented by
 552    one-byte sequences which are their 8-bit code.
 553
 554    The other characters are represented by a sequence of `base
 555    leading-code', optional `extended leading-code', and one or two
 556    `position-code's.  The length of the sequence is determined by the
 557    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 558    whereas extended leading-code and position-code take the range 0xA0
 559    through 0xFF.  See `charset.h' for more details about leading-code
 560    and position-code.
 561
 562    --- CODE RANGE of Emacs' internal format ---
 563    character set        range
 564    -------------        -----
 565    ascii                0x00..0x7F
 566    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 567    eight-bit-graphic    0xA0..0xBF
 568    ELSE                 0x81..0x9D + [0xA0..0xFF]+
 569    ---------------------------------------------
 570
 571    As this is the internal character representation, the format is
 572    usually not used externally (i.e. in a file or in a data sent to a
 573    process).  But, it is possible to have a text externally in this
 574    format (i.e. by encoding by the coding system `emacs-mule').
 575
 576    In that case, a sequence of one-byte codes has a slightly different
 577    form.
 578
 579    Firstly, all characters in eight-bit-control are represented by
 580    one-byte sequences which are their 8-bit code.
 581
 582    Next, character composition data are represented by the byte
 583    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 584    where,
 585         METHOD is 0xF0 plus one of composition method (enum
 586         composition_method),
 587
 588         BYTES is 0xA0 plus the byte length of these composition data,
 589
 590         CHARS is 0xA0 plus the number of characters composed by these
 591         data,
 592
 593         COMPONENTs are characters of multibyte form or composition
 594         rules encoded by two-byte of ASCII codes.
 595
 596    In addition, for backward compatibility, the following formats are
 597    also recognized as composition data on decoding.
 598
 599    0x80 MSEQ ...
 600    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 601
 602    Here,
 603         MSEQ is a multibyte form but in these special format:
 604           ASCII: 0xA0 ASCII_CODE+0x80,
 605           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 606         RULE is a one byte code of the range 0xA0..0xF0 that
 607         represents a composition rule.
 608   */
 609
 610 enum emacs_code_class_type emacs_code_class[256];
 611
 612 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 613    Check if a text is encoded in Emacs' internal format.  If it is,
 614    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 615
 616 static int
 617 detect_coding_emacs_mule (src, src_end, multibytep)
 618       unsigned char *src, *src_end;
 619       int multibytep;
 620 {
 621   unsigned char c;
 622   int composing = 0;
 623   /* Dummy for ONE_MORE_BYTE.  */
 624   struct coding_system dummy_coding;
 625   struct coding_system *coding = &dummy_coding;
 626
 627   while (1)
 628     {
 629       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 630
 631       if (composing)
 632         {
 633           if (c < 0xA0)
 634             composing = 0;
 635           else if (c == 0xA0)
 636             {
 637               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 638               c &= 0x7F;
 639             }
 640           else
 641             c -= 0x20;
 642         }
 643
 644       if (c < 0x20)
 645         {
 646           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 647             return 0;
 648         }
 649       else if (c >= 0x80 && c < 0xA0)
 650         {
 651           if (c == 0x80)
 652             /* Old leading code for a composite character.  */
 653             composing = 1;
 654           else
 655             {
 656               unsigned char *src_base = src - 1;
 657               int bytes;
 658
 659               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 660                                                bytes))
 661                 return 0;
 662               src = src_base + bytes;
 663             }
 664         }
 665     }
 666  label_end_of_loop:
 667   return CODING_CATEGORY_MASK_EMACS_MULE;
 668 }
 669
 670
 671 /* Record the starting position START and METHOD of one composition.  */
 672
 673 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
 674   do {                                                          \
 675     struct composition_data *cmp_data = coding->cmp_data;       \
 676     int *data = cmp_data->data + cmp_data->used;                \
 677     coding->cmp_data_start = cmp_data->used;                    \
 678     data[0] = -1;                                               \
 679     data[1] = cmp_data->char_offset + start;                    \
 680     data[3] = (int) method;                                     \
 681     cmp_data->used += 4;                                        \
 682   } while (0)
 683
 684 /* Record the ending position END of the current composition.  */
 685
 686 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
 687   do {                                                          \
 688     struct composition_data *cmp_data = coding->cmp_data;       \
 689     int *data = cmp_data->data + coding->cmp_data_start;        \
 690     data[0] = cmp_data->used - coding->cmp_data_start;          \
 691     data[2] = cmp_data->char_offset + end;                      \
 692   } while (0)
 693
 694 /* Record one COMPONENT (alternate character or composition rule).  */
 695
 696 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)             \
 697   do {                                                                  \
 698     coding->cmp_data->data[coding->cmp_data->used++] = component;       \
 699     if (coding->cmp_data->used - coding->cmp_data_start                 \
 700         == COMPOSITION_DATA_MAX_BUNCH_LENGTH)                           \
 701       {                                                                 \
 702         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
 703         coding->composing = COMPOSITION_NO;                             \
 704       }                                                                 \
 705   } while (0)
 706
 707
 708 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
 709    is not less than SRC_END, return -1 without incrementing Src.  */
 710
 711 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
 712
 713
 714 /* Decode a character represented as a component of composition
 715    sequence of Emacs 20 style at SRC.  Set C to that character, store
 716    its multibyte form sequence at P, and set P to the end of that
 717    sequence.  If no valid character is found, set C to -1.  */
 718
 719 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)                \
 720   do {                                                          \
 721     int bytes;                                                  \
 722                                                                 \
 723     c = SAFE_ONE_MORE_BYTE ();                                  \
 724     if (c < 0)                                                  \
 725       break;                                                    \
 726     if (CHAR_HEAD_P (c))                                        \
 727       c = -1;                                                   \
 728     else if (c == 0xA0)                                         \
 729       {                                                         \
 730         c = SAFE_ONE_MORE_BYTE ();                              \
 731         if (c < 0xA0)                                           \
 732           c = -1;                                               \
 733         else                                                    \
 734           {                                                     \
 735             c -= 0xA0;                                          \
 736             *p++ = c;                                           \
 737           }                                                     \
 738       }                                                         \
 739     else if (BASE_LEADING_CODE_P (c - 0x20))                    \
 740       {                                                         \
 741         unsigned char *p0 = p;                                  \
 742                                                                 \
 743         c -= 0x20;                                              \
 744         *p++ = c;                                               \
 745         bytes = BYTES_BY_CHAR_HEAD (c);                         \
 746         while (--bytes)                                         \
 747           {                                                     \
 748             c = SAFE_ONE_MORE_BYTE ();                          \
 749             if (c < 0)                                          \
 750               break;                                            \
 751             *p++ = c;                                           \
 752           }                                                     \
 753         if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)      \
 754             || (coding->flags /* We are recovering a file.  */  \
 755                 && p0[0] == LEADING_CODE_8_BIT_CONTROL          \
 756                 && ! CHAR_HEAD_P (p0[1])))                      \
 757           c = STRING_CHAR (p0, bytes);                          \
 758         else                                                    \
 759           c = -1;                                               \
 760       }                                                         \
 761     else                                                        \
 762       c = -1;                                                   \
 763   } while (0)
 764
 765
 766 /* Decode a composition rule represented as a component of composition
 767    sequence of Emacs 20 style at SRC.  Set C to the rule.  If not
 768    valid rule is found, set C to -1.  */
 769
 770 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c)           \
 771   do {                                                  \
 772     c = SAFE_ONE_MORE_BYTE ();                          \
 773     c -= 0xA0;                                          \
 774     if (c < 0 || c >= 81)                               \
 775       c = -1;                                           \
 776     else                                                \
 777       {                                                 \
 778         gref = c / 9, nref = c % 9;                     \
 779         c = COMPOSITION_ENCODE_RULE (gref, nref);       \
 780       }                                                 \
 781   } while (0)
 782
 783
 784 /* Decode composition sequence encoded by `emacs-mule' at the source
 785    pointed by SRC.  SRC_END is the end of source.  Store information
 786    of the composition in CODING->cmp_data.
 787
 788    For backward compatibility, decode also a composition sequence of
 789    Emacs 20 style.  In that case, the composition sequence contains
 790    characters that should be extracted into a buffer or string.  Store
 791    those characters at *DESTINATION in multibyte form.
 792
 793    If we encounter an invalid byte sequence, return 0.
 794    If we encounter an insufficient source or destination, or
 795    insufficient space in CODING->cmp_data, return 1.
 796    Otherwise, return consumed bytes in the source.
 797
 798 */
 799 static INLINE int
 800 decode_composition_emacs_mule (coding, src, src_end,
 801                                destination, dst_end, dst_bytes)
 802      struct coding_system *coding;
 803      unsigned char *src, *src_end, **destination, *dst_end;
 804      int dst_bytes;
 805 {
 806   unsigned char *dst = *destination;
 807   int method, data_len, nchars;
 808   unsigned char *src_base = src++;
 809   /* Store components of composition.  */
 810   int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 811   int ncomponent;
 812   /* Store multibyte form of characters to be composed.  This is for
 813      Emacs 20 style composition sequence.  */
 814   unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 815   unsigned char *bufp = buf;
 816   int c, i, gref, nref;
 817
 818   if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 819       >= COMPOSITION_DATA_SIZE)
 820     {
 821       coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 822       return -1;
 823     }
 824
 825   ONE_MORE_BYTE (c);
 826   if (c - 0xF0 >= COMPOSITION_RELATIVE
 827            && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 828     {
 829       int with_rule;
 830
 831       method = c - 0xF0;
 832       with_rule = (method == COMPOSITION_WITH_RULE
 833                    || method == COMPOSITION_WITH_RULE_ALTCHARS);
 834       ONE_MORE_BYTE (c);
 835       data_len = c - 0xA0;
 836       if (data_len < 4
 837           || src_base + data_len > src_end)
 838         return 0;
 839       ONE_MORE_BYTE (c);
 840       nchars = c - 0xA0;
 841       if (c < 1)
 842         return 0;
 843       for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 844         {
 845           /* If it is longer than this, it can't be valid.  */
 846           if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
 847             return 0;
 848
 849           if (ncomponent % 2 && with_rule)
 850             {
 851               ONE_MORE_BYTE (gref);
 852               gref -= 32;
 853               ONE_MORE_BYTE (nref);
 854               nref -= 32;
 855               c = COMPOSITION_ENCODE_RULE (gref, nref);
 856             }
 857           else
 858             {
 859               int bytes;
 860               if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
 861                   || (coding->flags /* We are recovering a file.  */
 862                       && src[0] == LEADING_CODE_8_BIT_CONTROL
 863                       && ! CHAR_HEAD_P (src[1])))
 864                 c = STRING_CHAR (src, bytes);
 865               else
 866                 c = *src, bytes = 1;
 867               src += bytes;
 868             }
 869           component[ncomponent] = c;
 870         }
 871     }
 872   else
 873     {
 874       /* This may be an old Emacs 20 style format.  See the comment at
 875          the section 2 of this file.  */
 876       while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 877       if (src == src_end
 878           && !(coding->mode & CODING_MODE_LAST_BLOCK))
 879         goto label_end_of_loop;
 880
 881       src_end = src;
 882       src = src_base + 1;
 883       if (c < 0xC0)
 884         {
 885           method = COMPOSITION_RELATIVE;
 886           for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 887             {
 888               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 889               if (c < 0)
 890                 break;
 891               component[ncomponent++] = c;
 892             }
 893           if (ncomponent < 2)
 894             return 0;
 895           nchars = ncomponent;
 896         }
 897       else if (c == 0xFF)
 898         {
 899           method = COMPOSITION_WITH_RULE;
 900           src++;
 901           DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 902           if (c < 0)
 903             return 0;
 904           component[0] = c;
 905           for (ncomponent = 1;
 906                ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 907             {
 908               DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 909               if (c < 0)
 910                 break;
 911               component[ncomponent++] = c;
 912               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 913               if (c < 0)
 914                 break;
 915               component[ncomponent++] = c;
 916             }
 917           if (ncomponent < 3)
 918             return 0;
 919           nchars = (ncomponent + 1) / 2;
 920         }
 921       else
 922         return 0;
 923     }
 924
 925   if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 926     {
 927       CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 928       for (i = 0; i < ncomponent; i++)
 929         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 930       CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 931       if (buf < bufp)
 932         {
 933           unsigned char *p = buf;
 934           EMIT_BYTES (p, bufp);
 935           *destination += bufp - buf;
 936           coding->produced_char += nchars;
 937         }
 938       return (src - src_base);
 939     }
 940  label_end_of_loop:
 941   return -1;
 942 }
 943
 944 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 945
 946 static void
 947 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 948      struct coding_system *coding;
 949      unsigned char *source, *destination;
 950      int src_bytes, dst_bytes;
 951 {
 952   unsigned char *src = source;
 953   unsigned char *src_end = source + src_bytes;
 954   unsigned char *dst = destination;
 955   unsigned char *dst_end = destination + dst_bytes;
 956   /* SRC_BASE remembers the start position in source in each loop.
 957      The loop will be exited when there's not enough source code, or
 958      when there's not enough destination area to produce a
 959      character.  */
 960   unsigned char *src_base;
 961
 962   coding->produced_char = 0;
 963   while ((src_base = src) < src_end)
 964     {
 965       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 966       int bytes;
 967
 968       if (*src == '\r')
 969         {
 970           int c = *src++;
 971
 972           if (coding->eol_type == CODING_EOL_CR)
 973             c = '\n';
 974           else if (coding->eol_type == CODING_EOL_CRLF)
 975             {
 976               ONE_MORE_BYTE (c);
 977               if (c != '\n')
 978                 {
 979                   src--;
 980                   c = '\r';
 981                 }
 982             }
 983           *dst++ = c;
 984           coding->produced_char++;
 985           continue;
 986         }
 987       else if (*src == '\n')
 988         {
 989           if ((coding->eol_type == CODING_EOL_CR
 990                || coding->eol_type == CODING_EOL_CRLF)
 991               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 992             {
 993               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 994               goto label_end_of_loop;
 995             }
 996           *dst++ = *src++;
 997           coding->produced_char++;
 998           continue;
 999         }
1000       else if (*src == 0x80 && coding->cmp_data)
1001         {
1002           /* Start of composition data.  */
1003           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
1004                                                          &dst, dst_end,
1005                                                          dst_bytes);
1006           if (consumed < 0)
1007             goto label_end_of_loop;
1008           else if (consumed > 0)
1009             {
1010               src += consumed;
1011               continue;
1012             }
1013           bytes = CHAR_STRING (*src, tmp);
1014           p = tmp;
1015           src++;
1016         }
1017       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
1018                || (coding->flags /* We are recovering a file.  */
1019                    && src[0] == LEADING_CODE_8_BIT_CONTROL
1020                    && ! CHAR_HEAD_P (src[1])))
1021         {
1022           p = src;
1023           src += bytes;
1024         }
1025       else
1026         {
1027           bytes = CHAR_STRING (*src, tmp);
1028           p = tmp;
1029           src++;
1030         }
1031       if (dst + bytes >= (dst_bytes ? dst_end : src))
1032         {
1033           coding->result = CODING_FINISH_INSUFFICIENT_DST;
1034           break;
1035         }
1036       while (bytes--) *dst++ = *p++;
1037       coding->produced_char++;
1038     }
1039  label_end_of_loop:
1040   coding->consumed = coding->consumed_char = src_base - source;
1041   coding->produced = dst - destination;
1042 }
1043
1044
1045 /* Encode composition data stored at DATA into a special byte sequence
1046    starting by 0x80.  Update CODING->cmp_data_start and maybe
1047    CODING->cmp_data for the next call.  */
1048
1049 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                     \
1050   do {                                                                  \
1051     unsigned char buf[1024], *p0 = buf, *p;                             \
1052     int len = data[0];                                                  \
1053     int i;                                                              \
1054                                                                         \
1055     buf[0] = 0x80;                                                      \
1056     buf[1] = 0xF0 + data[3];    /* METHOD */                            \
1057     buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */           \
1058     p = buf + 4;                                                        \
1059     if (data[3] == COMPOSITION_WITH_RULE                                \
1060         || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
1061       {                                                                 \
1062         p += CHAR_STRING (data[4], p);                                  \
1063         for (i = 5; i < len; i += 2)                                    \
1064           {                                                             \
1065             int gref, nref;                                             \
1066              COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
1067             *p++ = 0x20 + gref;                                         \
1068             *p++ = 0x20 + nref;                                         \
1069             p += CHAR_STRING (data[i + 1], p);                          \
1070           }                                                             \
1071       }                                                                 \
1072     else                                                                \
1073       {                                                                 \
1074         for (i = 4; i < len; i++)                                       \
1075           p += CHAR_STRING (data[i], p);                                \
1076       }                                                                 \
1077     buf[2] = 0xA0 + (p - buf);  /* COMPONENTS-BYTES */                  \
1078                                                                         \
1079     if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))              \
1080       {                                                                 \
1081         coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
1082         goto label_end_of_loop;                                         \
1083       }                                                                 \
1084     while (p0 < p)                                                      \
1085       *dst++ = *p0++;                                                   \
1086     coding->cmp_data_start += data[0];                                  \
1087     if (coding->cmp_data_start == coding->cmp_data->used                \
1088         && coding->cmp_data->next)                                      \
1089       {                                                                 \
1090         coding->cmp_data = coding->cmp_data->next;                      \
1091         coding->cmp_data_start = 0;                                     \
1092       }                                                                 \
1093   } while (0)
1094
1095
1096 static void encode_eol P_ ((struct coding_system *, const unsigned char *,
1097                             unsigned char *, int, int));
1098
1099 static void
1100 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1101      struct coding_system *coding;
1102      unsigned char *source, *destination;
1103      int src_bytes, dst_bytes;
1104 {
1105   unsigned char *src = source;
1106   unsigned char *src_end = source + src_bytes;
1107   unsigned char *dst = destination;
1108   unsigned char *dst_end = destination + dst_bytes;
1109   unsigned char *src_base;
1110   int c;
1111   int char_offset;
1112   int *data;
1113
1114   Lisp_Object translation_table;
1115
1116   translation_table = Qnil;
1117
1118   /* Optimization for the case that there's no composition.  */
1119   if (!coding->cmp_data || coding->cmp_data->used == 0)
1120     {
1121       encode_eol (coding, source, destination, src_bytes, dst_bytes);
1122       return;
1123     }
1124
1125   char_offset = coding->cmp_data->char_offset;
1126   data = coding->cmp_data->data + coding->cmp_data_start;
1127   while (1)
1128     {
1129       src_base = src;
1130
1131       /* If SRC starts a composition, encode the information about the
1132          composition in advance.  */
1133       if (coding->cmp_data_start < coding->cmp_data->used
1134           && char_offset + coding->consumed_char == data[1])
1135         {
1136           ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1137           char_offset = coding->cmp_data->char_offset;
1138           data = coding->cmp_data->data + coding->cmp_data_start;
1139         }
1140
1141       ONE_MORE_CHAR (c);
1142       if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1143                         || coding->eol_type == CODING_EOL_CR))
1144         {
1145           if (coding->eol_type == CODING_EOL_CRLF)
1146             EMIT_TWO_BYTES ('\r', c);
1147           else
1148             EMIT_ONE_BYTE ('\r');
1149         }
1150       else if (SINGLE_BYTE_CHAR_P (c))
1151         {
1152           if (coding->flags && ! ASCII_BYTE_P (c))
1153             {
1154               /* As we are auto saving, retain the multibyte form for
1155                  8-bit chars.  */
1156               unsigned char buf[MAX_MULTIBYTE_LENGTH];
1157               int bytes = CHAR_STRING (c, buf);
1158
1159               if (bytes == 1)
1160                 EMIT_ONE_BYTE (buf[0]);
1161               else
1162                 EMIT_TWO_BYTES (buf[0], buf[1]);
1163             }
1164           else
1165             EMIT_ONE_BYTE (c);
1166         }
1167       else
1168         EMIT_BYTES (src_base, src);
1169       coding->consumed_char++;
1170     }
1171  label_end_of_loop:
1172   coding->consumed = src_base - source;
1173   coding->produced = coding->produced_char = dst - destination;
1174   return;
1175 }
1176
1177 \f
1178 /*** 3. ISO2022 handlers ***/
1179
1180 /* The following note describes the coding system ISO2022 briefly.
1181    Since the intention of this note is to help understand the
1182    functions in this file, some parts are NOT ACCURATE or are OVERLY
1183    SIMPLIFIED.  For thorough understanding, please refer to the
1184    original document of ISO2022.  This is equivalent to the standard
1185    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
1186
1187    ISO2022 provides many mechanisms to encode several character sets
1188    in 7-bit and 8-bit environments.  For 7-bit environments, all text
1189    is encoded using bytes less than 128.  This may make the encoded
1190    text a little bit longer, but the text passes more easily through
1191    several types of gateway, some of which strip off the MSB (Most
1192    Significant Bit).
1193
1194    There are two kinds of character sets: control character sets and
1195    graphic character sets.  The former contain control characters such
1196    as `newline' and `escape' to provide control functions (control
1197    functions are also provided by escape sequences).  The latter
1198    contain graphic characters such as 'A' and '-'.  Emacs recognizes
1199    two control character sets and many graphic character sets.
1200
1201    Graphic character sets are classified into one of the following
1202    four classes, according to the number of bytes (DIMENSION) and
1203    number of characters in one dimension (CHARS) of the set:
1204    - DIMENSION1_CHARS94
1205    - DIMENSION1_CHARS96
1206    - DIMENSION2_CHARS94
1207    - DIMENSION2_CHARS96
1208
1209    In addition, each character set is assigned an identification tag,
1210    unique for each set, called the "final character" (denoted as <F>
1211    hereafter).  The <F> of each character set is decided by ECMA(*)
1212    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
1213    (0x30..0x3F are for private use only).
1214
1215    Note (*): ECMA = European Computer Manufacturers Association
1216
1217    Here are examples of graphic character sets [NAME(<F>)]:
1218         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1219         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1220         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1221         o DIMENSION2_CHARS96 -- none for the moment
1222
1223    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
1224         C0 [0x00..0x1F] -- control character plane 0
1225         GL [0x20..0x7F] -- graphic character plane 0
1226         C1 [0x80..0x9F] -- control character plane 1
1227         GR [0xA0..0xFF] -- graphic character plane 1
1228
1229    A control character set is directly designated and invoked to C0 or
1230    C1 by an escape sequence.  The most common case is that:
1231    - ISO646's  control character set is designated/invoked to C0, and
1232    - ISO6429's control character set is designated/invoked to C1,
1233    and usually these designations/invocations are omitted in encoded
1234    text.  In a 7-bit environment, only C0 can be used, and a control
1235    character for C1 is encoded by an appropriate escape sequence to
1236    fit into the environment.  All control characters for C1 are
1237    defined to have corresponding escape sequences.
1238
1239    A graphic character set is at first designated to one of four
1240    graphic registers (G0 through G3), then these graphic registers are
1241    invoked to GL or GR.  These designations and invocations can be
1242    done independently.  The most common case is that G0 is invoked to
1243    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
1244    these invocations and designations are omitted in encoded text.
1245    In a 7-bit environment, only GL can be used.
1246
1247    When a graphic character set of CHARS94 is invoked to GL, codes
1248    0x20 and 0x7F of the GL area work as control characters SPACE and
1249    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1250    be used.
1251
1252    There are two ways of invocation: locking-shift and single-shift.
1253    With locking-shift, the invocation lasts until the next different
1254    invocation, whereas with single-shift, the invocation affects the
1255    following character only and doesn't affect the locking-shift
1256    state.  Invocations are done by the following control characters or
1257    escape sequences:
1258
1259    ----------------------------------------------------------------------
1260    abbrev  function                  cntrl escape seq   description
1261    ----------------------------------------------------------------------
1262    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
1263    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
1264    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
1265    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
1266    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
1267    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
1268    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
1269    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
1270    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
1271    ----------------------------------------------------------------------
1272    (*) These are not used by any known coding system.
1273
1274    Control characters for these functions are defined by macros
1275    ISO_CODE_XXX in `coding.h'.
1276
1277    Designations are done by the following escape sequences:
1278    ----------------------------------------------------------------------
1279    escape sequence      description
1280    ----------------------------------------------------------------------
1281    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
1282    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
1283    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
1284    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
1285    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
1286    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
1287    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
1288    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
1289    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
1290    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
1291    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
1292    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
1293    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
1294    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
1295    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
1296    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
1297    ----------------------------------------------------------------------
1298
1299    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
1300    of dimension 1, chars 94, and final character <F>, etc...
1301
1302    Note (*): Although these designations are not allowed in ISO2022,
1303    Emacs accepts them on decoding, and produces them on encoding
1304    CHARS96 character sets in a coding system which is characterized as
1305    7-bit environment, non-locking-shift, and non-single-shift.
1306
1307    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
1308    '(' can be omitted.  We refer to this as "short-form" hereafter.
1309
1310    Now you may notice that there are a lot of ways of encoding the
1311    same multilingual text in ISO2022.  Actually, there exist many
1312    coding systems such as Compound Text (used in X11's inter client
1313    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
1314    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
1315    localized platforms), and all of these are variants of ISO2022.
1316
1317    In addition to the above, Emacs handles two more kinds of escape
1318    sequences: ISO6429's direction specification and Emacs' private
1319    sequence for specifying character composition.
1320
1321    ISO6429's direction specification takes the following form:
1322         o CSI ']'      -- end of the current direction
1323         o CSI '0' ']'  -- end of the current direction
1324         o CSI '1' ']'  -- start of left-to-right text
1325         o CSI '2' ']'  -- start of right-to-left text
1326    The control character CSI (0x9B: control sequence introducer) is
1327    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1328
1329    Character composition specification takes the following form:
1330         o ESC '0' -- start relative composition
1331         o ESC '1' -- end composition
1332         o ESC '2' -- start rule-base composition (*)
1333         o ESC '3' -- start relative composition with alternate chars  (**)
1334         o ESC '4' -- start rule-base composition with alternate chars  (**)
1335   Since these are not standard escape sequences of any ISO standard,
1336   the use of them with these meanings is restricted to Emacs only.
1337
1338   (*) This form is used only in Emacs 20.5 and older versions,
1339   but the newer versions can safely decode it.
1340   (**) This form is used only in Emacs 21.1 and newer versions,
1341   and the older versions can't decode it.
1342
1343   Here's a list of example usages of these composition escape
1344   sequences (categorized by `enum composition_method').
1345
1346   COMPOSITION_RELATIVE:
1347         ESC 0 CHAR [ CHAR ] ESC 1
1348   COMPOSITION_WITH_RULE:
1349         ESC 2 CHAR [ RULE CHAR ] ESC 1
1350   COMPOSITION_WITH_ALTCHARS:
1351         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
1352   COMPOSITION_WITH_RULE_ALTCHARS:
1353         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
1354
1355 enum iso_code_class_type iso_code_class[256];
1356
1357 #define CHARSET_OK(idx, charset, c)                                     \
1358   (coding_system_table[idx]                                             \
1359    && (charset == CHARSET_ASCII                                         \
1360        || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
1361            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
1362    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1363                                               charset)                  \
1364        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1365
1366 #define SHIFT_OUT_OK(idx) \
1367   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1368
1369 #define COMPOSITION_OK(idx)     \
1370   (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
1371
1372 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1373    Check if a text is encoded in ISO2022.  If it is, return an
1374    integer in which appropriate flag bits any of:
1375         CODING_CATEGORY_MASK_ISO_7
1376         CODING_CATEGORY_MASK_ISO_7_TIGHT
1377         CODING_CATEGORY_MASK_ISO_8_1
1378         CODING_CATEGORY_MASK_ISO_8_2
1379         CODING_CATEGORY_MASK_ISO_7_ELSE
1380         CODING_CATEGORY_MASK_ISO_8_ELSE
1381    are set.  If a code which should never appear in ISO2022 is found,
1382    returns 0.  */
1383
1384 static int
1385 detect_coding_iso2022 (src, src_end, multibytep)
1386      unsigned char *src, *src_end;
1387      int multibytep;
1388 {
1389   int mask = CODING_CATEGORY_MASK_ISO;
1390   int mask_found = 0;
1391   int reg[4], shift_out = 0, single_shifting = 0;
1392   int c, c1, charset;
1393   /* Dummy for ONE_MORE_BYTE.  */
1394   struct coding_system dummy_coding;
1395   struct coding_system *coding = &dummy_coding;
1396   Lisp_Object safe_chars;
1397
1398   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1399   while (mask && src < src_end)
1400     {
1401       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1402     retry:
1403       switch (c)
1404         {
1405         case ISO_CODE_ESC:
1406           if (inhibit_iso_escape_detection)
1407             break;
1408           single_shifting = 0;
1409           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1410           if (c >= '(' && c <= '/')
1411             {
1412               /* Designation sequence for a charset of dimension 1.  */
1413               ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1414               if (c1 < ' ' || c1 >= 0x80
1415                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1416                 /* Invalid designation sequence.  Just ignore.  */
1417                 break;
1418               reg[(c - '(') % 4] = charset;
1419             }
1420           else if (c == '$')
1421             {
1422               /* Designation sequence for a charset of dimension 2.  */
1423               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1424               if (c >= '@' && c <= 'B')
1425                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
1426                 reg[0] = charset = iso_charset_table[1][0][c];
1427               else if (c >= '(' && c <= '/')
1428                 {
1429                   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1430                   if (c1 < ' ' || c1 >= 0x80
1431                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1432                     /* Invalid designation sequence.  Just ignore.  */
1433                     break;
1434                   reg[(c - '(') % 4] = charset;
1435                 }
1436               else
1437                 /* Invalid designation sequence.  Just ignore.  */
1438                 break;
1439             }
1440           else if (c == 'N' || c == 'O')
1441             {
1442               /* ESC <Fe> for SS2 or SS3.  */
1443               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
1444               break;
1445             }
1446           else if (c >= '0' && c <= '4')
1447             {
1448               /* ESC <Fp> for start/end composition.  */
1449               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
1450                 mask_found |= CODING_CATEGORY_MASK_ISO_7;
1451               else
1452                 mask &= ~CODING_CATEGORY_MASK_ISO_7;
1453               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
1454                 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1455               else
1456                 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1457               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
1458                 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1459               else
1460                 mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
1461               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
1462                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1463               else
1464                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1465               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
1466                 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1467               else
1468                 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1469               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
1470                 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1471               else
1472                 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1473               break;
1474             }
1475           else
1476             /* Invalid escape sequence.  Just ignore.  */
1477             break;
1478
1479           /* We found a valid designation sequence for CHARSET.  */
1480           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
1481           c = MAKE_CHAR (charset, 0, 0);
1482           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
1483             mask_found |= CODING_CATEGORY_MASK_ISO_7;
1484           else
1485             mask &= ~CODING_CATEGORY_MASK_ISO_7;
1486           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
1487             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1488           else
1489             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1490           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
1491             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1492           else
1493             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1494           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
1495             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1496           else
1497             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1498           break;
1499
1500         case ISO_CODE_SO:
1501           if (inhibit_iso_escape_detection)
1502             break;
1503           single_shifting = 0;
1504           if (shift_out == 0
1505               && (reg[1] >= 0
1506                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1507                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1508             {
1509               /* Locking shift out.  */
1510               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1511               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1512             }
1513           break;
1514
1515         case ISO_CODE_SI:
1516           if (inhibit_iso_escape_detection)
1517             break;
1518           single_shifting = 0;
1519           if (shift_out == 1)
1520             {
1521               /* Locking shift in.  */
1522               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1523               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1524             }
1525           break;
1526
1527         case ISO_CODE_CSI:
1528           single_shifting = 0;
1529         case ISO_CODE_SS2:
1530         case ISO_CODE_SS3:
1531           {
1532             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1533
1534             if (inhibit_iso_escape_detection)
1535               break;
1536             if (c != ISO_CODE_CSI)
1537               {
1538                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1539                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1540                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1541                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1542                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1543                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1544                 single_shifting = 1;
1545               }
1546             if (VECTORP (Vlatin_extra_code_table)
1547                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1548               {
1549                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1550                     & CODING_FLAG_ISO_LATIN_EXTRA)
1551                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1552                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1553                     & CODING_FLAG_ISO_LATIN_EXTRA)
1554                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1555               }
1556             mask &= newmask;
1557             mask_found |= newmask;
1558           }
1559           break;
1560
1561         default:
1562           if (c < 0x80)
1563             {
1564               single_shifting = 0;
1565               break;
1566             }
1567           else if (c < 0xA0)
1568             {
1569               single_shifting = 0;
1570               if (VECTORP (Vlatin_extra_code_table)
1571                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1572                 {
1573                   int newmask = 0;
1574
1575                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1576                       & CODING_FLAG_ISO_LATIN_EXTRA)
1577                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1578                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1579                       & CODING_FLAG_ISO_LATIN_EXTRA)
1580                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1581                   mask &= newmask;
1582                   mask_found |= newmask;
1583                 }
1584               else
1585                 return 0;
1586             }
1587           else
1588             {
1589               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1590                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1591               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1592               /* Check the length of succeeding codes of the range
1593                  0xA0..0FF.  If the byte length is odd, we exclude
1594                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1595                  when we are not single shifting.  */
1596               if (!single_shifting
1597                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1598                 {
1599                   int i = 1;
1600
1601                   c = -1;
1602                   while (src < src_end)
1603                     {
1604                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1605                       if (c < 0xA0)
1606                         break;
1607                       i++;
1608                     }
1609
1610                   if (i & 1 && src < src_end)
1611                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1612                   else
1613                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1614                   if (c >= 0)
1615                     /* This means that we have read one extra byte.  */
1616                     goto retry;
1617                 }
1618             }
1619           break;
1620         }
1621     }
1622  label_end_of_loop:
1623   return (mask & mask_found);
1624 }
1625
1626 /* Decode a character of which charset is CHARSET, the 1st position
1627    code is C1, the 2nd position code is C2, and return the decoded
1628    character code.  If the variable `translation_table' is non-nil,
1629    returned the translated code.  */
1630
1631 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1632   (NILP (translation_table)                     \
1633    ? MAKE_CHAR (charset, c1, c2)                \
1634    : translate_char (translation_table, -1, charset, c1, c2))
1635
1636 /* Set designation state into CODING.  */
1637 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1638   do {                                                                     \
1639     int charset, c;                                                        \
1640                                                                            \
1641     if (final_char < '0' || final_char >= 128)                             \
1642       goto label_invalid_code;                                             \
1643     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1644                                  make_number (chars),                      \
1645                                  make_number (final_char));                \
1646     c = MAKE_CHAR (charset, 0, 0);                                         \
1647     if (charset >= 0                                                       \
1648         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1649             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1650       {                                                                    \
1651         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1652             && reg == 0                                                    \
1653             && charset == CHARSET_ASCII)                                   \
1654           {                                                                \
1655             /* We should insert this designation sequence as is so         \
1656                that it is surely written back to a file.  */               \
1657             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1658             goto label_invalid_code;                                       \
1659           }                                                                \
1660         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1661         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1662             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1663           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1664         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1665       }                                                                    \
1666     else                                                                   \
1667       {                                                                    \
1668         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1669         goto label_invalid_code;                                           \
1670       }                                                                    \
1671   } while (0)
1672
1673 /* Allocate a memory block for storing information about compositions.
1674    The block is chained to the already allocated blocks.  */
1675
1676 void
1677 coding_allocate_composition_data (coding, char_offset)
1678      struct coding_system *coding;
1679      int char_offset;
1680 {
1681   struct composition_data *cmp_data
1682     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1683
1684   cmp_data->char_offset = char_offset;
1685   cmp_data->used = 0;
1686   cmp_data->prev = coding->cmp_data;
1687   cmp_data->next = NULL;
1688   if (coding->cmp_data)
1689     coding->cmp_data->next = cmp_data;
1690   coding->cmp_data = cmp_data;
1691   coding->cmp_data_start = 0;
1692   coding->composing = COMPOSITION_NO;
1693 }
1694
1695 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1696    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1697    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1698    ESC 3 : altchar composition :  ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1699    ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1700   */
1701
1702 #define DECODE_COMPOSITION_START(c1)                                       \
1703   do {                                                                     \
1704     if (coding->composing == COMPOSITION_DISABLED)                         \
1705       {                                                                    \
1706         *dst++ = ISO_CODE_ESC;                                             \
1707         *dst++ = c1 & 0x7f;                                                \
1708         coding->produced_char += 2;                                        \
1709       }                                                                    \
1710     else if (!COMPOSING_P (coding))                                        \
1711       {                                                                    \
1712         /* This is surely the start of a composition.  We must be sure     \
1713            that coding->cmp_data has enough space to store the             \
1714            information about the composition.  If not, terminate the       \
1715            current decoding loop, allocate one more memory block for       \
1716            coding->cmp_data in the caller, then start the decoding         \
1717            loop again.  We can't allocate memory here directly because     \
1718            it may cause buffer/string relocation.  */                      \
1719         if (!coding->cmp_data                                              \
1720             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1721                 >= COMPOSITION_DATA_SIZE))                                 \
1722           {                                                                \
1723             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1724             goto label_end_of_loop;                                        \
1725           }                                                                \
1726         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1727                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1728                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1729                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1730         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1731                                       coding->composing);                  \
1732         coding->composition_rule_follows = 0;                              \
1733       }                                                                    \
1734     else                                                                   \
1735       {                                                                    \
1736         /* We are already handling a composition.  If the method is        \
1737            the following two, the codes following the current escape       \
1738            sequence are actual characters stored in a buffer.  */          \
1739         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1740             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1741           {                                                                \
1742             coding->composing = COMPOSITION_RELATIVE;                      \
1743             coding->composition_rule_follows = 0;                          \
1744           }                                                                \
1745       }                                                                    \
1746   } while (0)
1747
1748 /* Handle composition end sequence ESC 1.  */
1749
1750 #define DECODE_COMPOSITION_END(c1)                                      \
1751   do {                                                                  \
1752     if (! COMPOSING_P (coding))                                         \
1753       {                                                                 \
1754         *dst++ = ISO_CODE_ESC;                                          \
1755         *dst++ = c1;                                                    \
1756         coding->produced_char += 2;                                     \
1757       }                                                                 \
1758     else                                                                \
1759       {                                                                 \
1760         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1761         coding->composing = COMPOSITION_NO;                             \
1762       }                                                                 \
1763   } while (0)
1764
1765 /* Decode a composition rule from the byte C1 (and maybe one more byte
1766    from SRC) and store one encoded composition rule in
1767    coding->cmp_data.  */
1768
1769 #define DECODE_COMPOSITION_RULE(c1)                                     \
1770   do {                                                                  \
1771     int rule = 0;                                                       \
1772     (c1) -= 32;                                                         \
1773     if (c1 < 81)                /* old format (before ver.21) */        \
1774       {                                                                 \
1775         int gref = (c1) / 9;                                            \
1776         int nref = (c1) % 9;                                            \
1777         if (gref == 4) gref = 10;                                       \
1778         if (nref == 4) nref = 10;                                       \
1779         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1780       }                                                                 \
1781     else if (c1 < 93)           /* new format (after ver.21) */         \
1782       {                                                                 \
1783         ONE_MORE_BYTE (c2);                                             \
1784         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1785       }                                                                 \
1786     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1787     coding->composition_rule_follows = 0;                               \
1788   } while (0)
1789
1790
1791 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1792
1793 static void
1794 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1795      struct coding_system *coding;
1796      unsigned char *source, *destination;
1797      int src_bytes, dst_bytes;
1798 {
1799   unsigned char *src = source;
1800   unsigned char *src_end = source + src_bytes;
1801   unsigned char *dst = destination;
1802   unsigned char *dst_end = destination + dst_bytes;
1803   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1804   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1805   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1806   /* SRC_BASE remembers the start position in source in each loop.
1807      The loop will be exited when there's not enough source code
1808      (within macro ONE_MORE_BYTE), or when there's not enough
1809      destination area to produce a character (within macro
1810      EMIT_CHAR).  */
1811   unsigned char *src_base;
1812   int c, charset;
1813   Lisp_Object translation_table;
1814   Lisp_Object safe_chars;
1815
1816   safe_chars = coding_safe_chars (coding->symbol);
1817
1818   if (NILP (Venable_character_translation))
1819     translation_table = Qnil;
1820   else
1821     {
1822       translation_table = coding->translation_table_for_decode;
1823       if (NILP (translation_table))
1824         translation_table = Vstandard_translation_table_for_decode;
1825     }
1826
1827   coding->result = CODING_FINISH_NORMAL;
1828
1829   while (1)
1830     {
1831       int c1, c2 = 0;
1832
1833       src_base = src;
1834       ONE_MORE_BYTE (c1);
1835
1836       /* We produce no character or one character.  */
1837       switch (iso_code_class [c1])
1838         {
1839         case ISO_0x20_or_0x7F:
1840           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1841             {
1842               DECODE_COMPOSITION_RULE (c1);
1843               continue;
1844             }
1845           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1846             {
1847               /* This is SPACE or DEL.  */
1848               charset = CHARSET_ASCII;
1849               break;
1850             }
1851           /* This is a graphic character, we fall down ...  */
1852
1853         case ISO_graphic_plane_0:
1854           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1855             {
1856               DECODE_COMPOSITION_RULE (c1);
1857               continue;
1858             }
1859           charset = charset0;
1860           break;
1861
1862         case ISO_0xA0_or_0xFF:
1863           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1864               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1865             goto label_invalid_code;
1866           /* This is a graphic character, we fall down ... */
1867
1868         case ISO_graphic_plane_1:
1869           if (charset1 < 0)
1870             goto label_invalid_code;
1871           charset = charset1;
1872           break;
1873
1874         case ISO_control_0:
1875           if (COMPOSING_P (coding))
1876             DECODE_COMPOSITION_END ('1');
1877
1878           /* All ISO2022 control characters in this class have the
1879              same representation in Emacs internal format.  */
1880           if (c1 == '\n'
1881               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1882               && (coding->eol_type == CODING_EOL_CR
1883                   || coding->eol_type == CODING_EOL_CRLF))
1884             {
1885               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1886               goto label_end_of_loop;
1887             }
1888           charset = CHARSET_ASCII;
1889           break;
1890
1891         case ISO_control_1:
1892           if (COMPOSING_P (coding))
1893             DECODE_COMPOSITION_END ('1');
1894           goto label_invalid_code;
1895
1896         case ISO_carriage_return:
1897           if (COMPOSING_P (coding))
1898             DECODE_COMPOSITION_END ('1');
1899
1900           if (coding->eol_type == CODING_EOL_CR)
1901             c1 = '\n';
1902           else if (coding->eol_type == CODING_EOL_CRLF)
1903             {
1904               ONE_MORE_BYTE (c1);
1905               if (c1 != ISO_CODE_LF)
1906                 {
1907                   src--;
1908                   c1 = '\r';
1909                 }
1910             }
1911           charset = CHARSET_ASCII;
1912           break;
1913
1914         case ISO_shift_out:
1915           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1916               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1917             goto label_invalid_code;
1918           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1919           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1920           continue;
1921
1922         case ISO_shift_in:
1923           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1924             goto label_invalid_code;
1925           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1926           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1927           continue;
1928
1929         case ISO_single_shift_2_7:
1930         case ISO_single_shift_2:
1931           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1932             goto label_invalid_code;
1933           /* SS2 is handled as an escape sequence of ESC 'N' */
1934           c1 = 'N';
1935           goto label_escape_sequence;
1936
1937         case ISO_single_shift_3:
1938           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1939             goto label_invalid_code;
1940           /* SS2 is handled as an escape sequence of ESC 'O' */
1941           c1 = 'O';
1942           goto label_escape_sequence;
1943
1944         case ISO_control_sequence_introducer:
1945           /* CSI is handled as an escape sequence of ESC '[' ...  */
1946           c1 = '[';
1947           goto label_escape_sequence;
1948
1949         case ISO_escape:
1950           ONE_MORE_BYTE (c1);
1951         label_escape_sequence:
1952           /* Escape sequences handled by Emacs are invocation,
1953              designation, direction specification, and character
1954              composition specification.  */
1955           switch (c1)
1956             {
1957             case '&':           /* revision of following character set */
1958               ONE_MORE_BYTE (c1);
1959               if (!(c1 >= '@' && c1 <= '~'))
1960                 goto label_invalid_code;
1961               ONE_MORE_BYTE (c1);
1962               if (c1 != ISO_CODE_ESC)
1963                 goto label_invalid_code;
1964               ONE_MORE_BYTE (c1);
1965               goto label_escape_sequence;
1966
1967             case '$':           /* designation of 2-byte character set */
1968               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1969                 goto label_invalid_code;
1970               ONE_MORE_BYTE (c1);
1971               if (c1 >= '@' && c1 <= 'B')
1972                 {       /* designation of JISX0208.1978, GB2312.1980,
1973                            or JISX0208.1980 */
1974                   DECODE_DESIGNATION (0, 2, 94, c1);
1975                 }
1976               else if (c1 >= 0x28 && c1 <= 0x2B)
1977                 {       /* designation of DIMENSION2_CHARS94 character set */
1978                   ONE_MORE_BYTE (c2);
1979                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1980                 }
1981               else if (c1 >= 0x2C && c1 <= 0x2F)
1982                 {       /* designation of DIMENSION2_CHARS96 character set */
1983                   ONE_MORE_BYTE (c2);
1984                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1985                 }
1986               else
1987                 goto label_invalid_code;
1988               /* We must update these variables now.  */
1989               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1990               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1991               continue;
1992
1993             case 'n':           /* invocation of locking-shift-2 */
1994               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1995                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1996                 goto label_invalid_code;
1997               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1998               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1999               continue;
2000
2001             case 'o':           /* invocation of locking-shift-3 */
2002               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
2003                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
2004                 goto label_invalid_code;
2005               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
2006               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2007               continue;
2008
2009             case 'N':           /* invocation of single-shift-2 */
2010               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2011                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
2012                 goto label_invalid_code;
2013               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
2014               ONE_MORE_BYTE (c1);
2015               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2016                 goto label_invalid_code;
2017               break;
2018
2019             case 'O':           /* invocation of single-shift-3 */
2020               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2021                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
2022                 goto label_invalid_code;
2023               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
2024               ONE_MORE_BYTE (c1);
2025               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2026                 goto label_invalid_code;
2027               break;
2028
2029             case '0': case '2': case '3': case '4': /* start composition */
2030               DECODE_COMPOSITION_START (c1);
2031               continue;
2032
2033             case '1':           /* end composition */
2034               DECODE_COMPOSITION_END (c1);
2035               continue;
2036
2037             case '[':           /* specification of direction */
2038               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
2039                 goto label_invalid_code;
2040               /* For the moment, nested direction is not supported.
2041                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
2042                  left-to-right, and nonzero means right-to-left.  */
2043               ONE_MORE_BYTE (c1);
2044               switch (c1)
2045                 {
2046                 case ']':       /* end of the current direction */
2047                   coding->mode &= ~CODING_MODE_DIRECTION;
2048
2049                 case '0':       /* end of the current direction */
2050                 case '1':       /* start of left-to-right direction */
2051                   ONE_MORE_BYTE (c1);
2052                   if (c1 == ']')
2053                     coding->mode &= ~CODING_MODE_DIRECTION;
2054                   else
2055                     goto label_invalid_code;
2056                   break;
2057
2058                 case '2':       /* start of right-to-left direction */
2059                   ONE_MORE_BYTE (c1);
2060                   if (c1 == ']')
2061                     coding->mode |= CODING_MODE_DIRECTION;
2062                   else
2063                     goto label_invalid_code;
2064                   break;
2065
2066                 default:
2067                   goto label_invalid_code;
2068                 }
2069               continue;
2070
2071             case '%':
2072               if (COMPOSING_P (coding))
2073                 DECODE_COMPOSITION_END ('1');
2074               ONE_MORE_BYTE (c1);
2075               if (c1 == '/')
2076                 {
2077                   /* CTEXT extended segment:
2078                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
2079                      We keep these bytes as is for the moment.
2080                      They may be decoded by post-read-conversion.  */
2081                   int dim, M, L;
2082                   int size, required;
2083                   int produced_chars;
2084
2085                   ONE_MORE_BYTE (dim);
2086                   ONE_MORE_BYTE (M);
2087                   ONE_MORE_BYTE (L);
2088                   size = ((M - 128) * 128) + (L - 128);
2089                   required = 8 + size * 2;
2090                   if (dst + required > (dst_bytes ? dst_end : src))
2091                     goto label_end_of_loop;
2092                   *dst++ = ISO_CODE_ESC;
2093                   *dst++ = '%';
2094                   *dst++ = '/';
2095                   *dst++ = dim;
2096                   produced_chars = 4;
2097                   dst += CHAR_STRING (M, dst), produced_chars++;
2098                   dst += CHAR_STRING (L, dst), produced_chars++;
2099                   while (size-- > 0)
2100                     {
2101                       ONE_MORE_BYTE (c1);
2102                       dst += CHAR_STRING (c1, dst), produced_chars++;
2103                     }
2104                   coding->produced_char += produced_chars;
2105                 }
2106               else if (c1 == 'G')
2107                 {
2108                   unsigned char *d = dst;
2109                   int produced_chars;
2110
2111                   /* XFree86 extension for embedding UTF-8 in CTEXT:
2112                      ESC % G --UTF-8-BYTES-- ESC % @
2113                      We keep these bytes as is for the moment.
2114                      They may be decoded by post-read-conversion.  */
2115                   if (d + 6 > (dst_bytes ? dst_end : src))
2116                     goto label_end_of_loop;
2117                   *d++ = ISO_CODE_ESC;
2118                   *d++ = '%';
2119                   *d++ = 'G';
2120                   produced_chars = 3;
2121                   while (d + 1 < (dst_bytes ? dst_end : src))
2122                     {
2123                       ONE_MORE_BYTE (c1);
2124                       if (c1 == ISO_CODE_ESC
2125                           && src + 1 < src_end
2126                           && src[0] == '%'
2127                           && src[1] == '@')
2128                         break;
2129                       d += CHAR_STRING (c1, d), produced_chars++;
2130                     }
2131                   if (d + 3 > (dst_bytes ? dst_end : src))
2132                     goto label_end_of_loop;
2133                   *d++ = ISO_CODE_ESC;
2134                   *d++ = '%';
2135                   *d++ = '@';
2136                   dst = d;
2137                   coding->produced_char += produced_chars + 3;
2138                 }
2139               else
2140                 goto label_invalid_code;
2141               continue;
2142
2143             default:
2144               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
2145                 goto label_invalid_code;
2146               if (c1 >= 0x28 && c1 <= 0x2B)
2147                 {       /* designation of DIMENSION1_CHARS94 character set */
2148                   ONE_MORE_BYTE (c2);
2149                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
2150                 }
2151               else if (c1 >= 0x2C && c1 <= 0x2F)
2152                 {       /* designation of DIMENSION1_CHARS96 character set */
2153                   ONE_MORE_BYTE (c2);
2154                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
2155                 }
2156               else
2157                 goto label_invalid_code;
2158               /* We must update these variables now.  */
2159               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2160               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2161               continue;
2162             }
2163         }
2164
2165       /* Now we know CHARSET and 1st position code C1 of a character.
2166          Produce a multibyte sequence for that character while getting
2167          2nd position code C2 if necessary.  */
2168       if (CHARSET_DIMENSION (charset) == 2)
2169         {
2170           ONE_MORE_BYTE (c2);
2171           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2172             /* C2 is not in a valid range.  */
2173             goto label_invalid_code;
2174         }
2175       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2176       EMIT_CHAR (c);
2177       continue;
2178
2179     label_invalid_code:
2180       coding->errors++;
2181       if (COMPOSING_P (coding))
2182         DECODE_COMPOSITION_END ('1');
2183       src = src_base;
2184       c = *src++;
2185       EMIT_CHAR (c);
2186     }
2187
2188  label_end_of_loop:
2189   coding->consumed = coding->consumed_char = src_base - source;
2190   coding->produced = dst - destination;
2191   return;
2192 }
2193
2194
2195 /* ISO2022 encoding stuff.  */
2196
2197 /*
2198    It is not enough to say just "ISO2022" on encoding, we have to
2199    specify more details.  In Emacs, each ISO2022 coding system
2200    variant has the following specifications:
2201         1. Initial designation to G0 through G3.
2202         2. Allows short-form designation?
2203         3. ASCII should be designated to G0 before control characters?
2204         4. ASCII should be designated to G0 at end of line?
2205         5. 7-bit environment or 8-bit environment?
2206         6. Use locking-shift?
2207         7. Use Single-shift?
2208    And the following two are only for Japanese:
2209         8. Use ASCII in place of JIS0201-1976-Roman?
2210         9. Use JISX0208-1983 in place of JISX0208-1978?
2211    These specifications are encoded in `coding->flags' as flag bits
2212    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
2213    details.
2214 */
2215
2216 /* Produce codes (escape sequence) for designating CHARSET to graphic
2217    register REG at DST, and increment DST.  If <final-char> of CHARSET is
2218    '@', 'A', or 'B' and the coding system CODING allows, produce
2219    designation sequence of short-form.  */
2220
2221 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
2222   do {                                                                  \
2223     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
2224     char *intermediate_char_94 = "()*+";                                \
2225     char *intermediate_char_96 = ",-./";                                \
2226     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
2227                                                                         \
2228     if (revision < 255)                                                 \
2229       {                                                                 \
2230         *dst++ = ISO_CODE_ESC;                                          \
2231         *dst++ = '&';                                                   \
2232         *dst++ = '@' + revision;                                        \
2233       }                                                                 \
2234     *dst++ = ISO_CODE_ESC;                                              \
2235     if (CHARSET_DIMENSION (charset) == 1)                               \
2236       {                                                                 \
2237         if (CHARSET_CHARS (charset) == 94)                              \
2238           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
2239         else                                                            \
2240           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2241       }                                                                 \
2242     else                                                                \
2243       {                                                                 \
2244         *dst++ = '$';                                                   \
2245         if (CHARSET_CHARS (charset) == 94)                              \
2246           {                                                             \
2247             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
2248                 || reg != 0                                             \
2249                 || final_char < '@' || final_char > 'B')                \
2250               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
2251           }                                                             \
2252         else                                                            \
2253           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2254       }                                                                 \
2255     *dst++ = final_char;                                                \
2256     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
2257   } while (0)
2258
2259 /* The following two macros produce codes (control character or escape
2260    sequence) for ISO2022 single-shift functions (single-shift-2 and
2261    single-shift-3).  */
2262
2263 #define ENCODE_SINGLE_SHIFT_2                           \
2264   do {                                                  \
2265     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2266       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
2267     else                                                \
2268       *dst++ = ISO_CODE_SS2;                            \
2269     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2270   } while (0)
2271
2272 #define ENCODE_SINGLE_SHIFT_3                           \
2273   do {                                                  \
2274     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2275       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
2276     else                                                \
2277       *dst++ = ISO_CODE_SS3;                            \
2278     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2279   } while (0)
2280
2281 /* The following four macros produce codes (control character or
2282    escape sequence) for ISO2022 locking-shift functions (shift-in,
2283    shift-out, locking-shift-2, and locking-shift-3).  */
2284
2285 #define ENCODE_SHIFT_IN                         \
2286   do {                                          \
2287     *dst++ = ISO_CODE_SI;                       \
2288     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2289   } while (0)
2290
2291 #define ENCODE_SHIFT_OUT                        \
2292   do {                                          \
2293     *dst++ = ISO_CODE_SO;                       \
2294     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2295   } while (0)
2296
2297 #define ENCODE_LOCKING_SHIFT_2                  \
2298   do {                                          \
2299     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
2300     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2301   } while (0)
2302
2303 #define ENCODE_LOCKING_SHIFT_3                  \
2304   do {                                          \
2305     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
2306     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2307   } while (0)
2308
2309 /* Produce codes for a DIMENSION1 character whose character set is
2310    CHARSET and whose position-code is C1.  Designation and invocation
2311    sequences are also produced in advance if necessary.  */
2312
2313 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
2314   do {                                                                  \
2315     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2316       {                                                                 \
2317         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2318           *dst++ = c1 & 0x7F;                                           \
2319         else                                                            \
2320           *dst++ = c1 | 0x80;                                           \
2321         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2322         break;                                                          \
2323       }                                                                 \
2324     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2325       {                                                                 \
2326         *dst++ = c1 & 0x7F;                                             \
2327         break;                                                          \
2328       }                                                                 \
2329     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2330       {                                                                 \
2331         *dst++ = c1 | 0x80;                                             \
2332         break;                                                          \
2333       }                                                                 \
2334     else                                                                \
2335       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2336          must invoke it, or, at first, designate it to some graphic     \
2337          register.  Then repeat the loop to actually produce the        \
2338          character.  */                                                 \
2339       dst = encode_invocation_designation (charset, coding, dst);       \
2340   } while (1)
2341
2342 /* Produce codes for a DIMENSION2 character whose character set is
2343    CHARSET and whose position-codes are C1 and C2.  Designation and
2344    invocation codes are also produced in advance if necessary.  */
2345
2346 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
2347   do {                                                                  \
2348     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2349       {                                                                 \
2350         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2351           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
2352         else                                                            \
2353           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
2354         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2355         break;                                                          \
2356       }                                                                 \
2357     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2358       {                                                                 \
2359         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
2360         break;                                                          \
2361       }                                                                 \
2362     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2363       {                                                                 \
2364         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
2365         break;                                                          \
2366       }                                                                 \
2367     else                                                                \
2368       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2369          must invoke it, or, at first, designate it to some graphic     \
2370          register.  Then repeat the loop to actually produce the        \
2371          character.  */                                                 \
2372       dst = encode_invocation_designation (charset, coding, dst);       \
2373   } while (1)
2374
2375 #define ENCODE_ISO_CHARACTER(c)                                 \
2376   do {                                                          \
2377     int charset, c1, c2;                                        \
2378                                                                 \
2379     SPLIT_CHAR (c, charset, c1, c2);                            \
2380     if (CHARSET_DEFINED_P (charset))                            \
2381       {                                                         \
2382         if (CHARSET_DIMENSION (charset) == 1)                   \
2383           {                                                     \
2384             if (charset == CHARSET_ASCII                        \
2385                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
2386               charset = charset_latin_jisx0201;                 \
2387             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
2388           }                                                     \
2389         else                                                    \
2390           {                                                     \
2391             if (charset == charset_jisx0208                     \
2392                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
2393               charset = charset_jisx0208_1978;                  \
2394             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
2395           }                                                     \
2396       }                                                         \
2397     else                                                        \
2398       {                                                         \
2399         *dst++ = c1;                                            \
2400         if (c2 >= 0)                                            \
2401           *dst++ = c2;                                          \
2402       }                                                         \
2403   } while (0)
2404
2405
2406 /* Instead of encoding character C, produce one or two `?'s.  */
2407
2408 #define ENCODE_UNSAFE_CHARACTER(c)                              \
2409   do {                                                          \
2410     ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);        \
2411     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                   \
2412       ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);      \
2413   } while (0)
2414
2415
2416 /* Produce designation and invocation codes at a place pointed by DST
2417    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
2418    Return new DST.  */
2419
2420 unsigned char *
2421 encode_invocation_designation (charset, coding, dst)
2422      int charset;
2423      struct coding_system *coding;
2424      unsigned char *dst;
2425 {
2426   int reg;                      /* graphic register number */
2427
2428   /* At first, check designations.  */
2429   for (reg = 0; reg < 4; reg++)
2430     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2431       break;
2432
2433   if (reg >= 4)
2434     {
2435       /* CHARSET is not yet designated to any graphic registers.  */
2436       /* At first check the requested designation.  */
2437       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2438       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2439         /* Since CHARSET requests no special designation, designate it
2440            to graphic register 0.  */
2441         reg = 0;
2442
2443       ENCODE_DESIGNATION (charset, reg, coding);
2444     }
2445
2446   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2447       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2448     {
2449       /* Since the graphic register REG is not invoked to any graphic
2450          planes, invoke it to graphic plane 0.  */
2451       switch (reg)
2452         {
2453         case 0:                 /* graphic register 0 */
2454           ENCODE_SHIFT_IN;
2455           break;
2456
2457         case 1:                 /* graphic register 1 */
2458           ENCODE_SHIFT_OUT;
2459           break;
2460
2461         case 2:                 /* graphic register 2 */
2462           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2463             ENCODE_SINGLE_SHIFT_2;
2464           else
2465             ENCODE_LOCKING_SHIFT_2;
2466           break;
2467
2468         case 3:                 /* graphic register 3 */
2469           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2470             ENCODE_SINGLE_SHIFT_3;
2471           else
2472             ENCODE_LOCKING_SHIFT_3;
2473           break;
2474         }
2475     }
2476
2477   return dst;
2478 }
2479
2480 /* Produce 2-byte codes for encoded composition rule RULE.  */
2481
2482 #define ENCODE_COMPOSITION_RULE(rule)           \
2483   do {                                          \
2484     int gref, nref;                             \
2485     COMPOSITION_DECODE_RULE (rule, gref, nref); \
2486     *dst++ = 32 + 81 + gref;                    \
2487     *dst++ = 32 + nref;                         \
2488   } while (0)
2489
2490 /* Produce codes for indicating the start of a composition sequence
2491    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
2492    which specify information about the composition.  See the comment
2493    in coding.h for the format of DATA.  */
2494
2495 #define ENCODE_COMPOSITION_START(coding, data)                          \
2496   do {                                                                  \
2497     coding->composing = data[3];                                        \
2498     *dst++ = ISO_CODE_ESC;                                              \
2499     if (coding->composing == COMPOSITION_RELATIVE)                      \
2500       *dst++ = '0';                                                     \
2501     else                                                                \
2502       {                                                                 \
2503         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
2504                   ? '3' : '4');                                         \
2505         coding->cmp_data_index = coding->cmp_data_start + 4;            \
2506         coding->composition_rule_follows = 0;                           \
2507       }                                                                 \
2508   } while (0)
2509
2510 /* Produce codes for indicating the end of the current composition.  */
2511
2512 #define ENCODE_COMPOSITION_END(coding, data)                    \
2513   do {                                                          \
2514     *dst++ = ISO_CODE_ESC;                                      \
2515     *dst++ = '1';                                               \
2516     coding->cmp_data_start += data[0];                          \
2517     coding->composing = COMPOSITION_NO;                         \
2518     if (coding->cmp_data_start == coding->cmp_data->used        \
2519         && coding->cmp_data->next)                              \
2520       {                                                         \
2521         coding->cmp_data = coding->cmp_data->next;              \
2522         coding->cmp_data_start = 0;                             \
2523       }                                                         \
2524   } while (0)
2525
2526 /* Produce composition start sequence ESC 0.  Here, this sequence
2527    doesn't mean the start of a new composition but means that we have
2528    just produced components (alternate chars and composition rules) of
2529    the composition and the actual text follows in SRC.  */
2530
2531 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
2532   do {                                          \
2533     *dst++ = ISO_CODE_ESC;                      \
2534     *dst++ = '0';                               \
2535     coding->composing = COMPOSITION_RELATIVE;   \
2536   } while (0)
2537
2538 /* The following three macros produce codes for indicating direction
2539    of text.  */
2540 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
2541   do {                                                  \
2542     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
2543       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
2544     else                                                \
2545       *dst++ = ISO_CODE_CSI;                            \
2546   } while (0)
2547
2548 #define ENCODE_DIRECTION_R2L    \
2549   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
2550
2551 #define ENCODE_DIRECTION_L2R    \
2552   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
2553
2554 /* Produce codes for designation and invocation to reset the graphic
2555    planes and registers to initial state.  */
2556 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
2557   do {                                                                      \
2558     int reg;                                                                \
2559     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
2560       ENCODE_SHIFT_IN;                                                      \
2561     for (reg = 0; reg < 4; reg++)                                           \
2562       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
2563           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
2564               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
2565         ENCODE_DESIGNATION                                                  \
2566           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
2567   } while (0)
2568
2569 /* Produce designation sequences of charsets in the line started from
2570    SRC to a place pointed by DST, and return updated DST.
2571
2572    If the current block ends before any end-of-line, we may fail to
2573    find all the necessary designations.  */
2574
2575 static unsigned char *
2576 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2577      struct coding_system *coding;
2578      Lisp_Object translation_table;
2579      unsigned char *src, *src_end, *dst;
2580 {
2581   int charset, c, found = 0, reg;
2582   /* Table of charsets to be designated to each graphic register.  */
2583   int r[4];
2584
2585   for (reg = 0; reg < 4; reg++)
2586     r[reg] = -1;
2587
2588   while (found < 4)
2589     {
2590       ONE_MORE_CHAR (c);
2591       if (c == '\n')
2592         break;
2593
2594       charset = CHAR_CHARSET (c);
2595       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2596       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2597         {
2598           found++;
2599           r[reg] = charset;
2600         }
2601     }
2602
2603  label_end_of_loop:
2604   if (found)
2605     {
2606       for (reg = 0; reg < 4; reg++)
2607         if (r[reg] >= 0
2608             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2609           ENCODE_DESIGNATION (r[reg], reg, coding);
2610     }
2611
2612   return dst;
2613 }
2614
2615 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2616
2617 static void
2618 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2619      struct coding_system *coding;
2620      unsigned char *source, *destination;
2621      int src_bytes, dst_bytes;
2622 {
2623   unsigned char *src = source;
2624   unsigned char *src_end = source + src_bytes;
2625   unsigned char *dst = destination;
2626   unsigned char *dst_end = destination + dst_bytes;
2627   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2628      from DST_END to assure overflow checking is necessary only at the
2629      head of loop.  */
2630   unsigned char *adjusted_dst_end = dst_end - 19;
2631   /* SRC_BASE remembers the start position in source in each loop.
2632      The loop will be exited when there's not enough source text to
2633      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2634      there's not enough destination area to produce encoded codes
2635      (within macro EMIT_BYTES).  */
2636   unsigned char *src_base;
2637   int c;
2638   Lisp_Object translation_table;
2639   Lisp_Object safe_chars;
2640
2641   if (coding->flags & CODING_FLAG_ISO_SAFE)
2642     coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
2643
2644   safe_chars = coding_safe_chars (coding->symbol);
2645
2646   if (NILP (Venable_character_translation))
2647     translation_table = Qnil;
2648   else
2649     {
2650       translation_table = coding->translation_table_for_encode;
2651       if (NILP (translation_table))
2652         translation_table = Vstandard_translation_table_for_encode;
2653     }
2654
2655   coding->consumed_char = 0;
2656   coding->errors = 0;
2657   while (1)
2658     {
2659       src_base = src;
2660
2661       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2662         {
2663           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2664           break;
2665         }
2666
2667       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2668           && CODING_SPEC_ISO_BOL (coding))
2669         {
2670           /* We have to produce designation sequences if any now.  */
2671           dst = encode_designation_at_bol (coding, translation_table,
2672                                            src, src_end, dst);
2673           CODING_SPEC_ISO_BOL (coding) = 0;
2674         }
2675
2676       /* Check composition start and end.  */
2677       if (coding->composing != COMPOSITION_DISABLED
2678           && coding->cmp_data_start < coding->cmp_data->used)
2679         {
2680           struct composition_data *cmp_data = coding->cmp_data;
2681           int *data = cmp_data->data + coding->cmp_data_start;
2682           int this_pos = cmp_data->char_offset + coding->consumed_char;
2683
2684           if (coding->composing == COMPOSITION_RELATIVE)
2685             {
2686               if (this_pos == data[2])
2687                 {
2688                   ENCODE_COMPOSITION_END (coding, data);
2689                   cmp_data = coding->cmp_data;
2690                   data = cmp_data->data + coding->cmp_data_start;
2691                 }
2692             }
2693           else if (COMPOSING_P (coding))
2694             {
2695               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2696               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2697                 /* We have consumed components of the composition.
2698                    What follows in SRC is the composition's base
2699                    text.  */
2700                 ENCODE_COMPOSITION_FAKE_START (coding);
2701               else
2702                 {
2703                   int c = cmp_data->data[coding->cmp_data_index++];
2704                   if (coding->composition_rule_follows)
2705                     {
2706                       ENCODE_COMPOSITION_RULE (c);
2707                       coding->composition_rule_follows = 0;
2708                     }
2709                   else
2710                     {
2711                       if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2712                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2713                         ENCODE_UNSAFE_CHARACTER (c);
2714                       else
2715                         ENCODE_ISO_CHARACTER (c);
2716                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2717                         coding->composition_rule_follows = 1;
2718                     }
2719                   continue;
2720                 }
2721             }
2722           if (!COMPOSING_P (coding))
2723             {
2724               if (this_pos == data[1])
2725                 {
2726                   ENCODE_COMPOSITION_START (coding, data);
2727                   continue;
2728                 }
2729             }
2730         }
2731
2732       ONE_MORE_CHAR (c);
2733
2734       /* Now encode the character C.  */
2735       if (c < 0x20 || c == 0x7F)
2736         {
2737           if (c == '\r')
2738             {
2739               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2740                 {
2741                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2742                     ENCODE_RESET_PLANE_AND_REGISTER;
2743                   *dst++ = c;
2744                   continue;
2745                 }
2746               /* fall down to treat '\r' as '\n' ...  */
2747               c = '\n';
2748             }
2749           if (c == '\n')
2750             {
2751               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2752                 ENCODE_RESET_PLANE_AND_REGISTER;
2753               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2754                 bcopy (coding->spec.iso2022.initial_designation,
2755                        coding->spec.iso2022.current_designation,
2756                        sizeof coding->spec.iso2022.initial_designation);
2757               if (coding->eol_type == CODING_EOL_LF
2758                   || coding->eol_type == CODING_EOL_UNDECIDED)
2759                 *dst++ = ISO_CODE_LF;
2760               else if (coding->eol_type == CODING_EOL_CRLF)
2761                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2762               else
2763                 *dst++ = ISO_CODE_CR;
2764               CODING_SPEC_ISO_BOL (coding) = 1;
2765             }
2766           else
2767             {
2768               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2769                 ENCODE_RESET_PLANE_AND_REGISTER;
2770               *dst++ = c;
2771             }
2772         }
2773       else if (ASCII_BYTE_P (c))
2774         ENCODE_ISO_CHARACTER (c);
2775       else if (SINGLE_BYTE_CHAR_P (c))
2776         {
2777           *dst++ = c;
2778           coding->errors++;
2779         }
2780       else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2781                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2782         ENCODE_UNSAFE_CHARACTER (c);
2783       else
2784         ENCODE_ISO_CHARACTER (c);
2785
2786       coding->consumed_char++;
2787     }
2788
2789  label_end_of_loop:
2790   coding->consumed = src_base - source;
2791   coding->produced = coding->produced_char = dst - destination;
2792 }
2793
2794 \f
2795 /*** 4. SJIS and BIG5 handlers ***/
2796
2797 /* Although SJIS and BIG5 are not ISO coding systems, they are used
2798    quite widely.  So, for the moment, Emacs supports them in the bare
2799    C code.  But, in the future, they may be supported only by CCL.  */
2800
2801 /* SJIS is a coding system encoding three character sets: ASCII, right
2802    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2803    as is.  A character of charset katakana-jisx0201 is encoded by
2804    "position-code + 0x80".  A character of charset japanese-jisx0208
2805    is encoded in 2-byte but two position-codes are divided and shifted
2806    so that it fits in the range below.
2807
2808    --- CODE RANGE of SJIS ---
2809    (character set)      (range)
2810    ASCII                0x00 .. 0x7F
2811    KATAKANA-JISX0201    0xA1 .. 0xDF
2812    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2813             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2814    -------------------------------
2815
2816 */
2817
2818 /* BIG5 is a coding system encoding two character sets: ASCII and
2819    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2820    character set and is encoded in two bytes.
2821
2822    --- CODE RANGE of BIG5 ---
2823    (character set)      (range)
2824    ASCII                0x00 .. 0x7F
2825    Big5 (1st byte)      0xA1 .. 0xFE
2826         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2827    --------------------------
2828
2829    Since the number of characters in Big5 is larger than maximum
2830    characters in Emacs' charset (96x96), it can't be handled as one
2831    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2832    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2833    contains frequently used characters and the latter contains less
2834    frequently used characters.  */
2835
2836 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2837    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2838    C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
2839    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2840
2841 /* Number of Big5 characters which have the same code in 1st byte.  */
2842 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2843
2844 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2845   do {                                                                  \
2846     unsigned int temp                                                   \
2847       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2848     if (b1 < 0xC9)                                                      \
2849       charset = charset_big5_1;                                         \
2850     else                                                                \
2851       {                                                                 \
2852         charset = charset_big5_2;                                       \
2853         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2854       }                                                                 \
2855     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2856     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2857   } while (0)
2858
2859 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2860   do {                                                                  \
2861     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2862     if (charset == charset_big5_2)                                      \
2863       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2864     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2865     b2 = temp % BIG5_SAME_ROW;                                          \
2866     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2867   } while (0)
2868
2869 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2870    Check if a text is encoded in SJIS.  If it is, return
2871    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2872
2873 static int
2874 detect_coding_sjis (src, src_end, multibytep)
2875      unsigned char *src, *src_end;
2876      int multibytep;
2877 {
2878   int c;
2879   /* Dummy for ONE_MORE_BYTE.  */
2880   struct coding_system dummy_coding;
2881   struct coding_system *coding = &dummy_coding;
2882
2883   while (1)
2884     {
2885       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2886       if (c < 0x80)
2887         continue;
2888       if (c == 0x80 || c == 0xA0 || c > 0xEF)
2889         return 0;
2890       if (c <= 0x9F || c >= 0xE0)
2891         {
2892           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2893           if (c < 0x40 || c == 0x7F || c > 0xFC)
2894             return 0;
2895         }
2896     }
2897  label_end_of_loop:
2898   return CODING_CATEGORY_MASK_SJIS;
2899 }
2900
2901 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2902    Check if a text is encoded in BIG5.  If it is, return
2903    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2904
2905 static int
2906 detect_coding_big5 (src, src_end, multibytep)
2907      unsigned char *src, *src_end;
2908      int multibytep;
2909 {
2910   int c;
2911   /* Dummy for ONE_MORE_BYTE.  */
2912   struct coding_system dummy_coding;
2913   struct coding_system *coding = &dummy_coding;
2914
2915   while (1)
2916     {
2917       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2918       if (c < 0x80)
2919         continue;
2920       if (c < 0xA1 || c > 0xFE)
2921         return 0;
2922       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2923       if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2924         return 0;
2925     }
2926  label_end_of_loop:
2927   return CODING_CATEGORY_MASK_BIG5;
2928 }
2929
2930 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2931    Check if a text is encoded in UTF-8.  If it is, return
2932    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2933
2934 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2935 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2936 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2937 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2938 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2939 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2940 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2941
2942 static int
2943 detect_coding_utf_8 (src, src_end, multibytep)
2944      unsigned char *src, *src_end;
2945      int multibytep;
2946 {
2947   unsigned char c;
2948   int seq_maybe_bytes;
2949   /* Dummy for ONE_MORE_BYTE.  */
2950   struct coding_system dummy_coding;
2951   struct coding_system *coding = &dummy_coding;
2952
2953   while (1)
2954     {
2955       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2956       if (UTF_8_1_OCTET_P (c))
2957         continue;
2958       else if (UTF_8_2_OCTET_LEADING_P (c))
2959         seq_maybe_bytes = 1;
2960       else if (UTF_8_3_OCTET_LEADING_P (c))
2961         seq_maybe_bytes = 2;
2962       else if (UTF_8_4_OCTET_LEADING_P (c))
2963         seq_maybe_bytes = 3;
2964       else if (UTF_8_5_OCTET_LEADING_P (c))
2965         seq_maybe_bytes = 4;
2966       else if (UTF_8_6_OCTET_LEADING_P (c))
2967         seq_maybe_bytes = 5;
2968       else
2969         return 0;
2970
2971       do
2972         {
2973           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2974           if (!UTF_8_EXTRA_OCTET_P (c))
2975             return 0;
2976           seq_maybe_bytes--;
2977         }
2978       while (seq_maybe_bytes > 0);
2979     }
2980
2981  label_end_of_loop:
2982   return CODING_CATEGORY_MASK_UTF_8;
2983 }
2984
2985 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2986    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2987    Little Endian (otherwise).  If it is, return
2988    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2989    else return 0.  */
2990
2991 #define UTF_16_INVALID_P(val)   \
2992   (((val) == 0xFFFE)            \
2993    || ((val) == 0xFFFF))
2994
2995 #define UTF_16_HIGH_SURROGATE_P(val) \
2996   (((val) & 0xD800) == 0xD800)
2997
2998 #define UTF_16_LOW_SURROGATE_P(val) \
2999   (((val) & 0xDC00) == 0xDC00)
3000
3001 static int
3002 detect_coding_utf_16 (src, src_end, multibytep)
3003      unsigned char *src, *src_end;
3004      int multibytep;
3005 {
3006   unsigned char c1, c2;
3007   /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE.  */
3008   struct coding_system dummy_coding;
3009   struct coding_system *coding = &dummy_coding;
3010
3011   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
3012   ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
3013
3014   if ((c1 == 0xFF) && (c2 == 0xFE))
3015     return CODING_CATEGORY_MASK_UTF_16_LE;
3016   else if ((c1 == 0xFE) && (c2 == 0xFF))
3017     return CODING_CATEGORY_MASK_UTF_16_BE;
3018
3019  label_end_of_loop:
3020   return 0;
3021 }
3022
3023 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3024    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3025
3026 static void
3027 decode_coding_sjis_big5 (coding, source, destination,
3028                          src_bytes, dst_bytes, sjis_p)
3029      struct coding_system *coding;
3030      unsigned char *source, *destination;
3031      int src_bytes, dst_bytes;
3032      int sjis_p;
3033 {
3034   unsigned char *src = source;
3035   unsigned char *src_end = source + src_bytes;
3036   unsigned char *dst = destination;
3037   unsigned char *dst_end = destination + dst_bytes;
3038   /* SRC_BASE remembers the start position in source in each loop.
3039      The loop will be exited when there's not enough source code
3040      (within macro ONE_MORE_BYTE), or when there's not enough
3041      destination area to produce a character (within macro
3042      EMIT_CHAR).  */
3043   unsigned char *src_base;
3044   Lisp_Object translation_table;
3045
3046   if (NILP (Venable_character_translation))
3047     translation_table = Qnil;
3048   else
3049     {
3050       translation_table = coding->translation_table_for_decode;
3051       if (NILP (translation_table))
3052         translation_table = Vstandard_translation_table_for_decode;
3053     }
3054
3055   coding->produced_char = 0;
3056   while (1)
3057     {
3058       int c, charset, c1, c2 = 0;
3059
3060       src_base = src;
3061       ONE_MORE_BYTE (c1);
3062
3063       if (c1 < 0x80)
3064         {
3065           charset = CHARSET_ASCII;
3066           if (c1 < 0x20)
3067             {
3068               if (c1 == '\r')
3069                 {
3070                   if (coding->eol_type == CODING_EOL_CRLF)
3071                     {
3072                       ONE_MORE_BYTE (c2);
3073                       if (c2 == '\n')
3074                         c1 = c2;
3075                       else
3076                         /* To process C2 again, SRC is subtracted by 1.  */
3077                         src--;
3078                     }
3079                   else if (coding->eol_type == CODING_EOL_CR)
3080                     c1 = '\n';
3081                 }
3082               else if (c1 == '\n'
3083                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3084                        && (coding->eol_type == CODING_EOL_CR
3085                            || coding->eol_type == CODING_EOL_CRLF))
3086                 {
3087                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3088                   goto label_end_of_loop;
3089                 }
3090             }
3091         }
3092       else
3093         {
3094           if (sjis_p)
3095             {
3096               if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
3097                 goto label_invalid_code;
3098               if (c1 <= 0x9F || c1 >= 0xE0)
3099                 {
3100                   /* SJIS -> JISX0208 */
3101                   ONE_MORE_BYTE (c2);
3102                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
3103                     goto label_invalid_code;
3104                   DECODE_SJIS (c1, c2, c1, c2);
3105                   charset = charset_jisx0208;
3106                 }
3107               else
3108                 /* SJIS -> JISX0201-Kana */
3109                 charset = charset_katakana_jisx0201;
3110             }
3111           else
3112             {
3113               /* BIG5 -> Big5 */
3114               if (c1 < 0xA0 || c1 > 0xFE)
3115                 goto label_invalid_code;
3116               ONE_MORE_BYTE (c2);
3117               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
3118                 goto label_invalid_code;
3119               DECODE_BIG5 (c1, c2, charset, c1, c2);
3120             }
3121         }
3122
3123       c = DECODE_ISO_CHARACTER (charset, c1, c2);
3124       EMIT_CHAR (c);
3125       continue;
3126
3127     label_invalid_code:
3128       coding->errors++;
3129       src = src_base;
3130       c = *src++;
3131       EMIT_CHAR (c);
3132     }
3133
3134  label_end_of_loop:
3135   coding->consumed = coding->consumed_char = src_base - source;
3136   coding->produced = dst - destination;
3137   return;
3138 }
3139
3140 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3141    This function can encode charsets `ascii', `katakana-jisx0201',
3142    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
3143    are sure that all these charsets are registered as official charset
3144    (i.e. do not have extended leading-codes).  Characters of other
3145    charsets are produced without any encoding.  If SJIS_P is 1, encode
3146    SJIS text, else encode BIG5 text.  */
3147
3148 static void
3149 encode_coding_sjis_big5 (coding, source, destination,
3150                          src_bytes, dst_bytes, sjis_p)
3151      struct coding_system *coding;
3152      unsigned char *source, *destination;
3153      int src_bytes, dst_bytes;
3154      int sjis_p;
3155 {
3156   unsigned char *src = source;
3157   unsigned char *src_end = source + src_bytes;
3158   unsigned char *dst = destination;
3159   unsigned char *dst_end = destination + dst_bytes;
3160   /* SRC_BASE remembers the start position in source in each loop.
3161      The loop will be exited when there's not enough source text to
3162      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3163      there's not enough destination area to produce encoded codes
3164      (within macro EMIT_BYTES).  */
3165   unsigned char *src_base;
3166   Lisp_Object translation_table;
3167
3168   if (NILP (Venable_character_translation))
3169     translation_table = Qnil;
3170   else
3171     {
3172       translation_table = coding->translation_table_for_encode;
3173       if (NILP (translation_table))
3174         translation_table = Vstandard_translation_table_for_encode;
3175     }
3176
3177   while (1)
3178     {
3179       int c, charset, c1, c2;
3180
3181       src_base = src;
3182       ONE_MORE_CHAR (c);
3183
3184       /* Now encode the character C.  */
3185       if (SINGLE_BYTE_CHAR_P (c))
3186         {
3187           switch (c)
3188             {
3189             case '\r':
3190               if (!(coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
3191                 {
3192                   EMIT_ONE_BYTE (c);
3193                   break;
3194                 }
3195               c = '\n';
3196             case '\n':
3197               if (coding->eol_type == CODING_EOL_CRLF)
3198                 {
3199                   EMIT_TWO_BYTES ('\r', c);
3200                   break;
3201                 }
3202               else if (coding->eol_type == CODING_EOL_CR)
3203                 c = '\r';
3204             default:
3205               EMIT_ONE_BYTE (c);
3206             }
3207         }
3208       else
3209         {
3210           SPLIT_CHAR (c, charset, c1, c2);
3211           if (sjis_p)
3212             {
3213               if (charset == charset_jisx0208
3214                   || charset == charset_jisx0208_1978)
3215                 {
3216                   ENCODE_SJIS (c1, c2, c1, c2);
3217                   EMIT_TWO_BYTES (c1, c2);
3218                 }
3219               else if (charset == charset_katakana_jisx0201)
3220                 EMIT_ONE_BYTE (c1 | 0x80);
3221               else if (charset == charset_latin_jisx0201)
3222                 EMIT_ONE_BYTE (c1);
3223               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3224                 {
3225                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3226                   if (CHARSET_WIDTH (charset) > 1)
3227                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3228                 }
3229               else
3230                 /* There's no way other than producing the internal
3231                    codes as is.  */
3232                 EMIT_BYTES (src_base, src);
3233             }
3234           else
3235             {
3236               if (charset == charset_big5_1 || charset == charset_big5_2)
3237                 {
3238                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
3239                   EMIT_TWO_BYTES (c1, c2);
3240                 }
3241               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3242                 {
3243                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3244                   if (CHARSET_WIDTH (charset) > 1)
3245                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3246                 }
3247               else
3248                 /* There's no way other than producing the internal
3249                    codes as is.  */
3250                 EMIT_BYTES (src_base, src);
3251             }
3252         }
3253       coding->consumed_char++;
3254     }
3255
3256  label_end_of_loop:
3257   coding->consumed = src_base - source;
3258   coding->produced = coding->produced_char = dst - destination;
3259 }
3260
3261 \f
3262 /*** 5. CCL handlers ***/
3263
3264 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3265    Check if a text is encoded in a coding system of which
3266    encoder/decoder are written in CCL program.  If it is, return
3267    CODING_CATEGORY_MASK_CCL, else return 0.  */
3268
3269 static int
3270 detect_coding_ccl (src, src_end, multibytep)
3271      unsigned char *src, *src_end;
3272      int multibytep;
3273 {
3274   unsigned char *valid;
3275   int c;
3276   /* Dummy for ONE_MORE_BYTE.  */
3277   struct coding_system dummy_coding;
3278   struct coding_system *coding = &dummy_coding;
3279
3280   /* No coding system is assigned to coding-category-ccl.  */
3281   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3282     return 0;
3283
3284   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3285   while (1)
3286     {
3287       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
3288       if (! valid[c])
3289         return 0;
3290     }
3291  label_end_of_loop:
3292   return CODING_CATEGORY_MASK_CCL;
3293 }
3294
3295 \f
3296 /*** 6. End-of-line handlers ***/
3297
3298 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3299
3300 static void
3301 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3302      struct coding_system *coding;
3303      unsigned char *source, *destination;
3304      int src_bytes, dst_bytes;
3305 {
3306   unsigned char *src = source;
3307   unsigned char *dst = destination;
3308   unsigned char *src_end = src + src_bytes;
3309   unsigned char *dst_end = dst + dst_bytes;
3310   Lisp_Object translation_table;
3311   /* SRC_BASE remembers the start position in source in each loop.
3312      The loop will be exited when there's not enough source code
3313      (within macro ONE_MORE_BYTE), or when there's not enough
3314      destination area to produce a character (within macro
3315      EMIT_CHAR).  */
3316   unsigned char *src_base;
3317   int c;
3318
3319   translation_table = Qnil;
3320   switch (coding->eol_type)
3321     {
3322     case CODING_EOL_CRLF:
3323       while (1)
3324         {
3325           src_base = src;
3326           ONE_MORE_BYTE (c);
3327           if (c == '\r')
3328             {
3329               ONE_MORE_BYTE (c);
3330               if (c != '\n')
3331                 {
3332                   src--;
3333                   c = '\r';
3334                 }
3335             }
3336           else if (c == '\n'
3337                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
3338             {
3339               coding->result = CODING_FINISH_INCONSISTENT_EOL;
3340               goto label_end_of_loop;
3341             }
3342           EMIT_CHAR (c);
3343         }
3344       break;
3345
3346     case CODING_EOL_CR:
3347       while (1)
3348         {
3349           src_base = src;
3350           ONE_MORE_BYTE (c);
3351           if (c == '\n')
3352             {
3353               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3354                 {
3355                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3356                   goto label_end_of_loop;
3357                 }
3358             }
3359           else if (c == '\r')
3360             c = '\n';
3361           EMIT_CHAR (c);
3362         }
3363       break;
3364
3365     default:                    /* no need for EOL handling */
3366       while (1)
3367         {
3368           src_base = src;
3369           ONE_MORE_BYTE (c);
3370           EMIT_CHAR (c);
3371         }
3372     }
3373
3374  label_end_of_loop:
3375   coding->consumed = coding->consumed_char = src_base - source;
3376   coding->produced = dst - destination;
3377   return;
3378 }
3379
3380 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
3381    format of end-of-line according to `coding->eol_type'.  It also
3382    convert multibyte form 8-bit characters to unibyte if
3383    CODING->src_multibyte is nonzero.  If `coding->mode &
3384    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3385    also means end-of-line.  */
3386
3387 static void
3388 encode_eol (coding, source, destination, src_bytes, dst_bytes)
3389      struct coding_system *coding;
3390      const unsigned char *source;
3391      unsigned char *destination;
3392      int src_bytes, dst_bytes;
3393 {
3394   const unsigned char *src = source;
3395   unsigned char *dst = destination;
3396   const unsigned char *src_end = src + src_bytes;
3397   unsigned char *dst_end = dst + dst_bytes;
3398   Lisp_Object translation_table;
3399   /* SRC_BASE remembers the start position in source in each loop.
3400      The loop will be exited when there's not enough source text to
3401      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3402      there's not enough destination area to produce encoded codes
3403      (within macro EMIT_BYTES).  */
3404   const unsigned char *src_base;
3405   unsigned char *tmp;
3406   int c;
3407   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3408
3409   translation_table = Qnil;
3410   if (coding->src_multibyte
3411       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3412     {
3413       src_end--;
3414       src_bytes--;
3415       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3416     }
3417
3418   if (coding->eol_type == CODING_EOL_CRLF)
3419     {
3420       while (src < src_end)
3421         {
3422           src_base = src;
3423           c = *src++;
3424           if (c >= 0x20)
3425             EMIT_ONE_BYTE (c);
3426           else if (c == '\n' || (c == '\r' && selective_display))
3427             EMIT_TWO_BYTES ('\r', '\n');
3428           else
3429             EMIT_ONE_BYTE (c);
3430         }
3431       src_base = src;
3432     label_end_of_loop:
3433       ;
3434     }
3435   else
3436     {
3437       if (!dst_bytes || src_bytes <= dst_bytes)
3438         {
3439           safe_bcopy (src, dst, src_bytes);
3440           src_base = src_end;
3441           dst += src_bytes;
3442         }
3443       else
3444         {
3445           if (coding->src_multibyte
3446               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3447             dst_bytes--;
3448           safe_bcopy (src, dst, dst_bytes);
3449           src_base = src + dst_bytes;
3450           dst = destination + dst_bytes;
3451           coding->result = CODING_FINISH_INSUFFICIENT_DST;
3452         }
3453       if (coding->eol_type == CODING_EOL_CR)
3454         {
3455           for (tmp = destination; tmp < dst; tmp++)
3456             if (*tmp == '\n') *tmp = '\r';
3457         }
3458       else if (selective_display)
3459         {
3460           for (tmp = destination; tmp < dst; tmp++)
3461             if (*tmp == '\r') *tmp = '\n';
3462         }
3463     }
3464   if (coding->src_multibyte)
3465     dst = destination + str_as_unibyte (destination, dst - destination);
3466
3467   coding->consumed = src_base - source;
3468   coding->produced = dst - destination;
3469   coding->produced_char = coding->produced;
3470 }
3471
3472 \f
3473 /*** 7. C library functions ***/
3474
3475 /* In Emacs Lisp, a coding system is represented by a Lisp symbol which
3476    has a property `coding-system'.  The value of this property is a
3477    vector of length 5 (called the coding-vector).  Among elements of
3478    this vector, the first (element[0]) and the fifth (element[4])
3479    carry important information for decoding/encoding.  Before
3480    decoding/encoding, this information should be set in fields of a
3481    structure of type `coding_system'.
3482
3483    The value of the property `coding-system' can be a symbol of another
3484    subsidiary coding-system.  In that case, Emacs gets coding-vector
3485    from that symbol.
3486
3487    `element[0]' contains information to be set in `coding->type'.  The
3488    value and its meaning is as follows:
3489
3490    0 -- coding_type_emacs_mule
3491    1 -- coding_type_sjis
3492    2 -- coding_type_iso2022
3493    3 -- coding_type_big5
3494    4 -- coding_type_ccl encoder/decoder written in CCL
3495    nil -- coding_type_no_conversion
3496    t -- coding_type_undecided (automatic conversion on decoding,
3497                                no-conversion on encoding)
3498
3499    `element[4]' contains information to be set in `coding->flags' and
3500    `coding->spec'.  The meaning varies by `coding->type'.
3501
3502    If `coding->type' is `coding_type_iso2022', element[4] is a vector
3503    of length 32 (of which the first 13 sub-elements are used now).
3504    Meanings of these sub-elements are:
3505
3506    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3507         If the value is an integer of valid charset, the charset is
3508         assumed to be designated to graphic register N initially.
3509
3510         If the value is minus, it is a minus value of charset which
3511         reserves graphic register N, which means that the charset is
3512         not designated initially but should be designated to graphic
3513         register N just before encoding a character in that charset.
3514
3515         If the value is nil, graphic register N is never used on
3516         encoding.
3517
3518    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3519         Each value takes t or nil.  See the section ISO2022 of
3520         `coding.h' for more information.
3521
3522    If `coding->type' is `coding_type_big5', element[4] is t to denote
3523    BIG5-ETen or nil to denote BIG5-HKU.
3524
3525    If `coding->type' takes the other value, element[4] is ignored.
3526
3527    Emacs Lisp's coding systems also carry information about format of
3528    end-of-line in a value of property `eol-type'.  If the value is
3529    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3530    means CODING_EOL_CR.  If it is not integer, it should be a vector
3531    of subsidiary coding systems of which property `eol-type' has one
3532    of the above values.
3533
3534 */
3535
3536 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3537    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
3538    is setup so that no conversion is necessary and return -1, else
3539    return 0.  */
3540
3541 int
3542 setup_coding_system (coding_system, coding)
3543      Lisp_Object coding_system;
3544      struct coding_system *coding;
3545 {
3546   Lisp_Object coding_spec, coding_type, eol_type, plist;
3547   Lisp_Object val;
3548
3549   /* At first, zero clear all members.  */
3550   bzero (coding, sizeof (struct coding_system));
3551
3552   /* Initialize some fields required for all kinds of coding systems.  */
3553   coding->symbol = coding_system;
3554   coding->heading_ascii = -1;
3555   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
3556   coding->composing = COMPOSITION_DISABLED;
3557   coding->cmp_data = NULL;
3558
3559   if (NILP (coding_system))
3560     goto label_invalid_coding_system;
3561
3562   coding_spec = Fget (coding_system, Qcoding_system);
3563
3564   if (!VECTORP (coding_spec)
3565       || XVECTOR (coding_spec)->size != 5
3566       || !CONSP (XVECTOR (coding_spec)->contents[3]))
3567     goto label_invalid_coding_system;
3568
3569   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3570   if (VECTORP (eol_type))
3571     {
3572       coding->eol_type = CODING_EOL_UNDECIDED;
3573       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3574     }
3575   else if (XFASTINT (eol_type) == 1)
3576     {
3577       coding->eol_type = CODING_EOL_CRLF;
3578       coding->common_flags
3579         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3580     }
3581   else if (XFASTINT (eol_type) == 2)
3582     {
3583       coding->eol_type = CODING_EOL_CR;
3584       coding->common_flags
3585         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3586     }
3587   else
3588     coding->eol_type = CODING_EOL_LF;
3589
3590   coding_type = XVECTOR (coding_spec)->contents[0];
3591   /* Try short cut.  */
3592   if (SYMBOLP (coding_type))
3593     {
3594       if (EQ (coding_type, Qt))
3595         {
3596           coding->type = coding_type_undecided;
3597           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3598         }
3599       else
3600         coding->type = coding_type_no_conversion;
3601       /* Initialize this member.  Any thing other than
3602          CODING_CATEGORY_IDX_UTF_16_BE and
3603          CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3604          special treatment in detect_eol.  */
3605       coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3606
3607       return 0;
3608     }
3609
3610   /* Get values of coding system properties:
3611      `post-read-conversion', `pre-write-conversion',
3612      `translation-table-for-decode', `translation-table-for-encode'.  */
3613   plist = XVECTOR (coding_spec)->contents[3];
3614   /* Pre & post conversion functions should be disabled if
3615      inhibit_eol_conversion is nonzero.  This is the case that a code
3616      conversion function is called while those functions are running.  */
3617   if (! inhibit_pre_post_conversion)
3618     {
3619       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3620       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3621     }
3622   val = Fplist_get (plist, Qtranslation_table_for_decode);
3623   if (SYMBOLP (val))
3624     val = Fget (val, Qtranslation_table_for_decode);
3625   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3626   val = Fplist_get (plist, Qtranslation_table_for_encode);
3627   if (SYMBOLP (val))
3628     val = Fget (val, Qtranslation_table_for_encode);
3629   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3630   val = Fplist_get (plist, Qcoding_category);
3631   if (!NILP (val))
3632     {
3633       val = Fget (val, Qcoding_category_index);
3634       if (INTEGERP (val))
3635         coding->category_idx = XINT (val);
3636       else
3637         goto label_invalid_coding_system;
3638     }
3639   else
3640     goto label_invalid_coding_system;
3641
3642   /* If the coding system has non-nil `composition' property, enable
3643      composition handling.  */
3644   val = Fplist_get (plist, Qcomposition);
3645   if (!NILP (val))
3646     coding->composing = COMPOSITION_NO;
3647
3648   switch (XFASTINT (coding_type))
3649     {
3650     case 0:
3651       coding->type = coding_type_emacs_mule;
3652       coding->common_flags
3653         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3654       if (!NILP (coding->post_read_conversion))
3655         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3656       if (!NILP (coding->pre_write_conversion))
3657         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3658       break;
3659
3660     case 1:
3661       coding->type = coding_type_sjis;
3662       coding->common_flags
3663         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3664       break;
3665
3666     case 2:
3667       coding->type = coding_type_iso2022;
3668       coding->common_flags
3669         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3670       {
3671         Lisp_Object val, temp;
3672         Lisp_Object *flags;
3673         int i, charset, reg_bits = 0;
3674
3675         val = XVECTOR (coding_spec)->contents[4];
3676
3677         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3678           goto label_invalid_coding_system;
3679
3680         flags = XVECTOR (val)->contents;
3681         coding->flags
3682           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3683              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3684              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3685              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3686              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3687              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3688              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3689              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3690              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3691              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3692              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3693              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3694              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3695              );
3696
3697         /* Invoke graphic register 0 to plane 0.  */
3698         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3699         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3700         CODING_SPEC_ISO_INVOCATION (coding, 1)
3701           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3702         /* Not single shifting at first.  */
3703         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3704         /* Beginning of buffer should also be regarded as bol. */
3705         CODING_SPEC_ISO_BOL (coding) = 1;
3706
3707         for (charset = 0; charset <= MAX_CHARSET; charset++)
3708           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3709         val = Vcharset_revision_alist;
3710         while (CONSP (val))
3711           {
3712             charset = get_charset_id (Fcar_safe (XCAR (val)));
3713             if (charset >= 0
3714                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3715                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3716               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3717             val = XCDR (val);
3718           }
3719
3720         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3721            FLAGS[REG] can be one of below:
3722                 integer CHARSET: CHARSET occupies register I,
3723                 t: designate nothing to REG initially, but can be used
3724                   by any charsets,
3725                 list of integer, nil, or t: designate the first
3726                   element (if integer) to REG initially, the remaining
3727                   elements (if integer) is designated to REG on request,
3728                   if an element is t, REG can be used by any charsets,
3729                 nil: REG is never used.  */
3730         for (charset = 0; charset <= MAX_CHARSET; charset++)
3731           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3732             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3733         for (i = 0; i < 4; i++)
3734           {
3735             if ((INTEGERP (flags[i])
3736                  && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
3737                 || (charset = get_charset_id (flags[i])) >= 0)
3738               {
3739                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3740                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3741               }
3742             else if (EQ (flags[i], Qt))
3743               {
3744                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3745                 reg_bits |= 1 << i;
3746                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3747               }
3748             else if (CONSP (flags[i]))
3749               {
3750                 Lisp_Object tail;
3751                 tail = flags[i];
3752
3753                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3754                 if ((INTEGERP (XCAR (tail))
3755                      && (charset = XINT (XCAR (tail)),
3756                          CHARSET_VALID_P (charset)))
3757                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3758                   {
3759                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3760                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3761                   }
3762                 else
3763                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3764                 tail = XCDR (tail);
3765                 while (CONSP (tail))
3766                   {
3767                     if ((INTEGERP (XCAR (tail))
3768                          && (charset = XINT (XCAR (tail)),
3769                              CHARSET_VALID_P (charset)))
3770                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3771                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3772                         = i;
3773                     else if (EQ (XCAR (tail), Qt))
3774                       reg_bits |= 1 << i;
3775                     tail = XCDR (tail);
3776                   }
3777               }
3778             else
3779               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3780
3781             CODING_SPEC_ISO_DESIGNATION (coding, i)
3782               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3783           }
3784
3785         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3786           {
3787             /* REG 1 can be used only by locking shift in 7-bit env.  */
3788             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3789               reg_bits &= ~2;
3790             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3791               /* Without any shifting, only REG 0 and 1 can be used.  */
3792               reg_bits &= 3;
3793           }
3794
3795         if (reg_bits)
3796           for (charset = 0; charset <= MAX_CHARSET; charset++)
3797             {
3798               if (CHARSET_DEFINED_P (charset)
3799                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3800                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3801                 {
3802                   /* There exist some default graphic registers to be
3803                      used by CHARSET.  */
3804
3805                   /* We had better avoid designating a charset of
3806                      CHARS96 to REG 0 as far as possible.  */
3807                   if (CHARSET_CHARS (charset) == 96)
3808                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3809                       = (reg_bits & 2
3810                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3811                   else
3812                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3813                       = (reg_bits & 1
3814                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3815                 }
3816             }
3817       }
3818       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3819       coding->spec.iso2022.last_invalid_designation_register = -1;
3820       break;
3821
3822     case 3:
3823       coding->type = coding_type_big5;
3824       coding->common_flags
3825         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3826       coding->flags
3827         = (NILP (XVECTOR (coding_spec)->contents[4])
3828            ? CODING_FLAG_BIG5_HKU
3829            : CODING_FLAG_BIG5_ETEN);
3830       break;
3831
3832     case 4:
3833       coding->type = coding_type_ccl;
3834       coding->common_flags
3835         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3836       {
3837         val = XVECTOR (coding_spec)->contents[4];
3838         if (! CONSP (val)
3839             || setup_ccl_program (&(coding->spec.ccl.decoder),
3840                                   XCAR (val)) < 0
3841             || setup_ccl_program (&(coding->spec.ccl.encoder),
3842                                   XCDR (val)) < 0)
3843           goto label_invalid_coding_system;
3844
3845         bzero (coding->spec.ccl.valid_codes, 256);
3846         val = Fplist_get (plist, Qvalid_codes);
3847         if (CONSP (val))
3848           {
3849             Lisp_Object this;
3850
3851             for (; CONSP (val); val = XCDR (val))
3852               {
3853                 this = XCAR (val);
3854                 if (INTEGERP (this)
3855                     && XINT (this) >= 0 && XINT (this) < 256)
3856                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3857                 else if (CONSP (this)
3858                          && INTEGERP (XCAR (this))
3859                          && INTEGERP (XCDR (this)))
3860                   {
3861                     int start = XINT (XCAR (this));
3862                     int end = XINT (XCDR (this));
3863
3864                     if (start >= 0 && start <= end && end < 256)
3865                       while (start <= end)
3866                         coding->spec.ccl.valid_codes[start++] = 1;
3867                   }
3868               }
3869           }
3870       }
3871       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3872       coding->spec.ccl.cr_carryover = 0;
3873       coding->spec.ccl.eight_bit_carryover[0] = 0;
3874       break;
3875
3876     case 5:
3877       coding->type = coding_type_raw_text;
3878       break;
3879
3880     default:
3881       goto label_invalid_coding_system;
3882     }
3883   return 0;
3884
3885  label_invalid_coding_system:
3886   coding->type = coding_type_no_conversion;
3887   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3888   coding->common_flags = 0;
3889   coding->eol_type = CODING_EOL_LF;
3890   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3891   return -1;
3892 }
3893
3894 /* Free memory blocks allocated for storing composition information.  */
3895
3896 void
3897 coding_free_composition_data (coding)
3898      struct coding_system *coding;
3899 {
3900   struct composition_data *cmp_data = coding->cmp_data, *next;
3901
3902   if (!cmp_data)
3903     return;
3904   /* Memory blocks are chained.  At first, rewind to the first, then,
3905      free blocks one by one.  */
3906   while (cmp_data->prev)
3907     cmp_data = cmp_data->prev;
3908   while (cmp_data)
3909     {
3910       next = cmp_data->next;
3911       xfree (cmp_data);
3912       cmp_data = next;
3913     }
3914   coding->cmp_data = NULL;
3915 }
3916
3917 /* Set `char_offset' member of all memory blocks pointed by
3918    coding->cmp_data to POS.  */
3919
3920 void
3921 coding_adjust_composition_offset (coding, pos)
3922      struct coding_system *coding;
3923      int pos;
3924 {
3925   struct composition_data *cmp_data;
3926
3927   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3928     cmp_data->char_offset = pos;
3929 }
3930
3931 /* Setup raw-text or one of its subsidiaries in the structure
3932    coding_system CODING according to the already setup value eol_type
3933    in CODING.  CODING should be setup for some coding system in
3934    advance.  */
3935
3936 void
3937 setup_raw_text_coding_system (coding)
3938      struct coding_system *coding;
3939 {
3940   if (coding->type != coding_type_raw_text)
3941     {
3942       coding->symbol = Qraw_text;
3943       coding->type = coding_type_raw_text;
3944       if (coding->eol_type != CODING_EOL_UNDECIDED)
3945         {
3946           Lisp_Object subsidiaries;
3947           subsidiaries = Fget (Qraw_text, Qeol_type);
3948
3949           if (VECTORP (subsidiaries)
3950               && XVECTOR (subsidiaries)->size == 3)
3951             coding->symbol
3952               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3953         }
3954       setup_coding_system (coding->symbol, coding);
3955     }
3956   return;
3957 }
3958
3959 /* Emacs has a mechanism to automatically detect a coding system if it
3960    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3961    it's impossible to distinguish some coding systems accurately
3962    because they use the same range of codes.  So, at first, coding
3963    systems are categorized into 7, those are:
3964
3965    o coding-category-emacs-mule
3966
3967         The category for a coding system which has the same code range
3968         as Emacs' internal format.  Assigned the coding-system (Lisp
3969         symbol) `emacs-mule' by default.
3970
3971    o coding-category-sjis
3972
3973         The category for a coding system which has the same code range
3974         as SJIS.  Assigned the coding-system (Lisp
3975         symbol) `japanese-shift-jis' by default.
3976
3977    o coding-category-iso-7
3978
3979         The category for a coding system which has the same code range
3980         as ISO2022 of 7-bit environment.  This doesn't use any locking
3981         shift and single shift functions.  This can encode/decode all
3982         charsets.  Assigned the coding-system (Lisp symbol)
3983         `iso-2022-7bit' by default.
3984
3985    o coding-category-iso-7-tight
3986
3987         Same as coding-category-iso-7 except that this can
3988         encode/decode only the specified charsets.
3989
3990    o coding-category-iso-8-1
3991
3992         The category for a coding system which has the same code range
3993         as ISO2022 of 8-bit environment and graphic plane 1 used only
3994         for DIMENSION1 charset.  This doesn't use any locking shift
3995         and single shift functions.  Assigned the coding-system (Lisp
3996         symbol) `iso-latin-1' by default.
3997
3998    o coding-category-iso-8-2
3999
4000         The category for a coding system which has the same code range
4001         as ISO2022 of 8-bit environment and graphic plane 1 used only
4002         for DIMENSION2 charset.  This doesn't use any locking shift
4003         and single shift functions.  Assigned the coding-system (Lisp
4004         symbol) `japanese-iso-8bit' by default.
4005
4006    o coding-category-iso-7-else
4007
4008         The category for a coding system which has the same code range
4009         as ISO2022 of 7-bit environment but uses locking shift or
4010         single shift functions.  Assigned the coding-system (Lisp
4011         symbol) `iso-2022-7bit-lock' by default.
4012
4013    o coding-category-iso-8-else
4014
4015         The category for a coding system which has the same code range
4016         as ISO2022 of 8-bit environment but uses locking shift or
4017         single shift functions.  Assigned the coding-system (Lisp
4018         symbol) `iso-2022-8bit-ss2' by default.
4019
4020    o coding-category-big5
4021
4022         The category for a coding system which has the same code range
4023         as BIG5.  Assigned the coding-system (Lisp symbol)
4024         `cn-big5' by default.
4025
4026    o coding-category-utf-8
4027
4028         The category for a coding system which has the same code range
4029         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
4030         symbol) `utf-8' by default.
4031
4032    o coding-category-utf-16-be
4033
4034         The category for a coding system in which a text has an
4035         Unicode signature (cf. Unicode Standard) in the order of BIG
4036         endian at the head.  Assigned the coding-system (Lisp symbol)
4037         `utf-16-be' by default.
4038
4039    o coding-category-utf-16-le
4040
4041         The category for a coding system in which a text has an
4042         Unicode signature (cf. Unicode Standard) in the order of
4043         LITTLE endian at the head.  Assigned the coding-system (Lisp
4044         symbol) `utf-16-le' by default.
4045
4046    o coding-category-ccl
4047
4048         The category for a coding system of which encoder/decoder is
4049         written in CCL programs.  The default value is nil, i.e., no
4050         coding system is assigned.
4051
4052    o coding-category-binary
4053
4054         The category for a coding system not categorized in any of the
4055         above.  Assigned the coding-system (Lisp symbol)
4056         `no-conversion' by default.
4057
4058    Each of them is a Lisp symbol and the value is an actual
4059    `coding-system' (this is also a Lisp symbol) assigned by a user.
4060    What Emacs does actually is to detect a category of coding system.
4061    Then, it uses a `coding-system' assigned to it.  If Emacs can't
4062    decide a single possible category, it selects a category of the
4063    highest priority.  Priorities of categories are also specified by a
4064    user in a Lisp variable `coding-category-list'.
4065
4066 */
4067
4068 static
4069 int ascii_skip_code[256];
4070
4071 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
4072    If it detects possible coding systems, return an integer in which
4073    appropriate flag bits are set.  Flag bits are defined by macros
4074    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
4075    it should point the table `coding_priorities'.  In that case, only
4076    the flag bit for a coding system of the highest priority is set in
4077    the returned value.  If MULTIBYTEP is nonzero, 8-bit codes of the
4078    range 0x80..0x9F are in multibyte form.
4079
4080    How many ASCII characters are at the head is returned as *SKIP.  */
4081
4082 static int
4083 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
4084      unsigned char *source;
4085      int src_bytes, *priorities, *skip;
4086      int multibytep;
4087 {
4088   register unsigned char c;
4089   unsigned char *src = source, *src_end = source + src_bytes;
4090   unsigned int mask, utf16_examined_p, iso2022_examined_p;
4091   int i;
4092
4093   /* At first, skip all ASCII characters and control characters except
4094      for three ISO2022 specific control characters.  */
4095   ascii_skip_code[ISO_CODE_SO] = 0;
4096   ascii_skip_code[ISO_CODE_SI] = 0;
4097   ascii_skip_code[ISO_CODE_ESC] = 0;
4098
4099  label_loop_detect_coding:
4100   while (src < src_end && ascii_skip_code[*src]) src++;
4101   *skip = src - source;
4102
4103   if (src >= src_end)
4104     /* We found nothing other than ASCII.  There's nothing to do.  */
4105     return 0;
4106
4107   c = *src;
4108   /* The text seems to be encoded in some multilingual coding system.
4109      Now, try to find in which coding system the text is encoded.  */
4110   if (c < 0x80)
4111     {
4112       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
4113       /* C is an ISO2022 specific control code of C0.  */
4114       mask = detect_coding_iso2022 (src, src_end, multibytep);
4115       if (mask == 0)
4116         {
4117           /* No valid ISO2022 code follows C.  Try again.  */
4118           src++;
4119           if (c == ISO_CODE_ESC)
4120             ascii_skip_code[ISO_CODE_ESC] = 1;
4121           else
4122             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
4123           goto label_loop_detect_coding;
4124         }
4125       if (priorities)
4126         {
4127           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4128             {
4129               if (mask & priorities[i])
4130                 return priorities[i];
4131             }
4132           return CODING_CATEGORY_MASK_RAW_TEXT;
4133         }
4134     }
4135   else
4136     {
4137       int try;
4138
4139       if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
4140         c = src[1] - 0x20;
4141
4142       if (c < 0xA0)
4143         {
4144           /* C is the first byte of SJIS character code,
4145              or a leading-code of Emacs' internal format (emacs-mule),
4146              or the first byte of UTF-16.  */
4147           try = (CODING_CATEGORY_MASK_SJIS
4148                   | CODING_CATEGORY_MASK_EMACS_MULE
4149                   | CODING_CATEGORY_MASK_UTF_16_BE
4150                   | CODING_CATEGORY_MASK_UTF_16_LE);
4151
4152           /* Or, if C is a special latin extra code,
4153              or is an ISO2022 specific control code of C1 (SS2 or SS3),
4154              or is an ISO2022 control-sequence-introducer (CSI),
4155              we should also consider the possibility of ISO2022 codings.  */
4156           if ((VECTORP (Vlatin_extra_code_table)
4157                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4158               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
4159               || (c == ISO_CODE_CSI
4160                   && (src < src_end
4161                       && (*src == ']'
4162                           || ((*src == '0' || *src == '1' || *src == '2')
4163                               && src + 1 < src_end
4164                               && src[1] == ']')))))
4165             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4166                      | CODING_CATEGORY_MASK_ISO_8BIT);
4167         }
4168       else
4169         /* C is a character of ISO2022 in graphic plane right,
4170            or a SJIS's 1-byte character code (i.e. JISX0201),
4171            or the first byte of BIG5's 2-byte code,
4172            or the first byte of UTF-8/16.  */
4173         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4174                 | CODING_CATEGORY_MASK_ISO_8BIT
4175                 | CODING_CATEGORY_MASK_SJIS
4176                 | CODING_CATEGORY_MASK_BIG5
4177                 | CODING_CATEGORY_MASK_UTF_8
4178                 | CODING_CATEGORY_MASK_UTF_16_BE
4179                 | CODING_CATEGORY_MASK_UTF_16_LE);
4180
4181       /* Or, we may have to consider the possibility of CCL.  */
4182       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4183           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4184               ->spec.ccl.valid_codes)[c])
4185         try |= CODING_CATEGORY_MASK_CCL;
4186
4187       mask = 0;
4188       utf16_examined_p = iso2022_examined_p = 0;
4189       if (priorities)
4190         {
4191           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4192             {
4193               if (!iso2022_examined_p
4194                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4195                 {
4196                   mask |= detect_coding_iso2022 (src, src_end, multibytep);
4197                   iso2022_examined_p = 1;
4198                 }
4199               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
4200                 mask |= detect_coding_sjis (src, src_end, multibytep);
4201               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
4202                 mask |= detect_coding_utf_8 (src, src_end, multibytep);
4203               else if (!utf16_examined_p
4204                        && (priorities[i] & try &
4205                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
4206                 {
4207                   mask |= detect_coding_utf_16 (src, src_end, multibytep);
4208                   utf16_examined_p = 1;
4209                 }
4210               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
4211                 mask |= detect_coding_big5 (src, src_end, multibytep);
4212               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
4213                 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4214               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
4215                 mask |= detect_coding_ccl (src, src_end, multibytep);
4216               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
4217                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
4218               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
4219                 mask |= CODING_CATEGORY_MASK_BINARY;
4220               if (mask & priorities[i])
4221                 return priorities[i];
4222             }
4223           return CODING_CATEGORY_MASK_RAW_TEXT;
4224         }
4225       if (try & CODING_CATEGORY_MASK_ISO)
4226         mask |= detect_coding_iso2022 (src, src_end, multibytep);
4227       if (try & CODING_CATEGORY_MASK_SJIS)
4228         mask |= detect_coding_sjis (src, src_end, multibytep);
4229       if (try & CODING_CATEGORY_MASK_BIG5)
4230         mask |= detect_coding_big5 (src, src_end, multibytep);
4231       if (try & CODING_CATEGORY_MASK_UTF_8)
4232         mask |= detect_coding_utf_8 (src, src_end, multibytep);
4233       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
4234         mask |= detect_coding_utf_16 (src, src_end, multibytep);
4235       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
4236         mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4237       if (try & CODING_CATEGORY_MASK_CCL)
4238         mask |= detect_coding_ccl (src, src_end, multibytep);
4239     }
4240   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4241 }
4242
4243 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4244    The information of the detected coding system is set in CODING.  */
4245
4246 void
4247 detect_coding (coding, src, src_bytes)
4248      struct coding_system *coding;
4249      const unsigned char *src;
4250      int src_bytes;
4251 {
4252   unsigned int idx;
4253   int skip, mask;
4254   Lisp_Object val;
4255
4256   val = Vcoding_category_list;
4257   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4258                              coding->src_multibyte);
4259   coding->heading_ascii = skip;
4260
4261   if (!mask) return;
4262
4263   /* We found a single coding system of the highest priority in MASK.  */
4264   idx = 0;
4265   while (mask && ! (mask & 1)) mask >>= 1, idx++;
4266   if (! mask)
4267     idx = CODING_CATEGORY_IDX_RAW_TEXT;
4268
4269   val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]);
4270
4271   if (coding->eol_type != CODING_EOL_UNDECIDED)
4272     {
4273       Lisp_Object tmp;
4274
4275       tmp = Fget (val, Qeol_type);
4276       if (VECTORP (tmp))
4277         val = XVECTOR (tmp)->contents[coding->eol_type];
4278     }
4279
4280   /* Setup this new coding system while preserving some slots.  */
4281   {
4282     int src_multibyte = coding->src_multibyte;
4283     int dst_multibyte = coding->dst_multibyte;
4284
4285     setup_coding_system (val, coding);
4286     coding->src_multibyte = src_multibyte;
4287     coding->dst_multibyte = dst_multibyte;
4288     coding->heading_ascii = skip;
4289   }
4290 }
4291
4292 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
4293    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4294    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4295
4296    How many non-eol characters are at the head is returned as *SKIP.  */
4297
4298 #define MAX_EOL_CHECK_COUNT 3
4299
4300 static int
4301 detect_eol_type (source, src_bytes, skip)
4302      unsigned char *source;
4303      int src_bytes, *skip;
4304 {
4305   unsigned char *src = source, *src_end = src + src_bytes;
4306   unsigned char c;
4307   int total = 0;                /* How many end-of-lines are found so far.  */
4308   int eol_type = CODING_EOL_UNDECIDED;
4309   int this_eol_type;
4310
4311   *skip = 0;
4312
4313   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4314     {
4315       c = *src++;
4316       if (c == '\n' || c == '\r')
4317         {
4318           if (*skip == 0)
4319             *skip = src - 1 - source;
4320           total++;
4321           if (c == '\n')
4322             this_eol_type = CODING_EOL_LF;
4323           else if (src >= src_end || *src != '\n')
4324             this_eol_type = CODING_EOL_CR;
4325           else
4326             this_eol_type = CODING_EOL_CRLF, src++;
4327
4328           if (eol_type == CODING_EOL_UNDECIDED)
4329             /* This is the first end-of-line.  */
4330             eol_type = this_eol_type;
4331           else if (eol_type != this_eol_type)
4332             {
4333               /* The found type is different from what found before.  */
4334               eol_type = CODING_EOL_INCONSISTENT;
4335               break;
4336             }
4337         }
4338     }
4339
4340   if (*skip == 0)
4341     *skip = src_end - source;
4342   return eol_type;
4343 }
4344
4345 /* Like detect_eol_type, but detect EOL type in 2-octet
4346    big-endian/little-endian format for coding systems utf-16-be and
4347    utf-16-le.  */
4348
4349 static int
4350 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4351      unsigned char *source;
4352      int src_bytes, *skip, big_endian_p;
4353 {
4354   unsigned char *src = source, *src_end = src + src_bytes;
4355   unsigned int c1, c2;
4356   int total = 0;                /* How many end-of-lines are found so far.  */
4357   int eol_type = CODING_EOL_UNDECIDED;
4358   int this_eol_type;
4359   int msb, lsb;
4360
4361   if (big_endian_p)
4362     msb = 0, lsb = 1;
4363   else
4364     msb = 1, lsb = 0;
4365
4366   *skip = 0;
4367
4368   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4369     {
4370       c1 = (src[msb] << 8) | (src[lsb]);
4371       src += 2;
4372
4373       if (c1 == '\n' || c1 == '\r')
4374         {
4375           if (*skip == 0)
4376             *skip = src - 2 - source;
4377           total++;
4378           if (c1 == '\n')
4379             {
4380               this_eol_type = CODING_EOL_LF;
4381             }
4382           else
4383             {
4384               if ((src + 1) >= src_end)
4385                 {
4386                   this_eol_type = CODING_EOL_CR;
4387                 }
4388               else
4389                 {
4390                   c2 = (src[msb] << 8) | (src[lsb]);
4391                   if (c2 == '\n')
4392                     this_eol_type = CODING_EOL_CRLF, src += 2;
4393                   else
4394                     this_eol_type = CODING_EOL_CR;
4395                 }
4396             }
4397
4398           if (eol_type == CODING_EOL_UNDECIDED)
4399             /* This is the first end-of-line.  */
4400             eol_type = this_eol_type;
4401           else if (eol_type != this_eol_type)
4402             {
4403               /* The found type is different from what found before.  */
4404               eol_type = CODING_EOL_INCONSISTENT;
4405               break;
4406             }
4407         }
4408     }
4409
4410   if (*skip == 0)
4411     *skip = src_end - source;
4412   return eol_type;
4413 }
4414
4415 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4416    is encoded.  If it detects an appropriate format of end-of-line, it
4417    sets the information in *CODING.  */
4418
4419 void
4420 detect_eol (coding, src, src_bytes)
4421      struct coding_system *coding;
4422      const unsigned char *src;
4423      int src_bytes;
4424 {
4425   Lisp_Object val;
4426   int skip;
4427   int eol_type;
4428
4429   switch (coding->category_idx)
4430     {
4431     case CODING_CATEGORY_IDX_UTF_16_BE:
4432       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4433       break;
4434     case CODING_CATEGORY_IDX_UTF_16_LE:
4435       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4436       break;
4437     default:
4438       eol_type = detect_eol_type (src, src_bytes, &skip);
4439       break;
4440     }
4441
4442   if (coding->heading_ascii > skip)
4443     coding->heading_ascii = skip;
4444   else
4445     skip = coding->heading_ascii;
4446
4447   if (eol_type == CODING_EOL_UNDECIDED)
4448     return;
4449   if (eol_type == CODING_EOL_INCONSISTENT)
4450     {
4451 #if 0
4452       /* This code is suppressed until we find a better way to
4453          distinguish raw text file and binary file.  */
4454
4455       /* If we have already detected that the coding is raw-text, the
4456          coding should actually be no-conversion.  */
4457       if (coding->type == coding_type_raw_text)
4458         {
4459           setup_coding_system (Qno_conversion, coding);
4460           return;
4461         }
4462       /* Else, let's decode only text code anyway.  */
4463 #endif /* 0 */
4464       eol_type = CODING_EOL_LF;
4465     }
4466
4467   val = Fget (coding->symbol, Qeol_type);
4468   if (VECTORP (val) && XVECTOR (val)->size == 3)
4469     {
4470       int src_multibyte = coding->src_multibyte;
4471       int dst_multibyte = coding->dst_multibyte;
4472       struct composition_data *cmp_data = coding->cmp_data;
4473
4474       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
4475       coding->src_multibyte = src_multibyte;
4476       coding->dst_multibyte = dst_multibyte;
4477       coding->heading_ascii = skip;
4478       coding->cmp_data = cmp_data;
4479     }
4480 }
4481
4482 #define CONVERSION_BUFFER_EXTRA_ROOM 256
4483
4484 #define DECODING_BUFFER_MAG(coding)                     \
4485   (coding->type == coding_type_iso2022                  \
4486    ? 3                                                  \
4487    : (coding->type == coding_type_ccl                   \
4488       ? coding->spec.ccl.decoder.buf_magnification      \
4489       : 2))
4490
4491 /* Return maximum size (bytes) of a buffer enough for decoding
4492    SRC_BYTES of text encoded in CODING.  */
4493
4494 int
4495 decoding_buffer_size (coding, src_bytes)
4496      struct coding_system *coding;
4497      int src_bytes;
4498 {
4499   return (src_bytes * DECODING_BUFFER_MAG (coding)
4500           + CONVERSION_BUFFER_EXTRA_ROOM);
4501 }
4502
4503 /* Return maximum size (bytes) of a buffer enough for encoding
4504    SRC_BYTES of text to CODING.  */
4505
4506 int
4507 encoding_buffer_size (coding, src_bytes)
4508      struct coding_system *coding;
4509      int src_bytes;
4510 {
4511   int magnification;
4512
4513   if (coding->type == coding_type_ccl)
4514     {
4515       magnification = coding->spec.ccl.encoder.buf_magnification;
4516       if (coding->eol_type == CODING_EOL_CRLF)
4517         magnification *= 2;
4518     }
4519   else if (CODING_REQUIRE_ENCODING (coding))
4520     magnification = 3;
4521   else
4522     magnification = 1;
4523
4524   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4525 }
4526
4527 /* Working buffer for code conversion.  */
4528 struct conversion_buffer
4529 {
4530   int size;                     /* size of data.  */
4531   int on_stack;                 /* 1 if allocated by alloca.  */
4532   unsigned char *data;
4533 };
4534
4535 /* Don't use alloca for allocating memory space larger than this, lest
4536    we overflow their stack.  */
4537 #define MAX_ALLOCA 16*1024
4538
4539 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
4540 #define allocate_conversion_buffer(buf, len)            \
4541   do {                                                  \
4542     if (len < MAX_ALLOCA)                               \
4543       {                                                 \
4544         buf.data = (unsigned char *) alloca (len);      \
4545         buf.on_stack = 1;                               \
4546       }                                                 \
4547     else                                                \
4548       {                                                 \
4549         buf.data = (unsigned char *) xmalloc (len);     \
4550         buf.on_stack = 0;                               \
4551       }                                                 \
4552     buf.size = len;                                     \
4553   } while (0)
4554
4555 /* Double the allocated memory for *BUF.  */
4556 static void
4557 extend_conversion_buffer (buf)
4558      struct conversion_buffer *buf;
4559 {
4560   if (buf->on_stack)
4561     {
4562       unsigned char *save = buf->data;
4563       buf->data = (unsigned char *) xmalloc (buf->size * 2);
4564       bcopy (save, buf->data, buf->size);
4565       buf->on_stack = 0;
4566     }
4567   else
4568     {
4569       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4570     }
4571   buf->size *= 2;
4572 }
4573
4574 /* Free the allocated memory for BUF if it is not on stack.  */
4575 static void
4576 free_conversion_buffer (buf)
4577      struct conversion_buffer *buf;
4578 {
4579   if (!buf->on_stack)
4580     xfree (buf->data);
4581 }
4582
4583 int
4584 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4585      struct coding_system *coding;
4586      unsigned char *source, *destination;
4587      int src_bytes, dst_bytes, encodep;
4588 {
4589   struct ccl_program *ccl
4590     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
4591   unsigned char *dst = destination;
4592
4593   ccl->suppress_error = coding->suppress_error;
4594   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4595   if (encodep)
4596     {
4597       /* On encoding, EOL format is converted within ccl_driver.  For
4598          that, setup proper information in the structure CCL.  */
4599       ccl->eol_type = coding->eol_type;
4600       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4601         ccl->eol_type = CODING_EOL_LF;
4602       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4603       ccl->eight_bit_control = coding->dst_multibyte;
4604     }
4605   else
4606     ccl->eight_bit_control = 1;
4607   ccl->multibyte = coding->src_multibyte;
4608   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4609     {
4610       /* Move carryover bytes to DESTINATION.  */
4611       unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4612       while (*p)
4613         *dst++ = *p++;
4614       coding->spec.ccl.eight_bit_carryover[0] = 0;
4615       if (dst_bytes)
4616         dst_bytes -= dst - destination;
4617     }
4618
4619   coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4620                                   &(coding->consumed))
4621                       + dst - destination);
4622
4623   if (encodep)
4624     {
4625       coding->produced_char = coding->produced;
4626       coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4627     }
4628   else if (!ccl->eight_bit_control)
4629     {
4630       /* The produced bytes forms a valid multibyte sequence. */
4631       coding->produced_char
4632         = multibyte_chars_in_text (destination, coding->produced);
4633       coding->spec.ccl.eight_bit_carryover[0] = 0;
4634     }
4635   else
4636     {
4637       /* On decoding, the destination should always multibyte.  But,
4638          CCL program might have been generated an invalid multibyte
4639          sequence.  Here we make such a sequence valid as
4640          multibyte.  */
4641       int bytes
4642         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
4643
4644       if ((coding->consumed < src_bytes
4645            || !ccl->last_block)
4646           && coding->produced >= 1
4647           && destination[coding->produced - 1] >= 0x80)
4648         {
4649           /* We should not convert the tailing 8-bit codes to
4650              multibyte form even if they doesn't form a valid
4651              multibyte sequence.  They may form a valid sequence in
4652              the next call.  */
4653           int carryover = 0;
4654
4655           if (destination[coding->produced - 1] < 0xA0)
4656             carryover = 1;
4657           else if (coding->produced >= 2)
4658             {
4659               if (destination[coding->produced - 2] >= 0x80)
4660                 {
4661                   if (destination[coding->produced - 2] < 0xA0)
4662                     carryover = 2;
4663                   else if (coding->produced >= 3
4664                            && destination[coding->produced - 3] >= 0x80
4665                            && destination[coding->produced - 3] < 0xA0)
4666                     carryover = 3;
4667                 }
4668             }
4669           if (carryover > 0)
4670             {
4671               BCOPY_SHORT (destination + coding->produced - carryover,
4672                            coding->spec.ccl.eight_bit_carryover,
4673                            carryover);
4674               coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4675               coding->produced -= carryover;
4676             }
4677         }
4678       coding->produced = str_as_multibyte (destination, bytes,
4679                                            coding->produced,
4680                                            &(coding->produced_char));
4681     }
4682
4683   switch (ccl->status)
4684     {
4685     case CCL_STAT_SUSPEND_BY_SRC:
4686       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4687       break;
4688     case CCL_STAT_SUSPEND_BY_DST:
4689       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4690       break;
4691     case CCL_STAT_QUIT:
4692     case CCL_STAT_INVALID_CMD:
4693       coding->result = CODING_FINISH_INTERRUPT;
4694       break;
4695     default:
4696       coding->result = CODING_FINISH_NORMAL;
4697       break;
4698     }
4699   return coding->result;
4700 }
4701
4702 /* Decode EOL format of the text at PTR of BYTES length destructively
4703    according to CODING->eol_type.  This is called after the CCL
4704    program produced a decoded text at PTR.  If we do CRLF->LF
4705    conversion, update CODING->produced and CODING->produced_char.  */
4706
4707 static void
4708 decode_eol_post_ccl (coding, ptr, bytes)
4709      struct coding_system *coding;
4710      unsigned char *ptr;
4711      int bytes;
4712 {
4713   Lisp_Object val, saved_coding_symbol;
4714   unsigned char *pend = ptr + bytes;
4715   int dummy;
4716
4717   /* Remember the current coding system symbol.  We set it back when
4718      an inconsistent EOL is found so that `last-coding-system-used' is
4719      set to the coding system that doesn't specify EOL conversion.  */
4720   saved_coding_symbol = coding->symbol;
4721
4722   coding->spec.ccl.cr_carryover = 0;
4723   if (coding->eol_type == CODING_EOL_UNDECIDED)
4724     {
4725       /* Here, to avoid the call of setup_coding_system, we directly
4726          call detect_eol_type.  */
4727       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4728       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4729         coding->eol_type = CODING_EOL_LF;
4730       if (coding->eol_type != CODING_EOL_UNDECIDED)
4731         {
4732           val = Fget (coding->symbol, Qeol_type);
4733           if (VECTORP (val) && XVECTOR (val)->size == 3)
4734             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4735         }
4736       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4737     }
4738
4739   if (coding->eol_type == CODING_EOL_LF
4740       || coding->eol_type == CODING_EOL_UNDECIDED)
4741     {
4742       /* We have nothing to do.  */
4743       ptr = pend;
4744     }
4745   else if (coding->eol_type == CODING_EOL_CRLF)
4746     {
4747       unsigned char *pstart = ptr, *p = ptr;
4748
4749       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4750           && *(pend - 1) == '\r')
4751         {
4752           /* If the last character is CR, we can't handle it here
4753              because LF will be in the not-yet-decoded source text.
4754              Record that the CR is not yet processed.  */
4755           coding->spec.ccl.cr_carryover = 1;
4756           coding->produced--;
4757           coding->produced_char--;
4758           pend--;
4759         }
4760       while (ptr < pend)
4761         {
4762           if (*ptr == '\r')
4763             {
4764               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4765                 {
4766                   *p++ = '\n';
4767                   ptr += 2;
4768                 }
4769               else
4770                 {
4771                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4772                     goto undo_eol_conversion;
4773                   *p++ = *ptr++;
4774                 }
4775             }
4776           else if (*ptr == '\n'
4777                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4778             goto undo_eol_conversion;
4779           else
4780             *p++ = *ptr++;
4781           continue;
4782
4783         undo_eol_conversion:
4784           /* We have faced with inconsistent EOL format at PTR.
4785              Convert all LFs before PTR back to CRLFs.  */
4786           for (p--, ptr--; p >= pstart; p--)
4787             {
4788               if (*p == '\n')
4789                 *ptr-- = '\n', *ptr-- = '\r';
4790               else
4791                 *ptr-- = *p;
4792             }
4793           /*  If carryover is recorded, cancel it because we don't
4794               convert CRLF anymore.  */
4795           if (coding->spec.ccl.cr_carryover)
4796             {
4797               coding->spec.ccl.cr_carryover = 0;
4798               coding->produced++;
4799               coding->produced_char++;
4800               pend++;
4801             }
4802           p = ptr = pend;
4803           coding->eol_type = CODING_EOL_LF;
4804           coding->symbol = saved_coding_symbol;
4805         }
4806       if (p < pend)
4807         {
4808           /* As each two-byte sequence CRLF was converted to LF, (PEND
4809              - P) is the number of deleted characters.  */
4810           coding->produced -= pend - p;
4811           coding->produced_char -= pend - p;
4812         }
4813     }
4814   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4815     {
4816       unsigned char *p = ptr;
4817
4818       for (; ptr < pend; ptr++)
4819         {
4820           if (*ptr == '\r')
4821             *ptr = '\n';
4822           else if (*ptr == '\n'
4823                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4824             {
4825               for (; p < ptr; p++)
4826                 {
4827                   if (*p == '\n')
4828                     *p = '\r';
4829                 }
4830               ptr = pend;
4831               coding->eol_type = CODING_EOL_LF;
4832               coding->symbol = saved_coding_symbol;
4833             }
4834         }
4835     }
4836 }
4837
4838 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4839    decoding, it may detect coding system and format of end-of-line if
4840    those are not yet decided.  The source should be unibyte, the
4841    result is multibyte if CODING->dst_multibyte is nonzero, else
4842    unibyte.  */
4843
4844 int
4845 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4846      struct coding_system *coding;
4847      const unsigned char *source;
4848      unsigned char *destination;
4849      int src_bytes, dst_bytes;
4850 {
4851   int extra = 0;
4852
4853   if (coding->type == coding_type_undecided)
4854     detect_coding (coding, source, src_bytes);
4855
4856   if (coding->eol_type == CODING_EOL_UNDECIDED
4857       && coding->type != coding_type_ccl)
4858     {
4859       detect_eol (coding, source, src_bytes);
4860       /* We had better recover the original eol format if we
4861          encounter an inconsistent eol format while decoding.  */
4862       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4863     }
4864
4865   coding->produced = coding->produced_char = 0;
4866   coding->consumed = coding->consumed_char = 0;
4867   coding->errors = 0;
4868   coding->result = CODING_FINISH_NORMAL;
4869
4870   switch (coding->type)
4871     {
4872     case coding_type_sjis:
4873       decode_coding_sjis_big5 (coding, source, destination,
4874                                src_bytes, dst_bytes, 1);
4875       break;
4876
4877     case coding_type_iso2022:
4878       decode_coding_iso2022 (coding, source, destination,
4879                              src_bytes, dst_bytes);
4880       break;
4881
4882     case coding_type_big5:
4883       decode_coding_sjis_big5 (coding, source, destination,
4884                                src_bytes, dst_bytes, 0);
4885       break;
4886
4887     case coding_type_emacs_mule:
4888       decode_coding_emacs_mule (coding, source, destination,
4889                                 src_bytes, dst_bytes);
4890       break;
4891
4892     case coding_type_ccl:
4893       if (coding->spec.ccl.cr_carryover)
4894         {
4895           /* Put the CR which was not processed by the previous call
4896              of decode_eol_post_ccl in DESTINATION.  It will be
4897              decoded together with the following LF by the call to
4898              decode_eol_post_ccl below.  */
4899           *destination = '\r';
4900           coding->produced++;
4901           coding->produced_char++;
4902           dst_bytes--;
4903           extra = coding->spec.ccl.cr_carryover;
4904         }
4905       ccl_coding_driver (coding, source, destination + extra,
4906                          src_bytes, dst_bytes, 0);
4907       if (coding->eol_type != CODING_EOL_LF)
4908         {
4909           coding->produced += extra;
4910           coding->produced_char += extra;
4911           decode_eol_post_ccl (coding, destination, coding->produced);
4912         }
4913       break;
4914
4915     default:
4916       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4917     }
4918
4919   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4920       && coding->mode & CODING_MODE_LAST_BLOCK
4921       && coding->consumed == src_bytes)
4922     coding->result = CODING_FINISH_NORMAL;
4923
4924   if (coding->mode & CODING_MODE_LAST_BLOCK
4925       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4926     {
4927       const unsigned char *src = source + coding->consumed;
4928       unsigned char *dst = destination + coding->produced;
4929
4930       src_bytes -= coding->consumed;
4931       coding->errors++;
4932       if (COMPOSING_P (coding))
4933         DECODE_COMPOSITION_END ('1');
4934       while (src_bytes--)
4935         {
4936           int c = *src++;
4937           dst += CHAR_STRING (c, dst);
4938           coding->produced_char++;
4939         }
4940       coding->consumed = coding->consumed_char = src - source;
4941       coding->produced = dst - destination;
4942       coding->result = CODING_FINISH_NORMAL;
4943     }
4944
4945   if (!coding->dst_multibyte)
4946     {
4947       coding->produced = str_as_unibyte (destination, coding->produced);
4948       coding->produced_char = coding->produced;
4949     }
4950
4951   return coding->result;
4952 }
4953
4954 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4955    multibyteness of the source is CODING->src_multibyte, the
4956    multibyteness of the result is always unibyte.  */
4957
4958 int
4959 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4960      struct coding_system *coding;
4961      const unsigned char *source;
4962      unsigned char *destination;
4963      int src_bytes, dst_bytes;
4964 {
4965   coding->produced = coding->produced_char = 0;
4966   coding->consumed = coding->consumed_char = 0;
4967   coding->errors = 0;
4968   coding->result = CODING_FINISH_NORMAL;
4969
4970   switch (coding->type)
4971     {
4972     case coding_type_sjis:
4973       encode_coding_sjis_big5 (coding, source, destination,
4974                                src_bytes, dst_bytes, 1);
4975       break;
4976
4977     case coding_type_iso2022:
4978       encode_coding_iso2022 (coding, source, destination,
4979                              src_bytes, dst_bytes);
4980       break;
4981
4982     case coding_type_big5:
4983       encode_coding_sjis_big5 (coding, source, destination,
4984                                src_bytes, dst_bytes, 0);
4985       break;
4986
4987     case coding_type_emacs_mule:
4988       encode_coding_emacs_mule (coding, source, destination,
4989                                 src_bytes, dst_bytes);
4990       break;
4991
4992     case coding_type_ccl:
4993       ccl_coding_driver (coding, source, destination,
4994                          src_bytes, dst_bytes, 1);
4995       break;
4996
4997     default:
4998       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4999     }
5000
5001   if (coding->mode & CODING_MODE_LAST_BLOCK
5002       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
5003     {
5004       const unsigned char *src = source + coding->consumed;
5005       unsigned char *dst = destination + coding->produced;
5006
5007       if (coding->type == coding_type_iso2022)
5008         ENCODE_RESET_PLANE_AND_REGISTER;
5009       if (COMPOSING_P (coding))
5010         *dst++ = ISO_CODE_ESC, *dst++ = '1';
5011       if (coding->consumed < src_bytes)
5012         {
5013           int len = src_bytes - coding->consumed;
5014
5015           BCOPY_SHORT (src, dst, len);
5016           if (coding->src_multibyte)
5017             len = str_as_unibyte (dst, len);
5018           dst += len;
5019           coding->consumed = src_bytes;
5020         }
5021       coding->produced = coding->produced_char = dst - destination;
5022       coding->result = CODING_FINISH_NORMAL;
5023     }
5024
5025   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
5026       && coding->consumed == src_bytes)
5027     coding->result = CODING_FINISH_NORMAL;
5028
5029   return coding->result;
5030 }
5031
5032 /* Scan text in the region between *BEG and *END (byte positions),
5033    skip characters which we don't have to decode by coding system
5034    CODING at the head and tail, then set *BEG and *END to the region
5035    of the text we actually have to convert.  The caller should move
5036    the gap out of the region in advance if the region is from a
5037    buffer.
5038
5039    If STR is not NULL, *BEG and *END are indices into STR.  */
5040
5041 static void
5042 shrink_decoding_region (beg, end, coding, str)
5043      int *beg, *end;
5044      struct coding_system *coding;
5045      unsigned char *str;
5046 {
5047   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
5048   int eol_conversion;
5049   Lisp_Object translation_table;
5050
5051   if (coding->type == coding_type_ccl
5052       || coding->type == coding_type_undecided
5053       || coding->eol_type != CODING_EOL_LF
5054       || !NILP (coding->post_read_conversion)
5055       || coding->composing != COMPOSITION_DISABLED)
5056     {
5057       /* We can't skip any data.  */
5058       return;
5059     }
5060   if (coding->type == coding_type_no_conversion
5061       || coding->type == coding_type_raw_text
5062       || coding->type == coding_type_emacs_mule)
5063     {
5064       /* We need no conversion, but don't have to skip any data here.
5065          Decoding routine handles them effectively anyway.  */
5066       return;
5067     }
5068
5069   translation_table = coding->translation_table_for_decode;
5070   if (NILP (translation_table) && !NILP (Venable_character_translation))
5071     translation_table = Vstandard_translation_table_for_decode;
5072   if (CHAR_TABLE_P (translation_table))
5073     {
5074       int i;
5075       for (i = 0; i < 128; i++)
5076         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5077           break;
5078       if (i < 128)
5079         /* Some ASCII character should be translated.  We give up
5080            shrinking.  */
5081         return;
5082     }
5083
5084   if (coding->heading_ascii >= 0)
5085     /* Detection routine has already found how much we can skip at the
5086        head.  */
5087     *beg += coding->heading_ascii;
5088
5089   if (str)
5090     {
5091       begp_orig = begp = str + *beg;
5092       endp_orig = endp = str + *end;
5093     }
5094   else
5095     {
5096       begp_orig = begp = BYTE_POS_ADDR (*beg);
5097       endp_orig = endp = begp + *end - *beg;
5098     }
5099
5100   eol_conversion = (coding->eol_type == CODING_EOL_CR
5101                     || coding->eol_type == CODING_EOL_CRLF);
5102
5103   switch (coding->type)
5104     {
5105     case coding_type_sjis:
5106     case coding_type_big5:
5107       /* We can skip all ASCII characters at the head.  */
5108       if (coding->heading_ascii < 0)
5109         {
5110           if (eol_conversion)
5111             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
5112           else
5113             while (begp < endp && *begp < 0x80) begp++;
5114         }
5115       /* We can skip all ASCII characters at the tail except for the
5116          second byte of SJIS or BIG5 code.  */
5117       if (eol_conversion)
5118         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
5119       else
5120         while (begp < endp && endp[-1] < 0x80) endp--;
5121       /* Do not consider LF as ascii if preceded by CR, since that
5122          confuses eol decoding. */
5123       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5124         endp++;
5125       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
5126         endp++;
5127       break;
5128
5129     case coding_type_iso2022:
5130       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5131         /* We can't skip any data.  */
5132         break;
5133       if (coding->heading_ascii < 0)
5134         {
5135           /* We can skip all ASCII characters at the head except for a
5136              few control codes.  */
5137           while (begp < endp && (c = *begp) < 0x80
5138                  && c != ISO_CODE_CR && c != ISO_CODE_SO
5139                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
5140                  && (!eol_conversion || c != ISO_CODE_LF))
5141             begp++;
5142         }
5143       switch (coding->category_idx)
5144         {
5145         case CODING_CATEGORY_IDX_ISO_8_1:
5146         case CODING_CATEGORY_IDX_ISO_8_2:
5147           /* We can skip all ASCII characters at the tail.  */
5148           if (eol_conversion)
5149             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
5150           else
5151             while (begp < endp && endp[-1] < 0x80) endp--;
5152           /* Do not consider LF as ascii if preceded by CR, since that
5153              confuses eol decoding. */
5154           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5155             endp++;
5156           break;
5157
5158         case CODING_CATEGORY_IDX_ISO_7:
5159         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
5160           {
5161             /* We can skip all characters at the tail except for 8-bit
5162                codes and ESC and the following 2-byte at the tail.  */
5163             unsigned char *eight_bit = NULL;
5164
5165             if (eol_conversion)
5166               while (begp < endp
5167                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
5168                 {
5169                   if (!eight_bit && c & 0x80) eight_bit = endp;
5170                   endp--;
5171                 }
5172             else
5173               while (begp < endp
5174                      && (c = endp[-1]) != ISO_CODE_ESC)
5175                 {
5176                   if (!eight_bit && c & 0x80) eight_bit = endp;
5177                   endp--;
5178                 }
5179             /* Do not consider LF as ascii if preceded by CR, since that
5180                confuses eol decoding. */
5181             if (begp < endp && endp < endp_orig
5182                 && endp[-1] == '\r' && endp[0] == '\n')
5183               endp++;
5184             if (begp < endp && endp[-1] == ISO_CODE_ESC)
5185               {
5186                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
5187                   /* This is an ASCII designation sequence.  We can
5188                      surely skip the tail.  But, if we have
5189                      encountered an 8-bit code, skip only the codes
5190                      after that.  */
5191                   endp = eight_bit ? eight_bit : endp + 2;
5192                 else
5193                   /* Hmmm, we can't skip the tail.  */
5194                   endp = endp_orig;
5195               }
5196             else if (eight_bit)
5197               endp = eight_bit;
5198           }
5199         }
5200       break;
5201
5202     default:
5203       abort ();
5204     }
5205   *beg += begp - begp_orig;
5206   *end += endp - endp_orig;
5207   return;
5208 }
5209
5210 /* Like shrink_decoding_region but for encoding.  */
5211
5212 static void
5213 shrink_encoding_region (beg, end, coding, str)
5214      int *beg, *end;
5215      struct coding_system *coding;
5216      unsigned char *str;
5217 {
5218   unsigned char *begp_orig, *begp, *endp_orig, *endp;
5219   int eol_conversion;
5220   Lisp_Object translation_table;
5221
5222   if (coding->type == coding_type_ccl
5223       || coding->eol_type == CODING_EOL_CRLF
5224       || coding->eol_type == CODING_EOL_CR
5225       || (coding->cmp_data && coding->cmp_data->used > 0))
5226     {
5227       /* We can't skip any data.  */
5228       return;
5229     }
5230   if (coding->type == coding_type_no_conversion
5231       || coding->type == coding_type_raw_text
5232       || coding->type == coding_type_emacs_mule
5233       || coding->type == coding_type_undecided)
5234     {
5235       /* We need no conversion, but don't have to skip any data here.
5236          Encoding routine handles them effectively anyway.  */
5237       return;
5238     }
5239
5240   translation_table = coding->translation_table_for_encode;
5241   if (NILP (translation_table) && !NILP (Venable_character_translation))
5242     translation_table = Vstandard_translation_table_for_encode;
5243   if (CHAR_TABLE_P (translation_table))
5244     {
5245       int i;
5246       for (i = 0; i < 128; i++)
5247         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5248           break;
5249       if (i < 128)
5250         /* Some ASCII character should be translated.  We give up
5251            shrinking.  */
5252         return;
5253     }
5254
5255   if (str)
5256     {
5257       begp_orig = begp = str + *beg;
5258       endp_orig = endp = str + *end;
5259     }
5260   else
5261     {
5262       begp_orig = begp = BYTE_POS_ADDR (*beg);
5263       endp_orig = endp = begp + *end - *beg;
5264     }
5265
5266   eol_conversion = (coding->eol_type == CODING_EOL_CR
5267                     || coding->eol_type == CODING_EOL_CRLF);
5268
5269   /* Here, we don't have to check coding->pre_write_conversion because
5270      the caller is expected to have handled it already.  */
5271   switch (coding->type)
5272     {
5273     case coding_type_iso2022:
5274       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5275         /* We can't skip any data.  */
5276         break;
5277       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5278         {
5279           unsigned char *bol = begp;
5280           while (begp < endp && *begp < 0x80)
5281             {
5282               begp++;
5283               if (begp[-1] == '\n')
5284                 bol = begp;
5285             }
5286           begp = bol;
5287           goto label_skip_tail;
5288         }
5289       /* fall down ... */
5290
5291     case coding_type_sjis:
5292     case coding_type_big5:
5293       /* We can skip all ASCII characters at the head and tail.  */
5294       if (eol_conversion)
5295         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5296       else
5297         while (begp < endp && *begp < 0x80) begp++;
5298     label_skip_tail:
5299       if (eol_conversion)
5300         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5301       else
5302         while (begp < endp && *(endp - 1) < 0x80) endp--;
5303       break;
5304
5305     default:
5306       abort ();
5307     }
5308
5309   *beg += begp - begp_orig;
5310   *end += endp - endp_orig;
5311   return;
5312 }
5313
5314 /* As shrinking conversion region requires some overhead, we don't try
5315    shrinking if the length of conversion region is less than this
5316    value.  */
5317 static int shrink_conversion_region_threshhold = 1024;
5318
5319 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
5320   do {                                                                  \
5321     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
5322       {                                                                 \
5323         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
5324         else shrink_decoding_region (beg, end, coding, str);            \
5325       }                                                                 \
5326   } while (0)
5327
5328 static Lisp_Object
5329 code_convert_region_unwind (arg)
5330      Lisp_Object arg;
5331 {
5332   inhibit_pre_post_conversion = 0;
5333   Vlast_coding_system_used = arg;
5334   return Qnil;
5335 }
5336
5337 /* Store information about all compositions in the range FROM and TO
5338    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
5339    buffer or a string, defaults to the current buffer.  */
5340
5341 void
5342 coding_save_composition (coding, from, to, obj)
5343      struct coding_system *coding;
5344      int from, to;
5345      Lisp_Object obj;
5346 {
5347   Lisp_Object prop;
5348   int start, end;
5349
5350   if (coding->composing == COMPOSITION_DISABLED)
5351     return;
5352   if (!coding->cmp_data)
5353     coding_allocate_composition_data (coding, from);
5354   if (!find_composition (from, to, &start, &end, &prop, obj)
5355       || end > to)
5356     return;
5357   if (start < from
5358       && (!find_composition (end, to, &start, &end, &prop, obj)
5359           || end > to))
5360     return;
5361   coding->composing = COMPOSITION_NO;
5362   do
5363     {
5364       if (COMPOSITION_VALID_P (start, end, prop))
5365         {
5366           enum composition_method method = COMPOSITION_METHOD (prop);
5367           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5368               >= COMPOSITION_DATA_SIZE)
5369             coding_allocate_composition_data (coding, from);
5370           /* For relative composition, we remember start and end
5371              positions, for the other compositions, we also remember
5372              components.  */
5373           CODING_ADD_COMPOSITION_START (coding, start - from, method);
5374           if (method != COMPOSITION_RELATIVE)
5375             {
5376               /* We must store a*/
5377               Lisp_Object val, ch;
5378
5379               val = COMPOSITION_COMPONENTS (prop);
5380               if (CONSP (val))
5381                 while (CONSP (val))
5382                   {
5383                     ch = XCAR (val), val = XCDR (val);
5384                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5385                   }
5386               else if (VECTORP (val) || STRINGP (val))
5387                 {
5388                   int len = (VECTORP (val)
5389                              ? XVECTOR (val)->size : SCHARS (val));
5390                   int i;
5391                   for (i = 0; i < len; i++)
5392                     {
5393                       ch = (STRINGP (val)
5394                             ? Faref (val, make_number (i))
5395                             : XVECTOR (val)->contents[i]);
5396                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5397                     }
5398                 }
5399               else              /* INTEGERP (val) */
5400                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5401             }
5402           CODING_ADD_COMPOSITION_END (coding, end - from);
5403         }
5404       start = end;
5405     }
5406   while (start < to
5407          && find_composition (start, to, &start, &end, &prop, obj)
5408          && end <= to);
5409
5410   /* Make coding->cmp_data point to the first memory block.  */
5411   while (coding->cmp_data->prev)
5412     coding->cmp_data = coding->cmp_data->prev;
5413   coding->cmp_data_start = 0;
5414 }
5415
5416 /* Reflect the saved information about compositions to OBJ.
5417    CODING->cmp_data points to a memory block for the information.  OBJ
5418    is a buffer or a string, defaults to the current buffer.  */
5419
5420 void
5421 coding_restore_composition (coding, obj)
5422      struct coding_system *coding;
5423      Lisp_Object obj;
5424 {
5425   struct composition_data *cmp_data = coding->cmp_data;
5426
5427   if (!cmp_data)
5428     return;
5429
5430   while (cmp_data->prev)
5431     cmp_data = cmp_data->prev;
5432
5433   while (cmp_data)
5434     {
5435       int i;
5436
5437       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5438            i += cmp_data->data[i])
5439         {
5440           int *data = cmp_data->data + i;
5441           enum composition_method method = (enum composition_method) data[3];
5442           Lisp_Object components;
5443
5444           if (data[0] < 0 || i + data[0] > cmp_data->used)
5445             /* Invalid composition data.  */
5446             break;
5447
5448           if (method == COMPOSITION_RELATIVE)
5449             components = Qnil;
5450           else
5451             {
5452               int len = data[0] - 4, j;
5453               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5454
5455               if (method == COMPOSITION_WITH_RULE_ALTCHARS
5456                   && len % 2 == 0)
5457                 len --;
5458               for (j = 0; j < len; j++)
5459                 args[j] = make_number (data[4 + j]);
5460               components = (method == COMPOSITION_WITH_ALTCHARS
5461                             ? Fstring (len, args)
5462                             : Fvector (len, args));
5463             }
5464           compose_text (data[1], data[2], components, Qnil, obj);
5465         }
5466       cmp_data = cmp_data->next;
5467     }
5468 }
5469
5470 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
5471    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5472    coding system CODING, and return the status code of code conversion
5473    (currently, this value has no meaning).
5474
5475    How many characters (and bytes) are converted to how many
5476    characters (and bytes) are recorded in members of the structure
5477    CODING.
5478
5479    If REPLACE is nonzero, we do various things as if the original text
5480    is deleted and a new text is inserted.  See the comments in
5481    replace_range (insdel.c) to know what we are doing.
5482
5483    If REPLACE is zero, it is assumed that the source text is unibyte.
5484    Otherwise, it is assumed that the source text is multibyte.  */
5485
5486 int
5487 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5488      int from, from_byte, to, to_byte, encodep, replace;
5489      struct coding_system *coding;
5490 {
5491   int len = to - from, len_byte = to_byte - from_byte;
5492   int nchars_del = 0, nbytes_del = 0;
5493   int require, inserted, inserted_byte;
5494   int head_skip, tail_skip, total_skip = 0;
5495   Lisp_Object saved_coding_symbol;
5496   int first = 1;
5497   unsigned char *src, *dst;
5498   Lisp_Object deletion;
5499   int orig_point = PT, orig_len = len;
5500   int prev_Z;
5501   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5502
5503   deletion = Qnil;
5504   saved_coding_symbol = coding->symbol;
5505
5506   if (from < PT && PT < to)
5507     {
5508       TEMP_SET_PT_BOTH (from, from_byte);
5509       orig_point = from;
5510     }
5511
5512   if (replace)
5513     {
5514       int saved_from = from;
5515       int saved_inhibit_modification_hooks;
5516
5517       prepare_to_modify_buffer (from, to, &from);
5518       if (saved_from != from)
5519         {
5520           to = from + len;
5521           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
5522           len_byte = to_byte - from_byte;
5523         }
5524
5525       /* The code conversion routine can not preserve text properties
5526          for now.  So, we must remove all text properties in the
5527          region.  Here, we must suppress all modification hooks.  */
5528       saved_inhibit_modification_hooks = inhibit_modification_hooks;
5529       inhibit_modification_hooks = 1;
5530       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5531       inhibit_modification_hooks = saved_inhibit_modification_hooks;
5532     }
5533
5534   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5535     {
5536       /* We must detect encoding of text and eol format.  */
5537
5538       if (from < GPT && to > GPT)
5539         move_gap_both (from, from_byte);
5540       if (coding->type == coding_type_undecided)
5541         {
5542           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
5543           if (coding->type == coding_type_undecided)
5544             {
5545               /* It seems that the text contains only ASCII, but we
5546                  should not leave it undecided because the deeper
5547                  decoding routine (decode_coding) tries to detect the
5548                  encodings again in vain.  */
5549               coding->type = coding_type_emacs_mule;
5550               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5551               /* As emacs-mule decoder will handle composition, we
5552                  need this setting to allocate coding->cmp_data
5553                  later.  */
5554               coding->composing = COMPOSITION_NO;
5555             }
5556         }
5557       if (coding->eol_type == CODING_EOL_UNDECIDED
5558           && coding->type != coding_type_ccl)
5559         {
5560           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5561           if (coding->eol_type == CODING_EOL_UNDECIDED)
5562             coding->eol_type = CODING_EOL_LF;
5563           /* We had better recover the original eol format if we
5564              encounter an inconsistent eol format while decoding.  */
5565           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5566         }
5567     }
5568
5569   /* Now we convert the text.  */
5570
5571   /* For encoding, we must process pre-write-conversion in advance.  */
5572   if (! inhibit_pre_post_conversion
5573       && encodep
5574       && SYMBOLP (coding->pre_write_conversion)
5575       && ! NILP (Ffboundp (coding->pre_write_conversion)))
5576     {
5577       /* The function in pre-write-conversion may put a new text in a
5578          new buffer.  */
5579       struct buffer *prev = current_buffer;
5580       Lisp_Object new;
5581
5582       record_unwind_protect (code_convert_region_unwind,
5583                              Vlast_coding_system_used);
5584       /* We should not call any more pre-write/post-read-conversion
5585          functions while this pre-write-conversion is running.  */
5586       inhibit_pre_post_conversion = 1;
5587       call2 (coding->pre_write_conversion,
5588              make_number (from), make_number (to));
5589       inhibit_pre_post_conversion = 0;
5590       /* Discard the unwind protect.  */
5591       specpdl_ptr--;
5592
5593       if (current_buffer != prev)
5594         {
5595           len = ZV - BEGV;
5596           new = Fcurrent_buffer ();
5597           set_buffer_internal_1 (prev);
5598           del_range_2 (from, from_byte, to, to_byte, 0);
5599           TEMP_SET_PT_BOTH (from, from_byte);
5600           insert_from_buffer (XBUFFER (new), 1, len, 0);
5601           Fkill_buffer (new);
5602           if (orig_point >= to)
5603             orig_point += len - orig_len;
5604           else if (orig_point > from)
5605             orig_point = from;
5606           orig_len = len;
5607           to = from + len;
5608           from_byte = CHAR_TO_BYTE (from);
5609           to_byte = CHAR_TO_BYTE (to);
5610           len_byte = to_byte - from_byte;
5611           TEMP_SET_PT_BOTH (from, from_byte);
5612         }
5613     }
5614
5615   if (replace)
5616     {
5617       if (! EQ (current_buffer->undo_list, Qt))
5618         deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5619       else
5620         {
5621           nchars_del = to - from;
5622           nbytes_del = to_byte - from_byte;
5623         }
5624     }
5625
5626   if (coding->composing != COMPOSITION_DISABLED)
5627     {
5628       if (encodep)
5629         coding_save_composition (coding, from, to, Fcurrent_buffer ());
5630       else
5631         coding_allocate_composition_data (coding, from);
5632     }
5633
5634   /* Try to skip the heading and tailing ASCIIs.  */
5635   if (coding->type != coding_type_ccl)
5636     {
5637       int from_byte_orig = from_byte, to_byte_orig = to_byte;
5638
5639       if (from < GPT && GPT < to)
5640         move_gap_both (from, from_byte);
5641       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5642       if (from_byte == to_byte
5643           && (encodep || NILP (coding->post_read_conversion))
5644           && ! CODING_REQUIRE_FLUSHING (coding))
5645         {
5646           coding->produced = len_byte;
5647           coding->produced_char = len;
5648           if (!replace)
5649             /* We must record and adjust for this new text now.  */
5650             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5651           return 0;
5652         }
5653
5654       head_skip = from_byte - from_byte_orig;
5655       tail_skip = to_byte_orig - to_byte;
5656       total_skip = head_skip + tail_skip;
5657       from += head_skip;
5658       to -= tail_skip;
5659       len -= total_skip; len_byte -= total_skip;
5660     }
5661
5662   /* For conversion, we must put the gap before the text in addition to
5663      making the gap larger for efficient decoding.  The required gap
5664      size starts from 2000 which is the magic number used in make_gap.
5665      But, after one batch of conversion, it will be incremented if we
5666      find that it is not enough .  */
5667   require = 2000;
5668
5669   if (GAP_SIZE  < require)
5670     make_gap (require - GAP_SIZE);
5671   move_gap_both (from, from_byte);
5672
5673   inserted = inserted_byte = 0;
5674
5675   GAP_SIZE += len_byte;
5676   ZV -= len;
5677   Z -= len;
5678   ZV_BYTE -= len_byte;
5679   Z_BYTE -= len_byte;
5680
5681   if (GPT - BEG < BEG_UNCHANGED)
5682     BEG_UNCHANGED = GPT - BEG;
5683   if (Z - GPT < END_UNCHANGED)
5684     END_UNCHANGED = Z - GPT;
5685
5686   if (!encodep && coding->src_multibyte)
5687     {
5688       /* Decoding routines expects that the source text is unibyte.
5689          We must convert 8-bit characters of multibyte form to
5690          unibyte.  */
5691       int len_byte_orig = len_byte;
5692       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5693       if (len_byte < len_byte_orig)
5694         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5695                     len_byte);
5696       coding->src_multibyte = 0;
5697     }
5698
5699   for (;;)
5700     {
5701       int result;
5702
5703       /* The buffer memory is now:
5704          +--------+converted-text+---------+-------original-text-------+---+
5705          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5706                   |<---------------------- GAP ----------------------->|  */
5707       src = GAP_END_ADDR - len_byte;
5708       dst = GPT_ADDR + inserted_byte;
5709
5710       if (encodep)
5711         result = encode_coding (coding, src, dst, len_byte, 0);
5712       else
5713         {
5714           if (coding->composing != COMPOSITION_DISABLED)
5715             coding->cmp_data->char_offset = from + inserted;
5716           result = decode_coding (coding, src, dst, len_byte, 0);
5717         }
5718
5719       /* The buffer memory is now:
5720          +--------+-------converted-text----+--+------original-text----+---+
5721          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5722                   |<---------------------- GAP ----------------------->|  */
5723
5724       inserted += coding->produced_char;
5725       inserted_byte += coding->produced;
5726       len_byte -= coding->consumed;
5727
5728       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5729         {
5730           coding_allocate_composition_data (coding, from + inserted);
5731           continue;
5732         }
5733
5734       src += coding->consumed;
5735       dst += coding->produced;
5736
5737       if (result == CODING_FINISH_NORMAL)
5738         {
5739           src += len_byte;
5740           break;
5741         }
5742       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5743         {
5744           unsigned char *pend = dst, *p = pend - inserted_byte;
5745           Lisp_Object eol_type;
5746
5747           /* Encode LFs back to the original eol format (CR or CRLF).  */
5748           if (coding->eol_type == CODING_EOL_CR)
5749             {
5750               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5751             }
5752           else
5753             {
5754               int count = 0;
5755
5756               while (p < pend) if (*p++ == '\n') count++;
5757               if (src - dst < count)
5758                 {
5759                   /* We don't have sufficient room for encoding LFs
5760                      back to CRLF.  We must record converted and
5761                      not-yet-converted text back to the buffer
5762                      content, enlarge the gap, then record them out of
5763                      the buffer contents again.  */
5764                   int add = len_byte + inserted_byte;
5765
5766                   GAP_SIZE -= add;
5767                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5768                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5769                   make_gap (count - GAP_SIZE);
5770                   GAP_SIZE += add;
5771                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5772                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5773                   /* Don't forget to update SRC, DST, and PEND.  */
5774                   src = GAP_END_ADDR - len_byte;
5775                   dst = GPT_ADDR + inserted_byte;
5776                   pend = dst;
5777                 }
5778               inserted += count;
5779               inserted_byte += count;
5780               coding->produced += count;
5781               p = dst = pend + count;
5782               while (count)
5783                 {
5784                   *--p = *--pend;
5785                   if (*p == '\n') count--, *--p = '\r';
5786                 }
5787             }
5788
5789           /* Suppress eol-format conversion in the further conversion.  */
5790           coding->eol_type = CODING_EOL_LF;
5791
5792           /* Set the coding system symbol to that for Unix-like EOL.  */
5793           eol_type = Fget (saved_coding_symbol, Qeol_type);
5794           if (VECTORP (eol_type)
5795               && XVECTOR (eol_type)->size == 3
5796               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5797             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5798           else
5799             coding->symbol = saved_coding_symbol;
5800
5801           continue;
5802         }
5803       if (len_byte <= 0)
5804         {
5805           if (coding->type != coding_type_ccl
5806               || coding->mode & CODING_MODE_LAST_BLOCK)
5807             break;
5808           coding->mode |= CODING_MODE_LAST_BLOCK;
5809           continue;
5810         }
5811       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5812         {
5813           /* The source text ends in invalid codes.  Let's just
5814              make them valid buffer contents, and finish conversion.  */
5815           if (multibyte_p)
5816             {
5817               unsigned char *start = dst;
5818
5819               inserted += len_byte;
5820               while (len_byte--)
5821                 {
5822                   int c = *src++;
5823                   dst += CHAR_STRING (c, dst);
5824                 }
5825
5826               inserted_byte += dst - start;
5827             }
5828           else
5829             {
5830               inserted += len_byte;
5831               inserted_byte += len_byte;
5832               while (len_byte--)
5833                 *dst++ = *src++;
5834             }
5835           break;
5836         }
5837       if (result == CODING_FINISH_INTERRUPT)
5838         {
5839           /* The conversion procedure was interrupted by a user.  */
5840           break;
5841         }
5842       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5843       if (coding->consumed < 1)
5844         {
5845           /* It's quite strange to require more memory without
5846              consuming any bytes.  Perhaps CCL program bug.  */
5847           break;
5848         }
5849       if (first)
5850         {
5851           /* We have just done the first batch of conversion which was
5852              stopped because of insufficient gap.  Let's reconsider the
5853              required gap size (i.e. SRT - DST) now.
5854
5855              We have converted ORIG bytes (== coding->consumed) into
5856              NEW bytes (coding->produced).  To convert the remaining
5857              LEN bytes, we may need REQUIRE bytes of gap, where:
5858                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5859                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5860              Here, we are sure that NEW >= ORIG.  */
5861           float ratio;
5862
5863           if (coding->produced <= coding->consumed)
5864             {
5865               /* This happens because of CCL-based coding system with
5866                  eol-type CRLF.  */
5867               require = 0;
5868             }
5869           else
5870             {
5871               ratio = (coding->produced - coding->consumed) / coding->consumed;
5872               require = len_byte * ratio;
5873             }
5874           first = 0;
5875         }
5876       if ((src - dst) < (require + 2000))
5877         {
5878           /* See the comment above the previous call of make_gap.  */
5879           int add = len_byte + inserted_byte;
5880
5881           GAP_SIZE -= add;
5882           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5883           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5884           make_gap (require + 2000);
5885           GAP_SIZE += add;
5886           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5887           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5888         }
5889     }
5890   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5891
5892   if (encodep && coding->dst_multibyte)
5893     {
5894       /* The output is unibyte.  We must convert 8-bit characters to
5895          multibyte form.  */
5896       if (inserted_byte * 2 > GAP_SIZE)
5897         {
5898           GAP_SIZE -= inserted_byte;
5899           ZV += inserted_byte; Z += inserted_byte;
5900           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5901           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5902           make_gap (inserted_byte - GAP_SIZE);
5903           GAP_SIZE += inserted_byte;
5904           ZV -= inserted_byte; Z -= inserted_byte;
5905           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5906           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5907         }
5908       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5909     }
5910
5911   /* If we shrank the conversion area, adjust it now.  */
5912   if (total_skip > 0)
5913     {
5914       if (tail_skip > 0)
5915         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5916       inserted += total_skip; inserted_byte += total_skip;
5917       GAP_SIZE += total_skip;
5918       GPT -= head_skip; GPT_BYTE -= head_skip;
5919       ZV -= total_skip; ZV_BYTE -= total_skip;
5920       Z -= total_skip; Z_BYTE -= total_skip;
5921       from -= head_skip; from_byte -= head_skip;
5922       to += tail_skip; to_byte += tail_skip;
5923     }
5924
5925   prev_Z = Z;
5926   if (! EQ (current_buffer->undo_list, Qt))
5927     adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5928   else
5929     adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del,
5930                                  inserted, inserted_byte);
5931   inserted = Z - prev_Z;
5932
5933   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5934     coding_restore_composition (coding, Fcurrent_buffer ());
5935   coding_free_composition_data (coding);
5936
5937   if (! inhibit_pre_post_conversion
5938       && ! encodep && ! NILP (coding->post_read_conversion))
5939     {
5940       Lisp_Object val;
5941       Lisp_Object saved_coding_system;
5942
5943       if (from != PT)
5944         TEMP_SET_PT_BOTH (from, from_byte);
5945       prev_Z = Z;
5946       record_unwind_protect (code_convert_region_unwind,
5947                              Vlast_coding_system_used);
5948       saved_coding_system = Vlast_coding_system_used;
5949       Vlast_coding_system_used = coding->symbol;
5950       /* We should not call any more pre-write/post-read-conversion
5951          functions while this post-read-conversion is running.  */
5952       inhibit_pre_post_conversion = 1;
5953       val = call1 (coding->post_read_conversion, make_number (inserted));
5954       inhibit_pre_post_conversion = 0;
5955       coding->symbol = Vlast_coding_system_used;
5956       Vlast_coding_system_used = saved_coding_system;
5957       /* Discard the unwind protect.  */
5958       specpdl_ptr--;
5959       CHECK_NUMBER (val);
5960       inserted += Z - prev_Z;
5961     }
5962
5963   if (orig_point >= from)
5964     {
5965       if (orig_point >= from + orig_len)
5966         orig_point += inserted - orig_len;
5967       else
5968         orig_point = from;
5969       TEMP_SET_PT (orig_point);
5970     }
5971
5972   if (replace)
5973     {
5974       signal_after_change (from, to - from, inserted);
5975       update_compositions (from, from + inserted, CHECK_BORDER);
5976     }
5977
5978   {
5979     coding->consumed = to_byte - from_byte;
5980     coding->consumed_char = to - from;
5981     coding->produced = inserted_byte;
5982     coding->produced_char = inserted;
5983   }
5984
5985   return 0;
5986 }
5987
5988 Lisp_Object
5989 run_pre_post_conversion_on_str (str, coding, encodep)
5990      Lisp_Object str;
5991      struct coding_system *coding;
5992      int encodep;
5993 {
5994   int count = SPECPDL_INDEX ();
5995   struct gcpro gcpro1, gcpro2;
5996   int multibyte = STRING_MULTIBYTE (str);
5997   Lisp_Object buffer;
5998   struct buffer *buf;
5999   Lisp_Object old_deactivate_mark;
6000
6001   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
6002   record_unwind_protect (code_convert_region_unwind,
6003                          Vlast_coding_system_used);
6004   /* It is not crucial to specbind this.  */
6005   old_deactivate_mark = Vdeactivate_mark;
6006   GCPRO2 (str, old_deactivate_mark);
6007
6008   buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
6009   buf = XBUFFER (buffer);
6010
6011   delete_all_overlays (buf);
6012   buf->directory = current_buffer->directory;
6013   buf->read_only = Qnil;
6014   buf->filename = Qnil;
6015   buf->undo_list = Qt;
6016   eassert (buf->overlays_before == NULL);
6017   eassert (buf->overlays_after == NULL);
6018
6019   set_buffer_internal (buf);
6020   /* We must insert the contents of STR as is without
6021      unibyte<->multibyte conversion.  For that, we adjust the
6022      multibyteness of the working buffer to that of STR.  */
6023   Ferase_buffer ();
6024   buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
6025
6026   insert_from_string (str, 0, 0,
6027                       SCHARS (str), SBYTES (str), 0);
6028   UNGCPRO;
6029   inhibit_pre_post_conversion = 1;
6030   if (encodep)
6031     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
6032   else
6033     {
6034       Vlast_coding_system_used = coding->symbol;
6035       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
6036       call1 (coding->post_read_conversion, make_number (Z - BEG));
6037       coding->symbol = Vlast_coding_system_used;
6038     }
6039   inhibit_pre_post_conversion = 0;
6040   Vdeactivate_mark = old_deactivate_mark;
6041   str = make_buffer_string (BEG, Z, 1);
6042   return unbind_to (count, str);
6043 }
6044
6045 Lisp_Object
6046 decode_coding_string (str, coding, nocopy)
6047      Lisp_Object str;
6048      struct coding_system *coding;
6049      int nocopy;
6050 {
6051   int len;
6052   struct conversion_buffer buf;
6053   int from, to_byte;
6054   Lisp_Object saved_coding_symbol;
6055   int result;
6056   int require_decoding;
6057   int shrinked_bytes = 0;
6058   Lisp_Object newstr;
6059   int consumed, consumed_char, produced, produced_char;
6060
6061   from = 0;
6062   to_byte = SBYTES (str);
6063
6064   saved_coding_symbol = coding->symbol;
6065   coding->src_multibyte = STRING_MULTIBYTE (str);
6066   coding->dst_multibyte = 1;
6067   if (CODING_REQUIRE_DETECTION (coding))
6068     {
6069       /* See the comments in code_convert_region.  */
6070       if (coding->type == coding_type_undecided)
6071         {
6072           detect_coding (coding, SDATA (str), to_byte);
6073           if (coding->type == coding_type_undecided)
6074             {
6075               coding->type = coding_type_emacs_mule;
6076               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
6077               /* As emacs-mule decoder will handle composition, we
6078                  need this setting to allocate coding->cmp_data
6079                  later.  */
6080               coding->composing = COMPOSITION_NO;
6081             }
6082         }
6083       if (coding->eol_type == CODING_EOL_UNDECIDED
6084           && coding->type != coding_type_ccl)
6085         {
6086           saved_coding_symbol = coding->symbol;
6087           detect_eol (coding, SDATA (str), to_byte);
6088           if (coding->eol_type == CODING_EOL_UNDECIDED)
6089             coding->eol_type = CODING_EOL_LF;
6090           /* We had better recover the original eol format if we
6091              encounter an inconsistent eol format while decoding.  */
6092           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
6093         }
6094     }
6095
6096   if (coding->type == coding_type_no_conversion
6097       || coding->type == coding_type_raw_text)
6098     coding->dst_multibyte = 0;
6099
6100   require_decoding = CODING_REQUIRE_DECODING (coding);
6101
6102   if (STRING_MULTIBYTE (str))
6103     {
6104       /* Decoding routines expect the source text to be unibyte.  */
6105       str = Fstring_as_unibyte (str);
6106       to_byte = SBYTES (str);
6107       nocopy = 1;
6108       coding->src_multibyte = 0;
6109     }
6110
6111   /* Try to skip the heading and tailing ASCIIs.  */
6112   if (require_decoding && coding->type != coding_type_ccl)
6113     {
6114       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6115                                 0);
6116       if (from == to_byte)
6117         require_decoding = 0;
6118       shrinked_bytes = from + (SBYTES (str) - to_byte);
6119     }
6120
6121   if (!require_decoding
6122       && !(SYMBOLP (coding->post_read_conversion)
6123            && !NILP (Ffboundp (coding->post_read_conversion))))
6124     {
6125       coding->consumed = SBYTES (str);
6126       coding->consumed_char = SCHARS (str);
6127       if (coding->dst_multibyte)
6128         {
6129           str = Fstring_as_multibyte (str);
6130           nocopy = 1;
6131         }
6132       coding->produced = SBYTES (str);
6133       coding->produced_char = SCHARS (str);
6134       return (nocopy ? str : Fcopy_sequence (str));
6135     }
6136
6137   if (coding->composing != COMPOSITION_DISABLED)
6138     coding_allocate_composition_data (coding, from);
6139   len = decoding_buffer_size (coding, to_byte - from);
6140   allocate_conversion_buffer (buf, len);
6141
6142   consumed = consumed_char = produced = produced_char = 0;
6143   while (1)
6144     {
6145       result = decode_coding (coding, SDATA (str) + from + consumed,
6146                               buf.data + produced, to_byte - from - consumed,
6147                               buf.size - produced);
6148       consumed += coding->consumed;
6149       consumed_char += coding->consumed_char;
6150       produced += coding->produced;
6151       produced_char += coding->produced_char;
6152       if (result == CODING_FINISH_NORMAL
6153           || (result == CODING_FINISH_INSUFFICIENT_SRC
6154               && coding->consumed == 0))
6155         break;
6156       if (result == CODING_FINISH_INSUFFICIENT_CMP)
6157         coding_allocate_composition_data (coding, from + produced_char);
6158       else if (result == CODING_FINISH_INSUFFICIENT_DST)
6159         extend_conversion_buffer (&buf);
6160       else if (result == CODING_FINISH_INCONSISTENT_EOL)
6161         {
6162           Lisp_Object eol_type;
6163
6164           /* Recover the original EOL format.  */
6165           if (coding->eol_type == CODING_EOL_CR)
6166             {
6167               unsigned char *p;
6168               for (p = buf.data; p < buf.data + produced; p++)
6169                 if (*p == '\n') *p = '\r';
6170             }
6171           else if (coding->eol_type == CODING_EOL_CRLF)
6172             {
6173               int num_eol = 0;
6174               unsigned char *p0, *p1;
6175               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
6176                 if (*p0 == '\n') num_eol++;
6177               if (produced + num_eol >= buf.size)
6178                 extend_conversion_buffer (&buf);
6179               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
6180                 {
6181                   *--p1 = *--p0;
6182                   if (*p0 == '\n') *--p1 = '\r';
6183                 }
6184               produced += num_eol;
6185               produced_char += num_eol;
6186             }
6187           /* Suppress eol-format conversion in the further conversion.  */
6188           coding->eol_type = CODING_EOL_LF;
6189
6190           /* Set the coding system symbol to that for Unix-like EOL.  */
6191           eol_type = Fget (saved_coding_symbol, Qeol_type);
6192           if (VECTORP (eol_type)
6193               && XVECTOR (eol_type)->size == 3
6194               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
6195             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
6196           else
6197             coding->symbol = saved_coding_symbol;
6198
6199
6200         }
6201     }
6202
6203   coding->consumed = consumed;
6204   coding->consumed_char = consumed_char;
6205   coding->produced = produced;
6206   coding->produced_char = produced_char;
6207
6208   if (coding->dst_multibyte)
6209     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
6210                                            produced + shrinked_bytes);
6211   else
6212     newstr = make_uninit_string (produced + shrinked_bytes);
6213   if (from > 0)
6214     STRING_COPYIN (newstr, 0, SDATA (str), from);
6215   STRING_COPYIN (newstr, from, buf.data, produced);
6216   if (shrinked_bytes > from)
6217     STRING_COPYIN (newstr, from + produced,
6218                    SDATA (str) + to_byte,
6219                    shrinked_bytes - from);
6220   free_conversion_buffer (&buf);
6221
6222   if (coding->cmp_data && coding->cmp_data->used)
6223     coding_restore_composition (coding, newstr);
6224   coding_free_composition_data (coding);
6225
6226   if (SYMBOLP (coding->post_read_conversion)
6227       && !NILP (Ffboundp (coding->post_read_conversion)))
6228     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
6229
6230   return newstr;
6231 }
6232
6233 Lisp_Object
6234 encode_coding_string (str, coding, nocopy)
6235      Lisp_Object str;
6236      struct coding_system *coding;
6237      int nocopy;
6238 {
6239   int len;
6240   struct conversion_buffer buf;
6241   int from, to, to_byte;
6242   int result;
6243   int shrinked_bytes = 0;
6244   Lisp_Object newstr;
6245   int consumed, consumed_char, produced, produced_char;
6246
6247   if (SYMBOLP (coding->pre_write_conversion)
6248       && !NILP (Ffboundp (coding->pre_write_conversion)))
6249     str = run_pre_post_conversion_on_str (str, coding, 1);
6250
6251   from = 0;
6252   to = SCHARS (str);
6253   to_byte = SBYTES (str);
6254
6255   /* Encoding routines determine the multibyteness of the source text
6256      by coding->src_multibyte.  */
6257   coding->src_multibyte = STRING_MULTIBYTE (str);
6258   coding->dst_multibyte = 0;
6259   if (! CODING_REQUIRE_ENCODING (coding))
6260     {
6261       coding->consumed = SBYTES (str);
6262       coding->consumed_char = SCHARS (str);
6263       if (STRING_MULTIBYTE (str))
6264         {
6265           str = Fstring_as_unibyte (str);
6266           nocopy = 1;
6267         }
6268       coding->produced = SBYTES (str);
6269       coding->produced_char = SCHARS (str);
6270       return (nocopy ? str : Fcopy_sequence (str));
6271     }
6272
6273   if (coding->composing != COMPOSITION_DISABLED)
6274     coding_save_composition (coding, from, to, str);
6275
6276   /* Try to skip the heading and tailing ASCIIs.  */
6277   if (coding->type != coding_type_ccl)
6278     {
6279       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6280                                 1);
6281       if (from == to_byte)
6282         return (nocopy ? str : Fcopy_sequence (str));
6283       shrinked_bytes = from + (SBYTES (str) - to_byte);
6284     }
6285
6286   len = encoding_buffer_size (coding, to_byte - from);
6287   allocate_conversion_buffer (buf, len);
6288
6289   consumed = consumed_char = produced = produced_char = 0;
6290   while (1)
6291     {
6292       result = encode_coding (coding, SDATA (str) + from + consumed,
6293                               buf.data + produced, to_byte - from - consumed,
6294                               buf.size - produced);
6295       consumed += coding->consumed;
6296       consumed_char += coding->consumed_char;
6297       produced += coding->produced;
6298       produced_char += coding->produced_char;
6299       if (result == CODING_FINISH_NORMAL
6300           || (result == CODING_FINISH_INSUFFICIENT_SRC
6301               && coding->consumed == 0))
6302         break;
6303       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
6304       extend_conversion_buffer (&buf);
6305     }
6306
6307   coding->consumed = consumed;
6308   coding->consumed_char = consumed_char;
6309   coding->produced = produced;
6310   coding->produced_char = produced_char;
6311
6312   newstr = make_uninit_string (produced + shrinked_bytes);
6313   if (from > 0)
6314     STRING_COPYIN (newstr, 0, SDATA (str), from);
6315   STRING_COPYIN (newstr, from, buf.data, produced);
6316   if (shrinked_bytes > from)
6317     STRING_COPYIN (newstr, from + produced,
6318                    SDATA (str) + to_byte,
6319                    shrinked_bytes - from);
6320
6321   free_conversion_buffer (&buf);
6322   coding_free_composition_data (coding);
6323
6324   return newstr;
6325 }
6326
6327 \f
6328 #ifdef emacs
6329 /*** 8. Emacs Lisp library functions ***/
6330
6331 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6332        doc: /* Return t if OBJECT is nil or a coding-system.
6333 See the documentation of `make-coding-system' for information
6334 about coding-system objects.  */)
6335      (obj)
6336      Lisp_Object obj;
6337 {
6338   if (NILP (obj))
6339     return Qt;
6340   if (!SYMBOLP (obj))
6341     return Qnil;
6342   if (! NILP (Fget (obj, Qcoding_system_define_form)))
6343     return Qt;
6344   /* Get coding-spec vector for OBJ.  */
6345   obj = Fget (obj, Qcoding_system);
6346   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6347           ? Qt : Qnil);
6348 }
6349
6350 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6351        Sread_non_nil_coding_system, 1, 1, 0,
6352        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6353      (prompt)
6354      Lisp_Object prompt;
6355 {
6356   Lisp_Object val;
6357   do
6358     {
6359       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6360                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6361     }
6362   while (SCHARS (val) == 0);
6363   return (Fintern (val, Qnil));
6364 }
6365
6366 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6367        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6368 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6369      (prompt, default_coding_system)
6370      Lisp_Object prompt, default_coding_system;
6371 {
6372   Lisp_Object val;
6373   if (SYMBOLP (default_coding_system))
6374     default_coding_system = SYMBOL_NAME (default_coding_system);
6375   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6376                           Qt, Qnil, Qcoding_system_history,
6377                           default_coding_system, Qnil);
6378   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6379 }
6380
6381 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6382        1, 1, 0,
6383        doc: /* Check validity of CODING-SYSTEM.
6384 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6385 It is valid if it is a symbol with a non-nil `coding-system' property.
6386 The value of property should be a vector of length 5.  */)
6387      (coding_system)
6388      Lisp_Object coding_system;
6389 {
6390   Lisp_Object define_form;
6391
6392   define_form = Fget (coding_system, Qcoding_system_define_form);
6393   if (! NILP (define_form))
6394     {
6395       Fput (coding_system, Qcoding_system_define_form, Qnil);
6396       safe_eval (define_form);
6397     }
6398   if (!NILP (Fcoding_system_p (coding_system)))
6399     return coding_system;
6400   while (1)
6401     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6402 }
6403 \f
6404 Lisp_Object
6405 detect_coding_system (src, src_bytes, highest, multibytep)
6406      const unsigned char *src;
6407      int src_bytes, highest;
6408      int multibytep;
6409 {
6410   int coding_mask, eol_type;
6411   Lisp_Object val, tmp;
6412   int dummy;
6413
6414   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
6415   eol_type  = detect_eol_type (src, src_bytes, &dummy);
6416   if (eol_type == CODING_EOL_INCONSISTENT)
6417     eol_type = CODING_EOL_UNDECIDED;
6418
6419   if (!coding_mask)
6420     {
6421       val = Qundecided;
6422       if (eol_type != CODING_EOL_UNDECIDED)
6423         {
6424           Lisp_Object val2;
6425           val2 = Fget (Qundecided, Qeol_type);
6426           if (VECTORP (val2))
6427             val = XVECTOR (val2)->contents[eol_type];
6428         }
6429       return (highest ? val : Fcons (val, Qnil));
6430     }
6431
6432   /* At first, gather possible coding systems in VAL.  */
6433   val = Qnil;
6434   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
6435     {
6436       Lisp_Object category_val, category_index;
6437
6438       category_index = Fget (XCAR (tmp), Qcoding_category_index);
6439       category_val = Fsymbol_value (XCAR (tmp));
6440       if (!NILP (category_val)
6441           && NATNUMP (category_index)
6442           && (coding_mask & (1 << XFASTINT (category_index))))
6443         {
6444           val = Fcons (category_val, val);
6445           if (highest)
6446             break;
6447         }
6448     }
6449   if (!highest)
6450     val = Fnreverse (val);
6451
6452   /* Then, replace the elements with subsidiary coding systems.  */
6453   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
6454     {
6455       if (eol_type != CODING_EOL_UNDECIDED
6456           && eol_type != CODING_EOL_INCONSISTENT)
6457         {
6458           Lisp_Object eol;
6459           eol = Fget (XCAR (tmp), Qeol_type);
6460           if (VECTORP (eol))
6461             XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]);
6462         }
6463     }
6464   return (highest ? XCAR (val) : val);
6465 }
6466
6467 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6468        2, 3, 0,
6469        doc: /* Detect how the byte sequence in the region is encoded.
6470 Return a list of possible coding systems used on decoding a byte
6471 sequence containing the bytes in the region between START and END when
6472 the coding system `undecided' is specified.  The list is ordered by
6473 priority decided in the current language environment.
6474
6475 If only ASCII characters are found, it returns a list of single element
6476 `undecided' or its subsidiary coding system according to a detected
6477 end-of-line format.
6478
6479 If optional argument HIGHEST is non-nil, return the coding system of
6480 highest priority.  */)
6481      (start, end, highest)
6482      Lisp_Object start, end, highest;
6483 {
6484   int from, to;
6485   int from_byte, to_byte;
6486   int include_anchor_byte = 0;
6487
6488   CHECK_NUMBER_COERCE_MARKER (start);
6489   CHECK_NUMBER_COERCE_MARKER (end);
6490
6491   validate_region (&start, &end);
6492   from = XINT (start), to = XINT (end);
6493   from_byte = CHAR_TO_BYTE (from);
6494   to_byte = CHAR_TO_BYTE (to);
6495
6496   if (from < GPT && to >= GPT)
6497     move_gap_both (to, to_byte);
6498   /* If we an anchor byte `\0' follows the region, we include it in
6499      the detecting source.  Then code detectors can handle the tailing
6500      byte sequence more accurately.
6501
6502      Fix me: This is not a perfect solution.  It is better that we
6503      add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
6504   */
6505   if (to == Z || (to == GPT && GAP_SIZE > 0))
6506     include_anchor_byte = 1;
6507   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6508                                to_byte - from_byte + include_anchor_byte,
6509                                !NILP (highest),
6510                                !NILP (current_buffer
6511                                       ->enable_multibyte_characters));
6512 }
6513
6514 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6515        1, 2, 0,
6516        doc: /* Detect how the byte sequence in STRING is encoded.
6517 Return a list of possible coding systems used on decoding a byte
6518 sequence containing the bytes in STRING when the coding system
6519 `undecided' is specified.  The list is ordered by priority decided in
6520 the current language environment.
6521
6522 If only ASCII characters are found, it returns a list of single element
6523 `undecided' or its subsidiary coding system according to a detected
6524 end-of-line format.
6525
6526 If optional argument HIGHEST is non-nil, return the coding system of
6527 highest priority.  */)
6528      (string, highest)
6529      Lisp_Object string, highest;
6530 {
6531   CHECK_STRING (string);
6532
6533   return detect_coding_system (SDATA (string),
6534                                /* "+ 1" is to include the anchor byte
6535                                   `\0'.  With this, code detectors can
6536                                   handle the tailing bytes more
6537                                   accurately.  */
6538                                SBYTES (string) + 1,
6539                                !NILP (highest),
6540                                STRING_MULTIBYTE (string));
6541 }
6542
6543 /*  Subroutine for Fsafe_coding_systems_region_internal.
6544
6545     Return a list of coding systems that safely encode the multibyte
6546     text between P and PEND.  SAFE_CODINGS, if non-nil, is an alist of
6547     possible coding systems.  If it is nil, it means that we have not
6548     yet found any coding systems.
6549
6550     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
6551     element of WORK_TABLE is set to t once the element is looked up.
6552
6553     If a non-ASCII single byte char is found, set
6554     *single_byte_char_found to 1.  */
6555
6556 static Lisp_Object
6557 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6558      unsigned char *p, *pend;
6559      Lisp_Object safe_codings, work_table;
6560      int *single_byte_char_found;
6561 {
6562   int c, len;
6563   Lisp_Object val, ch;
6564   Lisp_Object prev, tail;
6565
6566   while (p < pend)
6567     {
6568       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6569       p += len;
6570       if (ASCII_BYTE_P (c))
6571         /* We can ignore ASCII characters here.  */
6572         continue;
6573       if (SINGLE_BYTE_CHAR_P (c))
6574         *single_byte_char_found = 1;
6575       if (NILP (safe_codings))
6576         /* Already all coding systems are excluded.  But, we can't
6577            terminate the loop here because non-ASCII single-byte char
6578            must be found.  */
6579         continue;
6580       /* Check the safe coding systems for C.  */
6581       ch = make_number (c);
6582       val = Faref (work_table, ch);
6583       if (EQ (val, Qt))
6584         /* This element was already checked.  Ignore it.  */
6585         continue;
6586       /* Remember that we checked this element.  */
6587       Faset (work_table, ch, Qt);
6588
6589       for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
6590         {
6591           Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
6592           int encodable;
6593
6594           elt = XCAR (tail);
6595           if (CONSP (XCDR (elt)))
6596             {
6597               /* This entry has this format now:
6598                  ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6599                           ACCEPT-LATIN-EXTRA ) */
6600               val = XCDR (elt);
6601               encodable = ! NILP (Faref (XCAR (val), ch));
6602               if (! encodable)
6603                 {
6604                   val = XCDR (val);
6605                   translation_table = XCAR (val);
6606                   hash_table = XCAR (XCDR (val));
6607                   accept_latin_extra = XCAR (XCDR (XCDR (val)));
6608                 }
6609             }
6610           else
6611             {
6612               /* This entry has this format now: ( CODING . SAFE-CHARS) */
6613               encodable = ! NILP (Faref (XCDR (elt), ch));
6614               if (! encodable)
6615                 {
6616                   /* Transform the format to:
6617                      ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6618                        ACCEPT-LATIN-EXTRA )  */
6619                   val = Fget (XCAR (elt), Qcoding_system);
6620                   translation_table
6621                     = Fplist_get (AREF (val, 3),
6622                                   Qtranslation_table_for_encode);
6623                   if (SYMBOLP (translation_table))
6624                     translation_table = Fget (translation_table,
6625                                               Qtranslation_table);
6626                   hash_table
6627                     = (CHAR_TABLE_P (translation_table)
6628                        ? XCHAR_TABLE (translation_table)->extras[1]
6629                        : Qnil);
6630                   accept_latin_extra
6631                     = ((EQ (AREF (val, 0), make_number (2))
6632                         && VECTORP (AREF (val, 4)))
6633                        ? AREF (AREF (val, 4), 16)
6634                        : Qnil);
6635                   XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
6636                                         translation_table, hash_table,
6637                                         accept_latin_extra));
6638                 }
6639             }
6640
6641           if (! encodable
6642               && ((CHAR_TABLE_P (translation_table)
6643                    && ! NILP (Faref (translation_table, ch)))
6644                   || (HASH_TABLE_P (hash_table)
6645                       && ! NILP (Fgethash (ch, hash_table, Qnil)))
6646                   || (SINGLE_BYTE_CHAR_P (c)
6647                       && ! NILP (accept_latin_extra)
6648                       && VECTORP (Vlatin_extra_code_table)
6649                       && ! NILP (AREF (Vlatin_extra_code_table, c)))))
6650             encodable = 1;
6651           if (encodable)
6652             prev = tail;
6653           else
6654             {
6655               /* Exclude this coding system from SAFE_CODINGS.  */
6656               if (EQ (tail, safe_codings))
6657                 safe_codings = XCDR (safe_codings);
6658               else
6659                 XSETCDR (prev, XCDR (tail));
6660             }
6661         }
6662     }
6663   return safe_codings;
6664 }
6665
6666 DEFUN ("find-coding-systems-region-internal",
6667        Ffind_coding_systems_region_internal,
6668        Sfind_coding_systems_region_internal, 2, 2, 0,
6669        doc: /* Internal use only.  */)
6670      (start, end)
6671      Lisp_Object start, end;
6672 {
6673   Lisp_Object work_table, safe_codings;
6674   int non_ascii_p = 0;
6675   int single_byte_char_found = 0;
6676   const unsigned char *p1, *p1end, *p2, *p2end, *p;
6677
6678   if (STRINGP (start))
6679     {
6680       if (!STRING_MULTIBYTE (start))
6681         return Qt;
6682       p1 = SDATA (start), p1end = p1 + SBYTES (start);
6683       p2 = p2end = p1end;
6684       if (SCHARS (start) != SBYTES (start))
6685         non_ascii_p = 1;
6686     }
6687   else
6688     {
6689       int from, to, stop;
6690
6691       CHECK_NUMBER_COERCE_MARKER (start);
6692       CHECK_NUMBER_COERCE_MARKER (end);
6693       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6694         args_out_of_range (start, end);
6695       if (NILP (current_buffer->enable_multibyte_characters))
6696         return Qt;
6697       from = CHAR_TO_BYTE (XINT (start));
6698       to = CHAR_TO_BYTE (XINT (end));
6699       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6700       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6701       if (stop == to)
6702         p2 = p2end = p1end;
6703       else
6704         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6705       if (XINT (end) - XINT (start) != to - from)
6706         non_ascii_p = 1;
6707     }
6708
6709   if (!non_ascii_p)
6710     {
6711       /* We are sure that the text contains no multibyte character.
6712          Check if it contains eight-bit-graphic.  */
6713       p = p1;
6714       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6715       if (p == p1end)
6716         {
6717           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6718           if (p == p2end)
6719             return Qt;
6720         }
6721     }
6722
6723   /* The text contains non-ASCII characters.  */
6724
6725   work_table = Fmake_char_table (Qchar_coding_system, Qnil);
6726   safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
6727
6728   safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
6729                                     &single_byte_char_found);
6730   if (p2 < p2end)
6731     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6732                                       &single_byte_char_found);
6733   if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
6734     safe_codings = Qt;
6735   else
6736     {
6737       /* Turn safe_codings to a list of coding systems... */
6738       Lisp_Object val;
6739
6740       if (single_byte_char_found)
6741         /* ... and append these for eight-bit chars.  */
6742         val = Fcons (Qraw_text,
6743                      Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
6744       else
6745         /* ... and append generic coding systems.  */
6746         val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
6747
6748       for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
6749         val = Fcons (XCAR (XCAR (safe_codings)), val);
6750       safe_codings = val;
6751     }
6752
6753   return safe_codings;
6754 }
6755
6756
6757 /* Search from position POS for such characters that are unencodable
6758    accoding to SAFE_CHARS, and return a list of their positions.  P
6759    points where in the memory the character at POS exists.  Limit the
6760    search at PEND or when Nth unencodable characters are found.
6761
6762    If SAFE_CHARS is a char table, an element for an unencodable
6763    character is nil.
6764
6765    If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
6766
6767    Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
6768    eight-bit-graphic characters are unencodable.  */
6769
6770 static Lisp_Object
6771 unencodable_char_position (safe_chars, pos, p, pend, n)
6772      Lisp_Object safe_chars;
6773      int pos;
6774      unsigned char *p, *pend;
6775      int n;
6776 {
6777   Lisp_Object pos_list;
6778
6779   pos_list = Qnil;
6780   while (p < pend)
6781     {
6782       int len;
6783       int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
6784
6785       if (c >= 128
6786           && (CHAR_TABLE_P (safe_chars)
6787               ? NILP (CHAR_TABLE_REF (safe_chars, c))
6788               : (NILP (safe_chars) || c < 256)))
6789         {
6790           pos_list = Fcons (make_number (pos), pos_list);
6791           if (--n <= 0)
6792             break;
6793         }
6794       pos++;
6795       p += len;
6796     }
6797   return Fnreverse (pos_list);
6798 }
6799
6800
6801 DEFUN ("unencodable-char-position", Funencodable_char_position,
6802        Sunencodable_char_position, 3, 5, 0,
6803        doc: /*
6804 Return position of first un-encodable character in a region.
6805 START and END specfiy the region and CODING-SYSTEM specifies the
6806 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
6807
6808 If optional 4th argument COUNT is non-nil, it specifies at most how
6809 many un-encodable characters to search.  In this case, the value is a
6810 list of positions.
6811
6812 If optional 5th argument STRING is non-nil, it is a string to search
6813 for un-encodable characters.  In that case, START and END are indexes
6814 to the string.  */)
6815      (start, end, coding_system, count, string)
6816      Lisp_Object start, end, coding_system, count, string;
6817 {
6818   int n;
6819   Lisp_Object safe_chars;
6820   struct coding_system coding;
6821   Lisp_Object positions;
6822   int from, to;
6823   unsigned char *p, *pend;
6824
6825   if (NILP (string))
6826     {
6827       validate_region (&start, &end);
6828       from = XINT (start);
6829       to = XINT (end);
6830       if (NILP (current_buffer->enable_multibyte_characters))
6831         return Qnil;
6832       p = CHAR_POS_ADDR (from);
6833       if (to == GPT)
6834         pend = GPT_ADDR;
6835       else
6836         pend = CHAR_POS_ADDR (to);
6837     }
6838   else
6839     {
6840       CHECK_STRING (string);
6841       CHECK_NATNUM (start);
6842       CHECK_NATNUM (end);
6843       from = XINT (start);
6844       to = XINT (end);
6845       if (from > to
6846           || to > SCHARS (string))
6847         args_out_of_range_3 (string, start, end);
6848       if (! STRING_MULTIBYTE (string))
6849         return Qnil;
6850       p = SDATA (string) + string_char_to_byte (string, from);
6851       pend = SDATA (string) + string_char_to_byte (string, to);
6852     }
6853
6854   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
6855
6856   if (NILP (count))
6857     n = 1;
6858   else
6859     {
6860       CHECK_NATNUM (count);
6861       n = XINT (count);
6862     }
6863
6864   if (coding.type == coding_type_no_conversion
6865       || coding.type == coding_type_raw_text)
6866     return Qnil;
6867
6868   if (coding.type == coding_type_undecided)
6869     safe_chars = Qnil;
6870   else
6871     safe_chars = coding_safe_chars (coding_system);
6872
6873   if (STRINGP (string)
6874       || from >= GPT || to <= GPT)
6875     positions = unencodable_char_position (safe_chars, from, p, pend, n);
6876   else
6877     {
6878       Lisp_Object args[2];
6879
6880       args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
6881       n -= XINT (Flength (args[0]));
6882       if (n <= 0)
6883         positions = args[0];
6884       else
6885         {
6886           args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
6887                                                pend, n);
6888           positions = Fappend (2, args);
6889         }
6890     }
6891
6892   return  (NILP (count) ? Fcar (positions) : positions);
6893 }
6894
6895
6896 Lisp_Object
6897 code_convert_region1 (start, end, coding_system, encodep)
6898      Lisp_Object start, end, coding_system;
6899      int encodep;
6900 {
6901   struct coding_system coding;
6902   int from, to;
6903
6904   CHECK_NUMBER_COERCE_MARKER (start);
6905   CHECK_NUMBER_COERCE_MARKER (end);
6906   CHECK_SYMBOL (coding_system);
6907
6908   validate_region (&start, &end);
6909   from = XFASTINT (start);
6910   to = XFASTINT (end);
6911
6912   if (NILP (coding_system))
6913     return make_number (to - from);
6914
6915   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6916     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6917
6918   coding.mode |= CODING_MODE_LAST_BLOCK;
6919   coding.src_multibyte = coding.dst_multibyte
6920     = !NILP (current_buffer->enable_multibyte_characters);
6921   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
6922                        &coding, encodep, 1);
6923   Vlast_coding_system_used = coding.symbol;
6924   return make_number (coding.produced_char);
6925 }
6926
6927 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6928        3, 3, "r\nzCoding system: ",
6929        doc: /* Decode the current region from the specified coding system.
6930 When called from a program, takes three arguments:
6931 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6932 This function sets `last-coding-system-used' to the precise coding system
6933 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6934 not fully specified.)
6935 It returns the length of the decoded text.  */)
6936      (start, end, coding_system)
6937      Lisp_Object start, end, coding_system;
6938 {
6939   return code_convert_region1 (start, end, coding_system, 0);
6940 }
6941
6942 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6943        3, 3, "r\nzCoding system: ",
6944        doc: /* Encode the current region into the specified coding system.
6945 When called from a program, takes three arguments:
6946 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6947 This function sets `last-coding-system-used' to the precise coding system
6948 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6949 not fully specified.)
6950 It returns the length of the encoded text.  */)
6951      (start, end, coding_system)
6952      Lisp_Object start, end, coding_system;
6953 {
6954   return code_convert_region1 (start, end, coding_system, 1);
6955 }
6956
6957 Lisp_Object
6958 code_convert_string1 (string, coding_system, nocopy, encodep)
6959      Lisp_Object string, coding_system, nocopy;
6960      int encodep;
6961 {
6962   struct coding_system coding;
6963
6964   CHECK_STRING (string);
6965   CHECK_SYMBOL (coding_system);
6966
6967   if (NILP (coding_system))
6968     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
6969
6970   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6971     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6972
6973   coding.mode |= CODING_MODE_LAST_BLOCK;
6974   string = (encodep
6975             ? encode_coding_string (string, &coding, !NILP (nocopy))
6976             : decode_coding_string (string, &coding, !NILP (nocopy)));
6977   Vlast_coding_system_used = coding.symbol;
6978
6979   return string;
6980 }
6981
6982 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6983        2, 3, 0,
6984        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6985 Optional arg NOCOPY non-nil means it is OK to return STRING itself
6986 if the decoding operation is trivial.
6987 This function sets `last-coding-system-used' to the precise coding system
6988 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6989 not fully specified.)  */)
6990      (string, coding_system, nocopy)
6991      Lisp_Object string, coding_system, nocopy;
6992 {
6993   return code_convert_string1 (string, coding_system, nocopy, 0);
6994 }
6995
6996 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6997        2, 3, 0,
6998        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6999 Optional arg NOCOPY non-nil means it is OK to return STRING itself
7000 if the encoding operation is trivial.
7001 This function sets `last-coding-system-used' to the precise coding system
7002 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7003 not fully specified.)  */)
7004      (string, coding_system, nocopy)
7005      Lisp_Object string, coding_system, nocopy;
7006 {
7007   return code_convert_string1 (string, coding_system, nocopy, 1);
7008 }
7009
7010 /* Encode or decode STRING according to CODING_SYSTEM.
7011    Do not set Vlast_coding_system_used.
7012
7013    This function is called only from macros DECODE_FILE and
7014    ENCODE_FILE, thus we ignore character composition.  */
7015
7016 Lisp_Object
7017 code_convert_string_norecord (string, coding_system, encodep)
7018      Lisp_Object string, coding_system;
7019      int encodep;
7020 {
7021   struct coding_system coding;
7022
7023   CHECK_STRING (string);
7024   CHECK_SYMBOL (coding_system);
7025
7026   if (NILP (coding_system))
7027     return string;
7028
7029   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
7030     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
7031
7032   coding.composing = COMPOSITION_DISABLED;
7033   coding.mode |= CODING_MODE_LAST_BLOCK;
7034   return (encodep
7035           ? encode_coding_string (string, &coding, 1)
7036           : decode_coding_string (string, &coding, 1));
7037 }
7038 \f
7039 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7040        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7041 Return the corresponding character.  */)
7042      (code)
7043      Lisp_Object code;
7044 {
7045   unsigned char c1, c2, s1, s2;
7046   Lisp_Object val;
7047
7048   CHECK_NUMBER (code);
7049   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
7050   if (s1 == 0)
7051     {
7052       if (s2 < 0x80)
7053         XSETFASTINT (val, s2);
7054       else if (s2 >= 0xA0 || s2 <= 0xDF)
7055         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
7056       else
7057         error ("Invalid Shift JIS code: %x", XFASTINT (code));
7058     }
7059   else
7060     {
7061       if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF)
7062           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
7063         error ("Invalid Shift JIS code: %x", XFASTINT (code));
7064       DECODE_SJIS (s1, s2, c1, c2);
7065       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
7066     }
7067   return val;
7068 }
7069
7070 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7071        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7072 Return the corresponding code in SJIS.  */)
7073      (ch)
7074      Lisp_Object ch;
7075 {
7076   int charset, c1, c2, s1, s2;
7077   Lisp_Object val;
7078
7079   CHECK_NUMBER (ch);
7080   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7081   if (charset == CHARSET_ASCII)
7082     {
7083       val = ch;
7084     }
7085   else if (charset == charset_jisx0208
7086            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
7087     {
7088       ENCODE_SJIS (c1, c2, s1, s2);
7089       XSETFASTINT (val, (s1 << 8) | s2);
7090     }
7091   else if (charset == charset_katakana_jisx0201
7092            && c1 > 0x20 && c2 < 0xE0)
7093     {
7094       XSETFASTINT (val, c1 | 0x80);
7095     }
7096   else
7097     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
7098   return val;
7099 }
7100
7101 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7102        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7103 Return the corresponding character.  */)
7104      (code)
7105      Lisp_Object code;
7106 {
7107   int charset;
7108   unsigned char b1, b2, c1, c2;
7109   Lisp_Object val;
7110
7111   CHECK_NUMBER (code);
7112   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
7113   if (b1 == 0)
7114     {
7115       if (b2 >= 0x80)
7116         error ("Invalid BIG5 code: %x", XFASTINT (code));
7117       val = code;
7118     }
7119   else
7120     {
7121       if ((b1 < 0xA1 || b1 > 0xFE)
7122           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
7123         error ("Invalid BIG5 code: %x", XFASTINT (code));
7124       DECODE_BIG5 (b1, b2, charset, c1, c2);
7125       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
7126     }
7127   return val;
7128 }
7129
7130 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7131        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7132 Return the corresponding character code in Big5.  */)
7133      (ch)
7134      Lisp_Object ch;
7135 {
7136   int charset, c1, c2, b1, b2;
7137   Lisp_Object val;
7138
7139   CHECK_NUMBER (ch);
7140   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7141   if (charset == CHARSET_ASCII)
7142     {
7143       val = ch;
7144     }
7145   else if ((charset == charset_big5_1
7146             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
7147            || (charset == charset_big5_2
7148                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
7149     {
7150       ENCODE_BIG5 (charset, c1, c2, b1, b2);
7151       XSETFASTINT (val, (b1 << 8) | b2);
7152     }
7153   else
7154     error ("Can't encode to Big5: %d", XFASTINT (ch));
7155   return val;
7156 }
7157 \f
7158 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
7159        Sset_terminal_coding_system_internal, 1, 1, 0,
7160        doc: /* Internal use only.  */)
7161      (coding_system)
7162      Lisp_Object coding_system;
7163 {
7164   CHECK_SYMBOL (coding_system);
7165   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
7166   /* We had better not send unsafe characters to terminal.  */
7167   terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
7168   /* Character composition should be disabled.  */
7169   terminal_coding.composing = COMPOSITION_DISABLED;
7170   /* Error notification should be suppressed.  */
7171   terminal_coding.suppress_error = 1;
7172   terminal_coding.src_multibyte = 1;
7173   terminal_coding.dst_multibyte = 0;
7174   return Qnil;
7175 }
7176
7177 DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
7178        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7179        doc: /* Internal use only.  */)
7180      (coding_system)
7181      Lisp_Object coding_system;
7182 {
7183   CHECK_SYMBOL (coding_system);
7184   setup_coding_system (Fcheck_coding_system (coding_system),
7185                        &safe_terminal_coding);
7186   /* Character composition should be disabled.  */
7187   safe_terminal_coding.composing = COMPOSITION_DISABLED;
7188   /* Error notification should be suppressed.  */
7189   terminal_coding.suppress_error = 1;
7190   safe_terminal_coding.src_multibyte = 1;
7191   safe_terminal_coding.dst_multibyte = 0;
7192   return Qnil;
7193 }
7194
7195 DEFUN ("terminal-coding-system", Fterminal_coding_system,
7196        Sterminal_coding_system, 0, 0, 0,
7197        doc: /* Return coding system specified for terminal output.  */)
7198      ()
7199 {
7200   return terminal_coding.symbol;
7201 }
7202
7203 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
7204        Sset_keyboard_coding_system_internal, 1, 1, 0,
7205        doc: /* Internal use only.  */)
7206      (coding_system)
7207      Lisp_Object coding_system;
7208 {
7209   CHECK_SYMBOL (coding_system);
7210   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
7211   /* Character composition should be disabled.  */
7212   keyboard_coding.composing = COMPOSITION_DISABLED;
7213   return Qnil;
7214 }
7215
7216 DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
7217        Skeyboard_coding_system, 0, 0, 0,
7218        doc: /* Return coding system specified for decoding keyboard input.  */)
7219      ()
7220 {
7221   return keyboard_coding.symbol;
7222 }
7223
7224 \f
7225 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7226        Sfind_operation_coding_system,  1, MANY, 0,
7227        doc: /* Choose a coding system for an operation based on the target name.
7228 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7229 DECODING-SYSTEM is the coding system to use for decoding
7230 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7231 for encoding (in case OPERATION does encoding).
7232
7233 The first argument OPERATION specifies an I/O primitive:
7234   For file I/O, `insert-file-contents' or `write-region'.
7235   For process I/O, `call-process', `call-process-region', or `start-process'.
7236   For network I/O, `open-network-stream'.
7237
7238 The remaining arguments should be the same arguments that were passed
7239 to the primitive.  Depending on which primitive, one of those arguments
7240 is selected as the TARGET.  For example, if OPERATION does file I/O,
7241 whichever argument specifies the file name is TARGET.
7242
7243 TARGET has a meaning which depends on OPERATION:
7244   For file I/O, TARGET is a file name.
7245   For process I/O, TARGET is a process name.
7246   For network I/O, TARGET is a service name or a port number
7247
7248 This function looks up what specified for TARGET in,
7249 `file-coding-system-alist', `process-coding-system-alist',
7250 or `network-coding-system-alist' depending on OPERATION.
7251 They may specify a coding system, a cons of coding systems,
7252 or a function symbol to call.
7253 In the last case, we call the function with one argument,
7254 which is a list of all the arguments given to this function.
7255
7256 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7257      (nargs, args)
7258      int nargs;
7259      Lisp_Object *args;
7260 {
7261   Lisp_Object operation, target_idx, target, val;
7262   register Lisp_Object chain;
7263
7264   if (nargs < 2)
7265     error ("Too few arguments");
7266   operation = args[0];
7267   if (!SYMBOLP (operation)
7268       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7269     error ("Invalid first argument");
7270   if (nargs < 1 + XINT (target_idx))
7271     error ("Too few arguments for operation: %s",
7272            SDATA (SYMBOL_NAME (operation)));
7273   /* For write-region, if the 6th argument (i.e. VISIT, the 5th
7274      argument to write-region) is string, it must be treated as a
7275      target file name.  */
7276   if (EQ (operation, Qwrite_region)
7277       && nargs > 5
7278       && STRINGP (args[5]))
7279     target_idx = make_number (4);
7280   target = args[XINT (target_idx) + 1];
7281   if (!(STRINGP (target)
7282         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7283     error ("Invalid argument %d", XINT (target_idx) + 1);
7284
7285   chain = ((EQ (operation, Qinsert_file_contents)
7286             || EQ (operation, Qwrite_region))
7287            ? Vfile_coding_system_alist
7288            : (EQ (operation, Qopen_network_stream)
7289               ? Vnetwork_coding_system_alist
7290               : Vprocess_coding_system_alist));
7291   if (NILP (chain))
7292     return Qnil;
7293
7294   for (; CONSP (chain); chain = XCDR (chain))
7295     {
7296       Lisp_Object elt;
7297       elt = XCAR (chain);
7298
7299       if (CONSP (elt)
7300           && ((STRINGP (target)
7301                && STRINGP (XCAR (elt))
7302                && fast_string_match (XCAR (elt), target) >= 0)
7303               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7304         {
7305           val = XCDR (elt);
7306           /* Here, if VAL is both a valid coding system and a valid
7307              function symbol, we return VAL as a coding system.  */
7308           if (CONSP (val))
7309             return val;
7310           if (! SYMBOLP (val))
7311             return Qnil;
7312           if (! NILP (Fcoding_system_p (val)))
7313             return Fcons (val, val);
7314           if (! NILP (Ffboundp (val)))
7315             {
7316               val = call1 (val, Flist (nargs, args));
7317               if (CONSP (val))
7318                 return val;
7319               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7320                 return Fcons (val, val);
7321             }
7322           return Qnil;
7323         }
7324     }
7325   return Qnil;
7326 }
7327
7328 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
7329        Supdate_coding_systems_internal, 0, 0, 0,
7330        doc: /* Update internal database for ISO2022 and CCL based coding systems.
7331 When values of any coding categories are changed, you must
7332 call this function.  */)
7333      ()
7334 {
7335   int i;
7336
7337   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
7338     {
7339       Lisp_Object val;
7340
7341       val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[i]);
7342       if (!NILP (val))
7343         {
7344           if (! coding_system_table[i])
7345             coding_system_table[i] = ((struct coding_system *)
7346                                       xmalloc (sizeof (struct coding_system)));
7347           setup_coding_system (val, coding_system_table[i]);
7348         }
7349       else if (coding_system_table[i])
7350         {
7351           xfree (coding_system_table[i]);
7352           coding_system_table[i] = NULL;
7353         }
7354     }
7355
7356   return Qnil;
7357 }
7358
7359 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
7360        Sset_coding_priority_internal, 0, 0, 0,
7361        doc: /* Update internal database for the current value of `coding-category-list'.
7362 This function is internal use only.  */)
7363      ()
7364 {
7365   int i = 0, idx;
7366   Lisp_Object val;
7367
7368   val = Vcoding_category_list;
7369
7370   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
7371     {
7372       if (! SYMBOLP (XCAR (val)))
7373         break;
7374       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
7375       if (idx >= CODING_CATEGORY_IDX_MAX)
7376         break;
7377       coding_priorities[i++] = (1 << idx);
7378       val = XCDR (val);
7379     }
7380   /* If coding-category-list is valid and contains all coding
7381      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
7382      the following code saves Emacs from crashing.  */
7383   while (i < CODING_CATEGORY_IDX_MAX)
7384     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
7385
7386   return Qnil;
7387 }
7388
7389 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7390        Sdefine_coding_system_internal, 1, 1, 0,
7391        doc: /* Register CODING-SYSTEM as a base coding system.
7392 This function is internal use only.  */)
7393      (coding_system)
7394      Lisp_Object coding_system;
7395 {
7396   Lisp_Object safe_chars, slot;
7397
7398   if (NILP (Fcheck_coding_system (coding_system)))
7399     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
7400   safe_chars = coding_safe_chars (coding_system);
7401   if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
7402     error ("No valid safe-chars property for %s",
7403            SDATA (SYMBOL_NAME (coding_system)));
7404   if (EQ (safe_chars, Qt))
7405     {
7406       if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
7407         XSETCAR (Vcoding_system_safe_chars,
7408                  Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
7409     }
7410   else
7411     {
7412       slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
7413       if (NILP (slot))
7414         XSETCDR (Vcoding_system_safe_chars,
7415                  nconc2 (XCDR (Vcoding_system_safe_chars),
7416                          Fcons (Fcons (coding_system, safe_chars), Qnil)));
7417       else
7418         XSETCDR (slot, safe_chars);
7419     }
7420   return Qnil;
7421 }
7422
7423 #endif /* emacs */
7424
7425 \f
7426 /*** 9. Post-amble ***/
7427
7428 void
7429 init_coding_once ()
7430 {
7431   int i;
7432
7433   /* Emacs' internal format specific initialize routine.  */
7434   for (i = 0; i <= 0x20; i++)
7435     emacs_code_class[i] = EMACS_control_code;
7436   emacs_code_class[0x0A] = EMACS_linefeed_code;
7437   emacs_code_class[0x0D] = EMACS_carriage_return_code;
7438   for (i = 0x21 ; i < 0x7F; i++)
7439     emacs_code_class[i] = EMACS_ascii_code;
7440   emacs_code_class[0x7F] = EMACS_control_code;
7441   for (i = 0x80; i < 0xFF; i++)
7442     emacs_code_class[i] = EMACS_invalid_code;
7443   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
7444   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
7445   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
7446   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
7447
7448   /* ISO2022 specific initialize routine.  */
7449   for (i = 0; i < 0x20; i++)
7450     iso_code_class[i] = ISO_control_0;
7451   for (i = 0x21; i < 0x7F; i++)
7452     iso_code_class[i] = ISO_graphic_plane_0;
7453   for (i = 0x80; i < 0xA0; i++)
7454     iso_code_class[i] = ISO_control_1;
7455   for (i = 0xA1; i < 0xFF; i++)
7456     iso_code_class[i] = ISO_graphic_plane_1;
7457   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7458   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7459   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7460   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7461   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7462   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7463   iso_code_class[ISO_CODE_ESC] = ISO_escape;
7464   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7465   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7466   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7467
7468   setup_coding_system (Qnil, &keyboard_coding);
7469   setup_coding_system (Qnil, &terminal_coding);
7470   setup_coding_system (Qnil, &safe_terminal_coding);
7471   setup_coding_system (Qnil, &default_buffer_file_coding);
7472
7473   bzero (coding_system_table, sizeof coding_system_table);
7474
7475   bzero (ascii_skip_code, sizeof ascii_skip_code);
7476   for (i = 0; i < 128; i++)
7477     ascii_skip_code[i] = 1;
7478
7479 #if defined (MSDOS) || defined (WINDOWSNT)
7480   system_eol_type = CODING_EOL_CRLF;
7481 #else
7482   system_eol_type = CODING_EOL_LF;
7483 #endif
7484
7485   inhibit_pre_post_conversion = 0;
7486 }
7487
7488 #ifdef emacs
7489
7490 void
7491 syms_of_coding ()
7492 {
7493   Qtarget_idx = intern ("target-idx");
7494   staticpro (&Qtarget_idx);
7495
7496   Qcoding_system_history = intern ("coding-system-history");
7497   staticpro (&Qcoding_system_history);
7498   Fset (Qcoding_system_history, Qnil);
7499
7500   /* Target FILENAME is the first argument.  */
7501   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7502   /* Target FILENAME is the third argument.  */
7503   Fput (Qwrite_region, Qtarget_idx, make_number (2));
7504
7505   Qcall_process = intern ("call-process");
7506   staticpro (&Qcall_process);
7507   /* Target PROGRAM is the first argument.  */
7508   Fput (Qcall_process, Qtarget_idx, make_number (0));
7509
7510   Qcall_process_region = intern ("call-process-region");
7511   staticpro (&Qcall_process_region);
7512   /* Target PROGRAM is the third argument.  */
7513   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7514
7515   Qstart_process = intern ("start-process");
7516   staticpro (&Qstart_process);
7517   /* Target PROGRAM is the third argument.  */
7518   Fput (Qstart_process, Qtarget_idx, make_number (2));
7519
7520   Qopen_network_stream = intern ("open-network-stream");
7521   staticpro (&Qopen_network_stream);
7522   /* Target SERVICE is the fourth argument.  */
7523   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7524
7525   Qcoding_system = intern ("coding-system");
7526   staticpro (&Qcoding_system);
7527
7528   Qeol_type = intern ("eol-type");
7529   staticpro (&Qeol_type);
7530
7531   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
7532   staticpro (&Qbuffer_file_coding_system);
7533
7534   Qpost_read_conversion = intern ("post-read-conversion");
7535   staticpro (&Qpost_read_conversion);
7536
7537   Qpre_write_conversion = intern ("pre-write-conversion");
7538   staticpro (&Qpre_write_conversion);
7539
7540   Qno_conversion = intern ("no-conversion");
7541   staticpro (&Qno_conversion);
7542
7543   Qundecided = intern ("undecided");
7544   staticpro (&Qundecided);
7545
7546   Qcoding_system_p = intern ("coding-system-p");
7547   staticpro (&Qcoding_system_p);
7548
7549   Qcoding_system_error = intern ("coding-system-error");
7550   staticpro (&Qcoding_system_error);
7551
7552   Fput (Qcoding_system_error, Qerror_conditions,
7553         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7554   Fput (Qcoding_system_error, Qerror_message,
7555         build_string ("Invalid coding system"));
7556
7557   Qcoding_category = intern ("coding-category");
7558   staticpro (&Qcoding_category);
7559   Qcoding_category_index = intern ("coding-category-index");
7560   staticpro (&Qcoding_category_index);
7561
7562   Vcoding_category_table
7563     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
7564   staticpro (&Vcoding_category_table);
7565   {
7566     int i;
7567     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7568       {
7569         XVECTOR (Vcoding_category_table)->contents[i]
7570           = intern (coding_category_name[i]);
7571         Fput (XVECTOR (Vcoding_category_table)->contents[i],
7572               Qcoding_category_index, make_number (i));
7573       }
7574   }
7575
7576   Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
7577   staticpro (&Vcoding_system_safe_chars);
7578
7579   Qtranslation_table = intern ("translation-table");
7580   staticpro (&Qtranslation_table);
7581   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
7582
7583   Qtranslation_table_id = intern ("translation-table-id");
7584   staticpro (&Qtranslation_table_id);
7585
7586   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7587   staticpro (&Qtranslation_table_for_decode);
7588
7589   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7590   staticpro (&Qtranslation_table_for_encode);
7591
7592   Qsafe_chars = intern ("safe-chars");
7593   staticpro (&Qsafe_chars);
7594
7595   Qchar_coding_system = intern ("char-coding-system");
7596   staticpro (&Qchar_coding_system);
7597
7598   /* Intern this now in case it isn't already done.
7599      Setting this variable twice is harmless.
7600      But don't staticpro it here--that is done in alloc.c.  */
7601   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7602   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7603   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
7604
7605   Qvalid_codes = intern ("valid-codes");
7606   staticpro (&Qvalid_codes);
7607
7608   Qemacs_mule = intern ("emacs-mule");
7609   staticpro (&Qemacs_mule);
7610
7611   Qraw_text = intern ("raw-text");
7612   staticpro (&Qraw_text);
7613
7614   Qutf_8 = intern ("utf-8");
7615   staticpro (&Qutf_8);
7616
7617   Qcoding_system_define_form = intern ("coding-system-define-form");
7618   staticpro (&Qcoding_system_define_form);
7619
7620   defsubr (&Scoding_system_p);
7621   defsubr (&Sread_coding_system);
7622   defsubr (&Sread_non_nil_coding_system);
7623   defsubr (&Scheck_coding_system);
7624   defsubr (&Sdetect_coding_region);
7625   defsubr (&Sdetect_coding_string);
7626   defsubr (&Sfind_coding_systems_region_internal);
7627   defsubr (&Sunencodable_char_position);
7628   defsubr (&Sdecode_coding_region);
7629   defsubr (&Sencode_coding_region);
7630   defsubr (&Sdecode_coding_string);
7631   defsubr (&Sencode_coding_string);
7632   defsubr (&Sdecode_sjis_char);
7633   defsubr (&Sencode_sjis_char);
7634   defsubr (&Sdecode_big5_char);
7635   defsubr (&Sencode_big5_char);
7636   defsubr (&Sset_terminal_coding_system_internal);
7637   defsubr (&Sset_safe_terminal_coding_system_internal);
7638   defsubr (&Sterminal_coding_system);
7639   defsubr (&Sset_keyboard_coding_system_internal);
7640   defsubr (&Skeyboard_coding_system);
7641   defsubr (&Sfind_operation_coding_system);
7642   defsubr (&Supdate_coding_systems_internal);
7643   defsubr (&Sset_coding_priority_internal);
7644   defsubr (&Sdefine_coding_system_internal);
7645
7646   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7647                doc: /* List of coding systems.
7648
7649 Do not alter the value of this variable manually.  This variable should be
7650 updated by the functions `make-coding-system' and
7651 `define-coding-system-alias'.  */);
7652   Vcoding_system_list = Qnil;
7653
7654   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7655                doc: /* Alist of coding system names.
7656 Each element is one element list of coding system name.
7657 This variable is given to `completing-read' as TABLE argument.
7658
7659 Do not alter the value of this variable manually.  This variable should be
7660 updated by the functions `make-coding-system' and
7661 `define-coding-system-alias'.  */);
7662   Vcoding_system_alist = Qnil;
7663
7664   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7665                doc: /* List of coding-categories (symbols) ordered by priority.
7666
7667 On detecting a coding system, Emacs tries code detection algorithms
7668 associated with each coding-category one by one in this order.  When
7669 one algorithm agrees with a byte sequence of source text, the coding
7670 system bound to the corresponding coding-category is selected.  */);
7671   {
7672     int i;
7673
7674     Vcoding_category_list = Qnil;
7675     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7676       Vcoding_category_list
7677         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7678                  Vcoding_category_list);
7679   }
7680
7681   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7682                doc: /* Specify the coding system for read operations.
7683 It is useful to bind this variable with `let', but do not set it globally.
7684 If the value is a coding system, it is used for decoding on read operation.
7685 If not, an appropriate element is used from one of the coding system alists:
7686 There are three such tables, `file-coding-system-alist',
7687 `process-coding-system-alist', and `network-coding-system-alist'.  */);
7688   Vcoding_system_for_read = Qnil;
7689
7690   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7691                doc: /* Specify the coding system for write operations.
7692 Programs bind this variable with `let', but you should not set it globally.
7693 If the value is a coding system, it is used for encoding of output,
7694 when writing it to a file and when sending it to a file or subprocess.
7695
7696 If this does not specify a coding system, an appropriate element
7697 is used from one of the coding system alists:
7698 There are three such tables, `file-coding-system-alist',
7699 `process-coding-system-alist', and `network-coding-system-alist'.
7700 For output to files, if the above procedure does not specify a coding system,
7701 the value of `buffer-file-coding-system' is used.  */);
7702   Vcoding_system_for_write = Qnil;
7703
7704   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7705                doc: /* Coding system used in the latest file or process I/O.
7706 Also set by `encode-coding-region', `decode-coding-region',
7707 `encode-coding-string' and `decode-coding-string'.  */);
7708   Vlast_coding_system_used = Qnil;
7709
7710   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7711                doc: /* *Non-nil means always inhibit code conversion of end-of-line format.
7712 See info node `Coding Systems' and info node `Text and Binary' concerning
7713 such conversion.  */);
7714   inhibit_eol_conversion = 0;
7715
7716   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
7717                doc: /* Non-nil means process buffer inherits coding system of process output.
7718 Bind it to t if the process output is to be treated as if it were a file
7719 read from some filesystem.  */);
7720   inherit_process_coding_system = 0;
7721
7722   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
7723                doc: /* Alist to decide a coding system to use for a file I/O operation.
7724 The format is ((PATTERN . VAL) ...),
7725 where PATTERN is a regular expression matching a file name,
7726 VAL is a coding system, a cons of coding systems, or a function symbol.
7727 If VAL is a coding system, it is used for both decoding and encoding
7728 the file contents.
7729 If VAL is a cons of coding systems, the car part is used for decoding,
7730 and the cdr part is used for encoding.
7731 If VAL is a function symbol, the function must return a coding system
7732 or a cons of coding systems which are used as above.  The function gets
7733 the arguments with which `find-operation-coding-system' was called.
7734
7735 See also the function `find-operation-coding-system'
7736 and the variable `auto-coding-alist'.  */);
7737   Vfile_coding_system_alist = Qnil;
7738
7739   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
7740     doc: /* Alist to decide a coding system to use for a process I/O operation.
7741 The format is ((PATTERN . VAL) ...),
7742 where PATTERN is a regular expression matching a program name,
7743 VAL is a coding system, a cons of coding systems, or a function symbol.
7744 If VAL is a coding system, it is used for both decoding what received
7745 from the program and encoding what sent to the program.
7746 If VAL is a cons of coding systems, the car part is used for decoding,
7747 and the cdr part is used for encoding.
7748 If VAL is a function symbol, the function must return a coding system
7749 or a cons of coding systems which are used as above.
7750
7751 See also the function `find-operation-coding-system'.  */);
7752   Vprocess_coding_system_alist = Qnil;
7753
7754   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
7755     doc: /* Alist to decide a coding system to use for a network I/O operation.
7756 The format is ((PATTERN . VAL) ...),
7757 where PATTERN is a regular expression matching a network service name
7758 or is a port number to connect to,
7759 VAL is a coding system, a cons of coding systems, or a function symbol.
7760 If VAL is a coding system, it is used for both decoding what received
7761 from the network stream and encoding what sent to the network stream.
7762 If VAL is a cons of coding systems, the car part is used for decoding,
7763 and the cdr part is used for encoding.
7764 If VAL is a function symbol, the function must return a coding system
7765 or a cons of coding systems which are used as above.
7766
7767 See also the function `find-operation-coding-system'.  */);
7768   Vnetwork_coding_system_alist = Qnil;
7769
7770   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
7771                doc: /* Coding system to use with system messages.
7772 Also used for decoding keyboard input on X Window system.  */);
7773   Vlocale_coding_system = Qnil;
7774
7775   /* The eol mnemonics are reset in startup.el system-dependently.  */
7776   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
7777                doc: /* *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
7778   eol_mnemonic_unix = build_string (":");
7779
7780   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
7781                doc: /* *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
7782   eol_mnemonic_dos = build_string ("\\");
7783
7784   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
7785                doc: /* *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
7786   eol_mnemonic_mac = build_string ("/");
7787
7788   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
7789                doc: /* *String displayed in mode line when end-of-line format is not yet determined.  */);
7790   eol_mnemonic_undecided = build_string (":");
7791
7792   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
7793                doc: /* *Non-nil enables character translation while encoding and decoding.  */);
7794   Venable_character_translation = Qt;
7795
7796   DEFVAR_LISP ("standard-translation-table-for-decode",
7797                &Vstandard_translation_table_for_decode,
7798                doc: /* Table for translating characters while decoding.  */);
7799   Vstandard_translation_table_for_decode = Qnil;
7800
7801   DEFVAR_LISP ("standard-translation-table-for-encode",
7802                &Vstandard_translation_table_for_encode,
7803                doc: /* Table for translating characters while encoding.  */);
7804   Vstandard_translation_table_for_encode = Qnil;
7805
7806   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
7807                doc: /* Alist of charsets vs revision numbers.
7808 While encoding, if a charset (car part of an element) is found,
7809 designate it with the escape sequence identifying revision (cdr part of the element).  */);
7810   Vcharset_revision_alist = Qnil;
7811
7812   DEFVAR_LISP ("default-process-coding-system",
7813                &Vdefault_process_coding_system,
7814                doc: /* Cons of coding systems used for process I/O by default.
7815 The car part is used for decoding a process output,
7816 the cdr part is used for encoding a text to be sent to a process.  */);
7817   Vdefault_process_coding_system = Qnil;
7818
7819   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
7820                doc: /* Table of extra Latin codes in the range 128..159 (inclusive).
7821 This is a vector of length 256.
7822 If Nth element is non-nil, the existence of code N in a file
7823 \(or output of subprocess) doesn't prevent it to be detected as
7824 a coding system of ISO 2022 variant which has a flag
7825 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
7826 or reading output of a subprocess.
7827 Only 128th through 159th elements has a meaning.  */);
7828   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
7829
7830   DEFVAR_LISP ("select-safe-coding-system-function",
7831                &Vselect_safe_coding_system_function,
7832                doc: /* Function to call to select safe coding system for encoding a text.
7833
7834 If set, this function is called to force a user to select a proper
7835 coding system which can encode the text in the case that a default
7836 coding system used in each operation can't encode the text.
7837
7838 The default value is `select-safe-coding-system' (which see).  */);
7839   Vselect_safe_coding_system_function = Qnil;
7840
7841   DEFVAR_BOOL ("coding-system-require-warning",
7842                &coding_system_require_warning,
7843                doc: /* Internal use only.
7844 If non-nil, on writing a file, `select-safe-coding-system-function' is
7845 called even if `coding-system-for-write' is non-nil.  The command
7846 `universal-coding-system-argument' binds this variable to t temporarily.  */);
7847   coding_system_require_warning = 0;
7848
7849
7850   DEFVAR_BOOL ("inhibit-iso-escape-detection",
7851                &inhibit_iso_escape_detection,
7852                doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
7853
7854 By default, on reading a file, Emacs tries to detect how the text is
7855 encoded.  This code detection is sensitive to escape sequences.  If
7856 the sequence is valid as ISO2022, the code is determined as one of
7857 the ISO2022 encodings, and the file is decoded by the corresponding
7858 coding system (e.g. `iso-2022-7bit').
7859
7860 However, there may be a case that you want to read escape sequences in
7861 a file as is.  In such a case, you can set this variable to non-nil.
7862 Then, as the code detection ignores any escape sequences, no file is
7863 detected as encoded in some ISO2022 encoding.  The result is that all
7864 escape sequences become visible in a buffer.
7865
7866 The default value is nil, and it is strongly recommended not to change
7867 it.  That is because many Emacs Lisp source files that contain
7868 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
7869 in Emacs's distribution, and they won't be decoded correctly on
7870 reading if you suppress escape sequence detection.
7871
7872 The other way to read escape sequences in a file without decoding is
7873 to explicitly specify some coding system that doesn't use ISO2022's
7874 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
7875   inhibit_iso_escape_detection = 0;
7876
7877   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
7878                doc: /* Char table for translating self-inserting characters.
7879 This is applied to the result of input methods, not their input.  See also
7880 `keyboard-translate-table'.  */);
7881     Vtranslation_table_for_input = Qnil;
7882 }
7883
7884 char *
7885 emacs_strerror (error_number)
7886      int error_number;
7887 {
7888   char *str;
7889
7890   synchronize_system_messages_locale ();
7891   str = strerror (error_number);
7892
7893   if (! NILP (Vlocale_coding_system))
7894     {
7895       Lisp_Object dec = code_convert_string_norecord (build_string (str),
7896                                                       Vlocale_coding_system,
7897                                                       0);
7898       str = (char *) SDATA (dec);
7899     }
7900
7901   return str;
7902 }
7903
7904 #endif /* emacs */
7905
7906 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
7907    (do not change this comment) */