src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995,97,1998,2002,2003  Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001,2002,2003  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /*** TABLE OF CONTENTS ***
  24
  25   0. General comments
  26   1. Preamble
  27   2. Emacs' internal format (emacs-mule) handlers
  28   3. ISO2022 handlers
  29   4. Shift-JIS and BIG5 handlers
  30   5. CCL handlers
  31   6. End-of-line handlers
  32   7. C library functions
  33   8. Emacs Lisp library functions
  34   9. Post-amble
  35
  36 */
  37
  38 /*** 0. General comments ***/
  39
  40
  41 /*** GENERAL NOTE on CODING SYSTEMS ***
  42
  43   A coding system is an encoding mechanism for one or more character
  44   sets.  Here's a list of coding systems which Emacs can handle.  When
  45   we say "decode", it means converting some other coding system to
  46   Emacs' internal format (emacs-mule), and when we say "encode",
  47   it means converting the coding system emacs-mule to some other
  48   coding system.
  49
  50   0. Emacs' internal format (emacs-mule)
  51
  52   Emacs itself holds a multi-lingual character in buffers and strings
  53   in a special format.  Details are described in section 2.
  54
  55   1. ISO2022
  56
  57   The most famous coding system for multiple character sets.  X's
  58   Compound Text, various EUCs (Extended Unix Code), and coding
  59   systems used in Internet communication such as ISO-2022-JP are
  60   all variants of ISO2022.  Details are described in section 3.
  61
  62   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  63
  64   A coding system to encode character sets: ASCII, JISX0201, and
  65   JISX0208.  Widely used for PC's in Japan.  Details are described in
  66   section 4.
  67
  68   3. BIG5
  69
  70   A coding system to encode the character sets ASCII and Big5.  Widely
  71   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  72   described in section 4.  In this file, when we write "BIG5"
  73   (all uppercase), we mean the coding system, and when we write
  74   "Big5" (capitalized), we mean the character set.
  75
  76   4. Raw text
  77
  78   A coding system for text containing random 8-bit code.  Emacs does
  79   no code conversion on such text except for end-of-line format.
  80
  81   5. Other
  82
  83   If a user wants to read/write text encoded in a coding system not
  84   listed above, he can supply a decoder and an encoder for it as CCL
  85   (Code Conversion Language) programs.  Emacs executes the CCL program
  86   while reading/writing.
  87
  88   Emacs represents a coding system by a Lisp symbol that has a property
  89   `coding-system'.  But, before actually using the coding system, the
  90   information about it is set in a structure of type `struct
  91   coding_system' for rapid processing.  See section 6 for more details.
  92
  93 */
  94
  95 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  96
  97   How end-of-line of text is encoded depends on the operating system.
  98   For instance, Unix's format is just one byte of `line-feed' code,
  99   whereas DOS's format is two-byte sequence of `carriage-return' and
 100   `line-feed' codes.  MacOS's format is usually one byte of
 101   `carriage-return'.
 102
 103   Since text character encoding and end-of-line encoding are
 104   independent, any coding system described above can have any
 105   end-of-line format.  So Emacs has information about end-of-line
 106   format in each coding-system.  See section 6 for more details.
 107
 108 */
 109
 110 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 111
 112   These functions check if a text between SRC and SRC_END is encoded
 113   in the coding system category XXX.  Each returns an integer value in
 114   which appropriate flag bits for the category XXX are set.  The flag
 115   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 116   template for these functions.  If MULTIBYTEP is nonzero, 8-bit codes
 117   of the range 0x80..0x9F are in multibyte form.  */
 118 #if 0
 119 int
 120 detect_coding_emacs_mule (src, src_end, multibytep)
 121      unsigned char *src, *src_end;
 122      int multibytep;
 123 {
 124   ...
 125 }
 126 #endif
 127
 128 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 129
 130   These functions decode SRC_BYTES length of unibyte text at SOURCE
 131   encoded in CODING to Emacs' internal format.  The resulting
 132   multibyte text goes to a place pointed to by DESTINATION, the length
 133   of which should not exceed DST_BYTES.
 134
 135   These functions set the information about original and decoded texts
 136   in the members `produced', `produced_char', `consumed', and
 137   `consumed_char' of the structure *CODING.  They also set the member
 138   `result' to one of CODING_FINISH_XXX indicating how the decoding
 139   finished.
 140
 141   DST_BYTES zero means that the source area and destination area are
 142   overlapped, which means that we can produce a decoded text until it
 143   reaches the head of the not-yet-decoded source text.
 144
 145   Below is a template for these functions.  */
 146 #if 0
 147 static void
 148 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 149      struct coding_system *coding;
 150      unsigned char *source, *destination;
 151      int src_bytes, dst_bytes;
 152 {
 153   ...
 154 }
 155 #endif
 156
 157 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 158
 159   These functions encode SRC_BYTES length text at SOURCE from Emacs'
 160   internal multibyte format to CODING.  The resulting unibyte text
 161   goes to a place pointed to by DESTINATION, the length of which
 162   should not exceed DST_BYTES.
 163
 164   These functions set the information about original and encoded texts
 165   in the members `produced', `produced_char', `consumed', and
 166   `consumed_char' of the structure *CODING.  They also set the member
 167   `result' to one of CODING_FINISH_XXX indicating how the encoding
 168   finished.
 169
 170   DST_BYTES zero means that the source area and destination area are
 171   overlapped, which means that we can produce encoded text until it
 172   reaches at the head of the not-yet-encoded source text.
 173
 174   Below is a template for these functions.  */
 175 #if 0
 176 static void
 177 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 178      struct coding_system *coding;
 179      unsigned char *source, *destination;
 180      int src_bytes, dst_bytes;
 181 {
 182   ...
 183 }
 184 #endif
 185
 186 /*** COMMONLY USED MACROS ***/
 187
 188 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 189    get one, two, and three bytes from the source text respectively.
 190    If there are not enough bytes in the source, they jump to
 191    `label_end_of_loop'.  The caller should set variables `coding',
 192    `src' and `src_end' to appropriate pointer in advance.  These
 193    macros are called from decoding routines `decode_coding_XXX', thus
 194    it is assumed that the source text is unibyte.  */
 195
 196 #define ONE_MORE_BYTE(c1)                                       \
 197   do {                                                          \
 198     if (src >= src_end)                                         \
 199       {                                                         \
 200         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 201         goto label_end_of_loop;                                 \
 202       }                                                         \
 203     c1 = *src++;                                                \
 204   } while (0)
 205
 206 #define TWO_MORE_BYTES(c1, c2)                                  \
 207   do {                                                          \
 208     if (src + 1 >= src_end)                                     \
 209       {                                                         \
 210         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 211         goto label_end_of_loop;                                 \
 212       }                                                         \
 213     c1 = *src++;                                                \
 214     c2 = *src++;                                                \
 215   } while (0)
 216
 217
 218 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
 219    form if MULTIBYTEP is nonzero.  */
 220
 221 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)           \
 222   do {                                                          \
 223     if (src >= src_end)                                         \
 224       {                                                         \
 225         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 226         goto label_end_of_loop;                                 \
 227       }                                                         \
 228     c1 = *src++;                                                \
 229     if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)         \
 230       c1 = *src++ - 0x20;                                       \
 231   } while (0)
 232
 233 /* Set C to the next character at the source text pointed by `src'.
 234    If there are not enough characters in the source, jump to
 235    `label_end_of_loop'.  The caller should set variables `coding'
 236    `src', `src_end', and `translation_table' to appropriate pointers
 237    in advance.  This macro is used in encoding routines
 238    `encode_coding_XXX', thus it assumes that the source text is in
 239    multibyte form except for 8-bit characters.  8-bit characters are
 240    in multibyte form if coding->src_multibyte is nonzero, else they
 241    are represented by a single byte.  */
 242
 243 #define ONE_MORE_CHAR(c)                                        \
 244   do {                                                          \
 245     int len = src_end - src;                                    \
 246     int bytes;                                                  \
 247     if (len <= 0)                                               \
 248       {                                                         \
 249         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 250         goto label_end_of_loop;                                 \
 251       }                                                         \
 252     if (coding->src_multibyte                                   \
 253         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 254       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 255     else                                                        \
 256       c = *src, bytes = 1;                                      \
 257     if (!NILP (translation_table))                              \
 258       c = translate_char (translation_table, c, -1, 0, 0);      \
 259     src += bytes;                                               \
 260   } while (0)
 261
 262
 263 /* Produce a multibyte form of character C to `dst'.  Jump to
 264    `label_end_of_loop' if there's not enough space at `dst'.
 265
 266    If we are now in the middle of a composition sequence, the decoded
 267    character may be ALTCHAR (for the current composition).  In that
 268    case, the character goes to coding->cmp_data->data instead of
 269    `dst'.
 270
 271    This macro is used in decoding routines.  */
 272
 273 #define EMIT_CHAR(c)                                                    \
 274   do {                                                                  \
 275     if (! COMPOSING_P (coding)                                          \
 276         || coding->composing == COMPOSITION_RELATIVE                    \
 277         || coding->composing == COMPOSITION_WITH_RULE)                  \
 278       {                                                                 \
 279         int bytes = CHAR_BYTES (c);                                     \
 280         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 281           {                                                             \
 282             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 283             goto label_end_of_loop;                                     \
 284           }                                                             \
 285         dst += CHAR_STRING (c, dst);                                    \
 286         coding->produced_char++;                                        \
 287       }                                                                 \
 288                                                                         \
 289     if (COMPOSING_P (coding)                                            \
 290         && coding->composing != COMPOSITION_RELATIVE)                   \
 291       {                                                                 \
 292         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 293         coding->composition_rule_follows                                \
 294           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 295       }                                                                 \
 296   } while (0)
 297
 298
 299 #define EMIT_ONE_BYTE(c)                                        \
 300   do {                                                          \
 301     if (dst >= (dst_bytes ? dst_end : src))                     \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     *dst++ = c;                                                 \
 307   } while (0)
 308
 309 #define EMIT_TWO_BYTES(c1, c2)                                  \
 310   do {                                                          \
 311     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 312       {                                                         \
 313         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 314         goto label_end_of_loop;                                 \
 315       }                                                         \
 316     *dst++ = c1, *dst++ = c2;                                   \
 317   } while (0)
 318
 319 #define EMIT_BYTES(from, to)                                    \
 320   do {                                                          \
 321     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 322       {                                                         \
 323         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 324         goto label_end_of_loop;                                 \
 325       }                                                         \
 326     while (from < to)                                           \
 327       *dst++ = *from++;                                         \
 328   } while (0)
 329
 330 \f
 331 /*** 1. Preamble ***/
 332
 333 #ifdef emacs
 334 #include <config.h>
 335 #endif
 336
 337 #include <stdio.h>
 338
 339 #ifdef emacs
 340
 341 #include "lisp.h"
 342 #include "buffer.h"
 343 #include "charset.h"
 344 #include "composite.h"
 345 #include "ccl.h"
 346 #include "coding.h"
 347 #include "window.h"
 348 #include "intervals.h"
 349
 350 #else  /* not emacs */
 351
 352 #include "mulelib.h"
 353
 354 #endif /* not emacs */
 355
 356 Lisp_Object Qcoding_system, Qeol_type;
 357 Lisp_Object Qbuffer_file_coding_system;
 358 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 359 Lisp_Object Qno_conversion, Qundecided;
 360 Lisp_Object Qcoding_system_history;
 361 Lisp_Object Qsafe_chars;
 362 Lisp_Object Qvalid_codes;
 363
 364 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 365 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 366 Lisp_Object Qstart_process, Qopen_network_stream;
 367 Lisp_Object Qtarget_idx;
 368
 369 /* If a symbol has this property, evaluate the value to define the
 370    symbol as a coding system.  */
 371 Lisp_Object Qcoding_system_define_form;
 372
 373 Lisp_Object Vselect_safe_coding_system_function;
 374
 375 int coding_system_require_warning;
 376
 377 /* Mnemonic string for each format of end-of-line.  */
 378 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 379 /* Mnemonic string to indicate format of end-of-line is not yet
 380    decided.  */
 381 Lisp_Object eol_mnemonic_undecided;
 382
 383 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 384    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 385 int system_eol_type;
 386
 387 #ifdef emacs
 388
 389 /* Information about which coding system is safe for which chars.
 390    The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
 391
 392    GENERIC-LIST is a list of generic coding systems which can encode
 393    any characters.
 394
 395    NON-GENERIC-ALIST is an alist of non generic coding systems vs the
 396    corresponding char table that contains safe chars.  */
 397 Lisp_Object Vcoding_system_safe_chars;
 398
 399 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 400
 401 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 402
 403 /* Coding system emacs-mule and raw-text are for converting only
 404    end-of-line format.  */
 405 Lisp_Object Qemacs_mule, Qraw_text;
 406
 407 Lisp_Object Qutf_8;
 408
 409 /* Coding-systems are handed between Emacs Lisp programs and C internal
 410    routines by the following three variables.  */
 411 /* Coding-system for reading files and receiving data from process.  */
 412 Lisp_Object Vcoding_system_for_read;
 413 /* Coding-system for writing files and sending data to process.  */
 414 Lisp_Object Vcoding_system_for_write;
 415 /* Coding-system actually used in the latest I/O.  */
 416 Lisp_Object Vlast_coding_system_used;
 417
 418 /* A vector of length 256 which contains information about special
 419    Latin codes (especially for dealing with Microsoft codes).  */
 420 Lisp_Object Vlatin_extra_code_table;
 421
 422 /* Flag to inhibit code conversion of end-of-line format.  */
 423 int inhibit_eol_conversion;
 424
 425 /* Flag to inhibit ISO2022 escape sequence detection.  */
 426 int inhibit_iso_escape_detection;
 427
 428 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 429 int inherit_process_coding_system;
 430
 431 /* Coding system to be used to encode text for terminal display.  */
 432 struct coding_system terminal_coding;
 433
 434 /* Coding system to be used to encode text for terminal display when
 435    terminal coding system is nil.  */
 436 struct coding_system safe_terminal_coding;
 437
 438 /* Coding system of what is sent from terminal keyboard.  */
 439 struct coding_system keyboard_coding;
 440
 441 /* Default coding system to be used to write a file.  */
 442 struct coding_system default_buffer_file_coding;
 443
 444 Lisp_Object Vfile_coding_system_alist;
 445 Lisp_Object Vprocess_coding_system_alist;
 446 Lisp_Object Vnetwork_coding_system_alist;
 447
 448 Lisp_Object Vlocale_coding_system;
 449
 450 #endif /* emacs */
 451
 452 Lisp_Object Qcoding_category, Qcoding_category_index;
 453
 454 /* List of symbols `coding-category-xxx' ordered by priority.  */
 455 Lisp_Object Vcoding_category_list;
 456
 457 /* Table of coding categories (Lisp symbols).  */
 458 Lisp_Object Vcoding_category_table;
 459
 460 /* Table of names of symbol for each coding-category.  */
 461 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 462   "coding-category-emacs-mule",
 463   "coding-category-sjis",
 464   "coding-category-iso-7",
 465   "coding-category-iso-7-tight",
 466   "coding-category-iso-8-1",
 467   "coding-category-iso-8-2",
 468   "coding-category-iso-7-else",
 469   "coding-category-iso-8-else",
 470   "coding-category-ccl",
 471   "coding-category-big5",
 472   "coding-category-utf-8",
 473   "coding-category-utf-16-be",
 474   "coding-category-utf-16-le",
 475   "coding-category-raw-text",
 476   "coding-category-binary"
 477 };
 478
 479 /* Table of pointers to coding systems corresponding to each coding
 480    categories.  */
 481 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 482
 483 /* Table of coding category masks.  Nth element is a mask for a coding
 484    category of which priority is Nth.  */
 485 static
 486 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 487
 488 /* Flag to tell if we look up translation table on character code
 489    conversion.  */
 490 Lisp_Object Venable_character_translation;
 491 /* Standard translation table to look up on decoding (reading).  */
 492 Lisp_Object Vstandard_translation_table_for_decode;
 493 /* Standard translation table to look up on encoding (writing).  */
 494 Lisp_Object Vstandard_translation_table_for_encode;
 495
 496 Lisp_Object Qtranslation_table;
 497 Lisp_Object Qtranslation_table_id;
 498 Lisp_Object Qtranslation_table_for_decode;
 499 Lisp_Object Qtranslation_table_for_encode;
 500
 501 /* Alist of charsets vs revision number.  */
 502 Lisp_Object Vcharset_revision_alist;
 503
 504 /* Default coding systems used for process I/O.  */
 505 Lisp_Object Vdefault_process_coding_system;
 506
 507 /* Char table for translating Quail and self-inserting input.  */
 508 Lisp_Object Vtranslation_table_for_input;
 509
 510 /* Global flag to tell that we can't call post-read-conversion and
 511    pre-write-conversion functions.  Usually the value is zero, but it
 512    is set to 1 temporarily while such functions are running.  This is
 513    to avoid infinite recursive call.  */
 514 static int inhibit_pre_post_conversion;
 515
 516 Lisp_Object Qchar_coding_system;
 517
 518 /* Return `safe-chars' property of CODING_SYSTEM (symbol).  Don't check
 519    its validity.  */
 520
 521 Lisp_Object
 522 coding_safe_chars (coding_system)
 523      Lisp_Object coding_system;
 524 {
 525   Lisp_Object coding_spec, plist, safe_chars;
 526
 527   coding_spec = Fget (coding_system, Qcoding_system);
 528   plist = XVECTOR (coding_spec)->contents[3];
 529   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 530   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 531 }
 532
 533 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 534   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 535
 536 \f
 537 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 538
 539 /* Emacs' internal format for representation of multiple character
 540    sets is a kind of multi-byte encoding, i.e. characters are
 541    represented by variable-length sequences of one-byte codes.
 542
 543    ASCII characters and control characters (e.g. `tab', `newline') are
 544    represented by one-byte sequences which are their ASCII codes, in
 545    the range 0x00 through 0x7F.
 546
 547    8-bit characters of the range 0x80..0x9F are represented by
 548    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 549    code + 0x20).
 550
 551    8-bit characters of the range 0xA0..0xFF are represented by
 552    one-byte sequences which are their 8-bit code.
 553
 554    The other characters are represented by a sequence of `base
 555    leading-code', optional `extended leading-code', and one or two
 556    `position-code's.  The length of the sequence is determined by the
 557    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 558    whereas extended leading-code and position-code take the range 0xA0
 559    through 0xFF.  See `charset.h' for more details about leading-code
 560    and position-code.
 561
 562    --- CODE RANGE of Emacs' internal format ---
 563    character set        range
 564    -------------        -----
 565    ascii                0x00..0x7F
 566    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 567    eight-bit-graphic    0xA0..0xBF
 568    ELSE                 0x81..0x9D + [0xA0..0xFF]+
 569    ---------------------------------------------
 570
 571    As this is the internal character representation, the format is
 572    usually not used externally (i.e. in a file or in a data sent to a
 573    process).  But, it is possible to have a text externally in this
 574    format (i.e. by encoding by the coding system `emacs-mule').
 575
 576    In that case, a sequence of one-byte codes has a slightly different
 577    form.
 578
 579    Firstly, all characters in eight-bit-control are represented by
 580    one-byte sequences which are their 8-bit code.
 581
 582    Next, character composition data are represented by the byte
 583    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 584    where,
 585         METHOD is 0xF0 plus one of composition method (enum
 586         composition_method),
 587
 588         BYTES is 0xA0 plus the byte length of these composition data,
 589
 590         CHARS is 0xA0 plus the number of characters composed by these
 591         data,
 592
 593         COMPONENTs are characters of multibyte form or composition
 594         rules encoded by two-byte of ASCII codes.
 595
 596    In addition, for backward compatibility, the following formats are
 597    also recognized as composition data on decoding.
 598
 599    0x80 MSEQ ...
 600    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 601
 602    Here,
 603         MSEQ is a multibyte form but in these special format:
 604           ASCII: 0xA0 ASCII_CODE+0x80,
 605           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 606         RULE is a one byte code of the range 0xA0..0xF0 that
 607         represents a composition rule.
 608   */
 609
 610 enum emacs_code_class_type emacs_code_class[256];
 611
 612 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 613    Check if a text is encoded in Emacs' internal format.  If it is,
 614    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 615
 616 static int
 617 detect_coding_emacs_mule (src, src_end, multibytep)
 618       unsigned char *src, *src_end;
 619       int multibytep;
 620 {
 621   unsigned char c;
 622   int composing = 0;
 623   /* Dummy for ONE_MORE_BYTE.  */
 624   struct coding_system dummy_coding;
 625   struct coding_system *coding = &dummy_coding;
 626
 627   while (1)
 628     {
 629       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 630
 631       if (composing)
 632         {
 633           if (c < 0xA0)
 634             composing = 0;
 635           else if (c == 0xA0)
 636             {
 637               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 638               c &= 0x7F;
 639             }
 640           else
 641             c -= 0x20;
 642         }
 643
 644       if (c < 0x20)
 645         {
 646           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 647             return 0;
 648         }
 649       else if (c >= 0x80 && c < 0xA0)
 650         {
 651           if (c == 0x80)
 652             /* Old leading code for a composite character.  */
 653             composing = 1;
 654           else
 655             {
 656               unsigned char *src_base = src - 1;
 657               int bytes;
 658
 659               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 660                                                bytes))
 661                 return 0;
 662               src = src_base + bytes;
 663             }
 664         }
 665     }
 666  label_end_of_loop:
 667   return CODING_CATEGORY_MASK_EMACS_MULE;
 668 }
 669
 670
 671 /* Record the starting position START and METHOD of one composition.  */
 672
 673 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
 674   do {                                                          \
 675     struct composition_data *cmp_data = coding->cmp_data;       \
 676     int *data = cmp_data->data + cmp_data->used;                \
 677     coding->cmp_data_start = cmp_data->used;                    \
 678     data[0] = -1;                                               \
 679     data[1] = cmp_data->char_offset + start;                    \
 680     data[3] = (int) method;                                     \
 681     cmp_data->used += 4;                                        \
 682   } while (0)
 683
 684 /* Record the ending position END of the current composition.  */
 685
 686 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
 687   do {                                                          \
 688     struct composition_data *cmp_data = coding->cmp_data;       \
 689     int *data = cmp_data->data + coding->cmp_data_start;        \
 690     data[0] = cmp_data->used - coding->cmp_data_start;          \
 691     data[2] = cmp_data->char_offset + end;                      \
 692   } while (0)
 693
 694 /* Record one COMPONENT (alternate character or composition rule).  */
 695
 696 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)             \
 697   do {                                                                  \
 698     coding->cmp_data->data[coding->cmp_data->used++] = component;       \
 699     if (coding->cmp_data->used - coding->cmp_data_start                 \
 700         == COMPOSITION_DATA_MAX_BUNCH_LENGTH)                           \
 701       {                                                                 \
 702         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
 703         coding->composing = COMPOSITION_NO;                             \
 704       }                                                                 \
 705   } while (0)
 706
 707
 708 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
 709    is not less than SRC_END, return -1 without incrementing Src.  */
 710
 711 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
 712
 713
 714 /* Decode a character represented as a component of composition
 715    sequence of Emacs 20 style at SRC.  Set C to that character, store
 716    its multibyte form sequence at P, and set P to the end of that
 717    sequence.  If no valid character is found, set C to -1.  */
 718
 719 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)                \
 720   do {                                                          \
 721     int bytes;                                                  \
 722                                                                 \
 723     c = SAFE_ONE_MORE_BYTE ();                                  \
 724     if (c < 0)                                                  \
 725       break;                                                    \
 726     if (CHAR_HEAD_P (c))                                        \
 727       c = -1;                                                   \
 728     else if (c == 0xA0)                                         \
 729       {                                                         \
 730         c = SAFE_ONE_MORE_BYTE ();                              \
 731         if (c < 0xA0)                                           \
 732           c = -1;                                               \
 733         else                                                    \
 734           {                                                     \
 735             c -= 0xA0;                                          \
 736             *p++ = c;                                           \
 737           }                                                     \
 738       }                                                         \
 739     else if (BASE_LEADING_CODE_P (c - 0x20))                    \
 740       {                                                         \
 741         unsigned char *p0 = p;                                  \
 742                                                                 \
 743         c -= 0x20;                                              \
 744         *p++ = c;                                               \
 745         bytes = BYTES_BY_CHAR_HEAD (c);                         \
 746         while (--bytes)                                         \
 747           {                                                     \
 748             c = SAFE_ONE_MORE_BYTE ();                          \
 749             if (c < 0)                                          \
 750               break;                                            \
 751             *p++ = c;                                           \
 752           }                                                     \
 753         if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)      \
 754             || (coding->flags /* We are recovering a file.  */  \
 755                 && p0[0] == LEADING_CODE_8_BIT_CONTROL          \
 756                 && ! CHAR_HEAD_P (p0[1])))                      \
 757           c = STRING_CHAR (p0, bytes);                          \
 758         else                                                    \
 759           c = -1;                                               \
 760       }                                                         \
 761     else                                                        \
 762       c = -1;                                                   \
 763   } while (0)
 764
 765
 766 /* Decode a composition rule represented as a component of composition
 767    sequence of Emacs 20 style at SRC.  Set C to the rule.  If not
 768    valid rule is found, set C to -1.  */
 769
 770 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c)           \
 771   do {                                                  \
 772     c = SAFE_ONE_MORE_BYTE ();                          \
 773     c -= 0xA0;                                          \
 774     if (c < 0 || c >= 81)                               \
 775       c = -1;                                           \
 776     else                                                \
 777       {                                                 \
 778         gref = c / 9, nref = c % 9;                     \
 779         c = COMPOSITION_ENCODE_RULE (gref, nref);       \
 780       }                                                 \
 781   } while (0)
 782
 783
 784 /* Decode composition sequence encoded by `emacs-mule' at the source
 785    pointed by SRC.  SRC_END is the end of source.  Store information
 786    of the composition in CODING->cmp_data.
 787
 788    For backward compatibility, decode also a composition sequence of
 789    Emacs 20 style.  In that case, the composition sequence contains
 790    characters that should be extracted into a buffer or string.  Store
 791    those characters at *DESTINATION in multibyte form.
 792
 793    If we encounter an invalid byte sequence, return 0.
 794    If we encounter an insufficient source or destination, or
 795    insufficient space in CODING->cmp_data, return 1.
 796    Otherwise, return consumed bytes in the source.
 797
 798 */
 799 static INLINE int
 800 decode_composition_emacs_mule (coding, src, src_end,
 801                                destination, dst_end, dst_bytes)
 802      struct coding_system *coding;
 803      unsigned char *src, *src_end, **destination, *dst_end;
 804      int dst_bytes;
 805 {
 806   unsigned char *dst = *destination;
 807   int method, data_len, nchars;
 808   unsigned char *src_base = src++;
 809   /* Store components of composition.  */
 810   int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 811   int ncomponent;
 812   /* Store multibyte form of characters to be composed.  This is for
 813      Emacs 20 style composition sequence.  */
 814   unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 815   unsigned char *bufp = buf;
 816   int c, i, gref, nref;
 817
 818   if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 819       >= COMPOSITION_DATA_SIZE)
 820     {
 821       coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 822       return -1;
 823     }
 824
 825   ONE_MORE_BYTE (c);
 826   if (c - 0xF0 >= COMPOSITION_RELATIVE
 827            && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 828     {
 829       int with_rule;
 830
 831       method = c - 0xF0;
 832       with_rule = (method == COMPOSITION_WITH_RULE
 833                    || method == COMPOSITION_WITH_RULE_ALTCHARS);
 834       ONE_MORE_BYTE (c);
 835       data_len = c - 0xA0;
 836       if (data_len < 4
 837           || src_base + data_len > src_end)
 838         return 0;
 839       ONE_MORE_BYTE (c);
 840       nchars = c - 0xA0;
 841       if (c < 1)
 842         return 0;
 843       for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 844         {
 845           /* If it is longer than this, it can't be valid.  */
 846           if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
 847             return 0;
 848
 849           if (ncomponent % 2 && with_rule)
 850             {
 851               ONE_MORE_BYTE (gref);
 852               gref -= 32;
 853               ONE_MORE_BYTE (nref);
 854               nref -= 32;
 855               c = COMPOSITION_ENCODE_RULE (gref, nref);
 856             }
 857           else
 858             {
 859               int bytes;
 860               if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
 861                   || (coding->flags /* We are recovering a file.  */
 862                       && src[0] == LEADING_CODE_8_BIT_CONTROL
 863                       && ! CHAR_HEAD_P (src[1])))
 864                 c = STRING_CHAR (src, bytes);
 865               else
 866                 c = *src, bytes = 1;
 867               src += bytes;
 868             }
 869           component[ncomponent] = c;
 870         }
 871     }
 872   else
 873     {
 874       /* This may be an old Emacs 20 style format.  See the comment at
 875          the section 2 of this file.  */
 876       while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 877       if (src == src_end
 878           && !(coding->mode & CODING_MODE_LAST_BLOCK))
 879         goto label_end_of_loop;
 880
 881       src_end = src;
 882       src = src_base + 1;
 883       if (c < 0xC0)
 884         {
 885           method = COMPOSITION_RELATIVE;
 886           for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 887             {
 888               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 889               if (c < 0)
 890                 break;
 891               component[ncomponent++] = c;
 892             }
 893           if (ncomponent < 2)
 894             return 0;
 895           nchars = ncomponent;
 896         }
 897       else if (c == 0xFF)
 898         {
 899           method = COMPOSITION_WITH_RULE;
 900           src++;
 901           DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 902           if (c < 0)
 903             return 0;
 904           component[0] = c;
 905           for (ncomponent = 1;
 906                ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 907             {
 908               DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 909               if (c < 0)
 910                 break;
 911               component[ncomponent++] = c;
 912               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 913               if (c < 0)
 914                 break;
 915               component[ncomponent++] = c;
 916             }
 917           if (ncomponent < 3)
 918             return 0;
 919           nchars = (ncomponent + 1) / 2;
 920         }
 921       else
 922         return 0;
 923     }
 924
 925   if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 926     {
 927       CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 928       for (i = 0; i < ncomponent; i++)
 929         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 930       CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 931       if (buf < bufp)
 932         {
 933           unsigned char *p = buf;
 934           EMIT_BYTES (p, bufp);
 935           *destination += bufp - buf;
 936           coding->produced_char += nchars;
 937         }
 938       return (src - src_base);
 939     }
 940  label_end_of_loop:
 941   return -1;
 942 }
 943
 944 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 945
 946 static void
 947 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 948      struct coding_system *coding;
 949      unsigned char *source, *destination;
 950      int src_bytes, dst_bytes;
 951 {
 952   unsigned char *src = source;
 953   unsigned char *src_end = source + src_bytes;
 954   unsigned char *dst = destination;
 955   unsigned char *dst_end = destination + dst_bytes;
 956   /* SRC_BASE remembers the start position in source in each loop.
 957      The loop will be exited when there's not enough source code, or
 958      when there's not enough destination area to produce a
 959      character.  */
 960   unsigned char *src_base;
 961
 962   coding->produced_char = 0;
 963   while ((src_base = src) < src_end)
 964     {
 965       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 966       int bytes;
 967
 968       if (*src == '\r')
 969         {
 970           int c = *src++;
 971
 972           if (coding->eol_type == CODING_EOL_CR)
 973             c = '\n';
 974           else if (coding->eol_type == CODING_EOL_CRLF)
 975             {
 976               ONE_MORE_BYTE (c);
 977               if (c != '\n')
 978                 {
 979                   src--;
 980                   c = '\r';
 981                 }
 982             }
 983           *dst++ = c;
 984           coding->produced_char++;
 985           continue;
 986         }
 987       else if (*src == '\n')
 988         {
 989           if ((coding->eol_type == CODING_EOL_CR
 990                || coding->eol_type == CODING_EOL_CRLF)
 991               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 992             {
 993               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 994               goto label_end_of_loop;
 995             }
 996           *dst++ = *src++;
 997           coding->produced_char++;
 998           continue;
 999         }
1000       else if (*src == 0x80 && coding->cmp_data)
1001         {
1002           /* Start of composition data.  */
1003           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
1004                                                          &dst, dst_end,
1005                                                          dst_bytes);
1006           if (consumed < 0)
1007             goto label_end_of_loop;
1008           else if (consumed > 0)
1009             {
1010               src += consumed;
1011               continue;
1012             }
1013           bytes = CHAR_STRING (*src, tmp);
1014           p = tmp;
1015           src++;
1016         }
1017       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
1018                || (coding->flags /* We are recovering a file.  */
1019                    && src[0] == LEADING_CODE_8_BIT_CONTROL
1020                    && ! CHAR_HEAD_P (src[1])))
1021         {
1022           p = src;
1023           src += bytes;
1024         }
1025       else
1026         {
1027           bytes = CHAR_STRING (*src, tmp);
1028           p = tmp;
1029           src++;
1030         }
1031       if (dst + bytes >= (dst_bytes ? dst_end : src))
1032         {
1033           coding->result = CODING_FINISH_INSUFFICIENT_DST;
1034           break;
1035         }
1036       while (bytes--) *dst++ = *p++;
1037       coding->produced_char++;
1038     }
1039  label_end_of_loop:
1040   coding->consumed = coding->consumed_char = src_base - source;
1041   coding->produced = dst - destination;
1042 }
1043
1044
1045 /* Encode composition data stored at DATA into a special byte sequence
1046    starting by 0x80.  Update CODING->cmp_data_start and maybe
1047    CODING->cmp_data for the next call.  */
1048
1049 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                     \
1050   do {                                                                  \
1051     unsigned char buf[1024], *p0 = buf, *p;                             \
1052     int len = data[0];                                                  \
1053     int i;                                                              \
1054                                                                         \
1055     buf[0] = 0x80;                                                      \
1056     buf[1] = 0xF0 + data[3];    /* METHOD */                            \
1057     buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */           \
1058     p = buf + 4;                                                        \
1059     if (data[3] == COMPOSITION_WITH_RULE                                \
1060         || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
1061       {                                                                 \
1062         p += CHAR_STRING (data[4], p);                                  \
1063         for (i = 5; i < len; i += 2)                                    \
1064           {                                                             \
1065             int gref, nref;                                             \
1066              COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
1067             *p++ = 0x20 + gref;                                         \
1068             *p++ = 0x20 + nref;                                         \
1069             p += CHAR_STRING (data[i + 1], p);                          \
1070           }                                                             \
1071       }                                                                 \
1072     else                                                                \
1073       {                                                                 \
1074         for (i = 4; i < len; i++)                                       \
1075           p += CHAR_STRING (data[i], p);                                \
1076       }                                                                 \
1077     buf[2] = 0xA0 + (p - buf);  /* COMPONENTS-BYTES */                  \
1078                                                                         \
1079     if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))              \
1080       {                                                                 \
1081         coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
1082         goto label_end_of_loop;                                         \
1083       }                                                                 \
1084     while (p0 < p)                                                      \
1085       *dst++ = *p0++;                                                   \
1086     coding->cmp_data_start += data[0];                                  \
1087     if (coding->cmp_data_start == coding->cmp_data->used                \
1088         && coding->cmp_data->next)                                      \
1089       {                                                                 \
1090         coding->cmp_data = coding->cmp_data->next;                      \
1091         coding->cmp_data_start = 0;                                     \
1092       }                                                                 \
1093   } while (0)
1094
1095
1096 static void encode_eol P_ ((struct coding_system *, const unsigned char *,
1097                             unsigned char *, int, int));
1098
1099 static void
1100 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1101      struct coding_system *coding;
1102      unsigned char *source, *destination;
1103      int src_bytes, dst_bytes;
1104 {
1105   unsigned char *src = source;
1106   unsigned char *src_end = source + src_bytes;
1107   unsigned char *dst = destination;
1108   unsigned char *dst_end = destination + dst_bytes;
1109   unsigned char *src_base;
1110   int c;
1111   int char_offset;
1112   int *data;
1113
1114   Lisp_Object translation_table;
1115
1116   translation_table = Qnil;
1117
1118   /* Optimization for the case that there's no composition.  */
1119   if (!coding->cmp_data || coding->cmp_data->used == 0)
1120     {
1121       encode_eol (coding, source, destination, src_bytes, dst_bytes);
1122       return;
1123     }
1124
1125   char_offset = coding->cmp_data->char_offset;
1126   data = coding->cmp_data->data + coding->cmp_data_start;
1127   while (1)
1128     {
1129       src_base = src;
1130
1131       /* If SRC starts a composition, encode the information about the
1132          composition in advance.  */
1133       if (coding->cmp_data_start < coding->cmp_data->used
1134           && char_offset + coding->consumed_char == data[1])
1135         {
1136           ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1137           char_offset = coding->cmp_data->char_offset;
1138           data = coding->cmp_data->data + coding->cmp_data_start;
1139         }
1140
1141       ONE_MORE_CHAR (c);
1142       if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1143                         || coding->eol_type == CODING_EOL_CR))
1144         {
1145           if (coding->eol_type == CODING_EOL_CRLF)
1146             EMIT_TWO_BYTES ('\r', c);
1147           else
1148             EMIT_ONE_BYTE ('\r');
1149         }
1150       else if (SINGLE_BYTE_CHAR_P (c))
1151         {
1152           if (coding->flags && ! ASCII_BYTE_P (c))
1153             {
1154               /* As we are auto saving, retain the multibyte form for
1155                  8-bit chars.  */
1156               unsigned char buf[MAX_MULTIBYTE_LENGTH];
1157               int bytes = CHAR_STRING (c, buf);
1158
1159               if (bytes == 1)
1160                 EMIT_ONE_BYTE (buf[0]);
1161               else
1162                 EMIT_TWO_BYTES (buf[0], buf[1]);
1163             }
1164           else
1165             EMIT_ONE_BYTE (c);
1166         }
1167       else
1168         EMIT_BYTES (src_base, src);
1169       coding->consumed_char++;
1170     }
1171  label_end_of_loop:
1172   coding->consumed = src_base - source;
1173   coding->produced = coding->produced_char = dst - destination;
1174   return;
1175 }
1176
1177 \f
1178 /*** 3. ISO2022 handlers ***/
1179
1180 /* The following note describes the coding system ISO2022 briefly.
1181    Since the intention of this note is to help understand the
1182    functions in this file, some parts are NOT ACCURATE or are OVERLY
1183    SIMPLIFIED.  For thorough understanding, please refer to the
1184    original document of ISO2022.  This is equivalent to the standard
1185    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
1186
1187    ISO2022 provides many mechanisms to encode several character sets
1188    in 7-bit and 8-bit environments.  For 7-bit environments, all text
1189    is encoded using bytes less than 128.  This may make the encoded
1190    text a little bit longer, but the text passes more easily through
1191    several types of gateway, some of which strip off the MSB (Most
1192    Significant Bit).
1193
1194    There are two kinds of character sets: control character sets and
1195    graphic character sets.  The former contain control characters such
1196    as `newline' and `escape' to provide control functions (control
1197    functions are also provided by escape sequences).  The latter
1198    contain graphic characters such as 'A' and '-'.  Emacs recognizes
1199    two control character sets and many graphic character sets.
1200
1201    Graphic character sets are classified into one of the following
1202    four classes, according to the number of bytes (DIMENSION) and
1203    number of characters in one dimension (CHARS) of the set:
1204    - DIMENSION1_CHARS94
1205    - DIMENSION1_CHARS96
1206    - DIMENSION2_CHARS94
1207    - DIMENSION2_CHARS96
1208
1209    In addition, each character set is assigned an identification tag,
1210    unique for each set, called the "final character" (denoted as <F>
1211    hereafter).  The <F> of each character set is decided by ECMA(*)
1212    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
1213    (0x30..0x3F are for private use only).
1214
1215    Note (*): ECMA = European Computer Manufacturers Association
1216
1217    Here are examples of graphic character sets [NAME(<F>)]:
1218         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1219         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1220         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1221         o DIMENSION2_CHARS96 -- none for the moment
1222
1223    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
1224         C0 [0x00..0x1F] -- control character plane 0
1225         GL [0x20..0x7F] -- graphic character plane 0
1226         C1 [0x80..0x9F] -- control character plane 1
1227         GR [0xA0..0xFF] -- graphic character plane 1
1228
1229    A control character set is directly designated and invoked to C0 or
1230    C1 by an escape sequence.  The most common case is that:
1231    - ISO646's  control character set is designated/invoked to C0, and
1232    - ISO6429's control character set is designated/invoked to C1,
1233    and usually these designations/invocations are omitted in encoded
1234    text.  In a 7-bit environment, only C0 can be used, and a control
1235    character for C1 is encoded by an appropriate escape sequence to
1236    fit into the environment.  All control characters for C1 are
1237    defined to have corresponding escape sequences.
1238
1239    A graphic character set is at first designated to one of four
1240    graphic registers (G0 through G3), then these graphic registers are
1241    invoked to GL or GR.  These designations and invocations can be
1242    done independently.  The most common case is that G0 is invoked to
1243    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
1244    these invocations and designations are omitted in encoded text.
1245    In a 7-bit environment, only GL can be used.
1246
1247    When a graphic character set of CHARS94 is invoked to GL, codes
1248    0x20 and 0x7F of the GL area work as control characters SPACE and
1249    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1250    be used.
1251
1252    There are two ways of invocation: locking-shift and single-shift.
1253    With locking-shift, the invocation lasts until the next different
1254    invocation, whereas with single-shift, the invocation affects the
1255    following character only and doesn't affect the locking-shift
1256    state.  Invocations are done by the following control characters or
1257    escape sequences:
1258
1259    ----------------------------------------------------------------------
1260    abbrev  function                  cntrl escape seq   description
1261    ----------------------------------------------------------------------
1262    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
1263    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
1264    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
1265    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
1266    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
1267    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
1268    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
1269    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
1270    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
1271    ----------------------------------------------------------------------
1272    (*) These are not used by any known coding system.
1273
1274    Control characters for these functions are defined by macros
1275    ISO_CODE_XXX in `coding.h'.
1276
1277    Designations are done by the following escape sequences:
1278    ----------------------------------------------------------------------
1279    escape sequence      description
1280    ----------------------------------------------------------------------
1281    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
1282    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
1283    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
1284    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
1285    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
1286    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
1287    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
1288    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
1289    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
1290    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
1291    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
1292    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
1293    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
1294    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
1295    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
1296    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
1297    ----------------------------------------------------------------------
1298
1299    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
1300    of dimension 1, chars 94, and final character <F>, etc...
1301
1302    Note (*): Although these designations are not allowed in ISO2022,
1303    Emacs accepts them on decoding, and produces them on encoding
1304    CHARS96 character sets in a coding system which is characterized as
1305    7-bit environment, non-locking-shift, and non-single-shift.
1306
1307    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
1308    '(' can be omitted.  We refer to this as "short-form" hereafter.
1309
1310    Now you may notice that there are a lot of ways of encoding the
1311    same multilingual text in ISO2022.  Actually, there exist many
1312    coding systems such as Compound Text (used in X11's inter client
1313    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
1314    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
1315    localized platforms), and all of these are variants of ISO2022.
1316
1317    In addition to the above, Emacs handles two more kinds of escape
1318    sequences: ISO6429's direction specification and Emacs' private
1319    sequence for specifying character composition.
1320
1321    ISO6429's direction specification takes the following form:
1322         o CSI ']'      -- end of the current direction
1323         o CSI '0' ']'  -- end of the current direction
1324         o CSI '1' ']'  -- start of left-to-right text
1325         o CSI '2' ']'  -- start of right-to-left text
1326    The control character CSI (0x9B: control sequence introducer) is
1327    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1328
1329    Character composition specification takes the following form:
1330         o ESC '0' -- start relative composition
1331         o ESC '1' -- end composition
1332         o ESC '2' -- start rule-base composition (*)
1333         o ESC '3' -- start relative composition with alternate chars  (**)
1334         o ESC '4' -- start rule-base composition with alternate chars  (**)
1335   Since these are not standard escape sequences of any ISO standard,
1336   the use of them with these meanings is restricted to Emacs only.
1337
1338   (*) This form is used only in Emacs 20.5 and older versions,
1339   but the newer versions can safely decode it.
1340   (**) This form is used only in Emacs 21.1 and newer versions,
1341   and the older versions can't decode it.
1342
1343   Here's a list of example usages of these composition escape
1344   sequences (categorized by `enum composition_method').
1345
1346   COMPOSITION_RELATIVE:
1347         ESC 0 CHAR [ CHAR ] ESC 1
1348   COMPOSITION_WITH_RULE:
1349         ESC 2 CHAR [ RULE CHAR ] ESC 1
1350   COMPOSITION_WITH_ALTCHARS:
1351         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
1352   COMPOSITION_WITH_RULE_ALTCHARS:
1353         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
1354
1355 enum iso_code_class_type iso_code_class[256];
1356
1357 #define CHARSET_OK(idx, charset, c)                                     \
1358   (coding_system_table[idx]                                             \
1359    && (charset == CHARSET_ASCII                                         \
1360        || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
1361            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
1362    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1363                                               charset)                  \
1364        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1365
1366 #define SHIFT_OUT_OK(idx) \
1367   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1368
1369 #define COMPOSITION_OK(idx)     \
1370   (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
1371
1372 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1373    Check if a text is encoded in ISO2022.  If it is, return an
1374    integer in which appropriate flag bits any of:
1375         CODING_CATEGORY_MASK_ISO_7
1376         CODING_CATEGORY_MASK_ISO_7_TIGHT
1377         CODING_CATEGORY_MASK_ISO_8_1
1378         CODING_CATEGORY_MASK_ISO_8_2
1379         CODING_CATEGORY_MASK_ISO_7_ELSE
1380         CODING_CATEGORY_MASK_ISO_8_ELSE
1381    are set.  If a code which should never appear in ISO2022 is found,
1382    returns 0.  */
1383
1384 static int
1385 detect_coding_iso2022 (src, src_end, multibytep)
1386      unsigned char *src, *src_end;
1387      int multibytep;
1388 {
1389   int mask = CODING_CATEGORY_MASK_ISO;
1390   int mask_found = 0;
1391   int reg[4], shift_out = 0, single_shifting = 0;
1392   int c, c1, charset;
1393   /* Dummy for ONE_MORE_BYTE.  */
1394   struct coding_system dummy_coding;
1395   struct coding_system *coding = &dummy_coding;
1396   Lisp_Object safe_chars;
1397
1398   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1399   while (mask && src < src_end)
1400     {
1401       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1402     retry:
1403       switch (c)
1404         {
1405         case ISO_CODE_ESC:
1406           if (inhibit_iso_escape_detection)
1407             break;
1408           single_shifting = 0;
1409           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1410           if (c >= '(' && c <= '/')
1411             {
1412               /* Designation sequence for a charset of dimension 1.  */
1413               ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1414               if (c1 < ' ' || c1 >= 0x80
1415                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1416                 /* Invalid designation sequence.  Just ignore.  */
1417                 break;
1418               reg[(c - '(') % 4] = charset;
1419             }
1420           else if (c == '$')
1421             {
1422               /* Designation sequence for a charset of dimension 2.  */
1423               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1424               if (c >= '@' && c <= 'B')
1425                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
1426                 reg[0] = charset = iso_charset_table[1][0][c];
1427               else if (c >= '(' && c <= '/')
1428                 {
1429                   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1430                   if (c1 < ' ' || c1 >= 0x80
1431                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1432                     /* Invalid designation sequence.  Just ignore.  */
1433                     break;
1434                   reg[(c - '(') % 4] = charset;
1435                 }
1436               else
1437                 /* Invalid designation sequence.  Just ignore.  */
1438                 break;
1439             }
1440           else if (c == 'N' || c == 'O')
1441             {
1442               /* ESC <Fe> for SS2 or SS3.  */
1443               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
1444               break;
1445             }
1446           else if (c >= '0' && c <= '4')
1447             {
1448               /* ESC <Fp> for start/end composition.  */
1449               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
1450                 mask_found |= CODING_CATEGORY_MASK_ISO_7;
1451               else
1452                 mask &= ~CODING_CATEGORY_MASK_ISO_7;
1453               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
1454                 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1455               else
1456                 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1457               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
1458                 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1459               else
1460                 mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
1461               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
1462                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1463               else
1464                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1465               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
1466                 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1467               else
1468                 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1469               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
1470                 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1471               else
1472                 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1473               break;
1474             }
1475           else
1476             /* Invalid escape sequence.  Just ignore.  */
1477             break;
1478
1479           /* We found a valid designation sequence for CHARSET.  */
1480           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
1481           c = MAKE_CHAR (charset, 0, 0);
1482           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
1483             mask_found |= CODING_CATEGORY_MASK_ISO_7;
1484           else
1485             mask &= ~CODING_CATEGORY_MASK_ISO_7;
1486           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
1487             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1488           else
1489             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1490           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
1491             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1492           else
1493             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1494           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
1495             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1496           else
1497             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1498           break;
1499
1500         case ISO_CODE_SO:
1501           if (inhibit_iso_escape_detection)
1502             break;
1503           single_shifting = 0;
1504           if (shift_out == 0
1505               && (reg[1] >= 0
1506                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1507                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1508             {
1509               /* Locking shift out.  */
1510               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1511               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1512             }
1513           break;
1514
1515         case ISO_CODE_SI:
1516           if (inhibit_iso_escape_detection)
1517             break;
1518           single_shifting = 0;
1519           if (shift_out == 1)
1520             {
1521               /* Locking shift in.  */
1522               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1523               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1524             }
1525           break;
1526
1527         case ISO_CODE_CSI:
1528           single_shifting = 0;
1529         case ISO_CODE_SS2:
1530         case ISO_CODE_SS3:
1531           {
1532             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1533
1534             if (inhibit_iso_escape_detection)
1535               break;
1536             if (c != ISO_CODE_CSI)
1537               {
1538                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1539                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1540                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1541                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1542                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1543                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1544                 single_shifting = 1;
1545               }
1546             if (VECTORP (Vlatin_extra_code_table)
1547                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1548               {
1549                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1550                     & CODING_FLAG_ISO_LATIN_EXTRA)
1551                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1552                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1553                     & CODING_FLAG_ISO_LATIN_EXTRA)
1554                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1555               }
1556             mask &= newmask;
1557             mask_found |= newmask;
1558           }
1559           break;
1560
1561         default:
1562           if (c < 0x80)
1563             {
1564               single_shifting = 0;
1565               break;
1566             }
1567           else if (c < 0xA0)
1568             {
1569               single_shifting = 0;
1570               if (VECTORP (Vlatin_extra_code_table)
1571                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1572                 {
1573                   int newmask = 0;
1574
1575                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1576                       & CODING_FLAG_ISO_LATIN_EXTRA)
1577                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1578                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1579                       & CODING_FLAG_ISO_LATIN_EXTRA)
1580                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1581                   mask &= newmask;
1582                   mask_found |= newmask;
1583                 }
1584               else
1585                 return 0;
1586             }
1587           else
1588             {
1589               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1590                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1591               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1592               /* Check the length of succeeding codes of the range
1593                  0xA0..0FF.  If the byte length is odd, we exclude
1594                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1595                  when we are not single shifting.  */
1596               if (!single_shifting
1597                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1598                 {
1599                   int i = 1;
1600
1601                   c = -1;
1602                   while (src < src_end)
1603                     {
1604                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1605                       if (c < 0xA0)
1606                         break;
1607                       i++;
1608                     }
1609
1610                   if (i & 1 && src < src_end)
1611                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1612                   else
1613                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1614                   if (c >= 0)
1615                     /* This means that we have read one extra byte.  */
1616                     goto retry;
1617                 }
1618             }
1619           break;
1620         }
1621     }
1622  label_end_of_loop:
1623   return (mask & mask_found);
1624 }
1625
1626 /* Decode a character of which charset is CHARSET, the 1st position
1627    code is C1, the 2nd position code is C2, and return the decoded
1628    character code.  If the variable `translation_table' is non-nil,
1629    returned the translated code.  */
1630
1631 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1632   (NILP (translation_table)                     \
1633    ? MAKE_CHAR (charset, c1, c2)                \
1634    : translate_char (translation_table, -1, charset, c1, c2))
1635
1636 /* Set designation state into CODING.  */
1637 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1638   do {                                                                     \
1639     int charset, c;                                                        \
1640                                                                            \
1641     if (final_char < '0' || final_char >= 128)                             \
1642       goto label_invalid_code;                                             \
1643     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1644                                  make_number (chars),                      \
1645                                  make_number (final_char));                \
1646     c = MAKE_CHAR (charset, 0, 0);                                         \
1647     if (charset >= 0                                                       \
1648         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1649             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1650       {                                                                    \
1651         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1652             && reg == 0                                                    \
1653             && charset == CHARSET_ASCII)                                   \
1654           {                                                                \
1655             /* We should insert this designation sequence as is so         \
1656                that it is surely written back to a file.  */               \
1657             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1658             goto label_invalid_code;                                       \
1659           }                                                                \
1660         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1661         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1662             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1663           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1664         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1665       }                                                                    \
1666     else                                                                   \
1667       {                                                                    \
1668         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1669         goto label_invalid_code;                                           \
1670       }                                                                    \
1671   } while (0)
1672
1673 /* Allocate a memory block for storing information about compositions.
1674    The block is chained to the already allocated blocks.  */
1675
1676 void
1677 coding_allocate_composition_data (coding, char_offset)
1678      struct coding_system *coding;
1679      int char_offset;
1680 {
1681   struct composition_data *cmp_data
1682     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1683
1684   cmp_data->char_offset = char_offset;
1685   cmp_data->used = 0;
1686   cmp_data->prev = coding->cmp_data;
1687   cmp_data->next = NULL;
1688   if (coding->cmp_data)
1689     coding->cmp_data->next = cmp_data;
1690   coding->cmp_data = cmp_data;
1691   coding->cmp_data_start = 0;
1692   coding->composing = COMPOSITION_NO;
1693 }
1694
1695 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1696    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1697    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1698    ESC 3 : altchar composition :  ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1699    ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1700   */
1701
1702 #define DECODE_COMPOSITION_START(c1)                                       \
1703   do {                                                                     \
1704     if (coding->composing == COMPOSITION_DISABLED)                         \
1705       {                                                                    \
1706         *dst++ = ISO_CODE_ESC;                                             \
1707         *dst++ = c1 & 0x7f;                                                \
1708         coding->produced_char += 2;                                        \
1709       }                                                                    \
1710     else if (!COMPOSING_P (coding))                                        \
1711       {                                                                    \
1712         /* This is surely the start of a composition.  We must be sure     \
1713            that coding->cmp_data has enough space to store the             \
1714            information about the composition.  If not, terminate the       \
1715            current decoding loop, allocate one more memory block for       \
1716            coding->cmp_data in the caller, then start the decoding         \
1717            loop again.  We can't allocate memory here directly because     \
1718            it may cause buffer/string relocation.  */                      \
1719         if (!coding->cmp_data                                              \
1720             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1721                 >= COMPOSITION_DATA_SIZE))                                 \
1722           {                                                                \
1723             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1724             goto label_end_of_loop;                                        \
1725           }                                                                \
1726         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1727                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1728                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1729                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1730         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1731                                       coding->composing);                  \
1732         coding->composition_rule_follows = 0;                              \
1733       }                                                                    \
1734     else                                                                   \
1735       {                                                                    \
1736         /* We are already handling a composition.  If the method is        \
1737            the following two, the codes following the current escape       \
1738            sequence are actual characters stored in a buffer.  */          \
1739         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1740             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1741           {                                                                \
1742             coding->composing = COMPOSITION_RELATIVE;                      \
1743             coding->composition_rule_follows = 0;                          \
1744           }                                                                \
1745       }                                                                    \
1746   } while (0)
1747
1748 /* Handle composition end sequence ESC 1.  */
1749
1750 #define DECODE_COMPOSITION_END(c1)                                      \
1751   do {                                                                  \
1752     if (! COMPOSING_P (coding))                                         \
1753       {                                                                 \
1754         *dst++ = ISO_CODE_ESC;                                          \
1755         *dst++ = c1;                                                    \
1756         coding->produced_char += 2;                                     \
1757       }                                                                 \
1758     else                                                                \
1759       {                                                                 \
1760         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1761         coding->composing = COMPOSITION_NO;                             \
1762       }                                                                 \
1763   } while (0)
1764
1765 /* Decode a composition rule from the byte C1 (and maybe one more byte
1766    from SRC) and store one encoded composition rule in
1767    coding->cmp_data.  */
1768
1769 #define DECODE_COMPOSITION_RULE(c1)                                     \
1770   do {                                                                  \
1771     int rule = 0;                                                       \
1772     (c1) -= 32;                                                         \
1773     if (c1 < 81)                /* old format (before ver.21) */        \
1774       {                                                                 \
1775         int gref = (c1) / 9;                                            \
1776         int nref = (c1) % 9;                                            \
1777         if (gref == 4) gref = 10;                                       \
1778         if (nref == 4) nref = 10;                                       \
1779         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1780       }                                                                 \
1781     else if (c1 < 93)           /* new format (after ver.21) */         \
1782       {                                                                 \
1783         ONE_MORE_BYTE (c2);                                             \
1784         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1785       }                                                                 \
1786     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1787     coding->composition_rule_follows = 0;                               \
1788   } while (0)
1789
1790
1791 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1792
1793 static void
1794 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1795      struct coding_system *coding;
1796      unsigned char *source, *destination;
1797      int src_bytes, dst_bytes;
1798 {
1799   unsigned char *src = source;
1800   unsigned char *src_end = source + src_bytes;
1801   unsigned char *dst = destination;
1802   unsigned char *dst_end = destination + dst_bytes;
1803   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1804   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1805   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1806   /* SRC_BASE remembers the start position in source in each loop.
1807      The loop will be exited when there's not enough source code
1808      (within macro ONE_MORE_BYTE), or when there's not enough
1809      destination area to produce a character (within macro
1810      EMIT_CHAR).  */
1811   unsigned char *src_base;
1812   int c, charset;
1813   Lisp_Object translation_table;
1814   Lisp_Object safe_chars;
1815
1816   safe_chars = coding_safe_chars (coding->symbol);
1817
1818   if (NILP (Venable_character_translation))
1819     translation_table = Qnil;
1820   else
1821     {
1822       translation_table = coding->translation_table_for_decode;
1823       if (NILP (translation_table))
1824         translation_table = Vstandard_translation_table_for_decode;
1825     }
1826
1827   coding->result = CODING_FINISH_NORMAL;
1828
1829   while (1)
1830     {
1831       int c1, c2 = 0;
1832
1833       src_base = src;
1834       ONE_MORE_BYTE (c1);
1835
1836       /* We produce no character or one character.  */
1837       switch (iso_code_class [c1])
1838         {
1839         case ISO_0x20_or_0x7F:
1840           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1841             {
1842               DECODE_COMPOSITION_RULE (c1);
1843               continue;
1844             }
1845           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1846             {
1847               /* This is SPACE or DEL.  */
1848               charset = CHARSET_ASCII;
1849               break;
1850             }
1851           /* This is a graphic character, we fall down ...  */
1852
1853         case ISO_graphic_plane_0:
1854           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1855             {
1856               DECODE_COMPOSITION_RULE (c1);
1857               continue;
1858             }
1859           charset = charset0;
1860           break;
1861
1862         case ISO_0xA0_or_0xFF:
1863           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1864               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1865             goto label_invalid_code;
1866           /* This is a graphic character, we fall down ... */
1867
1868         case ISO_graphic_plane_1:
1869           if (charset1 < 0)
1870             goto label_invalid_code;
1871           charset = charset1;
1872           break;
1873
1874         case ISO_control_0:
1875           if (COMPOSING_P (coding))
1876             DECODE_COMPOSITION_END ('1');
1877
1878           /* All ISO2022 control characters in this class have the
1879              same representation in Emacs internal format.  */
1880           if (c1 == '\n'
1881               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1882               && (coding->eol_type == CODING_EOL_CR
1883                   || coding->eol_type == CODING_EOL_CRLF))
1884             {
1885               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1886               goto label_end_of_loop;
1887             }
1888           charset = CHARSET_ASCII;
1889           break;
1890
1891         case ISO_control_1:
1892           if (COMPOSING_P (coding))
1893             DECODE_COMPOSITION_END ('1');
1894           goto label_invalid_code;
1895
1896         case ISO_carriage_return:
1897           if (COMPOSING_P (coding))
1898             DECODE_COMPOSITION_END ('1');
1899
1900           if (coding->eol_type == CODING_EOL_CR)
1901             c1 = '\n';
1902           else if (coding->eol_type == CODING_EOL_CRLF)
1903             {
1904               ONE_MORE_BYTE (c1);
1905               if (c1 != ISO_CODE_LF)
1906                 {
1907                   src--;
1908                   c1 = '\r';
1909                 }
1910             }
1911           charset = CHARSET_ASCII;
1912           break;
1913
1914         case ISO_shift_out:
1915           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1916               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1917             goto label_invalid_code;
1918           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1919           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1920           continue;
1921
1922         case ISO_shift_in:
1923           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1924             goto label_invalid_code;
1925           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1926           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1927           continue;
1928
1929         case ISO_single_shift_2_7:
1930         case ISO_single_shift_2:
1931           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1932             goto label_invalid_code;
1933           /* SS2 is handled as an escape sequence of ESC 'N' */
1934           c1 = 'N';
1935           goto label_escape_sequence;
1936
1937         case ISO_single_shift_3:
1938           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1939             goto label_invalid_code;
1940           /* SS2 is handled as an escape sequence of ESC 'O' */
1941           c1 = 'O';
1942           goto label_escape_sequence;
1943
1944         case ISO_control_sequence_introducer:
1945           /* CSI is handled as an escape sequence of ESC '[' ...  */
1946           c1 = '[';
1947           goto label_escape_sequence;
1948
1949         case ISO_escape:
1950           ONE_MORE_BYTE (c1);
1951         label_escape_sequence:
1952           /* Escape sequences handled by Emacs are invocation,
1953              designation, direction specification, and character
1954              composition specification.  */
1955           switch (c1)
1956             {
1957             case '&':           /* revision of following character set */
1958               ONE_MORE_BYTE (c1);
1959               if (!(c1 >= '@' && c1 <= '~'))
1960                 goto label_invalid_code;
1961               ONE_MORE_BYTE (c1);
1962               if (c1 != ISO_CODE_ESC)
1963                 goto label_invalid_code;
1964               ONE_MORE_BYTE (c1);
1965               goto label_escape_sequence;
1966
1967             case '$':           /* designation of 2-byte character set */
1968               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1969                 goto label_invalid_code;
1970               ONE_MORE_BYTE (c1);
1971               if (c1 >= '@' && c1 <= 'B')
1972                 {       /* designation of JISX0208.1978, GB2312.1980,
1973                            or JISX0208.1980 */
1974                   DECODE_DESIGNATION (0, 2, 94, c1);
1975                 }
1976               else if (c1 >= 0x28 && c1 <= 0x2B)
1977                 {       /* designation of DIMENSION2_CHARS94 character set */
1978                   ONE_MORE_BYTE (c2);
1979                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1980                 }
1981               else if (c1 >= 0x2C && c1 <= 0x2F)
1982                 {       /* designation of DIMENSION2_CHARS96 character set */
1983                   ONE_MORE_BYTE (c2);
1984                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1985                 }
1986               else
1987                 goto label_invalid_code;
1988               /* We must update these variables now.  */
1989               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1990               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1991               continue;
1992
1993             case 'n':           /* invocation of locking-shift-2 */
1994               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1995                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1996                 goto label_invalid_code;
1997               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1998               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1999               continue;
2000
2001             case 'o':           /* invocation of locking-shift-3 */
2002               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
2003                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
2004                 goto label_invalid_code;
2005               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
2006               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2007               continue;
2008
2009             case 'N':           /* invocation of single-shift-2 */
2010               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2011                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
2012                 goto label_invalid_code;
2013               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
2014               ONE_MORE_BYTE (c1);
2015               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2016                 goto label_invalid_code;
2017               break;
2018
2019             case 'O':           /* invocation of single-shift-3 */
2020               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2021                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
2022                 goto label_invalid_code;
2023               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
2024               ONE_MORE_BYTE (c1);
2025               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2026                 goto label_invalid_code;
2027               break;
2028
2029             case '0': case '2': case '3': case '4': /* start composition */
2030               DECODE_COMPOSITION_START (c1);
2031               continue;
2032
2033             case '1':           /* end composition */
2034               DECODE_COMPOSITION_END (c1);
2035               continue;
2036
2037             case '[':           /* specification of direction */
2038               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
2039                 goto label_invalid_code;
2040               /* For the moment, nested direction is not supported.
2041                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
2042                  left-to-right, and nonzero means right-to-left.  */
2043               ONE_MORE_BYTE (c1);
2044               switch (c1)
2045                 {
2046                 case ']':       /* end of the current direction */
2047                   coding->mode &= ~CODING_MODE_DIRECTION;
2048
2049                 case '0':       /* end of the current direction */
2050                 case '1':       /* start of left-to-right direction */
2051                   ONE_MORE_BYTE (c1);
2052                   if (c1 == ']')
2053                     coding->mode &= ~CODING_MODE_DIRECTION;
2054                   else
2055                     goto label_invalid_code;
2056                   break;
2057
2058                 case '2':       /* start of right-to-left direction */
2059                   ONE_MORE_BYTE (c1);
2060                   if (c1 == ']')
2061                     coding->mode |= CODING_MODE_DIRECTION;
2062                   else
2063                     goto label_invalid_code;
2064                   break;
2065
2066                 default:
2067                   goto label_invalid_code;
2068                 }
2069               continue;
2070
2071             case '%':
2072               if (COMPOSING_P (coding))
2073                 DECODE_COMPOSITION_END ('1');
2074               ONE_MORE_BYTE (c1);
2075               if (c1 == '/')
2076                 {
2077                   /* CTEXT extended segment:
2078                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
2079                      We keep these bytes as is for the moment.
2080                      They may be decoded by post-read-conversion.  */
2081                   int dim, M, L;
2082                   int size, required;
2083                   int produced_chars;
2084
2085                   ONE_MORE_BYTE (dim);
2086                   ONE_MORE_BYTE (M);
2087                   ONE_MORE_BYTE (L);
2088                   size = ((M - 128) * 128) + (L - 128);
2089                   required = 8 + size * 2;
2090                   if (dst + required > (dst_bytes ? dst_end : src))
2091                     goto label_end_of_loop;
2092                   *dst++ = ISO_CODE_ESC;
2093                   *dst++ = '%';
2094                   *dst++ = '/';
2095                   *dst++ = dim;
2096                   produced_chars = 4;
2097                   dst += CHAR_STRING (M, dst), produced_chars++;
2098                   dst += CHAR_STRING (L, dst), produced_chars++;
2099                   while (size-- > 0)
2100                     {
2101                       ONE_MORE_BYTE (c1);
2102                       dst += CHAR_STRING (c1, dst), produced_chars++;
2103                     }
2104                   coding->produced_char += produced_chars;
2105                 }
2106               else if (c1 == 'G')
2107                 {
2108                   unsigned char *d = dst;
2109                   int produced_chars;
2110
2111                   /* XFree86 extension for embedding UTF-8 in CTEXT:
2112                      ESC % G --UTF-8-BYTES-- ESC % @
2113                      We keep these bytes as is for the moment.
2114                      They may be decoded by post-read-conversion.  */
2115                   if (d + 6 > (dst_bytes ? dst_end : src))
2116                     goto label_end_of_loop;
2117                   *d++ = ISO_CODE_ESC;
2118                   *d++ = '%';
2119                   *d++ = 'G';
2120                   produced_chars = 3;
2121                   while (d + 1 < (dst_bytes ? dst_end : src))
2122                     {
2123                       ONE_MORE_BYTE (c1);
2124                       if (c1 == ISO_CODE_ESC
2125                           && src + 1 < src_end
2126                           && src[0] == '%'
2127                           && src[1] == '@')
2128                         {
2129                           src += 2;
2130                           break;
2131                         }
2132                       d += CHAR_STRING (c1, d), produced_chars++;
2133                     }
2134                   if (d + 3 > (dst_bytes ? dst_end : src))
2135                     goto label_end_of_loop;
2136                   *d++ = ISO_CODE_ESC;
2137                   *d++ = '%';
2138                   *d++ = '@';
2139                   dst = d;
2140                   coding->produced_char += produced_chars + 3;
2141                 }
2142               else
2143                 goto label_invalid_code;
2144               continue;
2145
2146             default:
2147               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
2148                 goto label_invalid_code;
2149               if (c1 >= 0x28 && c1 <= 0x2B)
2150                 {       /* designation of DIMENSION1_CHARS94 character set */
2151                   ONE_MORE_BYTE (c2);
2152                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
2153                 }
2154               else if (c1 >= 0x2C && c1 <= 0x2F)
2155                 {       /* designation of DIMENSION1_CHARS96 character set */
2156                   ONE_MORE_BYTE (c2);
2157                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
2158                 }
2159               else
2160                 goto label_invalid_code;
2161               /* We must update these variables now.  */
2162               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2163               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2164               continue;
2165             }
2166         }
2167
2168       /* Now we know CHARSET and 1st position code C1 of a character.
2169          Produce a multibyte sequence for that character while getting
2170          2nd position code C2 if necessary.  */
2171       if (CHARSET_DIMENSION (charset) == 2)
2172         {
2173           ONE_MORE_BYTE (c2);
2174           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2175             /* C2 is not in a valid range.  */
2176             goto label_invalid_code;
2177         }
2178       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2179       EMIT_CHAR (c);
2180       continue;
2181
2182     label_invalid_code:
2183       coding->errors++;
2184       if (COMPOSING_P (coding))
2185         DECODE_COMPOSITION_END ('1');
2186       src = src_base;
2187       c = *src++;
2188       EMIT_CHAR (c);
2189     }
2190
2191  label_end_of_loop:
2192   coding->consumed = coding->consumed_char = src_base - source;
2193   coding->produced = dst - destination;
2194   return;
2195 }
2196
2197
2198 /* ISO2022 encoding stuff.  */
2199
2200 /*
2201    It is not enough to say just "ISO2022" on encoding, we have to
2202    specify more details.  In Emacs, each ISO2022 coding system
2203    variant has the following specifications:
2204         1. Initial designation to G0 through G3.
2205         2. Allows short-form designation?
2206         3. ASCII should be designated to G0 before control characters?
2207         4. ASCII should be designated to G0 at end of line?
2208         5. 7-bit environment or 8-bit environment?
2209         6. Use locking-shift?
2210         7. Use Single-shift?
2211    And the following two are only for Japanese:
2212         8. Use ASCII in place of JIS0201-1976-Roman?
2213         9. Use JISX0208-1983 in place of JISX0208-1978?
2214    These specifications are encoded in `coding->flags' as flag bits
2215    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
2216    details.
2217 */
2218
2219 /* Produce codes (escape sequence) for designating CHARSET to graphic
2220    register REG at DST, and increment DST.  If <final-char> of CHARSET is
2221    '@', 'A', or 'B' and the coding system CODING allows, produce
2222    designation sequence of short-form.  */
2223
2224 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
2225   do {                                                                  \
2226     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
2227     char *intermediate_char_94 = "()*+";                                \
2228     char *intermediate_char_96 = ",-./";                                \
2229     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
2230                                                                         \
2231     if (revision < 255)                                                 \
2232       {                                                                 \
2233         *dst++ = ISO_CODE_ESC;                                          \
2234         *dst++ = '&';                                                   \
2235         *dst++ = '@' + revision;                                        \
2236       }                                                                 \
2237     *dst++ = ISO_CODE_ESC;                                              \
2238     if (CHARSET_DIMENSION (charset) == 1)                               \
2239       {                                                                 \
2240         if (CHARSET_CHARS (charset) == 94)                              \
2241           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
2242         else                                                            \
2243           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2244       }                                                                 \
2245     else                                                                \
2246       {                                                                 \
2247         *dst++ = '$';                                                   \
2248         if (CHARSET_CHARS (charset) == 94)                              \
2249           {                                                             \
2250             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
2251                 || reg != 0                                             \
2252                 || final_char < '@' || final_char > 'B')                \
2253               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
2254           }                                                             \
2255         else                                                            \
2256           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2257       }                                                                 \
2258     *dst++ = final_char;                                                \
2259     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
2260   } while (0)
2261
2262 /* The following two macros produce codes (control character or escape
2263    sequence) for ISO2022 single-shift functions (single-shift-2 and
2264    single-shift-3).  */
2265
2266 #define ENCODE_SINGLE_SHIFT_2                           \
2267   do {                                                  \
2268     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2269       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
2270     else                                                \
2271       *dst++ = ISO_CODE_SS2;                            \
2272     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2273   } while (0)
2274
2275 #define ENCODE_SINGLE_SHIFT_3                           \
2276   do {                                                  \
2277     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2278       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
2279     else                                                \
2280       *dst++ = ISO_CODE_SS3;                            \
2281     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2282   } while (0)
2283
2284 /* The following four macros produce codes (control character or
2285    escape sequence) for ISO2022 locking-shift functions (shift-in,
2286    shift-out, locking-shift-2, and locking-shift-3).  */
2287
2288 #define ENCODE_SHIFT_IN                         \
2289   do {                                          \
2290     *dst++ = ISO_CODE_SI;                       \
2291     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2292   } while (0)
2293
2294 #define ENCODE_SHIFT_OUT                        \
2295   do {                                          \
2296     *dst++ = ISO_CODE_SO;                       \
2297     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2298   } while (0)
2299
2300 #define ENCODE_LOCKING_SHIFT_2                  \
2301   do {                                          \
2302     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
2303     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2304   } while (0)
2305
2306 #define ENCODE_LOCKING_SHIFT_3                  \
2307   do {                                          \
2308     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
2309     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2310   } while (0)
2311
2312 /* Produce codes for a DIMENSION1 character whose character set is
2313    CHARSET and whose position-code is C1.  Designation and invocation
2314    sequences are also produced in advance if necessary.  */
2315
2316 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
2317   do {                                                                  \
2318     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2319       {                                                                 \
2320         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2321           *dst++ = c1 & 0x7F;                                           \
2322         else                                                            \
2323           *dst++ = c1 | 0x80;                                           \
2324         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2325         break;                                                          \
2326       }                                                                 \
2327     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2328       {                                                                 \
2329         *dst++ = c1 & 0x7F;                                             \
2330         break;                                                          \
2331       }                                                                 \
2332     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2333       {                                                                 \
2334         *dst++ = c1 | 0x80;                                             \
2335         break;                                                          \
2336       }                                                                 \
2337     else                                                                \
2338       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2339          must invoke it, or, at first, designate it to some graphic     \
2340          register.  Then repeat the loop to actually produce the        \
2341          character.  */                                                 \
2342       dst = encode_invocation_designation (charset, coding, dst);       \
2343   } while (1)
2344
2345 /* Produce codes for a DIMENSION2 character whose character set is
2346    CHARSET and whose position-codes are C1 and C2.  Designation and
2347    invocation codes are also produced in advance if necessary.  */
2348
2349 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
2350   do {                                                                  \
2351     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2352       {                                                                 \
2353         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2354           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
2355         else                                                            \
2356           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
2357         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2358         break;                                                          \
2359       }                                                                 \
2360     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2361       {                                                                 \
2362         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
2363         break;                                                          \
2364       }                                                                 \
2365     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2366       {                                                                 \
2367         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
2368         break;                                                          \
2369       }                                                                 \
2370     else                                                                \
2371       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2372          must invoke it, or, at first, designate it to some graphic     \
2373          register.  Then repeat the loop to actually produce the        \
2374          character.  */                                                 \
2375       dst = encode_invocation_designation (charset, coding, dst);       \
2376   } while (1)
2377
2378 #define ENCODE_ISO_CHARACTER(c)                                 \
2379   do {                                                          \
2380     int charset, c1, c2;                                        \
2381                                                                 \
2382     SPLIT_CHAR (c, charset, c1, c2);                            \
2383     if (CHARSET_DEFINED_P (charset))                            \
2384       {                                                         \
2385         if (CHARSET_DIMENSION (charset) == 1)                   \
2386           {                                                     \
2387             if (charset == CHARSET_ASCII                        \
2388                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
2389               charset = charset_latin_jisx0201;                 \
2390             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
2391           }                                                     \
2392         else                                                    \
2393           {                                                     \
2394             if (charset == charset_jisx0208                     \
2395                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
2396               charset = charset_jisx0208_1978;                  \
2397             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
2398           }                                                     \
2399       }                                                         \
2400     else                                                        \
2401       {                                                         \
2402         *dst++ = c1;                                            \
2403         if (c2 >= 0)                                            \
2404           *dst++ = c2;                                          \
2405       }                                                         \
2406   } while (0)
2407
2408
2409 /* Instead of encoding character C, produce one or two `?'s.  */
2410
2411 #define ENCODE_UNSAFE_CHARACTER(c)                              \
2412   do {                                                          \
2413     ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);        \
2414     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                   \
2415       ENCODE_ISO_CHARACTER (CODING_REPLACEMENT_CHARACTER);      \
2416   } while (0)
2417
2418
2419 /* Produce designation and invocation codes at a place pointed by DST
2420    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
2421    Return new DST.  */
2422
2423 unsigned char *
2424 encode_invocation_designation (charset, coding, dst)
2425      int charset;
2426      struct coding_system *coding;
2427      unsigned char *dst;
2428 {
2429   int reg;                      /* graphic register number */
2430
2431   /* At first, check designations.  */
2432   for (reg = 0; reg < 4; reg++)
2433     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2434       break;
2435
2436   if (reg >= 4)
2437     {
2438       /* CHARSET is not yet designated to any graphic registers.  */
2439       /* At first check the requested designation.  */
2440       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2441       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2442         /* Since CHARSET requests no special designation, designate it
2443            to graphic register 0.  */
2444         reg = 0;
2445
2446       ENCODE_DESIGNATION (charset, reg, coding);
2447     }
2448
2449   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2450       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2451     {
2452       /* Since the graphic register REG is not invoked to any graphic
2453          planes, invoke it to graphic plane 0.  */
2454       switch (reg)
2455         {
2456         case 0:                 /* graphic register 0 */
2457           ENCODE_SHIFT_IN;
2458           break;
2459
2460         case 1:                 /* graphic register 1 */
2461           ENCODE_SHIFT_OUT;
2462           break;
2463
2464         case 2:                 /* graphic register 2 */
2465           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2466             ENCODE_SINGLE_SHIFT_2;
2467           else
2468             ENCODE_LOCKING_SHIFT_2;
2469           break;
2470
2471         case 3:                 /* graphic register 3 */
2472           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2473             ENCODE_SINGLE_SHIFT_3;
2474           else
2475             ENCODE_LOCKING_SHIFT_3;
2476           break;
2477         }
2478     }
2479
2480   return dst;
2481 }
2482
2483 /* Produce 2-byte codes for encoded composition rule RULE.  */
2484
2485 #define ENCODE_COMPOSITION_RULE(rule)           \
2486   do {                                          \
2487     int gref, nref;                             \
2488     COMPOSITION_DECODE_RULE (rule, gref, nref); \
2489     *dst++ = 32 + 81 + gref;                    \
2490     *dst++ = 32 + nref;                         \
2491   } while (0)
2492
2493 /* Produce codes for indicating the start of a composition sequence
2494    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
2495    which specify information about the composition.  See the comment
2496    in coding.h for the format of DATA.  */
2497
2498 #define ENCODE_COMPOSITION_START(coding, data)                          \
2499   do {                                                                  \
2500     coding->composing = data[3];                                        \
2501     *dst++ = ISO_CODE_ESC;                                              \
2502     if (coding->composing == COMPOSITION_RELATIVE)                      \
2503       *dst++ = '0';                                                     \
2504     else                                                                \
2505       {                                                                 \
2506         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
2507                   ? '3' : '4');                                         \
2508         coding->cmp_data_index = coding->cmp_data_start + 4;            \
2509         coding->composition_rule_follows = 0;                           \
2510       }                                                                 \
2511   } while (0)
2512
2513 /* Produce codes for indicating the end of the current composition.  */
2514
2515 #define ENCODE_COMPOSITION_END(coding, data)                    \
2516   do {                                                          \
2517     *dst++ = ISO_CODE_ESC;                                      \
2518     *dst++ = '1';                                               \
2519     coding->cmp_data_start += data[0];                          \
2520     coding->composing = COMPOSITION_NO;                         \
2521     if (coding->cmp_data_start == coding->cmp_data->used        \
2522         && coding->cmp_data->next)                              \
2523       {                                                         \
2524         coding->cmp_data = coding->cmp_data->next;              \
2525         coding->cmp_data_start = 0;                             \
2526       }                                                         \
2527   } while (0)
2528
2529 /* Produce composition start sequence ESC 0.  Here, this sequence
2530    doesn't mean the start of a new composition but means that we have
2531    just produced components (alternate chars and composition rules) of
2532    the composition and the actual text follows in SRC.  */
2533
2534 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
2535   do {                                          \
2536     *dst++ = ISO_CODE_ESC;                      \
2537     *dst++ = '0';                               \
2538     coding->composing = COMPOSITION_RELATIVE;   \
2539   } while (0)
2540
2541 /* The following three macros produce codes for indicating direction
2542    of text.  */
2543 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
2544   do {                                                  \
2545     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
2546       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
2547     else                                                \
2548       *dst++ = ISO_CODE_CSI;                            \
2549   } while (0)
2550
2551 #define ENCODE_DIRECTION_R2L    \
2552   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
2553
2554 #define ENCODE_DIRECTION_L2R    \
2555   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
2556
2557 /* Produce codes for designation and invocation to reset the graphic
2558    planes and registers to initial state.  */
2559 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
2560   do {                                                                      \
2561     int reg;                                                                \
2562     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
2563       ENCODE_SHIFT_IN;                                                      \
2564     for (reg = 0; reg < 4; reg++)                                           \
2565       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
2566           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
2567               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
2568         ENCODE_DESIGNATION                                                  \
2569           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
2570   } while (0)
2571
2572 /* Produce designation sequences of charsets in the line started from
2573    SRC to a place pointed by DST, and return updated DST.
2574
2575    If the current block ends before any end-of-line, we may fail to
2576    find all the necessary designations.  */
2577
2578 static unsigned char *
2579 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2580      struct coding_system *coding;
2581      Lisp_Object translation_table;
2582      unsigned char *src, *src_end, *dst;
2583 {
2584   int charset, c, found = 0, reg;
2585   /* Table of charsets to be designated to each graphic register.  */
2586   int r[4];
2587
2588   for (reg = 0; reg < 4; reg++)
2589     r[reg] = -1;
2590
2591   while (found < 4)
2592     {
2593       ONE_MORE_CHAR (c);
2594       if (c == '\n')
2595         break;
2596
2597       charset = CHAR_CHARSET (c);
2598       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2599       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2600         {
2601           found++;
2602           r[reg] = charset;
2603         }
2604     }
2605
2606  label_end_of_loop:
2607   if (found)
2608     {
2609       for (reg = 0; reg < 4; reg++)
2610         if (r[reg] >= 0
2611             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2612           ENCODE_DESIGNATION (r[reg], reg, coding);
2613     }
2614
2615   return dst;
2616 }
2617
2618 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2619
2620 static void
2621 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2622      struct coding_system *coding;
2623      unsigned char *source, *destination;
2624      int src_bytes, dst_bytes;
2625 {
2626   unsigned char *src = source;
2627   unsigned char *src_end = source + src_bytes;
2628   unsigned char *dst = destination;
2629   unsigned char *dst_end = destination + dst_bytes;
2630   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2631      from DST_END to assure overflow checking is necessary only at the
2632      head of loop.  */
2633   unsigned char *adjusted_dst_end = dst_end - 19;
2634   /* SRC_BASE remembers the start position in source in each loop.
2635      The loop will be exited when there's not enough source text to
2636      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2637      there's not enough destination area to produce encoded codes
2638      (within macro EMIT_BYTES).  */
2639   unsigned char *src_base;
2640   int c;
2641   Lisp_Object translation_table;
2642   Lisp_Object safe_chars;
2643
2644   if (coding->flags & CODING_FLAG_ISO_SAFE)
2645     coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
2646
2647   safe_chars = coding_safe_chars (coding->symbol);
2648
2649   if (NILP (Venable_character_translation))
2650     translation_table = Qnil;
2651   else
2652     {
2653       translation_table = coding->translation_table_for_encode;
2654       if (NILP (translation_table))
2655         translation_table = Vstandard_translation_table_for_encode;
2656     }
2657
2658   coding->consumed_char = 0;
2659   coding->errors = 0;
2660   while (1)
2661     {
2662       src_base = src;
2663
2664       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2665         {
2666           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2667           break;
2668         }
2669
2670       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2671           && CODING_SPEC_ISO_BOL (coding))
2672         {
2673           /* We have to produce designation sequences if any now.  */
2674           dst = encode_designation_at_bol (coding, translation_table,
2675                                            src, src_end, dst);
2676           CODING_SPEC_ISO_BOL (coding) = 0;
2677         }
2678
2679       /* Check composition start and end.  */
2680       if (coding->composing != COMPOSITION_DISABLED
2681           && coding->cmp_data_start < coding->cmp_data->used)
2682         {
2683           struct composition_data *cmp_data = coding->cmp_data;
2684           int *data = cmp_data->data + coding->cmp_data_start;
2685           int this_pos = cmp_data->char_offset + coding->consumed_char;
2686
2687           if (coding->composing == COMPOSITION_RELATIVE)
2688             {
2689               if (this_pos == data[2])
2690                 {
2691                   ENCODE_COMPOSITION_END (coding, data);
2692                   cmp_data = coding->cmp_data;
2693                   data = cmp_data->data + coding->cmp_data_start;
2694                 }
2695             }
2696           else if (COMPOSING_P (coding))
2697             {
2698               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2699               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2700                 /* We have consumed components of the composition.
2701                    What follows in SRC is the composition's base
2702                    text.  */
2703                 ENCODE_COMPOSITION_FAKE_START (coding);
2704               else
2705                 {
2706                   int c = cmp_data->data[coding->cmp_data_index++];
2707                   if (coding->composition_rule_follows)
2708                     {
2709                       ENCODE_COMPOSITION_RULE (c);
2710                       coding->composition_rule_follows = 0;
2711                     }
2712                   else
2713                     {
2714                       if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2715                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2716                         ENCODE_UNSAFE_CHARACTER (c);
2717                       else
2718                         ENCODE_ISO_CHARACTER (c);
2719                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2720                         coding->composition_rule_follows = 1;
2721                     }
2722                   continue;
2723                 }
2724             }
2725           if (!COMPOSING_P (coding))
2726             {
2727               if (this_pos == data[1])
2728                 {
2729                   ENCODE_COMPOSITION_START (coding, data);
2730                   continue;
2731                 }
2732             }
2733         }
2734
2735       ONE_MORE_CHAR (c);
2736
2737       /* Now encode the character C.  */
2738       if (c < 0x20 || c == 0x7F)
2739         {
2740           if (c == '\r')
2741             {
2742               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2743                 {
2744                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2745                     ENCODE_RESET_PLANE_AND_REGISTER;
2746                   *dst++ = c;
2747                   continue;
2748                 }
2749               /* fall down to treat '\r' as '\n' ...  */
2750               c = '\n';
2751             }
2752           if (c == '\n')
2753             {
2754               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2755                 ENCODE_RESET_PLANE_AND_REGISTER;
2756               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2757                 bcopy (coding->spec.iso2022.initial_designation,
2758                        coding->spec.iso2022.current_designation,
2759                        sizeof coding->spec.iso2022.initial_designation);
2760               if (coding->eol_type == CODING_EOL_LF
2761                   || coding->eol_type == CODING_EOL_UNDECIDED)
2762                 *dst++ = ISO_CODE_LF;
2763               else if (coding->eol_type == CODING_EOL_CRLF)
2764                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2765               else
2766                 *dst++ = ISO_CODE_CR;
2767               CODING_SPEC_ISO_BOL (coding) = 1;
2768             }
2769           else
2770             {
2771               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2772                 ENCODE_RESET_PLANE_AND_REGISTER;
2773               *dst++ = c;
2774             }
2775         }
2776       else if (ASCII_BYTE_P (c))
2777         ENCODE_ISO_CHARACTER (c);
2778       else if (SINGLE_BYTE_CHAR_P (c))
2779         {
2780           *dst++ = c;
2781           coding->errors++;
2782         }
2783       else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR
2784                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2785         ENCODE_UNSAFE_CHARACTER (c);
2786       else
2787         ENCODE_ISO_CHARACTER (c);
2788
2789       coding->consumed_char++;
2790     }
2791
2792  label_end_of_loop:
2793   coding->consumed = src_base - source;
2794   coding->produced = coding->produced_char = dst - destination;
2795 }
2796
2797 \f
2798 /*** 4. SJIS and BIG5 handlers ***/
2799
2800 /* Although SJIS and BIG5 are not ISO coding systems, they are used
2801    quite widely.  So, for the moment, Emacs supports them in the bare
2802    C code.  But, in the future, they may be supported only by CCL.  */
2803
2804 /* SJIS is a coding system encoding three character sets: ASCII, right
2805    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2806    as is.  A character of charset katakana-jisx0201 is encoded by
2807    "position-code + 0x80".  A character of charset japanese-jisx0208
2808    is encoded in 2-byte but two position-codes are divided and shifted
2809    so that it fits in the range below.
2810
2811    --- CODE RANGE of SJIS ---
2812    (character set)      (range)
2813    ASCII                0x00 .. 0x7F
2814    KATAKANA-JISX0201    0xA1 .. 0xDF
2815    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2816             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2817    -------------------------------
2818
2819 */
2820
2821 /* BIG5 is a coding system encoding two character sets: ASCII and
2822    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2823    character set and is encoded in two bytes.
2824
2825    --- CODE RANGE of BIG5 ---
2826    (character set)      (range)
2827    ASCII                0x00 .. 0x7F
2828    Big5 (1st byte)      0xA1 .. 0xFE
2829         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2830    --------------------------
2831
2832    Since the number of characters in Big5 is larger than maximum
2833    characters in Emacs' charset (96x96), it can't be handled as one
2834    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2835    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2836    contains frequently used characters and the latter contains less
2837    frequently used characters.  */
2838
2839 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2840    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2841    C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
2842    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2843
2844 /* Number of Big5 characters which have the same code in 1st byte.  */
2845 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2846
2847 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2848   do {                                                                  \
2849     unsigned int temp                                                   \
2850       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2851     if (b1 < 0xC9)                                                      \
2852       charset = charset_big5_1;                                         \
2853     else                                                                \
2854       {                                                                 \
2855         charset = charset_big5_2;                                       \
2856         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2857       }                                                                 \
2858     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2859     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2860   } while (0)
2861
2862 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2863   do {                                                                  \
2864     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2865     if (charset == charset_big5_2)                                      \
2866       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2867     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2868     b2 = temp % BIG5_SAME_ROW;                                          \
2869     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2870   } while (0)
2871
2872 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2873    Check if a text is encoded in SJIS.  If it is, return
2874    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2875
2876 static int
2877 detect_coding_sjis (src, src_end, multibytep)
2878      unsigned char *src, *src_end;
2879      int multibytep;
2880 {
2881   int c;
2882   /* Dummy for ONE_MORE_BYTE.  */
2883   struct coding_system dummy_coding;
2884   struct coding_system *coding = &dummy_coding;
2885
2886   while (1)
2887     {
2888       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2889       if (c < 0x80)
2890         continue;
2891       if (c == 0x80 || c == 0xA0 || c > 0xEF)
2892         return 0;
2893       if (c <= 0x9F || c >= 0xE0)
2894         {
2895           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2896           if (c < 0x40 || c == 0x7F || c > 0xFC)
2897             return 0;
2898         }
2899     }
2900  label_end_of_loop:
2901   return CODING_CATEGORY_MASK_SJIS;
2902 }
2903
2904 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2905    Check if a text is encoded in BIG5.  If it is, return
2906    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2907
2908 static int
2909 detect_coding_big5 (src, src_end, multibytep)
2910      unsigned char *src, *src_end;
2911      int multibytep;
2912 {
2913   int c;
2914   /* Dummy for ONE_MORE_BYTE.  */
2915   struct coding_system dummy_coding;
2916   struct coding_system *coding = &dummy_coding;
2917
2918   while (1)
2919     {
2920       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2921       if (c < 0x80)
2922         continue;
2923       if (c < 0xA1 || c > 0xFE)
2924         return 0;
2925       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2926       if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2927         return 0;
2928     }
2929  label_end_of_loop:
2930   return CODING_CATEGORY_MASK_BIG5;
2931 }
2932
2933 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2934    Check if a text is encoded in UTF-8.  If it is, return
2935    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2936
2937 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2938 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2939 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2940 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2941 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2942 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2943 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2944
2945 static int
2946 detect_coding_utf_8 (src, src_end, multibytep)
2947      unsigned char *src, *src_end;
2948      int multibytep;
2949 {
2950   unsigned char c;
2951   int seq_maybe_bytes;
2952   /* Dummy for ONE_MORE_BYTE.  */
2953   struct coding_system dummy_coding;
2954   struct coding_system *coding = &dummy_coding;
2955
2956   while (1)
2957     {
2958       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2959       if (UTF_8_1_OCTET_P (c))
2960         continue;
2961       else if (UTF_8_2_OCTET_LEADING_P (c))
2962         seq_maybe_bytes = 1;
2963       else if (UTF_8_3_OCTET_LEADING_P (c))
2964         seq_maybe_bytes = 2;
2965       else if (UTF_8_4_OCTET_LEADING_P (c))
2966         seq_maybe_bytes = 3;
2967       else if (UTF_8_5_OCTET_LEADING_P (c))
2968         seq_maybe_bytes = 4;
2969       else if (UTF_8_6_OCTET_LEADING_P (c))
2970         seq_maybe_bytes = 5;
2971       else
2972         return 0;
2973
2974       do
2975         {
2976           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2977           if (!UTF_8_EXTRA_OCTET_P (c))
2978             return 0;
2979           seq_maybe_bytes--;
2980         }
2981       while (seq_maybe_bytes > 0);
2982     }
2983
2984  label_end_of_loop:
2985   return CODING_CATEGORY_MASK_UTF_8;
2986 }
2987
2988 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2989    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2990    Little Endian (otherwise).  If it is, return
2991    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2992    else return 0.  */
2993
2994 #define UTF_16_INVALID_P(val)   \
2995   (((val) == 0xFFFE)            \
2996    || ((val) == 0xFFFF))
2997
2998 #define UTF_16_HIGH_SURROGATE_P(val) \
2999   (((val) & 0xD800) == 0xD800)
3000
3001 #define UTF_16_LOW_SURROGATE_P(val) \
3002   (((val) & 0xDC00) == 0xDC00)
3003
3004 static int
3005 detect_coding_utf_16 (src, src_end, multibytep)
3006      unsigned char *src, *src_end;
3007      int multibytep;
3008 {
3009   unsigned char c1, c2;
3010   /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE.  */
3011   struct coding_system dummy_coding;
3012   struct coding_system *coding = &dummy_coding;
3013
3014   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
3015   ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
3016
3017   if ((c1 == 0xFF) && (c2 == 0xFE))
3018     return CODING_CATEGORY_MASK_UTF_16_LE;
3019   else if ((c1 == 0xFE) && (c2 == 0xFF))
3020     return CODING_CATEGORY_MASK_UTF_16_BE;
3021
3022  label_end_of_loop:
3023   return 0;
3024 }
3025
3026 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3027    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3028
3029 static void
3030 decode_coding_sjis_big5 (coding, source, destination,
3031                          src_bytes, dst_bytes, sjis_p)
3032      struct coding_system *coding;
3033      unsigned char *source, *destination;
3034      int src_bytes, dst_bytes;
3035      int sjis_p;
3036 {
3037   unsigned char *src = source;
3038   unsigned char *src_end = source + src_bytes;
3039   unsigned char *dst = destination;
3040   unsigned char *dst_end = destination + dst_bytes;
3041   /* SRC_BASE remembers the start position in source in each loop.
3042      The loop will be exited when there's not enough source code
3043      (within macro ONE_MORE_BYTE), or when there's not enough
3044      destination area to produce a character (within macro
3045      EMIT_CHAR).  */
3046   unsigned char *src_base;
3047   Lisp_Object translation_table;
3048
3049   if (NILP (Venable_character_translation))
3050     translation_table = Qnil;
3051   else
3052     {
3053       translation_table = coding->translation_table_for_decode;
3054       if (NILP (translation_table))
3055         translation_table = Vstandard_translation_table_for_decode;
3056     }
3057
3058   coding->produced_char = 0;
3059   while (1)
3060     {
3061       int c, charset, c1, c2 = 0;
3062
3063       src_base = src;
3064       ONE_MORE_BYTE (c1);
3065
3066       if (c1 < 0x80)
3067         {
3068           charset = CHARSET_ASCII;
3069           if (c1 < 0x20)
3070             {
3071               if (c1 == '\r')
3072                 {
3073                   if (coding->eol_type == CODING_EOL_CRLF)
3074                     {
3075                       ONE_MORE_BYTE (c2);
3076                       if (c2 == '\n')
3077                         c1 = c2;
3078                       else
3079                         /* To process C2 again, SRC is subtracted by 1.  */
3080                         src--;
3081                     }
3082                   else if (coding->eol_type == CODING_EOL_CR)
3083                     c1 = '\n';
3084                 }
3085               else if (c1 == '\n'
3086                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3087                        && (coding->eol_type == CODING_EOL_CR
3088                            || coding->eol_type == CODING_EOL_CRLF))
3089                 {
3090                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3091                   goto label_end_of_loop;
3092                 }
3093             }
3094         }
3095       else
3096         {
3097           if (sjis_p)
3098             {
3099               if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
3100                 goto label_invalid_code;
3101               if (c1 <= 0x9F || c1 >= 0xE0)
3102                 {
3103                   /* SJIS -> JISX0208 */
3104                   ONE_MORE_BYTE (c2);
3105                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
3106                     goto label_invalid_code;
3107                   DECODE_SJIS (c1, c2, c1, c2);
3108                   charset = charset_jisx0208;
3109                 }
3110               else
3111                 /* SJIS -> JISX0201-Kana */
3112                 charset = charset_katakana_jisx0201;
3113             }
3114           else
3115             {
3116               /* BIG5 -> Big5 */
3117               if (c1 < 0xA0 || c1 > 0xFE)
3118                 goto label_invalid_code;
3119               ONE_MORE_BYTE (c2);
3120               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
3121                 goto label_invalid_code;
3122               DECODE_BIG5 (c1, c2, charset, c1, c2);
3123             }
3124         }
3125
3126       c = DECODE_ISO_CHARACTER (charset, c1, c2);
3127       EMIT_CHAR (c);
3128       continue;
3129
3130     label_invalid_code:
3131       coding->errors++;
3132       src = src_base;
3133       c = *src++;
3134       EMIT_CHAR (c);
3135     }
3136
3137  label_end_of_loop:
3138   coding->consumed = coding->consumed_char = src_base - source;
3139   coding->produced = dst - destination;
3140   return;
3141 }
3142
3143 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3144    This function can encode charsets `ascii', `katakana-jisx0201',
3145    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
3146    are sure that all these charsets are registered as official charset
3147    (i.e. do not have extended leading-codes).  Characters of other
3148    charsets are produced without any encoding.  If SJIS_P is 1, encode
3149    SJIS text, else encode BIG5 text.  */
3150
3151 static void
3152 encode_coding_sjis_big5 (coding, source, destination,
3153                          src_bytes, dst_bytes, sjis_p)
3154      struct coding_system *coding;
3155      unsigned char *source, *destination;
3156      int src_bytes, dst_bytes;
3157      int sjis_p;
3158 {
3159   unsigned char *src = source;
3160   unsigned char *src_end = source + src_bytes;
3161   unsigned char *dst = destination;
3162   unsigned char *dst_end = destination + dst_bytes;
3163   /* SRC_BASE remembers the start position in source in each loop.
3164      The loop will be exited when there's not enough source text to
3165      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3166      there's not enough destination area to produce encoded codes
3167      (within macro EMIT_BYTES).  */
3168   unsigned char *src_base;
3169   Lisp_Object translation_table;
3170
3171   if (NILP (Venable_character_translation))
3172     translation_table = Qnil;
3173   else
3174     {
3175       translation_table = coding->translation_table_for_encode;
3176       if (NILP (translation_table))
3177         translation_table = Vstandard_translation_table_for_encode;
3178     }
3179
3180   while (1)
3181     {
3182       int c, charset, c1, c2;
3183
3184       src_base = src;
3185       ONE_MORE_CHAR (c);
3186
3187       /* Now encode the character C.  */
3188       if (SINGLE_BYTE_CHAR_P (c))
3189         {
3190           switch (c)
3191             {
3192             case '\r':
3193               if (!(coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
3194                 {
3195                   EMIT_ONE_BYTE (c);
3196                   break;
3197                 }
3198               c = '\n';
3199             case '\n':
3200               if (coding->eol_type == CODING_EOL_CRLF)
3201                 {
3202                   EMIT_TWO_BYTES ('\r', c);
3203                   break;
3204                 }
3205               else if (coding->eol_type == CODING_EOL_CR)
3206                 c = '\r';
3207             default:
3208               EMIT_ONE_BYTE (c);
3209             }
3210         }
3211       else
3212         {
3213           SPLIT_CHAR (c, charset, c1, c2);
3214           if (sjis_p)
3215             {
3216               if (charset == charset_jisx0208
3217                   || charset == charset_jisx0208_1978)
3218                 {
3219                   ENCODE_SJIS (c1, c2, c1, c2);
3220                   EMIT_TWO_BYTES (c1, c2);
3221                 }
3222               else if (charset == charset_katakana_jisx0201)
3223                 EMIT_ONE_BYTE (c1 | 0x80);
3224               else if (charset == charset_latin_jisx0201)
3225                 EMIT_ONE_BYTE (c1);
3226               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3227                 {
3228                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3229                   if (CHARSET_WIDTH (charset) > 1)
3230                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3231                 }
3232               else
3233                 /* There's no way other than producing the internal
3234                    codes as is.  */
3235                 EMIT_BYTES (src_base, src);
3236             }
3237           else
3238             {
3239               if (charset == charset_big5_1 || charset == charset_big5_2)
3240                 {
3241                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
3242                   EMIT_TWO_BYTES (c1, c2);
3243                 }
3244               else if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR)
3245                 {
3246                   EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3247                   if (CHARSET_WIDTH (charset) > 1)
3248                     EMIT_ONE_BYTE (CODING_REPLACEMENT_CHARACTER);
3249                 }
3250               else
3251                 /* There's no way other than producing the internal
3252                    codes as is.  */
3253                 EMIT_BYTES (src_base, src);
3254             }
3255         }
3256       coding->consumed_char++;
3257     }
3258
3259  label_end_of_loop:
3260   coding->consumed = src_base - source;
3261   coding->produced = coding->produced_char = dst - destination;
3262 }
3263
3264 \f
3265 /*** 5. CCL handlers ***/
3266
3267 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3268    Check if a text is encoded in a coding system of which
3269    encoder/decoder are written in CCL program.  If it is, return
3270    CODING_CATEGORY_MASK_CCL, else return 0.  */
3271
3272 static int
3273 detect_coding_ccl (src, src_end, multibytep)
3274      unsigned char *src, *src_end;
3275      int multibytep;
3276 {
3277   unsigned char *valid;
3278   int c;
3279   /* Dummy for ONE_MORE_BYTE.  */
3280   struct coding_system dummy_coding;
3281   struct coding_system *coding = &dummy_coding;
3282
3283   /* No coding system is assigned to coding-category-ccl.  */
3284   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3285     return 0;
3286
3287   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3288   while (1)
3289     {
3290       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
3291       if (! valid[c])
3292         return 0;
3293     }
3294  label_end_of_loop:
3295   return CODING_CATEGORY_MASK_CCL;
3296 }
3297
3298 \f
3299 /*** 6. End-of-line handlers ***/
3300
3301 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3302
3303 static void
3304 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3305      struct coding_system *coding;
3306      unsigned char *source, *destination;
3307      int src_bytes, dst_bytes;
3308 {
3309   unsigned char *src = source;
3310   unsigned char *dst = destination;
3311   unsigned char *src_end = src + src_bytes;
3312   unsigned char *dst_end = dst + dst_bytes;
3313   Lisp_Object translation_table;
3314   /* SRC_BASE remembers the start position in source in each loop.
3315      The loop will be exited when there's not enough source code
3316      (within macro ONE_MORE_BYTE), or when there's not enough
3317      destination area to produce a character (within macro
3318      EMIT_CHAR).  */
3319   unsigned char *src_base;
3320   int c;
3321
3322   translation_table = Qnil;
3323   switch (coding->eol_type)
3324     {
3325     case CODING_EOL_CRLF:
3326       while (1)
3327         {
3328           src_base = src;
3329           ONE_MORE_BYTE (c);
3330           if (c == '\r')
3331             {
3332               ONE_MORE_BYTE (c);
3333               if (c != '\n')
3334                 {
3335                   src--;
3336                   c = '\r';
3337                 }
3338             }
3339           else if (c == '\n'
3340                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
3341             {
3342               coding->result = CODING_FINISH_INCONSISTENT_EOL;
3343               goto label_end_of_loop;
3344             }
3345           EMIT_CHAR (c);
3346         }
3347       break;
3348
3349     case CODING_EOL_CR:
3350       while (1)
3351         {
3352           src_base = src;
3353           ONE_MORE_BYTE (c);
3354           if (c == '\n')
3355             {
3356               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3357                 {
3358                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3359                   goto label_end_of_loop;
3360                 }
3361             }
3362           else if (c == '\r')
3363             c = '\n';
3364           EMIT_CHAR (c);
3365         }
3366       break;
3367
3368     default:                    /* no need for EOL handling */
3369       while (1)
3370         {
3371           src_base = src;
3372           ONE_MORE_BYTE (c);
3373           EMIT_CHAR (c);
3374         }
3375     }
3376
3377  label_end_of_loop:
3378   coding->consumed = coding->consumed_char = src_base - source;
3379   coding->produced = dst - destination;
3380   return;
3381 }
3382
3383 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
3384    format of end-of-line according to `coding->eol_type'.  It also
3385    convert multibyte form 8-bit characters to unibyte if
3386    CODING->src_multibyte is nonzero.  If `coding->mode &
3387    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3388    also means end-of-line.  */
3389
3390 static void
3391 encode_eol (coding, source, destination, src_bytes, dst_bytes)
3392      struct coding_system *coding;
3393      const unsigned char *source;
3394      unsigned char *destination;
3395      int src_bytes, dst_bytes;
3396 {
3397   const unsigned char *src = source;
3398   unsigned char *dst = destination;
3399   const unsigned char *src_end = src + src_bytes;
3400   unsigned char *dst_end = dst + dst_bytes;
3401   Lisp_Object translation_table;
3402   /* SRC_BASE remembers the start position in source in each loop.
3403      The loop will be exited when there's not enough source text to
3404      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3405      there's not enough destination area to produce encoded codes
3406      (within macro EMIT_BYTES).  */
3407   const unsigned char *src_base;
3408   unsigned char *tmp;
3409   int c;
3410   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3411
3412   translation_table = Qnil;
3413   if (coding->src_multibyte
3414       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3415     {
3416       src_end--;
3417       src_bytes--;
3418       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3419     }
3420
3421   if (coding->eol_type == CODING_EOL_CRLF)
3422     {
3423       while (src < src_end)
3424         {
3425           src_base = src;
3426           c = *src++;
3427           if (c >= 0x20)
3428             EMIT_ONE_BYTE (c);
3429           else if (c == '\n' || (c == '\r' && selective_display))
3430             EMIT_TWO_BYTES ('\r', '\n');
3431           else
3432             EMIT_ONE_BYTE (c);
3433         }
3434       src_base = src;
3435     label_end_of_loop:
3436       ;
3437     }
3438   else
3439     {
3440       if (!dst_bytes || src_bytes <= dst_bytes)
3441         {
3442           safe_bcopy (src, dst, src_bytes);
3443           src_base = src_end;
3444           dst += src_bytes;
3445         }
3446       else
3447         {
3448           if (coding->src_multibyte
3449               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3450             dst_bytes--;
3451           safe_bcopy (src, dst, dst_bytes);
3452           src_base = src + dst_bytes;
3453           dst = destination + dst_bytes;
3454           coding->result = CODING_FINISH_INSUFFICIENT_DST;
3455         }
3456       if (coding->eol_type == CODING_EOL_CR)
3457         {
3458           for (tmp = destination; tmp < dst; tmp++)
3459             if (*tmp == '\n') *tmp = '\r';
3460         }
3461       else if (selective_display)
3462         {
3463           for (tmp = destination; tmp < dst; tmp++)
3464             if (*tmp == '\r') *tmp = '\n';
3465         }
3466     }
3467   if (coding->src_multibyte)
3468     dst = destination + str_as_unibyte (destination, dst - destination);
3469
3470   coding->consumed = src_base - source;
3471   coding->produced = dst - destination;
3472   coding->produced_char = coding->produced;
3473 }
3474
3475 \f
3476 /*** 7. C library functions ***/
3477
3478 /* In Emacs Lisp, a coding system is represented by a Lisp symbol which
3479    has a property `coding-system'.  The value of this property is a
3480    vector of length 5 (called the coding-vector).  Among elements of
3481    this vector, the first (element[0]) and the fifth (element[4])
3482    carry important information for decoding/encoding.  Before
3483    decoding/encoding, this information should be set in fields of a
3484    structure of type `coding_system'.
3485
3486    The value of the property `coding-system' can be a symbol of another
3487    subsidiary coding-system.  In that case, Emacs gets coding-vector
3488    from that symbol.
3489
3490    `element[0]' contains information to be set in `coding->type'.  The
3491    value and its meaning is as follows:
3492
3493    0 -- coding_type_emacs_mule
3494    1 -- coding_type_sjis
3495    2 -- coding_type_iso2022
3496    3 -- coding_type_big5
3497    4 -- coding_type_ccl encoder/decoder written in CCL
3498    nil -- coding_type_no_conversion
3499    t -- coding_type_undecided (automatic conversion on decoding,
3500                                no-conversion on encoding)
3501
3502    `element[4]' contains information to be set in `coding->flags' and
3503    `coding->spec'.  The meaning varies by `coding->type'.
3504
3505    If `coding->type' is `coding_type_iso2022', element[4] is a vector
3506    of length 32 (of which the first 13 sub-elements are used now).
3507    Meanings of these sub-elements are:
3508
3509    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3510         If the value is an integer of valid charset, the charset is
3511         assumed to be designated to graphic register N initially.
3512
3513         If the value is minus, it is a minus value of charset which
3514         reserves graphic register N, which means that the charset is
3515         not designated initially but should be designated to graphic
3516         register N just before encoding a character in that charset.
3517
3518         If the value is nil, graphic register N is never used on
3519         encoding.
3520
3521    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3522         Each value takes t or nil.  See the section ISO2022 of
3523         `coding.h' for more information.
3524
3525    If `coding->type' is `coding_type_big5', element[4] is t to denote
3526    BIG5-ETen or nil to denote BIG5-HKU.
3527
3528    If `coding->type' takes the other value, element[4] is ignored.
3529
3530    Emacs Lisp's coding systems also carry information about format of
3531    end-of-line in a value of property `eol-type'.  If the value is
3532    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3533    means CODING_EOL_CR.  If it is not integer, it should be a vector
3534    of subsidiary coding systems of which property `eol-type' has one
3535    of the above values.
3536
3537 */
3538
3539 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3540    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
3541    is setup so that no conversion is necessary and return -1, else
3542    return 0.  */
3543
3544 int
3545 setup_coding_system (coding_system, coding)
3546      Lisp_Object coding_system;
3547      struct coding_system *coding;
3548 {
3549   Lisp_Object coding_spec, coding_type, eol_type, plist;
3550   Lisp_Object val;
3551
3552   /* At first, zero clear all members.  */
3553   bzero (coding, sizeof (struct coding_system));
3554
3555   /* Initialize some fields required for all kinds of coding systems.  */
3556   coding->symbol = coding_system;
3557   coding->heading_ascii = -1;
3558   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
3559   coding->composing = COMPOSITION_DISABLED;
3560   coding->cmp_data = NULL;
3561
3562   if (NILP (coding_system))
3563     goto label_invalid_coding_system;
3564
3565   coding_spec = Fget (coding_system, Qcoding_system);
3566
3567   if (!VECTORP (coding_spec)
3568       || XVECTOR (coding_spec)->size != 5
3569       || !CONSP (XVECTOR (coding_spec)->contents[3]))
3570     goto label_invalid_coding_system;
3571
3572   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3573   if (VECTORP (eol_type))
3574     {
3575       coding->eol_type = CODING_EOL_UNDECIDED;
3576       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3577     }
3578   else if (XFASTINT (eol_type) == 1)
3579     {
3580       coding->eol_type = CODING_EOL_CRLF;
3581       coding->common_flags
3582         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3583     }
3584   else if (XFASTINT (eol_type) == 2)
3585     {
3586       coding->eol_type = CODING_EOL_CR;
3587       coding->common_flags
3588         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3589     }
3590   else
3591     coding->eol_type = CODING_EOL_LF;
3592
3593   coding_type = XVECTOR (coding_spec)->contents[0];
3594   /* Try short cut.  */
3595   if (SYMBOLP (coding_type))
3596     {
3597       if (EQ (coding_type, Qt))
3598         {
3599           coding->type = coding_type_undecided;
3600           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3601         }
3602       else
3603         coding->type = coding_type_no_conversion;
3604       /* Initialize this member.  Any thing other than
3605          CODING_CATEGORY_IDX_UTF_16_BE and
3606          CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3607          special treatment in detect_eol.  */
3608       coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3609
3610       return 0;
3611     }
3612
3613   /* Get values of coding system properties:
3614      `post-read-conversion', `pre-write-conversion',
3615      `translation-table-for-decode', `translation-table-for-encode'.  */
3616   plist = XVECTOR (coding_spec)->contents[3];
3617   /* Pre & post conversion functions should be disabled if
3618      inhibit_eol_conversion is nonzero.  This is the case that a code
3619      conversion function is called while those functions are running.  */
3620   if (! inhibit_pre_post_conversion)
3621     {
3622       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3623       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3624     }
3625   val = Fplist_get (plist, Qtranslation_table_for_decode);
3626   if (SYMBOLP (val))
3627     val = Fget (val, Qtranslation_table_for_decode);
3628   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3629   val = Fplist_get (plist, Qtranslation_table_for_encode);
3630   if (SYMBOLP (val))
3631     val = Fget (val, Qtranslation_table_for_encode);
3632   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3633   val = Fplist_get (plist, Qcoding_category);
3634   if (!NILP (val))
3635     {
3636       val = Fget (val, Qcoding_category_index);
3637       if (INTEGERP (val))
3638         coding->category_idx = XINT (val);
3639       else
3640         goto label_invalid_coding_system;
3641     }
3642   else
3643     goto label_invalid_coding_system;
3644
3645   /* If the coding system has non-nil `composition' property, enable
3646      composition handling.  */
3647   val = Fplist_get (plist, Qcomposition);
3648   if (!NILP (val))
3649     coding->composing = COMPOSITION_NO;
3650
3651   switch (XFASTINT (coding_type))
3652     {
3653     case 0:
3654       coding->type = coding_type_emacs_mule;
3655       coding->common_flags
3656         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3657       if (!NILP (coding->post_read_conversion))
3658         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3659       if (!NILP (coding->pre_write_conversion))
3660         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3661       break;
3662
3663     case 1:
3664       coding->type = coding_type_sjis;
3665       coding->common_flags
3666         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3667       break;
3668
3669     case 2:
3670       coding->type = coding_type_iso2022;
3671       coding->common_flags
3672         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3673       {
3674         Lisp_Object val, temp;
3675         Lisp_Object *flags;
3676         int i, charset, reg_bits = 0;
3677
3678         val = XVECTOR (coding_spec)->contents[4];
3679
3680         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3681           goto label_invalid_coding_system;
3682
3683         flags = XVECTOR (val)->contents;
3684         coding->flags
3685           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3686              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3687              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3688              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3689              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3690              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3691              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3692              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3693              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3694              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3695              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3696              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3697              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3698              );
3699
3700         /* Invoke graphic register 0 to plane 0.  */
3701         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3702         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3703         CODING_SPEC_ISO_INVOCATION (coding, 1)
3704           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3705         /* Not single shifting at first.  */
3706         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3707         /* Beginning of buffer should also be regarded as bol. */
3708         CODING_SPEC_ISO_BOL (coding) = 1;
3709
3710         for (charset = 0; charset <= MAX_CHARSET; charset++)
3711           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3712         val = Vcharset_revision_alist;
3713         while (CONSP (val))
3714           {
3715             charset = get_charset_id (Fcar_safe (XCAR (val)));
3716             if (charset >= 0
3717                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3718                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3719               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3720             val = XCDR (val);
3721           }
3722
3723         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3724            FLAGS[REG] can be one of below:
3725                 integer CHARSET: CHARSET occupies register I,
3726                 t: designate nothing to REG initially, but can be used
3727                   by any charsets,
3728                 list of integer, nil, or t: designate the first
3729                   element (if integer) to REG initially, the remaining
3730                   elements (if integer) is designated to REG on request,
3731                   if an element is t, REG can be used by any charsets,
3732                 nil: REG is never used.  */
3733         for (charset = 0; charset <= MAX_CHARSET; charset++)
3734           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3735             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3736         for (i = 0; i < 4; i++)
3737           {
3738             if ((INTEGERP (flags[i])
3739                  && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
3740                 || (charset = get_charset_id (flags[i])) >= 0)
3741               {
3742                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3743                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3744               }
3745             else if (EQ (flags[i], Qt))
3746               {
3747                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3748                 reg_bits |= 1 << i;
3749                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3750               }
3751             else if (CONSP (flags[i]))
3752               {
3753                 Lisp_Object tail;
3754                 tail = flags[i];
3755
3756                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3757                 if ((INTEGERP (XCAR (tail))
3758                      && (charset = XINT (XCAR (tail)),
3759                          CHARSET_VALID_P (charset)))
3760                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3761                   {
3762                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3763                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3764                   }
3765                 else
3766                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3767                 tail = XCDR (tail);
3768                 while (CONSP (tail))
3769                   {
3770                     if ((INTEGERP (XCAR (tail))
3771                          && (charset = XINT (XCAR (tail)),
3772                              CHARSET_VALID_P (charset)))
3773                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3774                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3775                         = i;
3776                     else if (EQ (XCAR (tail), Qt))
3777                       reg_bits |= 1 << i;
3778                     tail = XCDR (tail);
3779                   }
3780               }
3781             else
3782               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3783
3784             CODING_SPEC_ISO_DESIGNATION (coding, i)
3785               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3786           }
3787
3788         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3789           {
3790             /* REG 1 can be used only by locking shift in 7-bit env.  */
3791             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3792               reg_bits &= ~2;
3793             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3794               /* Without any shifting, only REG 0 and 1 can be used.  */
3795               reg_bits &= 3;
3796           }
3797
3798         if (reg_bits)
3799           for (charset = 0; charset <= MAX_CHARSET; charset++)
3800             {
3801               if (CHARSET_DEFINED_P (charset)
3802                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3803                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3804                 {
3805                   /* There exist some default graphic registers to be
3806                      used by CHARSET.  */
3807
3808                   /* We had better avoid designating a charset of
3809                      CHARS96 to REG 0 as far as possible.  */
3810                   if (CHARSET_CHARS (charset) == 96)
3811                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3812                       = (reg_bits & 2
3813                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3814                   else
3815                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3816                       = (reg_bits & 1
3817                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3818                 }
3819             }
3820       }
3821       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3822       coding->spec.iso2022.last_invalid_designation_register = -1;
3823       break;
3824
3825     case 3:
3826       coding->type = coding_type_big5;
3827       coding->common_flags
3828         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3829       coding->flags
3830         = (NILP (XVECTOR (coding_spec)->contents[4])
3831            ? CODING_FLAG_BIG5_HKU
3832            : CODING_FLAG_BIG5_ETEN);
3833       break;
3834
3835     case 4:
3836       coding->type = coding_type_ccl;
3837       coding->common_flags
3838         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3839       {
3840         val = XVECTOR (coding_spec)->contents[4];
3841         if (! CONSP (val)
3842             || setup_ccl_program (&(coding->spec.ccl.decoder),
3843                                   XCAR (val)) < 0
3844             || setup_ccl_program (&(coding->spec.ccl.encoder),
3845                                   XCDR (val)) < 0)
3846           goto label_invalid_coding_system;
3847
3848         bzero (coding->spec.ccl.valid_codes, 256);
3849         val = Fplist_get (plist, Qvalid_codes);
3850         if (CONSP (val))
3851           {
3852             Lisp_Object this;
3853
3854             for (; CONSP (val); val = XCDR (val))
3855               {
3856                 this = XCAR (val);
3857                 if (INTEGERP (this)
3858                     && XINT (this) >= 0 && XINT (this) < 256)
3859                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3860                 else if (CONSP (this)
3861                          && INTEGERP (XCAR (this))
3862                          && INTEGERP (XCDR (this)))
3863                   {
3864                     int start = XINT (XCAR (this));
3865                     int end = XINT (XCDR (this));
3866
3867                     if (start >= 0 && start <= end && end < 256)
3868                       while (start <= end)
3869                         coding->spec.ccl.valid_codes[start++] = 1;
3870                   }
3871               }
3872           }
3873       }
3874       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3875       coding->spec.ccl.cr_carryover = 0;
3876       coding->spec.ccl.eight_bit_carryover[0] = 0;
3877       break;
3878
3879     case 5:
3880       coding->type = coding_type_raw_text;
3881       break;
3882
3883     default:
3884       goto label_invalid_coding_system;
3885     }
3886   return 0;
3887
3888  label_invalid_coding_system:
3889   coding->type = coding_type_no_conversion;
3890   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3891   coding->common_flags = 0;
3892   coding->eol_type = CODING_EOL_LF;
3893   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3894   return -1;
3895 }
3896
3897 /* Free memory blocks allocated for storing composition information.  */
3898
3899 void
3900 coding_free_composition_data (coding)
3901      struct coding_system *coding;
3902 {
3903   struct composition_data *cmp_data = coding->cmp_data, *next;
3904
3905   if (!cmp_data)
3906     return;
3907   /* Memory blocks are chained.  At first, rewind to the first, then,
3908      free blocks one by one.  */
3909   while (cmp_data->prev)
3910     cmp_data = cmp_data->prev;
3911   while (cmp_data)
3912     {
3913       next = cmp_data->next;
3914       xfree (cmp_data);
3915       cmp_data = next;
3916     }
3917   coding->cmp_data = NULL;
3918 }
3919
3920 /* Set `char_offset' member of all memory blocks pointed by
3921    coding->cmp_data to POS.  */
3922
3923 void
3924 coding_adjust_composition_offset (coding, pos)
3925      struct coding_system *coding;
3926      int pos;
3927 {
3928   struct composition_data *cmp_data;
3929
3930   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3931     cmp_data->char_offset = pos;
3932 }
3933
3934 /* Setup raw-text or one of its subsidiaries in the structure
3935    coding_system CODING according to the already setup value eol_type
3936    in CODING.  CODING should be setup for some coding system in
3937    advance.  */
3938
3939 void
3940 setup_raw_text_coding_system (coding)
3941      struct coding_system *coding;
3942 {
3943   if (coding->type != coding_type_raw_text)
3944     {
3945       coding->symbol = Qraw_text;
3946       coding->type = coding_type_raw_text;
3947       if (coding->eol_type != CODING_EOL_UNDECIDED)
3948         {
3949           Lisp_Object subsidiaries;
3950           subsidiaries = Fget (Qraw_text, Qeol_type);
3951
3952           if (VECTORP (subsidiaries)
3953               && XVECTOR (subsidiaries)->size == 3)
3954             coding->symbol
3955               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3956         }
3957       setup_coding_system (coding->symbol, coding);
3958     }
3959   return;
3960 }
3961
3962 /* Emacs has a mechanism to automatically detect a coding system if it
3963    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3964    it's impossible to distinguish some coding systems accurately
3965    because they use the same range of codes.  So, at first, coding
3966    systems are categorized into 7, those are:
3967
3968    o coding-category-emacs-mule
3969
3970         The category for a coding system which has the same code range
3971         as Emacs' internal format.  Assigned the coding-system (Lisp
3972         symbol) `emacs-mule' by default.
3973
3974    o coding-category-sjis
3975
3976         The category for a coding system which has the same code range
3977         as SJIS.  Assigned the coding-system (Lisp
3978         symbol) `japanese-shift-jis' by default.
3979
3980    o coding-category-iso-7
3981
3982         The category for a coding system which has the same code range
3983         as ISO2022 of 7-bit environment.  This doesn't use any locking
3984         shift and single shift functions.  This can encode/decode all
3985         charsets.  Assigned the coding-system (Lisp symbol)
3986         `iso-2022-7bit' by default.
3987
3988    o coding-category-iso-7-tight
3989
3990         Same as coding-category-iso-7 except that this can
3991         encode/decode only the specified charsets.
3992
3993    o coding-category-iso-8-1
3994
3995         The category for a coding system which has the same code range
3996         as ISO2022 of 8-bit environment and graphic plane 1 used only
3997         for DIMENSION1 charset.  This doesn't use any locking shift
3998         and single shift functions.  Assigned the coding-system (Lisp
3999         symbol) `iso-latin-1' by default.
4000
4001    o coding-category-iso-8-2
4002
4003         The category for a coding system which has the same code range
4004         as ISO2022 of 8-bit environment and graphic plane 1 used only
4005         for DIMENSION2 charset.  This doesn't use any locking shift
4006         and single shift functions.  Assigned the coding-system (Lisp
4007         symbol) `japanese-iso-8bit' by default.
4008
4009    o coding-category-iso-7-else
4010
4011         The category for a coding system which has the same code range
4012         as ISO2022 of 7-bit environment but uses locking shift or
4013         single shift functions.  Assigned the coding-system (Lisp
4014         symbol) `iso-2022-7bit-lock' by default.
4015
4016    o coding-category-iso-8-else
4017
4018         The category for a coding system which has the same code range
4019         as ISO2022 of 8-bit environment but uses locking shift or
4020         single shift functions.  Assigned the coding-system (Lisp
4021         symbol) `iso-2022-8bit-ss2' by default.
4022
4023    o coding-category-big5
4024
4025         The category for a coding system which has the same code range
4026         as BIG5.  Assigned the coding-system (Lisp symbol)
4027         `cn-big5' by default.
4028
4029    o coding-category-utf-8
4030
4031         The category for a coding system which has the same code range
4032         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
4033         symbol) `utf-8' by default.
4034
4035    o coding-category-utf-16-be
4036
4037         The category for a coding system in which a text has an
4038         Unicode signature (cf. Unicode Standard) in the order of BIG
4039         endian at the head.  Assigned the coding-system (Lisp symbol)
4040         `utf-16-be' by default.
4041
4042    o coding-category-utf-16-le
4043
4044         The category for a coding system in which a text has an
4045         Unicode signature (cf. Unicode Standard) in the order of
4046         LITTLE endian at the head.  Assigned the coding-system (Lisp
4047         symbol) `utf-16-le' by default.
4048
4049    o coding-category-ccl
4050
4051         The category for a coding system of which encoder/decoder is
4052         written in CCL programs.  The default value is nil, i.e., no
4053         coding system is assigned.
4054
4055    o coding-category-binary
4056
4057         The category for a coding system not categorized in any of the
4058         above.  Assigned the coding-system (Lisp symbol)
4059         `no-conversion' by default.
4060
4061    Each of them is a Lisp symbol and the value is an actual
4062    `coding-system' (this is also a Lisp symbol) assigned by a user.
4063    What Emacs does actually is to detect a category of coding system.
4064    Then, it uses a `coding-system' assigned to it.  If Emacs can't
4065    decide a single possible category, it selects a category of the
4066    highest priority.  Priorities of categories are also specified by a
4067    user in a Lisp variable `coding-category-list'.
4068
4069 */
4070
4071 static
4072 int ascii_skip_code[256];
4073
4074 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
4075    If it detects possible coding systems, return an integer in which
4076    appropriate flag bits are set.  Flag bits are defined by macros
4077    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
4078    it should point the table `coding_priorities'.  In that case, only
4079    the flag bit for a coding system of the highest priority is set in
4080    the returned value.  If MULTIBYTEP is nonzero, 8-bit codes of the
4081    range 0x80..0x9F are in multibyte form.
4082
4083    How many ASCII characters are at the head is returned as *SKIP.  */
4084
4085 static int
4086 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
4087      unsigned char *source;
4088      int src_bytes, *priorities, *skip;
4089      int multibytep;
4090 {
4091   register unsigned char c;
4092   unsigned char *src = source, *src_end = source + src_bytes;
4093   unsigned int mask, utf16_examined_p, iso2022_examined_p;
4094   int i;
4095
4096   /* At first, skip all ASCII characters and control characters except
4097      for three ISO2022 specific control characters.  */
4098   ascii_skip_code[ISO_CODE_SO] = 0;
4099   ascii_skip_code[ISO_CODE_SI] = 0;
4100   ascii_skip_code[ISO_CODE_ESC] = 0;
4101
4102  label_loop_detect_coding:
4103   while (src < src_end && ascii_skip_code[*src]) src++;
4104   *skip = src - source;
4105
4106   if (src >= src_end)
4107     /* We found nothing other than ASCII.  There's nothing to do.  */
4108     return 0;
4109
4110   c = *src;
4111   /* The text seems to be encoded in some multilingual coding system.
4112      Now, try to find in which coding system the text is encoded.  */
4113   if (c < 0x80)
4114     {
4115       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
4116       /* C is an ISO2022 specific control code of C0.  */
4117       mask = detect_coding_iso2022 (src, src_end, multibytep);
4118       if (mask == 0)
4119         {
4120           /* No valid ISO2022 code follows C.  Try again.  */
4121           src++;
4122           if (c == ISO_CODE_ESC)
4123             ascii_skip_code[ISO_CODE_ESC] = 1;
4124           else
4125             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
4126           goto label_loop_detect_coding;
4127         }
4128       if (priorities)
4129         {
4130           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4131             {
4132               if (mask & priorities[i])
4133                 return priorities[i];
4134             }
4135           return CODING_CATEGORY_MASK_RAW_TEXT;
4136         }
4137     }
4138   else
4139     {
4140       int try;
4141
4142       if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
4143         c = src[1] - 0x20;
4144
4145       if (c < 0xA0)
4146         {
4147           /* C is the first byte of SJIS character code,
4148              or a leading-code of Emacs' internal format (emacs-mule),
4149              or the first byte of UTF-16.  */
4150           try = (CODING_CATEGORY_MASK_SJIS
4151                   | CODING_CATEGORY_MASK_EMACS_MULE
4152                   | CODING_CATEGORY_MASK_UTF_16_BE
4153                   | CODING_CATEGORY_MASK_UTF_16_LE);
4154
4155           /* Or, if C is a special latin extra code,
4156              or is an ISO2022 specific control code of C1 (SS2 or SS3),
4157              or is an ISO2022 control-sequence-introducer (CSI),
4158              we should also consider the possibility of ISO2022 codings.  */
4159           if ((VECTORP (Vlatin_extra_code_table)
4160                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4161               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
4162               || (c == ISO_CODE_CSI
4163                   && (src < src_end
4164                       && (*src == ']'
4165                           || ((*src == '0' || *src == '1' || *src == '2')
4166                               && src + 1 < src_end
4167                               && src[1] == ']')))))
4168             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4169                      | CODING_CATEGORY_MASK_ISO_8BIT);
4170         }
4171       else
4172         /* C is a character of ISO2022 in graphic plane right,
4173            or a SJIS's 1-byte character code (i.e. JISX0201),
4174            or the first byte of BIG5's 2-byte code,
4175            or the first byte of UTF-8/16.  */
4176         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4177                 | CODING_CATEGORY_MASK_ISO_8BIT
4178                 | CODING_CATEGORY_MASK_SJIS
4179                 | CODING_CATEGORY_MASK_BIG5
4180                 | CODING_CATEGORY_MASK_UTF_8
4181                 | CODING_CATEGORY_MASK_UTF_16_BE
4182                 | CODING_CATEGORY_MASK_UTF_16_LE);
4183
4184       /* Or, we may have to consider the possibility of CCL.  */
4185       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4186           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4187               ->spec.ccl.valid_codes)[c])
4188         try |= CODING_CATEGORY_MASK_CCL;
4189
4190       mask = 0;
4191       utf16_examined_p = iso2022_examined_p = 0;
4192       if (priorities)
4193         {
4194           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4195             {
4196               if (!iso2022_examined_p
4197                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4198                 {
4199                   mask |= detect_coding_iso2022 (src, src_end, multibytep);
4200                   iso2022_examined_p = 1;
4201                 }
4202               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
4203                 mask |= detect_coding_sjis (src, src_end, multibytep);
4204               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
4205                 mask |= detect_coding_utf_8 (src, src_end, multibytep);
4206               else if (!utf16_examined_p
4207                        && (priorities[i] & try &
4208                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
4209                 {
4210                   mask |= detect_coding_utf_16 (src, src_end, multibytep);
4211                   utf16_examined_p = 1;
4212                 }
4213               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
4214                 mask |= detect_coding_big5 (src, src_end, multibytep);
4215               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
4216                 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4217               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
4218                 mask |= detect_coding_ccl (src, src_end, multibytep);
4219               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
4220                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
4221               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
4222                 mask |= CODING_CATEGORY_MASK_BINARY;
4223               if (mask & priorities[i])
4224                 return priorities[i];
4225             }
4226           return CODING_CATEGORY_MASK_RAW_TEXT;
4227         }
4228       if (try & CODING_CATEGORY_MASK_ISO)
4229         mask |= detect_coding_iso2022 (src, src_end, multibytep);
4230       if (try & CODING_CATEGORY_MASK_SJIS)
4231         mask |= detect_coding_sjis (src, src_end, multibytep);
4232       if (try & CODING_CATEGORY_MASK_BIG5)
4233         mask |= detect_coding_big5 (src, src_end, multibytep);
4234       if (try & CODING_CATEGORY_MASK_UTF_8)
4235         mask |= detect_coding_utf_8 (src, src_end, multibytep);
4236       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
4237         mask |= detect_coding_utf_16 (src, src_end, multibytep);
4238       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
4239         mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4240       if (try & CODING_CATEGORY_MASK_CCL)
4241         mask |= detect_coding_ccl (src, src_end, multibytep);
4242     }
4243   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4244 }
4245
4246 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4247    The information of the detected coding system is set in CODING.  */
4248
4249 void
4250 detect_coding (coding, src, src_bytes)
4251      struct coding_system *coding;
4252      const unsigned char *src;
4253      int src_bytes;
4254 {
4255   unsigned int idx;
4256   int skip, mask;
4257   Lisp_Object val;
4258
4259   val = Vcoding_category_list;
4260   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4261                              coding->src_multibyte);
4262   coding->heading_ascii = skip;
4263
4264   if (!mask) return;
4265
4266   /* We found a single coding system of the highest priority in MASK.  */
4267   idx = 0;
4268   while (mask && ! (mask & 1)) mask >>= 1, idx++;
4269   if (! mask)
4270     idx = CODING_CATEGORY_IDX_RAW_TEXT;
4271
4272   val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]);
4273
4274   if (coding->eol_type != CODING_EOL_UNDECIDED)
4275     {
4276       Lisp_Object tmp;
4277
4278       tmp = Fget (val, Qeol_type);
4279       if (VECTORP (tmp))
4280         val = XVECTOR (tmp)->contents[coding->eol_type];
4281     }
4282
4283   /* Setup this new coding system while preserving some slots.  */
4284   {
4285     int src_multibyte = coding->src_multibyte;
4286     int dst_multibyte = coding->dst_multibyte;
4287
4288     setup_coding_system (val, coding);
4289     coding->src_multibyte = src_multibyte;
4290     coding->dst_multibyte = dst_multibyte;
4291     coding->heading_ascii = skip;
4292   }
4293 }
4294
4295 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
4296    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4297    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4298
4299    How many non-eol characters are at the head is returned as *SKIP.  */
4300
4301 #define MAX_EOL_CHECK_COUNT 3
4302
4303 static int
4304 detect_eol_type (source, src_bytes, skip)
4305      unsigned char *source;
4306      int src_bytes, *skip;
4307 {
4308   unsigned char *src = source, *src_end = src + src_bytes;
4309   unsigned char c;
4310   int total = 0;                /* How many end-of-lines are found so far.  */
4311   int eol_type = CODING_EOL_UNDECIDED;
4312   int this_eol_type;
4313
4314   *skip = 0;
4315
4316   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4317     {
4318       c = *src++;
4319       if (c == '\n' || c == '\r')
4320         {
4321           if (*skip == 0)
4322             *skip = src - 1 - source;
4323           total++;
4324           if (c == '\n')
4325             this_eol_type = CODING_EOL_LF;
4326           else if (src >= src_end || *src != '\n')
4327             this_eol_type = CODING_EOL_CR;
4328           else
4329             this_eol_type = CODING_EOL_CRLF, src++;
4330
4331           if (eol_type == CODING_EOL_UNDECIDED)
4332             /* This is the first end-of-line.  */
4333             eol_type = this_eol_type;
4334           else if (eol_type != this_eol_type)
4335             {
4336               /* The found type is different from what found before.  */
4337               eol_type = CODING_EOL_INCONSISTENT;
4338               break;
4339             }
4340         }
4341     }
4342
4343   if (*skip == 0)
4344     *skip = src_end - source;
4345   return eol_type;
4346 }
4347
4348 /* Like detect_eol_type, but detect EOL type in 2-octet
4349    big-endian/little-endian format for coding systems utf-16-be and
4350    utf-16-le.  */
4351
4352 static int
4353 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4354      unsigned char *source;
4355      int src_bytes, *skip, big_endian_p;
4356 {
4357   unsigned char *src = source, *src_end = src + src_bytes;
4358   unsigned int c1, c2;
4359   int total = 0;                /* How many end-of-lines are found so far.  */
4360   int eol_type = CODING_EOL_UNDECIDED;
4361   int this_eol_type;
4362   int msb, lsb;
4363
4364   if (big_endian_p)
4365     msb = 0, lsb = 1;
4366   else
4367     msb = 1, lsb = 0;
4368
4369   *skip = 0;
4370
4371   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4372     {
4373       c1 = (src[msb] << 8) | (src[lsb]);
4374       src += 2;
4375
4376       if (c1 == '\n' || c1 == '\r')
4377         {
4378           if (*skip == 0)
4379             *skip = src - 2 - source;
4380           total++;
4381           if (c1 == '\n')
4382             {
4383               this_eol_type = CODING_EOL_LF;
4384             }
4385           else
4386             {
4387               if ((src + 1) >= src_end)
4388                 {
4389                   this_eol_type = CODING_EOL_CR;
4390                 }
4391               else
4392                 {
4393                   c2 = (src[msb] << 8) | (src[lsb]);
4394                   if (c2 == '\n')
4395                     this_eol_type = CODING_EOL_CRLF, src += 2;
4396                   else
4397                     this_eol_type = CODING_EOL_CR;
4398                 }
4399             }
4400
4401           if (eol_type == CODING_EOL_UNDECIDED)
4402             /* This is the first end-of-line.  */
4403             eol_type = this_eol_type;
4404           else if (eol_type != this_eol_type)
4405             {
4406               /* The found type is different from what found before.  */
4407               eol_type = CODING_EOL_INCONSISTENT;
4408               break;
4409             }
4410         }
4411     }
4412
4413   if (*skip == 0)
4414     *skip = src_end - source;
4415   return eol_type;
4416 }
4417
4418 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4419    is encoded.  If it detects an appropriate format of end-of-line, it
4420    sets the information in *CODING.  */
4421
4422 void
4423 detect_eol (coding, src, src_bytes)
4424      struct coding_system *coding;
4425      const unsigned char *src;
4426      int src_bytes;
4427 {
4428   Lisp_Object val;
4429   int skip;
4430   int eol_type;
4431
4432   switch (coding->category_idx)
4433     {
4434     case CODING_CATEGORY_IDX_UTF_16_BE:
4435       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4436       break;
4437     case CODING_CATEGORY_IDX_UTF_16_LE:
4438       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4439       break;
4440     default:
4441       eol_type = detect_eol_type (src, src_bytes, &skip);
4442       break;
4443     }
4444
4445   if (coding->heading_ascii > skip)
4446     coding->heading_ascii = skip;
4447   else
4448     skip = coding->heading_ascii;
4449
4450   if (eol_type == CODING_EOL_UNDECIDED)
4451     return;
4452   if (eol_type == CODING_EOL_INCONSISTENT)
4453     {
4454 #if 0
4455       /* This code is suppressed until we find a better way to
4456          distinguish raw text file and binary file.  */
4457
4458       /* If we have already detected that the coding is raw-text, the
4459          coding should actually be no-conversion.  */
4460       if (coding->type == coding_type_raw_text)
4461         {
4462           setup_coding_system (Qno_conversion, coding);
4463           return;
4464         }
4465       /* Else, let's decode only text code anyway.  */
4466 #endif /* 0 */
4467       eol_type = CODING_EOL_LF;
4468     }
4469
4470   val = Fget (coding->symbol, Qeol_type);
4471   if (VECTORP (val) && XVECTOR (val)->size == 3)
4472     {
4473       int src_multibyte = coding->src_multibyte;
4474       int dst_multibyte = coding->dst_multibyte;
4475       struct composition_data *cmp_data = coding->cmp_data;
4476
4477       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
4478       coding->src_multibyte = src_multibyte;
4479       coding->dst_multibyte = dst_multibyte;
4480       coding->heading_ascii = skip;
4481       coding->cmp_data = cmp_data;
4482     }
4483 }
4484
4485 #define CONVERSION_BUFFER_EXTRA_ROOM 256
4486
4487 #define DECODING_BUFFER_MAG(coding)                     \
4488   (coding->type == coding_type_iso2022                  \
4489    ? 3                                                  \
4490    : (coding->type == coding_type_ccl                   \
4491       ? coding->spec.ccl.decoder.buf_magnification      \
4492       : 2))
4493
4494 /* Return maximum size (bytes) of a buffer enough for decoding
4495    SRC_BYTES of text encoded in CODING.  */
4496
4497 int
4498 decoding_buffer_size (coding, src_bytes)
4499      struct coding_system *coding;
4500      int src_bytes;
4501 {
4502   return (src_bytes * DECODING_BUFFER_MAG (coding)
4503           + CONVERSION_BUFFER_EXTRA_ROOM);
4504 }
4505
4506 /* Return maximum size (bytes) of a buffer enough for encoding
4507    SRC_BYTES of text to CODING.  */
4508
4509 int
4510 encoding_buffer_size (coding, src_bytes)
4511      struct coding_system *coding;
4512      int src_bytes;
4513 {
4514   int magnification;
4515
4516   if (coding->type == coding_type_ccl)
4517     {
4518       magnification = coding->spec.ccl.encoder.buf_magnification;
4519       if (coding->eol_type == CODING_EOL_CRLF)
4520         magnification *= 2;
4521     }
4522   else if (CODING_REQUIRE_ENCODING (coding))
4523     magnification = 3;
4524   else
4525     magnification = 1;
4526
4527   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4528 }
4529
4530 /* Working buffer for code conversion.  */
4531 struct conversion_buffer
4532 {
4533   int size;                     /* size of data.  */
4534   int on_stack;                 /* 1 if allocated by alloca.  */
4535   unsigned char *data;
4536 };
4537
4538 /* Don't use alloca for allocating memory space larger than this, lest
4539    we overflow their stack.  */
4540 #define MAX_ALLOCA 16*1024
4541
4542 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
4543 #define allocate_conversion_buffer(buf, len)            \
4544   do {                                                  \
4545     if (len < MAX_ALLOCA)                               \
4546       {                                                 \
4547         buf.data = (unsigned char *) alloca (len);      \
4548         buf.on_stack = 1;                               \
4549       }                                                 \
4550     else                                                \
4551       {                                                 \
4552         buf.data = (unsigned char *) xmalloc (len);     \
4553         buf.on_stack = 0;                               \
4554       }                                                 \
4555     buf.size = len;                                     \
4556   } while (0)
4557
4558 /* Double the allocated memory for *BUF.  */
4559 static void
4560 extend_conversion_buffer (buf)
4561      struct conversion_buffer *buf;
4562 {
4563   if (buf->on_stack)
4564     {
4565       unsigned char *save = buf->data;
4566       buf->data = (unsigned char *) xmalloc (buf->size * 2);
4567       bcopy (save, buf->data, buf->size);
4568       buf->on_stack = 0;
4569     }
4570   else
4571     {
4572       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4573     }
4574   buf->size *= 2;
4575 }
4576
4577 /* Free the allocated memory for BUF if it is not on stack.  */
4578 static void
4579 free_conversion_buffer (buf)
4580      struct conversion_buffer *buf;
4581 {
4582   if (!buf->on_stack)
4583     xfree (buf->data);
4584 }
4585
4586 int
4587 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4588      struct coding_system *coding;
4589      unsigned char *source, *destination;
4590      int src_bytes, dst_bytes, encodep;
4591 {
4592   struct ccl_program *ccl
4593     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
4594   unsigned char *dst = destination;
4595
4596   ccl->suppress_error = coding->suppress_error;
4597   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4598   if (encodep)
4599     {
4600       /* On encoding, EOL format is converted within ccl_driver.  For
4601          that, setup proper information in the structure CCL.  */
4602       ccl->eol_type = coding->eol_type;
4603       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4604         ccl->eol_type = CODING_EOL_LF;
4605       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4606       ccl->eight_bit_control = coding->dst_multibyte;
4607     }
4608   else
4609     ccl->eight_bit_control = 1;
4610   ccl->multibyte = coding->src_multibyte;
4611   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4612     {
4613       /* Move carryover bytes to DESTINATION.  */
4614       unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4615       while (*p)
4616         *dst++ = *p++;
4617       coding->spec.ccl.eight_bit_carryover[0] = 0;
4618       if (dst_bytes)
4619         dst_bytes -= dst - destination;
4620     }
4621
4622   coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4623                                   &(coding->consumed))
4624                       + dst - destination);
4625
4626   if (encodep)
4627     {
4628       coding->produced_char = coding->produced;
4629       coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4630     }
4631   else if (!ccl->eight_bit_control)
4632     {
4633       /* The produced bytes forms a valid multibyte sequence. */
4634       coding->produced_char
4635         = multibyte_chars_in_text (destination, coding->produced);
4636       coding->spec.ccl.eight_bit_carryover[0] = 0;
4637     }
4638   else
4639     {
4640       /* On decoding, the destination should always multibyte.  But,
4641          CCL program might have been generated an invalid multibyte
4642          sequence.  Here we make such a sequence valid as
4643          multibyte.  */
4644       int bytes
4645         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
4646
4647       if ((coding->consumed < src_bytes
4648            || !ccl->last_block)
4649           && coding->produced >= 1
4650           && destination[coding->produced - 1] >= 0x80)
4651         {
4652           /* We should not convert the tailing 8-bit codes to
4653              multibyte form even if they doesn't form a valid
4654              multibyte sequence.  They may form a valid sequence in
4655              the next call.  */
4656           int carryover = 0;
4657
4658           if (destination[coding->produced - 1] < 0xA0)
4659             carryover = 1;
4660           else if (coding->produced >= 2)
4661             {
4662               if (destination[coding->produced - 2] >= 0x80)
4663                 {
4664                   if (destination[coding->produced - 2] < 0xA0)
4665                     carryover = 2;
4666                   else if (coding->produced >= 3
4667                            && destination[coding->produced - 3] >= 0x80
4668                            && destination[coding->produced - 3] < 0xA0)
4669                     carryover = 3;
4670                 }
4671             }
4672           if (carryover > 0)
4673             {
4674               BCOPY_SHORT (destination + coding->produced - carryover,
4675                            coding->spec.ccl.eight_bit_carryover,
4676                            carryover);
4677               coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4678               coding->produced -= carryover;
4679             }
4680         }
4681       coding->produced = str_as_multibyte (destination, bytes,
4682                                            coding->produced,
4683                                            &(coding->produced_char));
4684     }
4685
4686   switch (ccl->status)
4687     {
4688     case CCL_STAT_SUSPEND_BY_SRC:
4689       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4690       break;
4691     case CCL_STAT_SUSPEND_BY_DST:
4692       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4693       break;
4694     case CCL_STAT_QUIT:
4695     case CCL_STAT_INVALID_CMD:
4696       coding->result = CODING_FINISH_INTERRUPT;
4697       break;
4698     default:
4699       coding->result = CODING_FINISH_NORMAL;
4700       break;
4701     }
4702   return coding->result;
4703 }
4704
4705 /* Decode EOL format of the text at PTR of BYTES length destructively
4706    according to CODING->eol_type.  This is called after the CCL
4707    program produced a decoded text at PTR.  If we do CRLF->LF
4708    conversion, update CODING->produced and CODING->produced_char.  */
4709
4710 static void
4711 decode_eol_post_ccl (coding, ptr, bytes)
4712      struct coding_system *coding;
4713      unsigned char *ptr;
4714      int bytes;
4715 {
4716   Lisp_Object val, saved_coding_symbol;
4717   unsigned char *pend = ptr + bytes;
4718   int dummy;
4719
4720   /* Remember the current coding system symbol.  We set it back when
4721      an inconsistent EOL is found so that `last-coding-system-used' is
4722      set to the coding system that doesn't specify EOL conversion.  */
4723   saved_coding_symbol = coding->symbol;
4724
4725   coding->spec.ccl.cr_carryover = 0;
4726   if (coding->eol_type == CODING_EOL_UNDECIDED)
4727     {
4728       /* Here, to avoid the call of setup_coding_system, we directly
4729          call detect_eol_type.  */
4730       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4731       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4732         coding->eol_type = CODING_EOL_LF;
4733       if (coding->eol_type != CODING_EOL_UNDECIDED)
4734         {
4735           val = Fget (coding->symbol, Qeol_type);
4736           if (VECTORP (val) && XVECTOR (val)->size == 3)
4737             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4738         }
4739       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4740     }
4741
4742   if (coding->eol_type == CODING_EOL_LF
4743       || coding->eol_type == CODING_EOL_UNDECIDED)
4744     {
4745       /* We have nothing to do.  */
4746       ptr = pend;
4747     }
4748   else if (coding->eol_type == CODING_EOL_CRLF)
4749     {
4750       unsigned char *pstart = ptr, *p = ptr;
4751
4752       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4753           && *(pend - 1) == '\r')
4754         {
4755           /* If the last character is CR, we can't handle it here
4756              because LF will be in the not-yet-decoded source text.
4757              Record that the CR is not yet processed.  */
4758           coding->spec.ccl.cr_carryover = 1;
4759           coding->produced--;
4760           coding->produced_char--;
4761           pend--;
4762         }
4763       while (ptr < pend)
4764         {
4765           if (*ptr == '\r')
4766             {
4767               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4768                 {
4769                   *p++ = '\n';
4770                   ptr += 2;
4771                 }
4772               else
4773                 {
4774                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4775                     goto undo_eol_conversion;
4776                   *p++ = *ptr++;
4777                 }
4778             }
4779           else if (*ptr == '\n'
4780                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4781             goto undo_eol_conversion;
4782           else
4783             *p++ = *ptr++;
4784           continue;
4785
4786         undo_eol_conversion:
4787           /* We have faced with inconsistent EOL format at PTR.
4788              Convert all LFs before PTR back to CRLFs.  */
4789           for (p--, ptr--; p >= pstart; p--)
4790             {
4791               if (*p == '\n')
4792                 *ptr-- = '\n', *ptr-- = '\r';
4793               else
4794                 *ptr-- = *p;
4795             }
4796           /*  If carryover is recorded, cancel it because we don't
4797               convert CRLF anymore.  */
4798           if (coding->spec.ccl.cr_carryover)
4799             {
4800               coding->spec.ccl.cr_carryover = 0;
4801               coding->produced++;
4802               coding->produced_char++;
4803               pend++;
4804             }
4805           p = ptr = pend;
4806           coding->eol_type = CODING_EOL_LF;
4807           coding->symbol = saved_coding_symbol;
4808         }
4809       if (p < pend)
4810         {
4811           /* As each two-byte sequence CRLF was converted to LF, (PEND
4812              - P) is the number of deleted characters.  */
4813           coding->produced -= pend - p;
4814           coding->produced_char -= pend - p;
4815         }
4816     }
4817   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4818     {
4819       unsigned char *p = ptr;
4820
4821       for (; ptr < pend; ptr++)
4822         {
4823           if (*ptr == '\r')
4824             *ptr = '\n';
4825           else if (*ptr == '\n'
4826                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4827             {
4828               for (; p < ptr; p++)
4829                 {
4830                   if (*p == '\n')
4831                     *p = '\r';
4832                 }
4833               ptr = pend;
4834               coding->eol_type = CODING_EOL_LF;
4835               coding->symbol = saved_coding_symbol;
4836             }
4837         }
4838     }
4839 }
4840
4841 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4842    decoding, it may detect coding system and format of end-of-line if
4843    those are not yet decided.  The source should be unibyte, the
4844    result is multibyte if CODING->dst_multibyte is nonzero, else
4845    unibyte.  */
4846
4847 int
4848 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4849      struct coding_system *coding;
4850      const unsigned char *source;
4851      unsigned char *destination;
4852      int src_bytes, dst_bytes;
4853 {
4854   int extra = 0;
4855
4856   if (coding->type == coding_type_undecided)
4857     detect_coding (coding, source, src_bytes);
4858
4859   if (coding->eol_type == CODING_EOL_UNDECIDED
4860       && coding->type != coding_type_ccl)
4861     {
4862       detect_eol (coding, source, src_bytes);
4863       /* We had better recover the original eol format if we
4864          encounter an inconsistent eol format while decoding.  */
4865       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4866     }
4867
4868   coding->produced = coding->produced_char = 0;
4869   coding->consumed = coding->consumed_char = 0;
4870   coding->errors = 0;
4871   coding->result = CODING_FINISH_NORMAL;
4872
4873   switch (coding->type)
4874     {
4875     case coding_type_sjis:
4876       decode_coding_sjis_big5 (coding, source, destination,
4877                                src_bytes, dst_bytes, 1);
4878       break;
4879
4880     case coding_type_iso2022:
4881       decode_coding_iso2022 (coding, source, destination,
4882                              src_bytes, dst_bytes);
4883       break;
4884
4885     case coding_type_big5:
4886       decode_coding_sjis_big5 (coding, source, destination,
4887                                src_bytes, dst_bytes, 0);
4888       break;
4889
4890     case coding_type_emacs_mule:
4891       decode_coding_emacs_mule (coding, source, destination,
4892                                 src_bytes, dst_bytes);
4893       break;
4894
4895     case coding_type_ccl:
4896       if (coding->spec.ccl.cr_carryover)
4897         {
4898           /* Put the CR which was not processed by the previous call
4899              of decode_eol_post_ccl in DESTINATION.  It will be
4900              decoded together with the following LF by the call to
4901              decode_eol_post_ccl below.  */
4902           *destination = '\r';
4903           coding->produced++;
4904           coding->produced_char++;
4905           dst_bytes--;
4906           extra = coding->spec.ccl.cr_carryover;
4907         }
4908       ccl_coding_driver (coding, source, destination + extra,
4909                          src_bytes, dst_bytes, 0);
4910       if (coding->eol_type != CODING_EOL_LF)
4911         {
4912           coding->produced += extra;
4913           coding->produced_char += extra;
4914           decode_eol_post_ccl (coding, destination, coding->produced);
4915         }
4916       break;
4917
4918     default:
4919       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4920     }
4921
4922   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4923       && coding->mode & CODING_MODE_LAST_BLOCK
4924       && coding->consumed == src_bytes)
4925     coding->result = CODING_FINISH_NORMAL;
4926
4927   if (coding->mode & CODING_MODE_LAST_BLOCK
4928       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4929     {
4930       const unsigned char *src = source + coding->consumed;
4931       unsigned char *dst = destination + coding->produced;
4932
4933       src_bytes -= coding->consumed;
4934       coding->errors++;
4935       if (COMPOSING_P (coding))
4936         DECODE_COMPOSITION_END ('1');
4937       while (src_bytes--)
4938         {
4939           int c = *src++;
4940           dst += CHAR_STRING (c, dst);
4941           coding->produced_char++;
4942         }
4943       coding->consumed = coding->consumed_char = src - source;
4944       coding->produced = dst - destination;
4945       coding->result = CODING_FINISH_NORMAL;
4946     }
4947
4948   if (!coding->dst_multibyte)
4949     {
4950       coding->produced = str_as_unibyte (destination, coding->produced);
4951       coding->produced_char = coding->produced;
4952     }
4953
4954   return coding->result;
4955 }
4956
4957 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4958    multibyteness of the source is CODING->src_multibyte, the
4959    multibyteness of the result is always unibyte.  */
4960
4961 int
4962 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4963      struct coding_system *coding;
4964      const unsigned char *source;
4965      unsigned char *destination;
4966      int src_bytes, dst_bytes;
4967 {
4968   coding->produced = coding->produced_char = 0;
4969   coding->consumed = coding->consumed_char = 0;
4970   coding->errors = 0;
4971   coding->result = CODING_FINISH_NORMAL;
4972
4973   switch (coding->type)
4974     {
4975     case coding_type_sjis:
4976       encode_coding_sjis_big5 (coding, source, destination,
4977                                src_bytes, dst_bytes, 1);
4978       break;
4979
4980     case coding_type_iso2022:
4981       encode_coding_iso2022 (coding, source, destination,
4982                              src_bytes, dst_bytes);
4983       break;
4984
4985     case coding_type_big5:
4986       encode_coding_sjis_big5 (coding, source, destination,
4987                                src_bytes, dst_bytes, 0);
4988       break;
4989
4990     case coding_type_emacs_mule:
4991       encode_coding_emacs_mule (coding, source, destination,
4992                                 src_bytes, dst_bytes);
4993       break;
4994
4995     case coding_type_ccl:
4996       ccl_coding_driver (coding, source, destination,
4997                          src_bytes, dst_bytes, 1);
4998       break;
4999
5000     default:
5001       encode_eol (coding, source, destination, src_bytes, dst_bytes);
5002     }
5003
5004   if (coding->mode & CODING_MODE_LAST_BLOCK
5005       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
5006     {
5007       const unsigned char *src = source + coding->consumed;
5008       unsigned char *dst = destination + coding->produced;
5009
5010       if (coding->type == coding_type_iso2022)
5011         ENCODE_RESET_PLANE_AND_REGISTER;
5012       if (COMPOSING_P (coding))
5013         *dst++ = ISO_CODE_ESC, *dst++ = '1';
5014       if (coding->consumed < src_bytes)
5015         {
5016           int len = src_bytes - coding->consumed;
5017
5018           BCOPY_SHORT (src, dst, len);
5019           if (coding->src_multibyte)
5020             len = str_as_unibyte (dst, len);
5021           dst += len;
5022           coding->consumed = src_bytes;
5023         }
5024       coding->produced = coding->produced_char = dst - destination;
5025       coding->result = CODING_FINISH_NORMAL;
5026     }
5027
5028   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
5029       && coding->consumed == src_bytes)
5030     coding->result = CODING_FINISH_NORMAL;
5031
5032   return coding->result;
5033 }
5034
5035 /* Scan text in the region between *BEG and *END (byte positions),
5036    skip characters which we don't have to decode by coding system
5037    CODING at the head and tail, then set *BEG and *END to the region
5038    of the text we actually have to convert.  The caller should move
5039    the gap out of the region in advance if the region is from a
5040    buffer.
5041
5042    If STR is not NULL, *BEG and *END are indices into STR.  */
5043
5044 static void
5045 shrink_decoding_region (beg, end, coding, str)
5046      int *beg, *end;
5047      struct coding_system *coding;
5048      unsigned char *str;
5049 {
5050   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
5051   int eol_conversion;
5052   Lisp_Object translation_table;
5053
5054   if (coding->type == coding_type_ccl
5055       || coding->type == coding_type_undecided
5056       || coding->eol_type != CODING_EOL_LF
5057       || !NILP (coding->post_read_conversion)
5058       || coding->composing != COMPOSITION_DISABLED)
5059     {
5060       /* We can't skip any data.  */
5061       return;
5062     }
5063   if (coding->type == coding_type_no_conversion
5064       || coding->type == coding_type_raw_text
5065       || coding->type == coding_type_emacs_mule)
5066     {
5067       /* We need no conversion, but don't have to skip any data here.
5068          Decoding routine handles them effectively anyway.  */
5069       return;
5070     }
5071
5072   translation_table = coding->translation_table_for_decode;
5073   if (NILP (translation_table) && !NILP (Venable_character_translation))
5074     translation_table = Vstandard_translation_table_for_decode;
5075   if (CHAR_TABLE_P (translation_table))
5076     {
5077       int i;
5078       for (i = 0; i < 128; i++)
5079         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5080           break;
5081       if (i < 128)
5082         /* Some ASCII character should be translated.  We give up
5083            shrinking.  */
5084         return;
5085     }
5086
5087   if (coding->heading_ascii >= 0)
5088     /* Detection routine has already found how much we can skip at the
5089        head.  */
5090     *beg += coding->heading_ascii;
5091
5092   if (str)
5093     {
5094       begp_orig = begp = str + *beg;
5095       endp_orig = endp = str + *end;
5096     }
5097   else
5098     {
5099       begp_orig = begp = BYTE_POS_ADDR (*beg);
5100       endp_orig = endp = begp + *end - *beg;
5101     }
5102
5103   eol_conversion = (coding->eol_type == CODING_EOL_CR
5104                     || coding->eol_type == CODING_EOL_CRLF);
5105
5106   switch (coding->type)
5107     {
5108     case coding_type_sjis:
5109     case coding_type_big5:
5110       /* We can skip all ASCII characters at the head.  */
5111       if (coding->heading_ascii < 0)
5112         {
5113           if (eol_conversion)
5114             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
5115           else
5116             while (begp < endp && *begp < 0x80) begp++;
5117         }
5118       /* We can skip all ASCII characters at the tail except for the
5119          second byte of SJIS or BIG5 code.  */
5120       if (eol_conversion)
5121         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
5122       else
5123         while (begp < endp && endp[-1] < 0x80) endp--;
5124       /* Do not consider LF as ascii if preceded by CR, since that
5125          confuses eol decoding. */
5126       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5127         endp++;
5128       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
5129         endp++;
5130       break;
5131
5132     case coding_type_iso2022:
5133       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5134         /* We can't skip any data.  */
5135         break;
5136       if (coding->heading_ascii < 0)
5137         {
5138           /* We can skip all ASCII characters at the head except for a
5139              few control codes.  */
5140           while (begp < endp && (c = *begp) < 0x80
5141                  && c != ISO_CODE_CR && c != ISO_CODE_SO
5142                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
5143                  && (!eol_conversion || c != ISO_CODE_LF))
5144             begp++;
5145         }
5146       switch (coding->category_idx)
5147         {
5148         case CODING_CATEGORY_IDX_ISO_8_1:
5149         case CODING_CATEGORY_IDX_ISO_8_2:
5150           /* We can skip all ASCII characters at the tail.  */
5151           if (eol_conversion)
5152             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
5153           else
5154             while (begp < endp && endp[-1] < 0x80) endp--;
5155           /* Do not consider LF as ascii if preceded by CR, since that
5156              confuses eol decoding. */
5157           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5158             endp++;
5159           break;
5160
5161         case CODING_CATEGORY_IDX_ISO_7:
5162         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
5163           {
5164             /* We can skip all characters at the tail except for 8-bit
5165                codes and ESC and the following 2-byte at the tail.  */
5166             unsigned char *eight_bit = NULL;
5167
5168             if (eol_conversion)
5169               while (begp < endp
5170                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
5171                 {
5172                   if (!eight_bit && c & 0x80) eight_bit = endp;
5173                   endp--;
5174                 }
5175             else
5176               while (begp < endp
5177                      && (c = endp[-1]) != ISO_CODE_ESC)
5178                 {
5179                   if (!eight_bit && c & 0x80) eight_bit = endp;
5180                   endp--;
5181                 }
5182             /* Do not consider LF as ascii if preceded by CR, since that
5183                confuses eol decoding. */
5184             if (begp < endp && endp < endp_orig
5185                 && endp[-1] == '\r' && endp[0] == '\n')
5186               endp++;
5187             if (begp < endp && endp[-1] == ISO_CODE_ESC)
5188               {
5189                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
5190                   /* This is an ASCII designation sequence.  We can
5191                      surely skip the tail.  But, if we have
5192                      encountered an 8-bit code, skip only the codes
5193                      after that.  */
5194                   endp = eight_bit ? eight_bit : endp + 2;
5195                 else
5196                   /* Hmmm, we can't skip the tail.  */
5197                   endp = endp_orig;
5198               }
5199             else if (eight_bit)
5200               endp = eight_bit;
5201           }
5202         }
5203       break;
5204
5205     default:
5206       abort ();
5207     }
5208   *beg += begp - begp_orig;
5209   *end += endp - endp_orig;
5210   return;
5211 }
5212
5213 /* Like shrink_decoding_region but for encoding.  */
5214
5215 static void
5216 shrink_encoding_region (beg, end, coding, str)
5217      int *beg, *end;
5218      struct coding_system *coding;
5219      unsigned char *str;
5220 {
5221   unsigned char *begp_orig, *begp, *endp_orig, *endp;
5222   int eol_conversion;
5223   Lisp_Object translation_table;
5224
5225   if (coding->type == coding_type_ccl
5226       || coding->eol_type == CODING_EOL_CRLF
5227       || coding->eol_type == CODING_EOL_CR
5228       || (coding->cmp_data && coding->cmp_data->used > 0))
5229     {
5230       /* We can't skip any data.  */
5231       return;
5232     }
5233   if (coding->type == coding_type_no_conversion
5234       || coding->type == coding_type_raw_text
5235       || coding->type == coding_type_emacs_mule
5236       || coding->type == coding_type_undecided)
5237     {
5238       /* We need no conversion, but don't have to skip any data here.
5239          Encoding routine handles them effectively anyway.  */
5240       return;
5241     }
5242
5243   translation_table = coding->translation_table_for_encode;
5244   if (NILP (translation_table) && !NILP (Venable_character_translation))
5245     translation_table = Vstandard_translation_table_for_encode;
5246   if (CHAR_TABLE_P (translation_table))
5247     {
5248       int i;
5249       for (i = 0; i < 128; i++)
5250         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5251           break;
5252       if (i < 128)
5253         /* Some ASCII character should be translated.  We give up
5254            shrinking.  */
5255         return;
5256     }
5257
5258   if (str)
5259     {
5260       begp_orig = begp = str + *beg;
5261       endp_orig = endp = str + *end;
5262     }
5263   else
5264     {
5265       begp_orig = begp = BYTE_POS_ADDR (*beg);
5266       endp_orig = endp = begp + *end - *beg;
5267     }
5268
5269   eol_conversion = (coding->eol_type == CODING_EOL_CR
5270                     || coding->eol_type == CODING_EOL_CRLF);
5271
5272   /* Here, we don't have to check coding->pre_write_conversion because
5273      the caller is expected to have handled it already.  */
5274   switch (coding->type)
5275     {
5276     case coding_type_iso2022:
5277       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5278         /* We can't skip any data.  */
5279         break;
5280       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5281         {
5282           unsigned char *bol = begp;
5283           while (begp < endp && *begp < 0x80)
5284             {
5285               begp++;
5286               if (begp[-1] == '\n')
5287                 bol = begp;
5288             }
5289           begp = bol;
5290           goto label_skip_tail;
5291         }
5292       /* fall down ... */
5293
5294     case coding_type_sjis:
5295     case coding_type_big5:
5296       /* We can skip all ASCII characters at the head and tail.  */
5297       if (eol_conversion)
5298         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5299       else
5300         while (begp < endp && *begp < 0x80) begp++;
5301     label_skip_tail:
5302       if (eol_conversion)
5303         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5304       else
5305         while (begp < endp && *(endp - 1) < 0x80) endp--;
5306       break;
5307
5308     default:
5309       abort ();
5310     }
5311
5312   *beg += begp - begp_orig;
5313   *end += endp - endp_orig;
5314   return;
5315 }
5316
5317 /* As shrinking conversion region requires some overhead, we don't try
5318    shrinking if the length of conversion region is less than this
5319    value.  */
5320 static int shrink_conversion_region_threshhold = 1024;
5321
5322 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
5323   do {                                                                  \
5324     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
5325       {                                                                 \
5326         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
5327         else shrink_decoding_region (beg, end, coding, str);            \
5328       }                                                                 \
5329   } while (0)
5330
5331 static Lisp_Object
5332 code_convert_region_unwind (arg)
5333      Lisp_Object arg;
5334 {
5335   inhibit_pre_post_conversion = 0;
5336   Vlast_coding_system_used = arg;
5337   return Qnil;
5338 }
5339
5340 /* Store information about all compositions in the range FROM and TO
5341    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
5342    buffer or a string, defaults to the current buffer.  */
5343
5344 void
5345 coding_save_composition (coding, from, to, obj)
5346      struct coding_system *coding;
5347      int from, to;
5348      Lisp_Object obj;
5349 {
5350   Lisp_Object prop;
5351   int start, end;
5352
5353   if (coding->composing == COMPOSITION_DISABLED)
5354     return;
5355   if (!coding->cmp_data)
5356     coding_allocate_composition_data (coding, from);
5357   if (!find_composition (from, to, &start, &end, &prop, obj)
5358       || end > to)
5359     return;
5360   if (start < from
5361       && (!find_composition (end, to, &start, &end, &prop, obj)
5362           || end > to))
5363     return;
5364   coding->composing = COMPOSITION_NO;
5365   do
5366     {
5367       if (COMPOSITION_VALID_P (start, end, prop))
5368         {
5369           enum composition_method method = COMPOSITION_METHOD (prop);
5370           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5371               >= COMPOSITION_DATA_SIZE)
5372             coding_allocate_composition_data (coding, from);
5373           /* For relative composition, we remember start and end
5374              positions, for the other compositions, we also remember
5375              components.  */
5376           CODING_ADD_COMPOSITION_START (coding, start - from, method);
5377           if (method != COMPOSITION_RELATIVE)
5378             {
5379               /* We must store a*/
5380               Lisp_Object val, ch;
5381
5382               val = COMPOSITION_COMPONENTS (prop);
5383               if (CONSP (val))
5384                 while (CONSP (val))
5385                   {
5386                     ch = XCAR (val), val = XCDR (val);
5387                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5388                   }
5389               else if (VECTORP (val) || STRINGP (val))
5390                 {
5391                   int len = (VECTORP (val)
5392                              ? XVECTOR (val)->size : SCHARS (val));
5393                   int i;
5394                   for (i = 0; i < len; i++)
5395                     {
5396                       ch = (STRINGP (val)
5397                             ? Faref (val, make_number (i))
5398                             : XVECTOR (val)->contents[i]);
5399                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5400                     }
5401                 }
5402               else              /* INTEGERP (val) */
5403                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5404             }
5405           CODING_ADD_COMPOSITION_END (coding, end - from);
5406         }
5407       start = end;
5408     }
5409   while (start < to
5410          && find_composition (start, to, &start, &end, &prop, obj)
5411          && end <= to);
5412
5413   /* Make coding->cmp_data point to the first memory block.  */
5414   while (coding->cmp_data->prev)
5415     coding->cmp_data = coding->cmp_data->prev;
5416   coding->cmp_data_start = 0;
5417 }
5418
5419 /* Reflect the saved information about compositions to OBJ.
5420    CODING->cmp_data points to a memory block for the information.  OBJ
5421    is a buffer or a string, defaults to the current buffer.  */
5422
5423 void
5424 coding_restore_composition (coding, obj)
5425      struct coding_system *coding;
5426      Lisp_Object obj;
5427 {
5428   struct composition_data *cmp_data = coding->cmp_data;
5429
5430   if (!cmp_data)
5431     return;
5432
5433   while (cmp_data->prev)
5434     cmp_data = cmp_data->prev;
5435
5436   while (cmp_data)
5437     {
5438       int i;
5439
5440       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5441            i += cmp_data->data[i])
5442         {
5443           int *data = cmp_data->data + i;
5444           enum composition_method method = (enum composition_method) data[3];
5445           Lisp_Object components;
5446
5447           if (data[0] < 0 || i + data[0] > cmp_data->used)
5448             /* Invalid composition data.  */
5449             break;
5450
5451           if (method == COMPOSITION_RELATIVE)
5452             components = Qnil;
5453           else
5454             {
5455               int len = data[0] - 4, j;
5456               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5457
5458               if (method == COMPOSITION_WITH_RULE_ALTCHARS
5459                   && len % 2 == 0)
5460                 len --;
5461               for (j = 0; j < len; j++)
5462                 args[j] = make_number (data[4 + j]);
5463               components = (method == COMPOSITION_WITH_ALTCHARS
5464                             ? Fstring (len, args)
5465                             : Fvector (len, args));
5466             }
5467           compose_text (data[1], data[2], components, Qnil, obj);
5468         }
5469       cmp_data = cmp_data->next;
5470     }
5471 }
5472
5473 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
5474    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5475    coding system CODING, and return the status code of code conversion
5476    (currently, this value has no meaning).
5477
5478    How many characters (and bytes) are converted to how many
5479    characters (and bytes) are recorded in members of the structure
5480    CODING.
5481
5482    If REPLACE is nonzero, we do various things as if the original text
5483    is deleted and a new text is inserted.  See the comments in
5484    replace_range (insdel.c) to know what we are doing.
5485
5486    If REPLACE is zero, it is assumed that the source text is unibyte.
5487    Otherwise, it is assumed that the source text is multibyte.  */
5488
5489 int
5490 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5491      int from, from_byte, to, to_byte, encodep, replace;
5492      struct coding_system *coding;
5493 {
5494   int len = to - from, len_byte = to_byte - from_byte;
5495   int nchars_del = 0, nbytes_del = 0;
5496   int require, inserted, inserted_byte;
5497   int head_skip, tail_skip, total_skip = 0;
5498   Lisp_Object saved_coding_symbol;
5499   int first = 1;
5500   unsigned char *src, *dst;
5501   Lisp_Object deletion;
5502   int orig_point = PT, orig_len = len;
5503   int prev_Z;
5504   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5505
5506   deletion = Qnil;
5507   saved_coding_symbol = coding->symbol;
5508
5509   if (from < PT && PT < to)
5510     {
5511       TEMP_SET_PT_BOTH (from, from_byte);
5512       orig_point = from;
5513     }
5514
5515   if (replace)
5516     {
5517       int saved_from = from;
5518       int saved_inhibit_modification_hooks;
5519
5520       prepare_to_modify_buffer (from, to, &from);
5521       if (saved_from != from)
5522         {
5523           to = from + len;
5524           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
5525           len_byte = to_byte - from_byte;
5526         }
5527
5528       /* The code conversion routine can not preserve text properties
5529          for now.  So, we must remove all text properties in the
5530          region.  Here, we must suppress all modification hooks.  */
5531       saved_inhibit_modification_hooks = inhibit_modification_hooks;
5532       inhibit_modification_hooks = 1;
5533       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5534       inhibit_modification_hooks = saved_inhibit_modification_hooks;
5535     }
5536
5537   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5538     {
5539       /* We must detect encoding of text and eol format.  */
5540
5541       if (from < GPT && to > GPT)
5542         move_gap_both (from, from_byte);
5543       if (coding->type == coding_type_undecided)
5544         {
5545           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
5546           if (coding->type == coding_type_undecided)
5547             {
5548               /* It seems that the text contains only ASCII, but we
5549                  should not leave it undecided because the deeper
5550                  decoding routine (decode_coding) tries to detect the
5551                  encodings again in vain.  */
5552               coding->type = coding_type_emacs_mule;
5553               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5554               /* As emacs-mule decoder will handle composition, we
5555                  need this setting to allocate coding->cmp_data
5556                  later.  */
5557               coding->composing = COMPOSITION_NO;
5558             }
5559         }
5560       if (coding->eol_type == CODING_EOL_UNDECIDED
5561           && coding->type != coding_type_ccl)
5562         {
5563           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5564           if (coding->eol_type == CODING_EOL_UNDECIDED)
5565             coding->eol_type = CODING_EOL_LF;
5566           /* We had better recover the original eol format if we
5567              encounter an inconsistent eol format while decoding.  */
5568           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5569         }
5570     }
5571
5572   /* Now we convert the text.  */
5573
5574   /* For encoding, we must process pre-write-conversion in advance.  */
5575   if (! inhibit_pre_post_conversion
5576       && encodep
5577       && SYMBOLP (coding->pre_write_conversion)
5578       && ! NILP (Ffboundp (coding->pre_write_conversion)))
5579     {
5580       /* The function in pre-write-conversion may put a new text in a
5581          new buffer.  */
5582       struct buffer *prev = current_buffer;
5583       Lisp_Object new;
5584
5585       record_unwind_protect (code_convert_region_unwind,
5586                              Vlast_coding_system_used);
5587       /* We should not call any more pre-write/post-read-conversion
5588          functions while this pre-write-conversion is running.  */
5589       inhibit_pre_post_conversion = 1;
5590       call2 (coding->pre_write_conversion,
5591              make_number (from), make_number (to));
5592       inhibit_pre_post_conversion = 0;
5593       /* Discard the unwind protect.  */
5594       specpdl_ptr--;
5595
5596       if (current_buffer != prev)
5597         {
5598           len = ZV - BEGV;
5599           new = Fcurrent_buffer ();
5600           set_buffer_internal_1 (prev);
5601           del_range_2 (from, from_byte, to, to_byte, 0);
5602           TEMP_SET_PT_BOTH (from, from_byte);
5603           insert_from_buffer (XBUFFER (new), 1, len, 0);
5604           Fkill_buffer (new);
5605           if (orig_point >= to)
5606             orig_point += len - orig_len;
5607           else if (orig_point > from)
5608             orig_point = from;
5609           orig_len = len;
5610           to = from + len;
5611           from_byte = CHAR_TO_BYTE (from);
5612           to_byte = CHAR_TO_BYTE (to);
5613           len_byte = to_byte - from_byte;
5614           TEMP_SET_PT_BOTH (from, from_byte);
5615         }
5616     }
5617
5618   if (replace)
5619     {
5620       if (! EQ (current_buffer->undo_list, Qt))
5621         deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5622       else
5623         {
5624           nchars_del = to - from;
5625           nbytes_del = to_byte - from_byte;
5626         }
5627     }
5628
5629   if (coding->composing != COMPOSITION_DISABLED)
5630     {
5631       if (encodep)
5632         coding_save_composition (coding, from, to, Fcurrent_buffer ());
5633       else
5634         coding_allocate_composition_data (coding, from);
5635     }
5636
5637   /* Try to skip the heading and tailing ASCIIs.  */
5638   if (coding->type != coding_type_ccl)
5639     {
5640       int from_byte_orig = from_byte, to_byte_orig = to_byte;
5641
5642       if (from < GPT && GPT < to)
5643         move_gap_both (from, from_byte);
5644       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5645       if (from_byte == to_byte
5646           && (encodep || NILP (coding->post_read_conversion))
5647           && ! CODING_REQUIRE_FLUSHING (coding))
5648         {
5649           coding->produced = len_byte;
5650           coding->produced_char = len;
5651           if (!replace)
5652             /* We must record and adjust for this new text now.  */
5653             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5654           return 0;
5655         }
5656
5657       head_skip = from_byte - from_byte_orig;
5658       tail_skip = to_byte_orig - to_byte;
5659       total_skip = head_skip + tail_skip;
5660       from += head_skip;
5661       to -= tail_skip;
5662       len -= total_skip; len_byte -= total_skip;
5663     }
5664
5665   /* For conversion, we must put the gap before the text in addition to
5666      making the gap larger for efficient decoding.  The required gap
5667      size starts from 2000 which is the magic number used in make_gap.
5668      But, after one batch of conversion, it will be incremented if we
5669      find that it is not enough .  */
5670   require = 2000;
5671
5672   if (GAP_SIZE  < require)
5673     make_gap (require - GAP_SIZE);
5674   move_gap_both (from, from_byte);
5675
5676   inserted = inserted_byte = 0;
5677
5678   GAP_SIZE += len_byte;
5679   ZV -= len;
5680   Z -= len;
5681   ZV_BYTE -= len_byte;
5682   Z_BYTE -= len_byte;
5683
5684   if (GPT - BEG < BEG_UNCHANGED)
5685     BEG_UNCHANGED = GPT - BEG;
5686   if (Z - GPT < END_UNCHANGED)
5687     END_UNCHANGED = Z - GPT;
5688
5689   if (!encodep && coding->src_multibyte)
5690     {
5691       /* Decoding routines expects that the source text is unibyte.
5692          We must convert 8-bit characters of multibyte form to
5693          unibyte.  */
5694       int len_byte_orig = len_byte;
5695       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5696       if (len_byte < len_byte_orig)
5697         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5698                     len_byte);
5699       coding->src_multibyte = 0;
5700     }
5701
5702   for (;;)
5703     {
5704       int result;
5705
5706       /* The buffer memory is now:
5707          +--------+converted-text+---------+-------original-text-------+---+
5708          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5709                   |<---------------------- GAP ----------------------->|  */
5710       src = GAP_END_ADDR - len_byte;
5711       dst = GPT_ADDR + inserted_byte;
5712
5713       if (encodep)
5714         result = encode_coding (coding, src, dst, len_byte, 0);
5715       else
5716         {
5717           if (coding->composing != COMPOSITION_DISABLED)
5718             coding->cmp_data->char_offset = from + inserted;
5719           result = decode_coding (coding, src, dst, len_byte, 0);
5720         }
5721
5722       /* The buffer memory is now:
5723          +--------+-------converted-text----+--+------original-text----+---+
5724          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5725                   |<---------------------- GAP ----------------------->|  */
5726
5727       inserted += coding->produced_char;
5728       inserted_byte += coding->produced;
5729       len_byte -= coding->consumed;
5730
5731       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5732         {
5733           coding_allocate_composition_data (coding, from + inserted);
5734           continue;
5735         }
5736
5737       src += coding->consumed;
5738       dst += coding->produced;
5739
5740       if (result == CODING_FINISH_NORMAL)
5741         {
5742           src += len_byte;
5743           break;
5744         }
5745       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5746         {
5747           unsigned char *pend = dst, *p = pend - inserted_byte;
5748           Lisp_Object eol_type;
5749
5750           /* Encode LFs back to the original eol format (CR or CRLF).  */
5751           if (coding->eol_type == CODING_EOL_CR)
5752             {
5753               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5754             }
5755           else
5756             {
5757               int count = 0;
5758
5759               while (p < pend) if (*p++ == '\n') count++;
5760               if (src - dst < count)
5761                 {
5762                   /* We don't have sufficient room for encoding LFs
5763                      back to CRLF.  We must record converted and
5764                      not-yet-converted text back to the buffer
5765                      content, enlarge the gap, then record them out of
5766                      the buffer contents again.  */
5767                   int add = len_byte + inserted_byte;
5768
5769                   GAP_SIZE -= add;
5770                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5771                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5772                   make_gap (count - GAP_SIZE);
5773                   GAP_SIZE += add;
5774                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5775                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5776                   /* Don't forget to update SRC, DST, and PEND.  */
5777                   src = GAP_END_ADDR - len_byte;
5778                   dst = GPT_ADDR + inserted_byte;
5779                   pend = dst;
5780                 }
5781               inserted += count;
5782               inserted_byte += count;
5783               coding->produced += count;
5784               p = dst = pend + count;
5785               while (count)
5786                 {
5787                   *--p = *--pend;
5788                   if (*p == '\n') count--, *--p = '\r';
5789                 }
5790             }
5791
5792           /* Suppress eol-format conversion in the further conversion.  */
5793           coding->eol_type = CODING_EOL_LF;
5794
5795           /* Set the coding system symbol to that for Unix-like EOL.  */
5796           eol_type = Fget (saved_coding_symbol, Qeol_type);
5797           if (VECTORP (eol_type)
5798               && XVECTOR (eol_type)->size == 3
5799               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5800             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5801           else
5802             coding->symbol = saved_coding_symbol;
5803
5804           continue;
5805         }
5806       if (len_byte <= 0)
5807         {
5808           if (coding->type != coding_type_ccl
5809               || coding->mode & CODING_MODE_LAST_BLOCK)
5810             break;
5811           coding->mode |= CODING_MODE_LAST_BLOCK;
5812           continue;
5813         }
5814       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5815         {
5816           /* The source text ends in invalid codes.  Let's just
5817              make them valid buffer contents, and finish conversion.  */
5818           if (multibyte_p)
5819             {
5820               unsigned char *start = dst;
5821
5822               inserted += len_byte;
5823               while (len_byte--)
5824                 {
5825                   int c = *src++;
5826                   dst += CHAR_STRING (c, dst);
5827                 }
5828
5829               inserted_byte += dst - start;
5830             }
5831           else
5832             {
5833               inserted += len_byte;
5834               inserted_byte += len_byte;
5835               while (len_byte--)
5836                 *dst++ = *src++;
5837             }
5838           break;
5839         }
5840       if (result == CODING_FINISH_INTERRUPT)
5841         {
5842           /* The conversion procedure was interrupted by a user.  */
5843           break;
5844         }
5845       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5846       if (coding->consumed < 1)
5847         {
5848           /* It's quite strange to require more memory without
5849              consuming any bytes.  Perhaps CCL program bug.  */
5850           break;
5851         }
5852       if (first)
5853         {
5854           /* We have just done the first batch of conversion which was
5855              stopped because of insufficient gap.  Let's reconsider the
5856              required gap size (i.e. SRT - DST) now.
5857
5858              We have converted ORIG bytes (== coding->consumed) into
5859              NEW bytes (coding->produced).  To convert the remaining
5860              LEN bytes, we may need REQUIRE bytes of gap, where:
5861                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5862                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5863              Here, we are sure that NEW >= ORIG.  */
5864           float ratio;
5865
5866           if (coding->produced <= coding->consumed)
5867             {
5868               /* This happens because of CCL-based coding system with
5869                  eol-type CRLF.  */
5870               require = 0;
5871             }
5872           else
5873             {
5874               ratio = (coding->produced - coding->consumed) / coding->consumed;
5875               require = len_byte * ratio;
5876             }
5877           first = 0;
5878         }
5879       if ((src - dst) < (require + 2000))
5880         {
5881           /* See the comment above the previous call of make_gap.  */
5882           int add = len_byte + inserted_byte;
5883
5884           GAP_SIZE -= add;
5885           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5886           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5887           make_gap (require + 2000);
5888           GAP_SIZE += add;
5889           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5890           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5891         }
5892     }
5893   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5894
5895   if (encodep && coding->dst_multibyte)
5896     {
5897       /* The output is unibyte.  We must convert 8-bit characters to
5898          multibyte form.  */
5899       if (inserted_byte * 2 > GAP_SIZE)
5900         {
5901           GAP_SIZE -= inserted_byte;
5902           ZV += inserted_byte; Z += inserted_byte;
5903           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5904           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5905           make_gap (inserted_byte - GAP_SIZE);
5906           GAP_SIZE += inserted_byte;
5907           ZV -= inserted_byte; Z -= inserted_byte;
5908           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5909           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5910         }
5911       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5912     }
5913
5914   /* If we shrank the conversion area, adjust it now.  */
5915   if (total_skip > 0)
5916     {
5917       if (tail_skip > 0)
5918         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5919       inserted += total_skip; inserted_byte += total_skip;
5920       GAP_SIZE += total_skip;
5921       GPT -= head_skip; GPT_BYTE -= head_skip;
5922       ZV -= total_skip; ZV_BYTE -= total_skip;
5923       Z -= total_skip; Z_BYTE -= total_skip;
5924       from -= head_skip; from_byte -= head_skip;
5925       to += tail_skip; to_byte += tail_skip;
5926     }
5927
5928   prev_Z = Z;
5929   if (! EQ (current_buffer->undo_list, Qt))
5930     adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5931   else
5932     adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del,
5933                                  inserted, inserted_byte);
5934   inserted = Z - prev_Z;
5935
5936   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5937     coding_restore_composition (coding, Fcurrent_buffer ());
5938   coding_free_composition_data (coding);
5939
5940   if (! inhibit_pre_post_conversion
5941       && ! encodep && ! NILP (coding->post_read_conversion))
5942     {
5943       Lisp_Object val;
5944       Lisp_Object saved_coding_system;
5945
5946       if (from != PT)
5947         TEMP_SET_PT_BOTH (from, from_byte);
5948       prev_Z = Z;
5949       record_unwind_protect (code_convert_region_unwind,
5950                              Vlast_coding_system_used);
5951       saved_coding_system = Vlast_coding_system_used;
5952       Vlast_coding_system_used = coding->symbol;
5953       /* We should not call any more pre-write/post-read-conversion
5954          functions while this post-read-conversion is running.  */
5955       inhibit_pre_post_conversion = 1;
5956       val = call1 (coding->post_read_conversion, make_number (inserted));
5957       inhibit_pre_post_conversion = 0;
5958       coding->symbol = Vlast_coding_system_used;
5959       Vlast_coding_system_used = saved_coding_system;
5960       /* Discard the unwind protect.  */
5961       specpdl_ptr--;
5962       CHECK_NUMBER (val);
5963       inserted += Z - prev_Z;
5964     }
5965
5966   if (orig_point >= from)
5967     {
5968       if (orig_point >= from + orig_len)
5969         orig_point += inserted - orig_len;
5970       else
5971         orig_point = from;
5972       TEMP_SET_PT (orig_point);
5973     }
5974
5975   if (replace)
5976     {
5977       signal_after_change (from, to - from, inserted);
5978       update_compositions (from, from + inserted, CHECK_BORDER);
5979     }
5980
5981   {
5982     coding->consumed = to_byte - from_byte;
5983     coding->consumed_char = to - from;
5984     coding->produced = inserted_byte;
5985     coding->produced_char = inserted;
5986   }
5987
5988   return 0;
5989 }
5990
5991 Lisp_Object
5992 run_pre_post_conversion_on_str (str, coding, encodep)
5993      Lisp_Object str;
5994      struct coding_system *coding;
5995      int encodep;
5996 {
5997   int count = SPECPDL_INDEX ();
5998   struct gcpro gcpro1, gcpro2;
5999   int multibyte = STRING_MULTIBYTE (str);
6000   Lisp_Object buffer;
6001   struct buffer *buf;
6002   Lisp_Object old_deactivate_mark;
6003
6004   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
6005   record_unwind_protect (code_convert_region_unwind,
6006                          Vlast_coding_system_used);
6007   /* It is not crucial to specbind this.  */
6008   old_deactivate_mark = Vdeactivate_mark;
6009   GCPRO2 (str, old_deactivate_mark);
6010
6011   buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
6012   buf = XBUFFER (buffer);
6013
6014   delete_all_overlays (buf);
6015   buf->directory = current_buffer->directory;
6016   buf->read_only = Qnil;
6017   buf->filename = Qnil;
6018   buf->undo_list = Qt;
6019   eassert (buf->overlays_before == NULL);
6020   eassert (buf->overlays_after == NULL);
6021
6022   set_buffer_internal (buf);
6023   /* We must insert the contents of STR as is without
6024      unibyte<->multibyte conversion.  For that, we adjust the
6025      multibyteness of the working buffer to that of STR.  */
6026   Ferase_buffer ();
6027   buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
6028
6029   insert_from_string (str, 0, 0,
6030                       SCHARS (str), SBYTES (str), 0);
6031   UNGCPRO;
6032   inhibit_pre_post_conversion = 1;
6033   if (encodep)
6034     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
6035   else
6036     {
6037       Vlast_coding_system_used = coding->symbol;
6038       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
6039       call1 (coding->post_read_conversion, make_number (Z - BEG));
6040       coding->symbol = Vlast_coding_system_used;
6041     }
6042   inhibit_pre_post_conversion = 0;
6043   Vdeactivate_mark = old_deactivate_mark;
6044   str = make_buffer_string (BEG, Z, 1);
6045   return unbind_to (count, str);
6046 }
6047
6048 Lisp_Object
6049 decode_coding_string (str, coding, nocopy)
6050      Lisp_Object str;
6051      struct coding_system *coding;
6052      int nocopy;
6053 {
6054   int len;
6055   struct conversion_buffer buf;
6056   int from, to_byte;
6057   Lisp_Object saved_coding_symbol;
6058   int result;
6059   int require_decoding;
6060   int shrinked_bytes = 0;
6061   Lisp_Object newstr;
6062   int consumed, consumed_char, produced, produced_char;
6063
6064   from = 0;
6065   to_byte = SBYTES (str);
6066
6067   saved_coding_symbol = coding->symbol;
6068   coding->src_multibyte = STRING_MULTIBYTE (str);
6069   coding->dst_multibyte = 1;
6070   if (CODING_REQUIRE_DETECTION (coding))
6071     {
6072       /* See the comments in code_convert_region.  */
6073       if (coding->type == coding_type_undecided)
6074         {
6075           detect_coding (coding, SDATA (str), to_byte);
6076           if (coding->type == coding_type_undecided)
6077             {
6078               coding->type = coding_type_emacs_mule;
6079               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
6080               /* As emacs-mule decoder will handle composition, we
6081                  need this setting to allocate coding->cmp_data
6082                  later.  */
6083               coding->composing = COMPOSITION_NO;
6084             }
6085         }
6086       if (coding->eol_type == CODING_EOL_UNDECIDED
6087           && coding->type != coding_type_ccl)
6088         {
6089           saved_coding_symbol = coding->symbol;
6090           detect_eol (coding, SDATA (str), to_byte);
6091           if (coding->eol_type == CODING_EOL_UNDECIDED)
6092             coding->eol_type = CODING_EOL_LF;
6093           /* We had better recover the original eol format if we
6094              encounter an inconsistent eol format while decoding.  */
6095           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
6096         }
6097     }
6098
6099   if (coding->type == coding_type_no_conversion
6100       || coding->type == coding_type_raw_text)
6101     coding->dst_multibyte = 0;
6102
6103   require_decoding = CODING_REQUIRE_DECODING (coding);
6104
6105   if (STRING_MULTIBYTE (str))
6106     {
6107       /* Decoding routines expect the source text to be unibyte.  */
6108       str = Fstring_as_unibyte (str);
6109       to_byte = SBYTES (str);
6110       nocopy = 1;
6111       coding->src_multibyte = 0;
6112     }
6113
6114   /* Try to skip the heading and tailing ASCIIs.  */
6115   if (require_decoding && coding->type != coding_type_ccl)
6116     {
6117       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6118                                 0);
6119       if (from == to_byte)
6120         require_decoding = 0;
6121       shrinked_bytes = from + (SBYTES (str) - to_byte);
6122     }
6123
6124   if (!require_decoding
6125       && !(SYMBOLP (coding->post_read_conversion)
6126            && !NILP (Ffboundp (coding->post_read_conversion))))
6127     {
6128       coding->consumed = SBYTES (str);
6129       coding->consumed_char = SCHARS (str);
6130       if (coding->dst_multibyte)
6131         {
6132           str = Fstring_as_multibyte (str);
6133           nocopy = 1;
6134         }
6135       coding->produced = SBYTES (str);
6136       coding->produced_char = SCHARS (str);
6137       return (nocopy ? str : Fcopy_sequence (str));
6138     }
6139
6140   if (coding->composing != COMPOSITION_DISABLED)
6141     coding_allocate_composition_data (coding, from);
6142   len = decoding_buffer_size (coding, to_byte - from);
6143   allocate_conversion_buffer (buf, len);
6144
6145   consumed = consumed_char = produced = produced_char = 0;
6146   while (1)
6147     {
6148       result = decode_coding (coding, SDATA (str) + from + consumed,
6149                               buf.data + produced, to_byte - from - consumed,
6150                               buf.size - produced);
6151       consumed += coding->consumed;
6152       consumed_char += coding->consumed_char;
6153       produced += coding->produced;
6154       produced_char += coding->produced_char;
6155       if (result == CODING_FINISH_NORMAL
6156           || (result == CODING_FINISH_INSUFFICIENT_SRC
6157               && coding->consumed == 0))
6158         break;
6159       if (result == CODING_FINISH_INSUFFICIENT_CMP)
6160         coding_allocate_composition_data (coding, from + produced_char);
6161       else if (result == CODING_FINISH_INSUFFICIENT_DST)
6162         extend_conversion_buffer (&buf);
6163       else if (result == CODING_FINISH_INCONSISTENT_EOL)
6164         {
6165           Lisp_Object eol_type;
6166
6167           /* Recover the original EOL format.  */
6168           if (coding->eol_type == CODING_EOL_CR)
6169             {
6170               unsigned char *p;
6171               for (p = buf.data; p < buf.data + produced; p++)
6172                 if (*p == '\n') *p = '\r';
6173             }
6174           else if (coding->eol_type == CODING_EOL_CRLF)
6175             {
6176               int num_eol = 0;
6177               unsigned char *p0, *p1;
6178               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
6179                 if (*p0 == '\n') num_eol++;
6180               if (produced + num_eol >= buf.size)
6181                 extend_conversion_buffer (&buf);
6182               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
6183                 {
6184                   *--p1 = *--p0;
6185                   if (*p0 == '\n') *--p1 = '\r';
6186                 }
6187               produced += num_eol;
6188               produced_char += num_eol;
6189             }
6190           /* Suppress eol-format conversion in the further conversion.  */
6191           coding->eol_type = CODING_EOL_LF;
6192
6193           /* Set the coding system symbol to that for Unix-like EOL.  */
6194           eol_type = Fget (saved_coding_symbol, Qeol_type);
6195           if (VECTORP (eol_type)
6196               && XVECTOR (eol_type)->size == 3
6197               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
6198             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
6199           else
6200             coding->symbol = saved_coding_symbol;
6201
6202
6203         }
6204     }
6205
6206   coding->consumed = consumed;
6207   coding->consumed_char = consumed_char;
6208   coding->produced = produced;
6209   coding->produced_char = produced_char;
6210
6211   if (coding->dst_multibyte)
6212     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
6213                                            produced + shrinked_bytes);
6214   else
6215     newstr = make_uninit_string (produced + shrinked_bytes);
6216   if (from > 0)
6217     STRING_COPYIN (newstr, 0, SDATA (str), from);
6218   STRING_COPYIN (newstr, from, buf.data, produced);
6219   if (shrinked_bytes > from)
6220     STRING_COPYIN (newstr, from + produced,
6221                    SDATA (str) + to_byte,
6222                    shrinked_bytes - from);
6223   free_conversion_buffer (&buf);
6224
6225   if (coding->cmp_data && coding->cmp_data->used)
6226     coding_restore_composition (coding, newstr);
6227   coding_free_composition_data (coding);
6228
6229   if (SYMBOLP (coding->post_read_conversion)
6230       && !NILP (Ffboundp (coding->post_read_conversion)))
6231     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
6232
6233   return newstr;
6234 }
6235
6236 Lisp_Object
6237 encode_coding_string (str, coding, nocopy)
6238      Lisp_Object str;
6239      struct coding_system *coding;
6240      int nocopy;
6241 {
6242   int len;
6243   struct conversion_buffer buf;
6244   int from, to, to_byte;
6245   int result;
6246   int shrinked_bytes = 0;
6247   Lisp_Object newstr;
6248   int consumed, consumed_char, produced, produced_char;
6249
6250   if (SYMBOLP (coding->pre_write_conversion)
6251       && !NILP (Ffboundp (coding->pre_write_conversion)))
6252     str = run_pre_post_conversion_on_str (str, coding, 1);
6253
6254   from = 0;
6255   to = SCHARS (str);
6256   to_byte = SBYTES (str);
6257
6258   /* Encoding routines determine the multibyteness of the source text
6259      by coding->src_multibyte.  */
6260   coding->src_multibyte = STRING_MULTIBYTE (str);
6261   coding->dst_multibyte = 0;
6262   if (! CODING_REQUIRE_ENCODING (coding))
6263     {
6264       coding->consumed = SBYTES (str);
6265       coding->consumed_char = SCHARS (str);
6266       if (STRING_MULTIBYTE (str))
6267         {
6268           str = Fstring_as_unibyte (str);
6269           nocopy = 1;
6270         }
6271       coding->produced = SBYTES (str);
6272       coding->produced_char = SCHARS (str);
6273       return (nocopy ? str : Fcopy_sequence (str));
6274     }
6275
6276   if (coding->composing != COMPOSITION_DISABLED)
6277     coding_save_composition (coding, from, to, str);
6278
6279   /* Try to skip the heading and tailing ASCIIs.  */
6280   if (coding->type != coding_type_ccl)
6281     {
6282       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6283                                 1);
6284       if (from == to_byte)
6285         return (nocopy ? str : Fcopy_sequence (str));
6286       shrinked_bytes = from + (SBYTES (str) - to_byte);
6287     }
6288
6289   len = encoding_buffer_size (coding, to_byte - from);
6290   allocate_conversion_buffer (buf, len);
6291
6292   consumed = consumed_char = produced = produced_char = 0;
6293   while (1)
6294     {
6295       result = encode_coding (coding, SDATA (str) + from + consumed,
6296                               buf.data + produced, to_byte - from - consumed,
6297                               buf.size - produced);
6298       consumed += coding->consumed;
6299       consumed_char += coding->consumed_char;
6300       produced += coding->produced;
6301       produced_char += coding->produced_char;
6302       if (result == CODING_FINISH_NORMAL
6303           || (result == CODING_FINISH_INSUFFICIENT_SRC
6304               && coding->consumed == 0))
6305         break;
6306       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
6307       extend_conversion_buffer (&buf);
6308     }
6309
6310   coding->consumed = consumed;
6311   coding->consumed_char = consumed_char;
6312   coding->produced = produced;
6313   coding->produced_char = produced_char;
6314
6315   newstr = make_uninit_string (produced + shrinked_bytes);
6316   if (from > 0)
6317     STRING_COPYIN (newstr, 0, SDATA (str), from);
6318   STRING_COPYIN (newstr, from, buf.data, produced);
6319   if (shrinked_bytes > from)
6320     STRING_COPYIN (newstr, from + produced,
6321                    SDATA (str) + to_byte,
6322                    shrinked_bytes - from);
6323
6324   free_conversion_buffer (&buf);
6325   coding_free_composition_data (coding);
6326
6327   return newstr;
6328 }
6329
6330 \f
6331 #ifdef emacs
6332 /*** 8. Emacs Lisp library functions ***/
6333
6334 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6335        doc: /* Return t if OBJECT is nil or a coding-system.
6336 See the documentation of `make-coding-system' for information
6337 about coding-system objects.  */)
6338      (obj)
6339      Lisp_Object obj;
6340 {
6341   if (NILP (obj))
6342     return Qt;
6343   if (!SYMBOLP (obj))
6344     return Qnil;
6345   if (! NILP (Fget (obj, Qcoding_system_define_form)))
6346     return Qt;
6347   /* Get coding-spec vector for OBJ.  */
6348   obj = Fget (obj, Qcoding_system);
6349   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6350           ? Qt : Qnil);
6351 }
6352
6353 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6354        Sread_non_nil_coding_system, 1, 1, 0,
6355        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6356      (prompt)
6357      Lisp_Object prompt;
6358 {
6359   Lisp_Object val;
6360   do
6361     {
6362       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6363                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6364     }
6365   while (SCHARS (val) == 0);
6366   return (Fintern (val, Qnil));
6367 }
6368
6369 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6370        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6371 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6372      (prompt, default_coding_system)
6373      Lisp_Object prompt, default_coding_system;
6374 {
6375   Lisp_Object val;
6376   if (SYMBOLP (default_coding_system))
6377     default_coding_system = SYMBOL_NAME (default_coding_system);
6378   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6379                           Qt, Qnil, Qcoding_system_history,
6380                           default_coding_system, Qnil);
6381   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6382 }
6383
6384 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6385        1, 1, 0,
6386        doc: /* Check validity of CODING-SYSTEM.
6387 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6388 It is valid if it is a symbol with a non-nil `coding-system' property.
6389 The value of property should be a vector of length 5.  */)
6390      (coding_system)
6391      Lisp_Object coding_system;
6392 {
6393   Lisp_Object define_form;
6394
6395   define_form = Fget (coding_system, Qcoding_system_define_form);
6396   if (! NILP (define_form))
6397     {
6398       Fput (coding_system, Qcoding_system_define_form, Qnil);
6399       safe_eval (define_form);
6400     }
6401   if (!NILP (Fcoding_system_p (coding_system)))
6402     return coding_system;
6403   while (1)
6404     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6405 }
6406 \f
6407 Lisp_Object
6408 detect_coding_system (src, src_bytes, highest, multibytep)
6409      const unsigned char *src;
6410      int src_bytes, highest;
6411      int multibytep;
6412 {
6413   int coding_mask, eol_type;
6414   Lisp_Object val, tmp;
6415   int dummy;
6416
6417   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
6418   eol_type  = detect_eol_type (src, src_bytes, &dummy);
6419   if (eol_type == CODING_EOL_INCONSISTENT)
6420     eol_type = CODING_EOL_UNDECIDED;
6421
6422   if (!coding_mask)
6423     {
6424       val = Qundecided;
6425       if (eol_type != CODING_EOL_UNDECIDED)
6426         {
6427           Lisp_Object val2;
6428           val2 = Fget (Qundecided, Qeol_type);
6429           if (VECTORP (val2))
6430             val = XVECTOR (val2)->contents[eol_type];
6431         }
6432       return (highest ? val : Fcons (val, Qnil));
6433     }
6434
6435   /* At first, gather possible coding systems in VAL.  */
6436   val = Qnil;
6437   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
6438     {
6439       Lisp_Object category_val, category_index;
6440
6441       category_index = Fget (XCAR (tmp), Qcoding_category_index);
6442       category_val = Fsymbol_value (XCAR (tmp));
6443       if (!NILP (category_val)
6444           && NATNUMP (category_index)
6445           && (coding_mask & (1 << XFASTINT (category_index))))
6446         {
6447           val = Fcons (category_val, val);
6448           if (highest)
6449             break;
6450         }
6451     }
6452   if (!highest)
6453     val = Fnreverse (val);
6454
6455   /* Then, replace the elements with subsidiary coding systems.  */
6456   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
6457     {
6458       if (eol_type != CODING_EOL_UNDECIDED
6459           && eol_type != CODING_EOL_INCONSISTENT)
6460         {
6461           Lisp_Object eol;
6462           eol = Fget (XCAR (tmp), Qeol_type);
6463           if (VECTORP (eol))
6464             XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]);
6465         }
6466     }
6467   return (highest ? XCAR (val) : val);
6468 }
6469
6470 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6471        2, 3, 0,
6472        doc: /* Detect how the byte sequence in the region is encoded.
6473 Return a list of possible coding systems used on decoding a byte
6474 sequence containing the bytes in the region between START and END when
6475 the coding system `undecided' is specified.  The list is ordered by
6476 priority decided in the current language environment.
6477
6478 If only ASCII characters are found, it returns a list of single element
6479 `undecided' or its subsidiary coding system according to a detected
6480 end-of-line format.
6481
6482 If optional argument HIGHEST is non-nil, return the coding system of
6483 highest priority.  */)
6484      (start, end, highest)
6485      Lisp_Object start, end, highest;
6486 {
6487   int from, to;
6488   int from_byte, to_byte;
6489   int include_anchor_byte = 0;
6490
6491   CHECK_NUMBER_COERCE_MARKER (start);
6492   CHECK_NUMBER_COERCE_MARKER (end);
6493
6494   validate_region (&start, &end);
6495   from = XINT (start), to = XINT (end);
6496   from_byte = CHAR_TO_BYTE (from);
6497   to_byte = CHAR_TO_BYTE (to);
6498
6499   if (from < GPT && to >= GPT)
6500     move_gap_both (to, to_byte);
6501   /* If we an anchor byte `\0' follows the region, we include it in
6502      the detecting source.  Then code detectors can handle the tailing
6503      byte sequence more accurately.
6504
6505      Fix me: This is not a perfect solution.  It is better that we
6506      add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
6507   */
6508   if (to == Z || (to == GPT && GAP_SIZE > 0))
6509     include_anchor_byte = 1;
6510   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6511                                to_byte - from_byte + include_anchor_byte,
6512                                !NILP (highest),
6513                                !NILP (current_buffer
6514                                       ->enable_multibyte_characters));
6515 }
6516
6517 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6518        1, 2, 0,
6519        doc: /* Detect how the byte sequence in STRING is encoded.
6520 Return a list of possible coding systems used on decoding a byte
6521 sequence containing the bytes in STRING when the coding system
6522 `undecided' is specified.  The list is ordered by priority decided in
6523 the current language environment.
6524
6525 If only ASCII characters are found, it returns a list of single element
6526 `undecided' or its subsidiary coding system according to a detected
6527 end-of-line format.
6528
6529 If optional argument HIGHEST is non-nil, return the coding system of
6530 highest priority.  */)
6531      (string, highest)
6532      Lisp_Object string, highest;
6533 {
6534   CHECK_STRING (string);
6535
6536   return detect_coding_system (SDATA (string),
6537                                /* "+ 1" is to include the anchor byte
6538                                   `\0'.  With this, code detectors can
6539                                   handle the tailing bytes more
6540                                   accurately.  */
6541                                SBYTES (string) + 1,
6542                                !NILP (highest),
6543                                STRING_MULTIBYTE (string));
6544 }
6545
6546 /*  Subroutine for Fsafe_coding_systems_region_internal.
6547
6548     Return a list of coding systems that safely encode the multibyte
6549     text between P and PEND.  SAFE_CODINGS, if non-nil, is an alist of
6550     possible coding systems.  If it is nil, it means that we have not
6551     yet found any coding systems.
6552
6553     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
6554     element of WORK_TABLE is set to t once the element is looked up.
6555
6556     If a non-ASCII single byte char is found, set
6557     *single_byte_char_found to 1.  */
6558
6559 static Lisp_Object
6560 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6561      unsigned char *p, *pend;
6562      Lisp_Object safe_codings, work_table;
6563      int *single_byte_char_found;
6564 {
6565   int c, len;
6566   Lisp_Object val, ch;
6567   Lisp_Object prev, tail;
6568
6569   while (p < pend)
6570     {
6571       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6572       p += len;
6573       if (ASCII_BYTE_P (c))
6574         /* We can ignore ASCII characters here.  */
6575         continue;
6576       if (SINGLE_BYTE_CHAR_P (c))
6577         *single_byte_char_found = 1;
6578       if (NILP (safe_codings))
6579         /* Already all coding systems are excluded.  But, we can't
6580            terminate the loop here because non-ASCII single-byte char
6581            must be found.  */
6582         continue;
6583       /* Check the safe coding systems for C.  */
6584       ch = make_number (c);
6585       val = Faref (work_table, ch);
6586       if (EQ (val, Qt))
6587         /* This element was already checked.  Ignore it.  */
6588         continue;
6589       /* Remember that we checked this element.  */
6590       Faset (work_table, ch, Qt);
6591
6592       for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
6593         {
6594           Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
6595           int encodable;
6596
6597           elt = XCAR (tail);
6598           if (CONSP (XCDR (elt)))
6599             {
6600               /* This entry has this format now:
6601                  ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6602                           ACCEPT-LATIN-EXTRA ) */
6603               val = XCDR (elt);
6604               encodable = ! NILP (Faref (XCAR (val), ch));
6605               if (! encodable)
6606                 {
6607                   val = XCDR (val);
6608                   translation_table = XCAR (val);
6609                   hash_table = XCAR (XCDR (val));
6610                   accept_latin_extra = XCAR (XCDR (XCDR (val)));
6611                 }
6612             }
6613           else
6614             {
6615               /* This entry has this format now: ( CODING . SAFE-CHARS) */
6616               encodable = ! NILP (Faref (XCDR (elt), ch));
6617               if (! encodable)
6618                 {
6619                   /* Transform the format to:
6620                      ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6621                        ACCEPT-LATIN-EXTRA )  */
6622                   val = Fget (XCAR (elt), Qcoding_system);
6623                   translation_table
6624                     = Fplist_get (AREF (val, 3),
6625                                   Qtranslation_table_for_encode);
6626                   if (SYMBOLP (translation_table))
6627                     translation_table = Fget (translation_table,
6628                                               Qtranslation_table);
6629                   hash_table
6630                     = (CHAR_TABLE_P (translation_table)
6631                        ? XCHAR_TABLE (translation_table)->extras[1]
6632                        : Qnil);
6633                   accept_latin_extra
6634                     = ((EQ (AREF (val, 0), make_number (2))
6635                         && VECTORP (AREF (val, 4)))
6636                        ? AREF (AREF (val, 4), 16)
6637                        : Qnil);
6638                   XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
6639                                         translation_table, hash_table,
6640                                         accept_latin_extra));
6641                 }
6642             }
6643
6644           if (! encodable
6645               && ((CHAR_TABLE_P (translation_table)
6646                    && ! NILP (Faref (translation_table, ch)))
6647                   || (HASH_TABLE_P (hash_table)
6648                       && ! NILP (Fgethash (ch, hash_table, Qnil)))
6649                   || (SINGLE_BYTE_CHAR_P (c)
6650                       && ! NILP (accept_latin_extra)
6651                       && VECTORP (Vlatin_extra_code_table)
6652                       && ! NILP (AREF (Vlatin_extra_code_table, c)))))
6653             encodable = 1;
6654           if (encodable)
6655             prev = tail;
6656           else
6657             {
6658               /* Exclude this coding system from SAFE_CODINGS.  */
6659               if (EQ (tail, safe_codings))
6660                 safe_codings = XCDR (safe_codings);
6661               else
6662                 XSETCDR (prev, XCDR (tail));
6663             }
6664         }
6665     }
6666   return safe_codings;
6667 }
6668
6669 DEFUN ("find-coding-systems-region-internal",
6670        Ffind_coding_systems_region_internal,
6671        Sfind_coding_systems_region_internal, 2, 2, 0,
6672        doc: /* Internal use only.  */)
6673      (start, end)
6674      Lisp_Object start, end;
6675 {
6676   Lisp_Object work_table, safe_codings;
6677   int non_ascii_p = 0;
6678   int single_byte_char_found = 0;
6679   const unsigned char *p1, *p1end, *p2, *p2end, *p;
6680
6681   if (STRINGP (start))
6682     {
6683       if (!STRING_MULTIBYTE (start))
6684         return Qt;
6685       p1 = SDATA (start), p1end = p1 + SBYTES (start);
6686       p2 = p2end = p1end;
6687       if (SCHARS (start) != SBYTES (start))
6688         non_ascii_p = 1;
6689     }
6690   else
6691     {
6692       int from, to, stop;
6693
6694       CHECK_NUMBER_COERCE_MARKER (start);
6695       CHECK_NUMBER_COERCE_MARKER (end);
6696       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6697         args_out_of_range (start, end);
6698       if (NILP (current_buffer->enable_multibyte_characters))
6699         return Qt;
6700       from = CHAR_TO_BYTE (XINT (start));
6701       to = CHAR_TO_BYTE (XINT (end));
6702       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6703       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6704       if (stop == to)
6705         p2 = p2end = p1end;
6706       else
6707         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6708       if (XINT (end) - XINT (start) != to - from)
6709         non_ascii_p = 1;
6710     }
6711
6712   if (!non_ascii_p)
6713     {
6714       /* We are sure that the text contains no multibyte character.
6715          Check if it contains eight-bit-graphic.  */
6716       p = p1;
6717       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6718       if (p == p1end)
6719         {
6720           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6721           if (p == p2end)
6722             return Qt;
6723         }
6724     }
6725
6726   /* The text contains non-ASCII characters.  */
6727
6728   work_table = Fmake_char_table (Qchar_coding_system, Qnil);
6729   safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
6730
6731   safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
6732                                     &single_byte_char_found);
6733   if (p2 < p2end)
6734     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6735                                       &single_byte_char_found);
6736   if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
6737     safe_codings = Qt;
6738   else
6739     {
6740       /* Turn safe_codings to a list of coding systems... */
6741       Lisp_Object val;
6742
6743       if (single_byte_char_found)
6744         /* ... and append these for eight-bit chars.  */
6745         val = Fcons (Qraw_text,
6746                      Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
6747       else
6748         /* ... and append generic coding systems.  */
6749         val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
6750
6751       for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
6752         val = Fcons (XCAR (XCAR (safe_codings)), val);
6753       safe_codings = val;
6754     }
6755
6756   return safe_codings;
6757 }
6758
6759
6760 /* Search from position POS for such characters that are unencodable
6761    accoding to SAFE_CHARS, and return a list of their positions.  P
6762    points where in the memory the character at POS exists.  Limit the
6763    search at PEND or when Nth unencodable characters are found.
6764
6765    If SAFE_CHARS is a char table, an element for an unencodable
6766    character is nil.
6767
6768    If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
6769
6770    Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
6771    eight-bit-graphic characters are unencodable.  */
6772
6773 static Lisp_Object
6774 unencodable_char_position (safe_chars, pos, p, pend, n)
6775      Lisp_Object safe_chars;
6776      int pos;
6777      unsigned char *p, *pend;
6778      int n;
6779 {
6780   Lisp_Object pos_list;
6781
6782   pos_list = Qnil;
6783   while (p < pend)
6784     {
6785       int len;
6786       int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
6787
6788       if (c >= 128
6789           && (CHAR_TABLE_P (safe_chars)
6790               ? NILP (CHAR_TABLE_REF (safe_chars, c))
6791               : (NILP (safe_chars) || c < 256)))
6792         {
6793           pos_list = Fcons (make_number (pos), pos_list);
6794           if (--n <= 0)
6795             break;
6796         }
6797       pos++;
6798       p += len;
6799     }
6800   return Fnreverse (pos_list);
6801 }
6802
6803
6804 DEFUN ("unencodable-char-position", Funencodable_char_position,
6805        Sunencodable_char_position, 3, 5, 0,
6806        doc: /*
6807 Return position of first un-encodable character in a region.
6808 START and END specfiy the region and CODING-SYSTEM specifies the
6809 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
6810
6811 If optional 4th argument COUNT is non-nil, it specifies at most how
6812 many un-encodable characters to search.  In this case, the value is a
6813 list of positions.
6814
6815 If optional 5th argument STRING is non-nil, it is a string to search
6816 for un-encodable characters.  In that case, START and END are indexes
6817 to the string.  */)
6818      (start, end, coding_system, count, string)
6819      Lisp_Object start, end, coding_system, count, string;
6820 {
6821   int n;
6822   Lisp_Object safe_chars;
6823   struct coding_system coding;
6824   Lisp_Object positions;
6825   int from, to;
6826   unsigned char *p, *pend;
6827
6828   if (NILP (string))
6829     {
6830       validate_region (&start, &end);
6831       from = XINT (start);
6832       to = XINT (end);
6833       if (NILP (current_buffer->enable_multibyte_characters))
6834         return Qnil;
6835       p = CHAR_POS_ADDR (from);
6836       if (to == GPT)
6837         pend = GPT_ADDR;
6838       else
6839         pend = CHAR_POS_ADDR (to);
6840     }
6841   else
6842     {
6843       CHECK_STRING (string);
6844       CHECK_NATNUM (start);
6845       CHECK_NATNUM (end);
6846       from = XINT (start);
6847       to = XINT (end);
6848       if (from > to
6849           || to > SCHARS (string))
6850         args_out_of_range_3 (string, start, end);
6851       if (! STRING_MULTIBYTE (string))
6852         return Qnil;
6853       p = SDATA (string) + string_char_to_byte (string, from);
6854       pend = SDATA (string) + string_char_to_byte (string, to);
6855     }
6856
6857   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
6858
6859   if (NILP (count))
6860     n = 1;
6861   else
6862     {
6863       CHECK_NATNUM (count);
6864       n = XINT (count);
6865     }
6866
6867   if (coding.type == coding_type_no_conversion
6868       || coding.type == coding_type_raw_text)
6869     return Qnil;
6870
6871   if (coding.type == coding_type_undecided)
6872     safe_chars = Qnil;
6873   else
6874     safe_chars = coding_safe_chars (coding_system);
6875
6876   if (STRINGP (string)
6877       || from >= GPT || to <= GPT)
6878     positions = unencodable_char_position (safe_chars, from, p, pend, n);
6879   else
6880     {
6881       Lisp_Object args[2];
6882
6883       args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
6884       n -= XINT (Flength (args[0]));
6885       if (n <= 0)
6886         positions = args[0];
6887       else
6888         {
6889           args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
6890                                                pend, n);
6891           positions = Fappend (2, args);
6892         }
6893     }
6894
6895   return  (NILP (count) ? Fcar (positions) : positions);
6896 }
6897
6898
6899 Lisp_Object
6900 code_convert_region1 (start, end, coding_system, encodep)
6901      Lisp_Object start, end, coding_system;
6902      int encodep;
6903 {
6904   struct coding_system coding;
6905   int from, to;
6906
6907   CHECK_NUMBER_COERCE_MARKER (start);
6908   CHECK_NUMBER_COERCE_MARKER (end);
6909   CHECK_SYMBOL (coding_system);
6910
6911   validate_region (&start, &end);
6912   from = XFASTINT (start);
6913   to = XFASTINT (end);
6914
6915   if (NILP (coding_system))
6916     return make_number (to - from);
6917
6918   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6919     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6920
6921   coding.mode |= CODING_MODE_LAST_BLOCK;
6922   coding.src_multibyte = coding.dst_multibyte
6923     = !NILP (current_buffer->enable_multibyte_characters);
6924   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
6925                        &coding, encodep, 1);
6926   Vlast_coding_system_used = coding.symbol;
6927   return make_number (coding.produced_char);
6928 }
6929
6930 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6931        3, 3, "r\nzCoding system: ",
6932        doc: /* Decode the current region from the specified coding system.
6933 When called from a program, takes three arguments:
6934 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6935 This function sets `last-coding-system-used' to the precise coding system
6936 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6937 not fully specified.)
6938 It returns the length of the decoded text.  */)
6939      (start, end, coding_system)
6940      Lisp_Object start, end, coding_system;
6941 {
6942   return code_convert_region1 (start, end, coding_system, 0);
6943 }
6944
6945 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6946        3, 3, "r\nzCoding system: ",
6947        doc: /* Encode the current region into the specified coding system.
6948 When called from a program, takes three arguments:
6949 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6950 This function sets `last-coding-system-used' to the precise coding system
6951 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6952 not fully specified.)
6953 It returns the length of the encoded text.  */)
6954      (start, end, coding_system)
6955      Lisp_Object start, end, coding_system;
6956 {
6957   return code_convert_region1 (start, end, coding_system, 1);
6958 }
6959
6960 Lisp_Object
6961 code_convert_string1 (string, coding_system, nocopy, encodep)
6962      Lisp_Object string, coding_system, nocopy;
6963      int encodep;
6964 {
6965   struct coding_system coding;
6966
6967   CHECK_STRING (string);
6968   CHECK_SYMBOL (coding_system);
6969
6970   if (NILP (coding_system))
6971     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
6972
6973   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6974     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6975
6976   coding.mode |= CODING_MODE_LAST_BLOCK;
6977   string = (encodep
6978             ? encode_coding_string (string, &coding, !NILP (nocopy))
6979             : decode_coding_string (string, &coding, !NILP (nocopy)));
6980   Vlast_coding_system_used = coding.symbol;
6981
6982   return string;
6983 }
6984
6985 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6986        2, 3, 0,
6987        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6988 Optional arg NOCOPY non-nil means it is OK to return STRING itself
6989 if the decoding operation is trivial.
6990 This function sets `last-coding-system-used' to the precise coding system
6991 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6992 not fully specified.)  */)
6993      (string, coding_system, nocopy)
6994      Lisp_Object string, coding_system, nocopy;
6995 {
6996   return code_convert_string1 (string, coding_system, nocopy, 0);
6997 }
6998
6999 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
7000        2, 3, 0,
7001        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
7002 Optional arg NOCOPY non-nil means it is OK to return STRING itself
7003 if the encoding operation is trivial.
7004 This function sets `last-coding-system-used' to the precise coding system
7005 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
7006 not fully specified.)  */)
7007      (string, coding_system, nocopy)
7008      Lisp_Object string, coding_system, nocopy;
7009 {
7010   return code_convert_string1 (string, coding_system, nocopy, 1);
7011 }
7012
7013 /* Encode or decode STRING according to CODING_SYSTEM.
7014    Do not set Vlast_coding_system_used.
7015
7016    This function is called only from macros DECODE_FILE and
7017    ENCODE_FILE, thus we ignore character composition.  */
7018
7019 Lisp_Object
7020 code_convert_string_norecord (string, coding_system, encodep)
7021      Lisp_Object string, coding_system;
7022      int encodep;
7023 {
7024   struct coding_system coding;
7025
7026   CHECK_STRING (string);
7027   CHECK_SYMBOL (coding_system);
7028
7029   if (NILP (coding_system))
7030     return string;
7031
7032   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
7033     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
7034
7035   coding.composing = COMPOSITION_DISABLED;
7036   coding.mode |= CODING_MODE_LAST_BLOCK;
7037   return (encodep
7038           ? encode_coding_string (string, &coding, 1)
7039           : decode_coding_string (string, &coding, 1));
7040 }
7041 \f
7042 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
7043        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
7044 Return the corresponding character.  */)
7045      (code)
7046      Lisp_Object code;
7047 {
7048   unsigned char c1, c2, s1, s2;
7049   Lisp_Object val;
7050
7051   CHECK_NUMBER (code);
7052   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
7053   if (s1 == 0)
7054     {
7055       if (s2 < 0x80)
7056         XSETFASTINT (val, s2);
7057       else if (s2 >= 0xA0 || s2 <= 0xDF)
7058         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
7059       else
7060         error ("Invalid Shift JIS code: %x", XFASTINT (code));
7061     }
7062   else
7063     {
7064       if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF)
7065           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
7066         error ("Invalid Shift JIS code: %x", XFASTINT (code));
7067       DECODE_SJIS (s1, s2, c1, c2);
7068       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
7069     }
7070   return val;
7071 }
7072
7073 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
7074        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
7075 Return the corresponding code in SJIS.  */)
7076      (ch)
7077      Lisp_Object ch;
7078 {
7079   int charset, c1, c2, s1, s2;
7080   Lisp_Object val;
7081
7082   CHECK_NUMBER (ch);
7083   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7084   if (charset == CHARSET_ASCII)
7085     {
7086       val = ch;
7087     }
7088   else if (charset == charset_jisx0208
7089            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
7090     {
7091       ENCODE_SJIS (c1, c2, s1, s2);
7092       XSETFASTINT (val, (s1 << 8) | s2);
7093     }
7094   else if (charset == charset_katakana_jisx0201
7095            && c1 > 0x20 && c2 < 0xE0)
7096     {
7097       XSETFASTINT (val, c1 | 0x80);
7098     }
7099   else
7100     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
7101   return val;
7102 }
7103
7104 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
7105        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
7106 Return the corresponding character.  */)
7107      (code)
7108      Lisp_Object code;
7109 {
7110   int charset;
7111   unsigned char b1, b2, c1, c2;
7112   Lisp_Object val;
7113
7114   CHECK_NUMBER (code);
7115   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
7116   if (b1 == 0)
7117     {
7118       if (b2 >= 0x80)
7119         error ("Invalid BIG5 code: %x", XFASTINT (code));
7120       val = code;
7121     }
7122   else
7123     {
7124       if ((b1 < 0xA1 || b1 > 0xFE)
7125           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
7126         error ("Invalid BIG5 code: %x", XFASTINT (code));
7127       DECODE_BIG5 (b1, b2, charset, c1, c2);
7128       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
7129     }
7130   return val;
7131 }
7132
7133 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
7134        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
7135 Return the corresponding character code in Big5.  */)
7136      (ch)
7137      Lisp_Object ch;
7138 {
7139   int charset, c1, c2, b1, b2;
7140   Lisp_Object val;
7141
7142   CHECK_NUMBER (ch);
7143   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7144   if (charset == CHARSET_ASCII)
7145     {
7146       val = ch;
7147     }
7148   else if ((charset == charset_big5_1
7149             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
7150            || (charset == charset_big5_2
7151                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
7152     {
7153       ENCODE_BIG5 (charset, c1, c2, b1, b2);
7154       XSETFASTINT (val, (b1 << 8) | b2);
7155     }
7156   else
7157     error ("Can't encode to Big5: %d", XFASTINT (ch));
7158   return val;
7159 }
7160 \f
7161 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
7162        Sset_terminal_coding_system_internal, 1, 1, 0,
7163        doc: /* Internal use only.  */)
7164      (coding_system)
7165      Lisp_Object coding_system;
7166 {
7167   CHECK_SYMBOL (coding_system);
7168   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
7169   /* We had better not send unsafe characters to terminal.  */
7170   terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR;
7171   /* Character composition should be disabled.  */
7172   terminal_coding.composing = COMPOSITION_DISABLED;
7173   /* Error notification should be suppressed.  */
7174   terminal_coding.suppress_error = 1;
7175   terminal_coding.src_multibyte = 1;
7176   terminal_coding.dst_multibyte = 0;
7177   return Qnil;
7178 }
7179
7180 DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
7181        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7182        doc: /* Internal use only.  */)
7183      (coding_system)
7184      Lisp_Object coding_system;
7185 {
7186   CHECK_SYMBOL (coding_system);
7187   setup_coding_system (Fcheck_coding_system (coding_system),
7188                        &safe_terminal_coding);
7189   /* Character composition should be disabled.  */
7190   safe_terminal_coding.composing = COMPOSITION_DISABLED;
7191   /* Error notification should be suppressed.  */
7192   terminal_coding.suppress_error = 1;
7193   safe_terminal_coding.src_multibyte = 1;
7194   safe_terminal_coding.dst_multibyte = 0;
7195   return Qnil;
7196 }
7197
7198 DEFUN ("terminal-coding-system", Fterminal_coding_system,
7199        Sterminal_coding_system, 0, 0, 0,
7200        doc: /* Return coding system specified for terminal output.  */)
7201      ()
7202 {
7203   return terminal_coding.symbol;
7204 }
7205
7206 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
7207        Sset_keyboard_coding_system_internal, 1, 1, 0,
7208        doc: /* Internal use only.  */)
7209      (coding_system)
7210      Lisp_Object coding_system;
7211 {
7212   CHECK_SYMBOL (coding_system);
7213   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
7214   /* Character composition should be disabled.  */
7215   keyboard_coding.composing = COMPOSITION_DISABLED;
7216   return Qnil;
7217 }
7218
7219 DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
7220        Skeyboard_coding_system, 0, 0, 0,
7221        doc: /* Return coding system specified for decoding keyboard input.  */)
7222      ()
7223 {
7224   return keyboard_coding.symbol;
7225 }
7226
7227 \f
7228 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7229        Sfind_operation_coding_system,  1, MANY, 0,
7230        doc: /* Choose a coding system for an operation based on the target name.
7231 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7232 DECODING-SYSTEM is the coding system to use for decoding
7233 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7234 for encoding (in case OPERATION does encoding).
7235
7236 The first argument OPERATION specifies an I/O primitive:
7237   For file I/O, `insert-file-contents' or `write-region'.
7238   For process I/O, `call-process', `call-process-region', or `start-process'.
7239   For network I/O, `open-network-stream'.
7240
7241 The remaining arguments should be the same arguments that were passed
7242 to the primitive.  Depending on which primitive, one of those arguments
7243 is selected as the TARGET.  For example, if OPERATION does file I/O,
7244 whichever argument specifies the file name is TARGET.
7245
7246 TARGET has a meaning which depends on OPERATION:
7247   For file I/O, TARGET is a file name.
7248   For process I/O, TARGET is a process name.
7249   For network I/O, TARGET is a service name or a port number
7250
7251 This function looks up what specified for TARGET in,
7252 `file-coding-system-alist', `process-coding-system-alist',
7253 or `network-coding-system-alist' depending on OPERATION.
7254 They may specify a coding system, a cons of coding systems,
7255 or a function symbol to call.
7256 In the last case, we call the function with one argument,
7257 which is a list of all the arguments given to this function.
7258
7259 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7260      (nargs, args)
7261      int nargs;
7262      Lisp_Object *args;
7263 {
7264   Lisp_Object operation, target_idx, target, val;
7265   register Lisp_Object chain;
7266
7267   if (nargs < 2)
7268     error ("Too few arguments");
7269   operation = args[0];
7270   if (!SYMBOLP (operation)
7271       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7272     error ("Invalid first argument");
7273   if (nargs < 1 + XINT (target_idx))
7274     error ("Too few arguments for operation: %s",
7275            SDATA (SYMBOL_NAME (operation)));
7276   /* For write-region, if the 6th argument (i.e. VISIT, the 5th
7277      argument to write-region) is string, it must be treated as a
7278      target file name.  */
7279   if (EQ (operation, Qwrite_region)
7280       && nargs > 5
7281       && STRINGP (args[5]))
7282     target_idx = make_number (4);
7283   target = args[XINT (target_idx) + 1];
7284   if (!(STRINGP (target)
7285         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7286     error ("Invalid argument %d", XINT (target_idx) + 1);
7287
7288   chain = ((EQ (operation, Qinsert_file_contents)
7289             || EQ (operation, Qwrite_region))
7290            ? Vfile_coding_system_alist
7291            : (EQ (operation, Qopen_network_stream)
7292               ? Vnetwork_coding_system_alist
7293               : Vprocess_coding_system_alist));
7294   if (NILP (chain))
7295     return Qnil;
7296
7297   for (; CONSP (chain); chain = XCDR (chain))
7298     {
7299       Lisp_Object elt;
7300       elt = XCAR (chain);
7301
7302       if (CONSP (elt)
7303           && ((STRINGP (target)
7304                && STRINGP (XCAR (elt))
7305                && fast_string_match (XCAR (elt), target) >= 0)
7306               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7307         {
7308           val = XCDR (elt);
7309           /* Here, if VAL is both a valid coding system and a valid
7310              function symbol, we return VAL as a coding system.  */
7311           if (CONSP (val))
7312             return val;
7313           if (! SYMBOLP (val))
7314             return Qnil;
7315           if (! NILP (Fcoding_system_p (val)))
7316             return Fcons (val, val);
7317           if (! NILP (Ffboundp (val)))
7318             {
7319               val = call1 (val, Flist (nargs, args));
7320               if (CONSP (val))
7321                 return val;
7322               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7323                 return Fcons (val, val);
7324             }
7325           return Qnil;
7326         }
7327     }
7328   return Qnil;
7329 }
7330
7331 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
7332        Supdate_coding_systems_internal, 0, 0, 0,
7333        doc: /* Update internal database for ISO2022 and CCL based coding systems.
7334 When values of any coding categories are changed, you must
7335 call this function.  */)
7336      ()
7337 {
7338   int i;
7339
7340   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
7341     {
7342       Lisp_Object val;
7343
7344       val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[i]);
7345       if (!NILP (val))
7346         {
7347           if (! coding_system_table[i])
7348             coding_system_table[i] = ((struct coding_system *)
7349                                       xmalloc (sizeof (struct coding_system)));
7350           setup_coding_system (val, coding_system_table[i]);
7351         }
7352       else if (coding_system_table[i])
7353         {
7354           xfree (coding_system_table[i]);
7355           coding_system_table[i] = NULL;
7356         }
7357     }
7358
7359   return Qnil;
7360 }
7361
7362 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
7363        Sset_coding_priority_internal, 0, 0, 0,
7364        doc: /* Update internal database for the current value of `coding-category-list'.
7365 This function is internal use only.  */)
7366      ()
7367 {
7368   int i = 0, idx;
7369   Lisp_Object val;
7370
7371   val = Vcoding_category_list;
7372
7373   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
7374     {
7375       if (! SYMBOLP (XCAR (val)))
7376         break;
7377       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
7378       if (idx >= CODING_CATEGORY_IDX_MAX)
7379         break;
7380       coding_priorities[i++] = (1 << idx);
7381       val = XCDR (val);
7382     }
7383   /* If coding-category-list is valid and contains all coding
7384      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
7385      the following code saves Emacs from crashing.  */
7386   while (i < CODING_CATEGORY_IDX_MAX)
7387     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
7388
7389   return Qnil;
7390 }
7391
7392 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7393        Sdefine_coding_system_internal, 1, 1, 0,
7394        doc: /* Register CODING-SYSTEM as a base coding system.
7395 This function is internal use only.  */)
7396      (coding_system)
7397      Lisp_Object coding_system;
7398 {
7399   Lisp_Object safe_chars, slot;
7400
7401   if (NILP (Fcheck_coding_system (coding_system)))
7402     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
7403   safe_chars = coding_safe_chars (coding_system);
7404   if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
7405     error ("No valid safe-chars property for %s",
7406            SDATA (SYMBOL_NAME (coding_system)));
7407   if (EQ (safe_chars, Qt))
7408     {
7409       if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
7410         XSETCAR (Vcoding_system_safe_chars,
7411                  Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
7412     }
7413   else
7414     {
7415       slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
7416       if (NILP (slot))
7417         XSETCDR (Vcoding_system_safe_chars,
7418                  nconc2 (XCDR (Vcoding_system_safe_chars),
7419                          Fcons (Fcons (coding_system, safe_chars), Qnil)));
7420       else
7421         XSETCDR (slot, safe_chars);
7422     }
7423   return Qnil;
7424 }
7425
7426 #endif /* emacs */
7427
7428 \f
7429 /*** 9. Post-amble ***/
7430
7431 void
7432 init_coding_once ()
7433 {
7434   int i;
7435
7436   /* Emacs' internal format specific initialize routine.  */
7437   for (i = 0; i <= 0x20; i++)
7438     emacs_code_class[i] = EMACS_control_code;
7439   emacs_code_class[0x0A] = EMACS_linefeed_code;
7440   emacs_code_class[0x0D] = EMACS_carriage_return_code;
7441   for (i = 0x21 ; i < 0x7F; i++)
7442     emacs_code_class[i] = EMACS_ascii_code;
7443   emacs_code_class[0x7F] = EMACS_control_code;
7444   for (i = 0x80; i < 0xFF; i++)
7445     emacs_code_class[i] = EMACS_invalid_code;
7446   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
7447   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
7448   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
7449   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
7450
7451   /* ISO2022 specific initialize routine.  */
7452   for (i = 0; i < 0x20; i++)
7453     iso_code_class[i] = ISO_control_0;
7454   for (i = 0x21; i < 0x7F; i++)
7455     iso_code_class[i] = ISO_graphic_plane_0;
7456   for (i = 0x80; i < 0xA0; i++)
7457     iso_code_class[i] = ISO_control_1;
7458   for (i = 0xA1; i < 0xFF; i++)
7459     iso_code_class[i] = ISO_graphic_plane_1;
7460   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7461   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7462   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7463   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7464   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7465   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7466   iso_code_class[ISO_CODE_ESC] = ISO_escape;
7467   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7468   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7469   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7470
7471   setup_coding_system (Qnil, &keyboard_coding);
7472   setup_coding_system (Qnil, &terminal_coding);
7473   setup_coding_system (Qnil, &safe_terminal_coding);
7474   setup_coding_system (Qnil, &default_buffer_file_coding);
7475
7476   bzero (coding_system_table, sizeof coding_system_table);
7477
7478   bzero (ascii_skip_code, sizeof ascii_skip_code);
7479   for (i = 0; i < 128; i++)
7480     ascii_skip_code[i] = 1;
7481
7482 #if defined (MSDOS) || defined (WINDOWSNT)
7483   system_eol_type = CODING_EOL_CRLF;
7484 #else
7485   system_eol_type = CODING_EOL_LF;
7486 #endif
7487
7488   inhibit_pre_post_conversion = 0;
7489 }
7490
7491 #ifdef emacs
7492
7493 void
7494 syms_of_coding ()
7495 {
7496   Qtarget_idx = intern ("target-idx");
7497   staticpro (&Qtarget_idx);
7498
7499   Qcoding_system_history = intern ("coding-system-history");
7500   staticpro (&Qcoding_system_history);
7501   Fset (Qcoding_system_history, Qnil);
7502
7503   /* Target FILENAME is the first argument.  */
7504   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7505   /* Target FILENAME is the third argument.  */
7506   Fput (Qwrite_region, Qtarget_idx, make_number (2));
7507
7508   Qcall_process = intern ("call-process");
7509   staticpro (&Qcall_process);
7510   /* Target PROGRAM is the first argument.  */
7511   Fput (Qcall_process, Qtarget_idx, make_number (0));
7512
7513   Qcall_process_region = intern ("call-process-region");
7514   staticpro (&Qcall_process_region);
7515   /* Target PROGRAM is the third argument.  */
7516   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7517
7518   Qstart_process = intern ("start-process");
7519   staticpro (&Qstart_process);
7520   /* Target PROGRAM is the third argument.  */
7521   Fput (Qstart_process, Qtarget_idx, make_number (2));
7522
7523   Qopen_network_stream = intern ("open-network-stream");
7524   staticpro (&Qopen_network_stream);
7525   /* Target SERVICE is the fourth argument.  */
7526   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7527
7528   Qcoding_system = intern ("coding-system");
7529   staticpro (&Qcoding_system);
7530
7531   Qeol_type = intern ("eol-type");
7532   staticpro (&Qeol_type);
7533
7534   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
7535   staticpro (&Qbuffer_file_coding_system);
7536
7537   Qpost_read_conversion = intern ("post-read-conversion");
7538   staticpro (&Qpost_read_conversion);
7539
7540   Qpre_write_conversion = intern ("pre-write-conversion");
7541   staticpro (&Qpre_write_conversion);
7542
7543   Qno_conversion = intern ("no-conversion");
7544   staticpro (&Qno_conversion);
7545
7546   Qundecided = intern ("undecided");
7547   staticpro (&Qundecided);
7548
7549   Qcoding_system_p = intern ("coding-system-p");
7550   staticpro (&Qcoding_system_p);
7551
7552   Qcoding_system_error = intern ("coding-system-error");
7553   staticpro (&Qcoding_system_error);
7554
7555   Fput (Qcoding_system_error, Qerror_conditions,
7556         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7557   Fput (Qcoding_system_error, Qerror_message,
7558         build_string ("Invalid coding system"));
7559
7560   Qcoding_category = intern ("coding-category");
7561   staticpro (&Qcoding_category);
7562   Qcoding_category_index = intern ("coding-category-index");
7563   staticpro (&Qcoding_category_index);
7564
7565   Vcoding_category_table
7566     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
7567   staticpro (&Vcoding_category_table);
7568   {
7569     int i;
7570     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7571       {
7572         XVECTOR (Vcoding_category_table)->contents[i]
7573           = intern (coding_category_name[i]);
7574         Fput (XVECTOR (Vcoding_category_table)->contents[i],
7575               Qcoding_category_index, make_number (i));
7576       }
7577   }
7578
7579   Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
7580   staticpro (&Vcoding_system_safe_chars);
7581
7582   Qtranslation_table = intern ("translation-table");
7583   staticpro (&Qtranslation_table);
7584   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
7585
7586   Qtranslation_table_id = intern ("translation-table-id");
7587   staticpro (&Qtranslation_table_id);
7588
7589   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7590   staticpro (&Qtranslation_table_for_decode);
7591
7592   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7593   staticpro (&Qtranslation_table_for_encode);
7594
7595   Qsafe_chars = intern ("safe-chars");
7596   staticpro (&Qsafe_chars);
7597
7598   Qchar_coding_system = intern ("char-coding-system");
7599   staticpro (&Qchar_coding_system);
7600
7601   /* Intern this now in case it isn't already done.
7602      Setting this variable twice is harmless.
7603      But don't staticpro it here--that is done in alloc.c.  */
7604   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7605   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7606   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
7607
7608   Qvalid_codes = intern ("valid-codes");
7609   staticpro (&Qvalid_codes);
7610
7611   Qemacs_mule = intern ("emacs-mule");
7612   staticpro (&Qemacs_mule);
7613
7614   Qraw_text = intern ("raw-text");
7615   staticpro (&Qraw_text);
7616
7617   Qutf_8 = intern ("utf-8");
7618   staticpro (&Qutf_8);
7619
7620   Qcoding_system_define_form = intern ("coding-system-define-form");
7621   staticpro (&Qcoding_system_define_form);
7622
7623   defsubr (&Scoding_system_p);
7624   defsubr (&Sread_coding_system);
7625   defsubr (&Sread_non_nil_coding_system);
7626   defsubr (&Scheck_coding_system);
7627   defsubr (&Sdetect_coding_region);
7628   defsubr (&Sdetect_coding_string);
7629   defsubr (&Sfind_coding_systems_region_internal);
7630   defsubr (&Sunencodable_char_position);
7631   defsubr (&Sdecode_coding_region);
7632   defsubr (&Sencode_coding_region);
7633   defsubr (&Sdecode_coding_string);
7634   defsubr (&Sencode_coding_string);
7635   defsubr (&Sdecode_sjis_char);
7636   defsubr (&Sencode_sjis_char);
7637   defsubr (&Sdecode_big5_char);
7638   defsubr (&Sencode_big5_char);
7639   defsubr (&Sset_terminal_coding_system_internal);
7640   defsubr (&Sset_safe_terminal_coding_system_internal);
7641   defsubr (&Sterminal_coding_system);
7642   defsubr (&Sset_keyboard_coding_system_internal);
7643   defsubr (&Skeyboard_coding_system);
7644   defsubr (&Sfind_operation_coding_system);
7645   defsubr (&Supdate_coding_systems_internal);
7646   defsubr (&Sset_coding_priority_internal);
7647   defsubr (&Sdefine_coding_system_internal);
7648
7649   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7650                doc: /* List of coding systems.
7651
7652 Do not alter the value of this variable manually.  This variable should be
7653 updated by the functions `make-coding-system' and
7654 `define-coding-system-alias'.  */);
7655   Vcoding_system_list = Qnil;
7656
7657   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7658                doc: /* Alist of coding system names.
7659 Each element is one element list of coding system name.
7660 This variable is given to `completing-read' as TABLE argument.
7661
7662 Do not alter the value of this variable manually.  This variable should be
7663 updated by the functions `make-coding-system' and
7664 `define-coding-system-alias'.  */);
7665   Vcoding_system_alist = Qnil;
7666
7667   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7668                doc: /* List of coding-categories (symbols) ordered by priority.
7669
7670 On detecting a coding system, Emacs tries code detection algorithms
7671 associated with each coding-category one by one in this order.  When
7672 one algorithm agrees with a byte sequence of source text, the coding
7673 system bound to the corresponding coding-category is selected.  */);
7674   {
7675     int i;
7676
7677     Vcoding_category_list = Qnil;
7678     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7679       Vcoding_category_list
7680         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7681                  Vcoding_category_list);
7682   }
7683
7684   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7685                doc: /* Specify the coding system for read operations.
7686 It is useful to bind this variable with `let', but do not set it globally.
7687 If the value is a coding system, it is used for decoding on read operation.
7688 If not, an appropriate element is used from one of the coding system alists:
7689 There are three such tables, `file-coding-system-alist',
7690 `process-coding-system-alist', and `network-coding-system-alist'.  */);
7691   Vcoding_system_for_read = Qnil;
7692
7693   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7694                doc: /* Specify the coding system for write operations.
7695 Programs bind this variable with `let', but you should not set it globally.
7696 If the value is a coding system, it is used for encoding of output,
7697 when writing it to a file and when sending it to a file or subprocess.
7698
7699 If this does not specify a coding system, an appropriate element
7700 is used from one of the coding system alists:
7701 There are three such tables, `file-coding-system-alist',
7702 `process-coding-system-alist', and `network-coding-system-alist'.
7703 For output to files, if the above procedure does not specify a coding system,
7704 the value of `buffer-file-coding-system' is used.  */);
7705   Vcoding_system_for_write = Qnil;
7706
7707   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7708                doc: /* Coding system used in the latest file or process I/O.
7709 Also set by `encode-coding-region', `decode-coding-region',
7710 `encode-coding-string' and `decode-coding-string'.  */);
7711   Vlast_coding_system_used = Qnil;
7712
7713   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7714                doc: /* *Non-nil means always inhibit code conversion of end-of-line format.
7715 See info node `Coding Systems' and info node `Text and Binary' concerning
7716 such conversion.  */);
7717   inhibit_eol_conversion = 0;
7718
7719   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
7720                doc: /* Non-nil means process buffer inherits coding system of process output.
7721 Bind it to t if the process output is to be treated as if it were a file
7722 read from some filesystem.  */);
7723   inherit_process_coding_system = 0;
7724
7725   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
7726                doc: /* Alist to decide a coding system to use for a file I/O operation.
7727 The format is ((PATTERN . VAL) ...),
7728 where PATTERN is a regular expression matching a file name,
7729 VAL is a coding system, a cons of coding systems, or a function symbol.
7730 If VAL is a coding system, it is used for both decoding and encoding
7731 the file contents.
7732 If VAL is a cons of coding systems, the car part is used for decoding,
7733 and the cdr part is used for encoding.
7734 If VAL is a function symbol, the function must return a coding system
7735 or a cons of coding systems which are used as above.  The function gets
7736 the arguments with which `find-operation-coding-system' was called.
7737
7738 See also the function `find-operation-coding-system'
7739 and the variable `auto-coding-alist'.  */);
7740   Vfile_coding_system_alist = Qnil;
7741
7742   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
7743     doc: /* Alist to decide a coding system to use for a process I/O operation.
7744 The format is ((PATTERN . VAL) ...),
7745 where PATTERN is a regular expression matching a program name,
7746 VAL is a coding system, a cons of coding systems, or a function symbol.
7747 If VAL is a coding system, it is used for both decoding what received
7748 from the program and encoding what sent to the program.
7749 If VAL is a cons of coding systems, the car part is used for decoding,
7750 and the cdr part is used for encoding.
7751 If VAL is a function symbol, the function must return a coding system
7752 or a cons of coding systems which are used as above.
7753
7754 See also the function `find-operation-coding-system'.  */);
7755   Vprocess_coding_system_alist = Qnil;
7756
7757   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
7758     doc: /* Alist to decide a coding system to use for a network I/O operation.
7759 The format is ((PATTERN . VAL) ...),
7760 where PATTERN is a regular expression matching a network service name
7761 or is a port number to connect to,
7762 VAL is a coding system, a cons of coding systems, or a function symbol.
7763 If VAL is a coding system, it is used for both decoding what received
7764 from the network stream and encoding what sent to the network stream.
7765 If VAL is a cons of coding systems, the car part is used for decoding,
7766 and the cdr part is used for encoding.
7767 If VAL is a function symbol, the function must return a coding system
7768 or a cons of coding systems which are used as above.
7769
7770 See also the function `find-operation-coding-system'.  */);
7771   Vnetwork_coding_system_alist = Qnil;
7772
7773   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
7774                doc: /* Coding system to use with system messages.
7775 Also used for decoding keyboard input on X Window system.  */);
7776   Vlocale_coding_system = Qnil;
7777
7778   /* The eol mnemonics are reset in startup.el system-dependently.  */
7779   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
7780                doc: /* *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
7781   eol_mnemonic_unix = build_string (":");
7782
7783   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
7784                doc: /* *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
7785   eol_mnemonic_dos = build_string ("\\");
7786
7787   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
7788                doc: /* *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
7789   eol_mnemonic_mac = build_string ("/");
7790
7791   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
7792                doc: /* *String displayed in mode line when end-of-line format is not yet determined.  */);
7793   eol_mnemonic_undecided = build_string (":");
7794
7795   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
7796                doc: /* *Non-nil enables character translation while encoding and decoding.  */);
7797   Venable_character_translation = Qt;
7798
7799   DEFVAR_LISP ("standard-translation-table-for-decode",
7800                &Vstandard_translation_table_for_decode,
7801                doc: /* Table for translating characters while decoding.  */);
7802   Vstandard_translation_table_for_decode = Qnil;
7803
7804   DEFVAR_LISP ("standard-translation-table-for-encode",
7805                &Vstandard_translation_table_for_encode,
7806                doc: /* Table for translating characters while encoding.  */);
7807   Vstandard_translation_table_for_encode = Qnil;
7808
7809   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
7810                doc: /* Alist of charsets vs revision numbers.
7811 While encoding, if a charset (car part of an element) is found,
7812 designate it with the escape sequence identifying revision (cdr part of the element).  */);
7813   Vcharset_revision_alist = Qnil;
7814
7815   DEFVAR_LISP ("default-process-coding-system",
7816                &Vdefault_process_coding_system,
7817                doc: /* Cons of coding systems used for process I/O by default.
7818 The car part is used for decoding a process output,
7819 the cdr part is used for encoding a text to be sent to a process.  */);
7820   Vdefault_process_coding_system = Qnil;
7821
7822   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
7823                doc: /* Table of extra Latin codes in the range 128..159 (inclusive).
7824 This is a vector of length 256.
7825 If Nth element is non-nil, the existence of code N in a file
7826 \(or output of subprocess) doesn't prevent it to be detected as
7827 a coding system of ISO 2022 variant which has a flag
7828 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
7829 or reading output of a subprocess.
7830 Only 128th through 159th elements has a meaning.  */);
7831   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
7832
7833   DEFVAR_LISP ("select-safe-coding-system-function",
7834                &Vselect_safe_coding_system_function,
7835                doc: /* Function to call to select safe coding system for encoding a text.
7836
7837 If set, this function is called to force a user to select a proper
7838 coding system which can encode the text in the case that a default
7839 coding system used in each operation can't encode the text.
7840
7841 The default value is `select-safe-coding-system' (which see).  */);
7842   Vselect_safe_coding_system_function = Qnil;
7843
7844   DEFVAR_BOOL ("coding-system-require-warning",
7845                &coding_system_require_warning,
7846                doc: /* Internal use only.
7847 If non-nil, on writing a file, `select-safe-coding-system-function' is
7848 called even if `coding-system-for-write' is non-nil.  The command
7849 `universal-coding-system-argument' binds this variable to t temporarily.  */);
7850   coding_system_require_warning = 0;
7851
7852
7853   DEFVAR_BOOL ("inhibit-iso-escape-detection",
7854                &inhibit_iso_escape_detection,
7855                doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
7856
7857 By default, on reading a file, Emacs tries to detect how the text is
7858 encoded.  This code detection is sensitive to escape sequences.  If
7859 the sequence is valid as ISO2022, the code is determined as one of
7860 the ISO2022 encodings, and the file is decoded by the corresponding
7861 coding system (e.g. `iso-2022-7bit').
7862
7863 However, there may be a case that you want to read escape sequences in
7864 a file as is.  In such a case, you can set this variable to non-nil.
7865 Then, as the code detection ignores any escape sequences, no file is
7866 detected as encoded in some ISO2022 encoding.  The result is that all
7867 escape sequences become visible in a buffer.
7868
7869 The default value is nil, and it is strongly recommended not to change
7870 it.  That is because many Emacs Lisp source files that contain
7871 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
7872 in Emacs's distribution, and they won't be decoded correctly on
7873 reading if you suppress escape sequence detection.
7874
7875 The other way to read escape sequences in a file without decoding is
7876 to explicitly specify some coding system that doesn't use ISO2022's
7877 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
7878   inhibit_iso_escape_detection = 0;
7879
7880   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
7881                doc: /* Char table for translating self-inserting characters.
7882 This is applied to the result of input methods, not their input.  See also
7883 `keyboard-translate-table'.  */);
7884     Vtranslation_table_for_input = Qnil;
7885 }
7886
7887 char *
7888 emacs_strerror (error_number)
7889      int error_number;
7890 {
7891   char *str;
7892
7893   synchronize_system_messages_locale ();
7894   str = strerror (error_number);
7895
7896   if (! NILP (Vlocale_coding_system))
7897     {
7898       Lisp_Object dec = code_convert_string_norecord (build_string (str),
7899                                                       Vlocale_coding_system,
7900                                                       0);
7901       str = (char *) SDATA (dec);
7902     }
7903
7904   return str;
7905 }
7906
7907 #endif /* emacs */
7908
7909 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
7910    (do not change this comment) */