src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998, 2002 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001,2002  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /*** TABLE OF CONTENTS ***
  24
  25   0. General comments
  26   1. Preamble
  27   2. Emacs' internal format (emacs-mule) handlers
  28   3. ISO2022 handlers
  29   4. Shift-JIS and BIG5 handlers
  30   5. CCL handlers
  31   6. End-of-line handlers
  32   7. C library functions
  33   8. Emacs Lisp library functions
  34   9. Post-amble
  35
  36 */
  37
  38 /*** 0. General comments ***/
  39
  40
  41 /*** GENERAL NOTE on CODING SYSTEMS ***
  42
  43   A coding system is an encoding mechanism for one or more character
  44   sets.  Here's a list of coding systems which Emacs can handle.  When
  45   we say "decode", it means converting some other coding system to
  46   Emacs' internal format (emacs-mule), and when we say "encode",
  47   it means converting the coding system emacs-mule to some other
  48   coding system.
  49
  50   0. Emacs' internal format (emacs-mule)
  51
  52   Emacs itself holds a multi-lingual character in buffers and strings
  53   in a special format.  Details are described in section 2.
  54
  55   1. ISO2022
  56
  57   The most famous coding system for multiple character sets.  X's
  58   Compound Text, various EUCs (Extended Unix Code), and coding
  59   systems used in Internet communication such as ISO-2022-JP are
  60   all variants of ISO2022.  Details are described in section 3.
  61
  62   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  63
  64   A coding system to encode character sets: ASCII, JISX0201, and
  65   JISX0208.  Widely used for PC's in Japan.  Details are described in
  66   section 4.
  67
  68   3. BIG5
  69
  70   A coding system to encode the character sets ASCII and Big5.  Widely
  71   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  72   described in section 4.  In this file, when we write "BIG5"
  73   (all uppercase), we mean the coding system, and when we write
  74   "Big5" (capitalized), we mean the character set.
  75
  76   4. Raw text
  77
  78   A coding system for text containing random 8-bit code.  Emacs does
  79   no code conversion on such text except for end-of-line format.
  80
  81   5. Other
  82
  83   If a user wants to read/write text encoded in a coding system not
  84   listed above, he can supply a decoder and an encoder for it as CCL
  85   (Code Conversion Language) programs.  Emacs executes the CCL program
  86   while reading/writing.
  87
  88   Emacs represents a coding system by a Lisp symbol that has a property
  89   `coding-system'.  But, before actually using the coding system, the
  90   information about it is set in a structure of type `struct
  91   coding_system' for rapid processing.  See section 6 for more details.
  92
  93 */
  94
  95 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  96
  97   How end-of-line of text is encoded depends on the operating system.
  98   For instance, Unix's format is just one byte of `line-feed' code,
  99   whereas DOS's format is two-byte sequence of `carriage-return' and
 100   `line-feed' codes.  MacOS's format is usually one byte of
 101   `carriage-return'.
 102
 103   Since text character encoding and end-of-line encoding are
 104   independent, any coding system described above can have any
 105   end-of-line format.  So Emacs has information about end-of-line
 106   format in each coding-system.  See section 6 for more details.
 107
 108 */
 109
 110 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 111
 112   These functions check if a text between SRC and SRC_END is encoded
 113   in the coding system category XXX.  Each returns an integer value in
 114   which appropriate flag bits for the category XXX are set.  The flag
 115   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 116   template for these functions.  If MULTIBYTEP is nonzero, 8-bit codes
 117   of the range 0x80..0x9F are in multibyte form.  */
 118 #if 0
 119 int
 120 detect_coding_emacs_mule (src, src_end, multibytep)
 121      unsigned char *src, *src_end;
 122      int multibytep;
 123 {
 124   ...
 125 }
 126 #endif
 127
 128 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 129
 130   These functions decode SRC_BYTES length of unibyte text at SOURCE
 131   encoded in CODING to Emacs' internal format.  The resulting
 132   multibyte text goes to a place pointed to by DESTINATION, the length
 133   of which should not exceed DST_BYTES.
 134
 135   These functions set the information about original and decoded texts
 136   in the members `produced', `produced_char', `consumed', and
 137   `consumed_char' of the structure *CODING.  They also set the member
 138   `result' to one of CODING_FINISH_XXX indicating how the decoding
 139   finished.
 140
 141   DST_BYTES zero means that the source area and destination area are
 142   overlapped, which means that we can produce a decoded text until it
 143   reaches the head of the not-yet-decoded source text.
 144
 145   Below is a template for these functions.  */
 146 #if 0
 147 static void
 148 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 149      struct coding_system *coding;
 150      unsigned char *source, *destination;
 151      int src_bytes, dst_bytes;
 152 {
 153   ...
 154 }
 155 #endif
 156
 157 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 158
 159   These functions encode SRC_BYTES length text at SOURCE from Emacs'
 160   internal multibyte format to CODING.  The resulting unibyte text
 161   goes to a place pointed to by DESTINATION, the length of which
 162   should not exceed DST_BYTES.
 163
 164   These functions set the information about original and encoded texts
 165   in the members `produced', `produced_char', `consumed', and
 166   `consumed_char' of the structure *CODING.  They also set the member
 167   `result' to one of CODING_FINISH_XXX indicating how the encoding
 168   finished.
 169
 170   DST_BYTES zero means that the source area and destination area are
 171   overlapped, which means that we can produce encoded text until it
 172   reaches at the head of the not-yet-encoded source text.
 173
 174   Below is a template for these functions.  */
 175 #if 0
 176 static void
 177 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 178      struct coding_system *coding;
 179      unsigned char *source, *destination;
 180      int src_bytes, dst_bytes;
 181 {
 182   ...
 183 }
 184 #endif
 185
 186 /*** COMMONLY USED MACROS ***/
 187
 188 /* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
 189    get one, two, and three bytes from the source text respectively.
 190    If there are not enough bytes in the source, they jump to
 191    `label_end_of_loop'.  The caller should set variables `coding',
 192    `src' and `src_end' to appropriate pointer in advance.  These
 193    macros are called from decoding routines `decode_coding_XXX', thus
 194    it is assumed that the source text is unibyte.  */
 195
 196 #define ONE_MORE_BYTE(c1)                                       \
 197   do {                                                          \
 198     if (src >= src_end)                                         \
 199       {                                                         \
 200         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 201         goto label_end_of_loop;                                 \
 202       }                                                         \
 203     c1 = *src++;                                                \
 204   } while (0)
 205
 206 #define TWO_MORE_BYTES(c1, c2)                                  \
 207   do {                                                          \
 208     if (src + 1 >= src_end)                                     \
 209       {                                                         \
 210         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 211         goto label_end_of_loop;                                 \
 212       }                                                         \
 213     c1 = *src++;                                                \
 214     c2 = *src++;                                                \
 215   } while (0)
 216
 217
 218 /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
 219    form if MULTIBYTEP is nonzero.  */
 220
 221 #define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)           \
 222   do {                                                          \
 223     if (src >= src_end)                                         \
 224       {                                                         \
 225         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 226         goto label_end_of_loop;                                 \
 227       }                                                         \
 228     c1 = *src++;                                                \
 229     if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)         \
 230       c1 = *src++ - 0x20;                                       \
 231   } while (0)
 232
 233 /* Set C to the next character at the source text pointed by `src'.
 234    If there are not enough characters in the source, jump to
 235    `label_end_of_loop'.  The caller should set variables `coding'
 236    `src', `src_end', and `translation_table' to appropriate pointers
 237    in advance.  This macro is used in encoding routines
 238    `encode_coding_XXX', thus it assumes that the source text is in
 239    multibyte form except for 8-bit characters.  8-bit characters are
 240    in multibyte form if coding->src_multibyte is nonzero, else they
 241    are represented by a single byte.  */
 242
 243 #define ONE_MORE_CHAR(c)                                        \
 244   do {                                                          \
 245     int len = src_end - src;                                    \
 246     int bytes;                                                  \
 247     if (len <= 0)                                               \
 248       {                                                         \
 249         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
 250         goto label_end_of_loop;                                 \
 251       }                                                         \
 252     if (coding->src_multibyte                                   \
 253         || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes))        \
 254       c = STRING_CHAR_AND_LENGTH (src, len, bytes);             \
 255     else                                                        \
 256       c = *src, bytes = 1;                                      \
 257     if (!NILP (translation_table))                              \
 258       c = translate_char (translation_table, c, -1, 0, 0);      \
 259     src += bytes;                                               \
 260   } while (0)
 261
 262
 263 /* Produce a multibyte form of character C to `dst'.  Jump to
 264    `label_end_of_loop' if there's not enough space at `dst'.
 265
 266    If we are now in the middle of a composition sequence, the decoded
 267    character may be ALTCHAR (for the current composition).  In that
 268    case, the character goes to coding->cmp_data->data instead of
 269    `dst'.
 270
 271    This macro is used in decoding routines.  */
 272
 273 #define EMIT_CHAR(c)                                                    \
 274   do {                                                                  \
 275     if (! COMPOSING_P (coding)                                          \
 276         || coding->composing == COMPOSITION_RELATIVE                    \
 277         || coding->composing == COMPOSITION_WITH_RULE)                  \
 278       {                                                                 \
 279         int bytes = CHAR_BYTES (c);                                     \
 280         if ((dst + bytes) > (dst_bytes ? dst_end : src))                \
 281           {                                                             \
 282             coding->result = CODING_FINISH_INSUFFICIENT_DST;            \
 283             goto label_end_of_loop;                                     \
 284           }                                                             \
 285         dst += CHAR_STRING (c, dst);                                    \
 286         coding->produced_char++;                                        \
 287       }                                                                 \
 288                                                                         \
 289     if (COMPOSING_P (coding)                                            \
 290         && coding->composing != COMPOSITION_RELATIVE)                   \
 291       {                                                                 \
 292         CODING_ADD_COMPOSITION_COMPONENT (coding, c);                   \
 293         coding->composition_rule_follows                                \
 294           = coding->composing != COMPOSITION_WITH_ALTCHARS;             \
 295       }                                                                 \
 296   } while (0)
 297
 298
 299 #define EMIT_ONE_BYTE(c)                                        \
 300   do {                                                          \
 301     if (dst >= (dst_bytes ? dst_end : src))                     \
 302       {                                                         \
 303         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 304         goto label_end_of_loop;                                 \
 305       }                                                         \
 306     *dst++ = c;                                                 \
 307   } while (0)
 308
 309 #define EMIT_TWO_BYTES(c1, c2)                                  \
 310   do {                                                          \
 311     if (dst + 2 > (dst_bytes ? dst_end : src))                  \
 312       {                                                         \
 313         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 314         goto label_end_of_loop;                                 \
 315       }                                                         \
 316     *dst++ = c1, *dst++ = c2;                                   \
 317   } while (0)
 318
 319 #define EMIT_BYTES(from, to)                                    \
 320   do {                                                          \
 321     if (dst + (to - from) > (dst_bytes ? dst_end : src))        \
 322       {                                                         \
 323         coding->result = CODING_FINISH_INSUFFICIENT_DST;        \
 324         goto label_end_of_loop;                                 \
 325       }                                                         \
 326     while (from < to)                                           \
 327       *dst++ = *from++;                                         \
 328   } while (0)
 329
 330 \f
 331 /*** 1. Preamble ***/
 332
 333 #ifdef emacs
 334 #include <config.h>
 335 #endif
 336
 337 #include <stdio.h>
 338
 339 #ifdef emacs
 340
 341 #include "lisp.h"
 342 #include "buffer.h"
 343 #include "charset.h"
 344 #include "composite.h"
 345 #include "ccl.h"
 346 #include "coding.h"
 347 #include "window.h"
 348
 349 #else  /* not emacs */
 350
 351 #include "mulelib.h"
 352
 353 #endif /* not emacs */
 354
 355 Lisp_Object Qcoding_system, Qeol_type;
 356 Lisp_Object Qbuffer_file_coding_system;
 357 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 358 Lisp_Object Qno_conversion, Qundecided;
 359 Lisp_Object Qcoding_system_history;
 360 Lisp_Object Qsafe_chars;
 361 Lisp_Object Qvalid_codes;
 362
 363 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 364 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 365 Lisp_Object Qstart_process, Qopen_network_stream;
 366 Lisp_Object Qtarget_idx;
 367
 368 Lisp_Object Vselect_safe_coding_system_function;
 369
 370 int coding_system_require_warning;
 371
 372 /* Mnemonic string for each format of end-of-line.  */
 373 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 374 /* Mnemonic string to indicate format of end-of-line is not yet
 375    decided.  */
 376 Lisp_Object eol_mnemonic_undecided;
 377
 378 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 379    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 380 int system_eol_type;
 381
 382 #ifdef emacs
 383
 384 /* Information about which coding system is safe for which chars.
 385    The value has the form (GENERIC-LIST . NON-GENERIC-ALIST).
 386
 387    GENERIC-LIST is a list of generic coding systems which can encode
 388    any characters.
 389
 390    NON-GENERIC-ALIST is an alist of non generic coding systems vs the
 391    corresponding char table that contains safe chars.  */
 392 Lisp_Object Vcoding_system_safe_chars;
 393
 394 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 395
 396 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 397
 398 /* Coding system emacs-mule and raw-text are for converting only
 399    end-of-line format.  */
 400 Lisp_Object Qemacs_mule, Qraw_text;
 401
 402 /* Coding-systems are handed between Emacs Lisp programs and C internal
 403    routines by the following three variables.  */
 404 /* Coding-system for reading files and receiving data from process.  */
 405 Lisp_Object Vcoding_system_for_read;
 406 /* Coding-system for writing files and sending data to process.  */
 407 Lisp_Object Vcoding_system_for_write;
 408 /* Coding-system actually used in the latest I/O.  */
 409 Lisp_Object Vlast_coding_system_used;
 410
 411 /* A vector of length 256 which contains information about special
 412    Latin codes (especially for dealing with Microsoft codes).  */
 413 Lisp_Object Vlatin_extra_code_table;
 414
 415 /* Flag to inhibit code conversion of end-of-line format.  */
 416 int inhibit_eol_conversion;
 417
 418 /* Flag to inhibit ISO2022 escape sequence detection.  */
 419 int inhibit_iso_escape_detection;
 420
 421 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 422 int inherit_process_coding_system;
 423
 424 /* Coding system to be used to encode text for terminal display.  */
 425 struct coding_system terminal_coding;
 426
 427 /* Coding system to be used to encode text for terminal display when
 428    terminal coding system is nil.  */
 429 struct coding_system safe_terminal_coding;
 430
 431 /* Coding system of what is sent from terminal keyboard.  */
 432 struct coding_system keyboard_coding;
 433
 434 /* Default coding system to be used to write a file.  */
 435 struct coding_system default_buffer_file_coding;
 436
 437 Lisp_Object Vfile_coding_system_alist;
 438 Lisp_Object Vprocess_coding_system_alist;
 439 Lisp_Object Vnetwork_coding_system_alist;
 440
 441 Lisp_Object Vlocale_coding_system;
 442
 443 #endif /* emacs */
 444
 445 Lisp_Object Qcoding_category, Qcoding_category_index;
 446
 447 /* List of symbols `coding-category-xxx' ordered by priority.  */
 448 Lisp_Object Vcoding_category_list;
 449
 450 /* Table of coding categories (Lisp symbols).  */
 451 Lisp_Object Vcoding_category_table;
 452
 453 /* Table of names of symbol for each coding-category.  */
 454 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 455   "coding-category-emacs-mule",
 456   "coding-category-sjis",
 457   "coding-category-iso-7",
 458   "coding-category-iso-7-tight",
 459   "coding-category-iso-8-1",
 460   "coding-category-iso-8-2",
 461   "coding-category-iso-7-else",
 462   "coding-category-iso-8-else",
 463   "coding-category-ccl",
 464   "coding-category-big5",
 465   "coding-category-utf-8",
 466   "coding-category-utf-16-be",
 467   "coding-category-utf-16-le",
 468   "coding-category-raw-text",
 469   "coding-category-binary"
 470 };
 471
 472 /* Table of pointers to coding systems corresponding to each coding
 473    categories.  */
 474 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 475
 476 /* Table of coding category masks.  Nth element is a mask for a coding
 477    category of which priority is Nth.  */
 478 static
 479 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 480
 481 /* Flag to tell if we look up translation table on character code
 482    conversion.  */
 483 Lisp_Object Venable_character_translation;
 484 /* Standard translation table to look up on decoding (reading).  */
 485 Lisp_Object Vstandard_translation_table_for_decode;
 486 /* Standard translation table to look up on encoding (writing).  */
 487 Lisp_Object Vstandard_translation_table_for_encode;
 488
 489 Lisp_Object Qtranslation_table;
 490 Lisp_Object Qtranslation_table_id;
 491 Lisp_Object Qtranslation_table_for_decode;
 492 Lisp_Object Qtranslation_table_for_encode;
 493
 494 /* Alist of charsets vs revision number.  */
 495 Lisp_Object Vcharset_revision_alist;
 496
 497 /* Default coding systems used for process I/O.  */
 498 Lisp_Object Vdefault_process_coding_system;
 499
 500 /* Char table for translating Quail and self-inserting input.  */
 501 Lisp_Object Vtranslation_table_for_input;
 502
 503 /* Global flag to tell that we can't call post-read-conversion and
 504    pre-write-conversion functions.  Usually the value is zero, but it
 505    is set to 1 temporarily while such functions are running.  This is
 506    to avoid infinite recursive call.  */
 507 static int inhibit_pre_post_conversion;
 508
 509 Lisp_Object Qchar_coding_system;
 510
 511 /* Return `safe-chars' property of CODING_SYSTEM (symbol).  Don't check
 512    its validity.  */
 513
 514 Lisp_Object
 515 coding_safe_chars (coding_system)
 516      Lisp_Object coding_system;
 517 {
 518   Lisp_Object coding_spec, plist, safe_chars;
 519
 520   coding_spec = Fget (coding_system, Qcoding_system);
 521   plist = XVECTOR (coding_spec)->contents[3];
 522   safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
 523   return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
 524 }
 525
 526 #define CODING_SAFE_CHAR_P(safe_chars, c) \
 527   (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
 528
 529 \f
 530 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 531
 532 /* Emacs' internal format for representation of multiple character
 533    sets is a kind of multi-byte encoding, i.e. characters are
 534    represented by variable-length sequences of one-byte codes.
 535
 536    ASCII characters and control characters (e.g. `tab', `newline') are
 537    represented by one-byte sequences which are their ASCII codes, in
 538    the range 0x00 through 0x7F.
 539
 540    8-bit characters of the range 0x80..0x9F are represented by
 541    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
 542    code + 0x20).
 543
 544    8-bit characters of the range 0xA0..0xFF are represented by
 545    one-byte sequences which are their 8-bit code.
 546
 547    The other characters are represented by a sequence of `base
 548    leading-code', optional `extended leading-code', and one or two
 549    `position-code's.  The length of the sequence is determined by the
 550    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
 551    whereas extended leading-code and position-code take the range 0xA0
 552    through 0xFF.  See `charset.h' for more details about leading-code
 553    and position-code.
 554
 555    --- CODE RANGE of Emacs' internal format ---
 556    character set        range
 557    -------------        -----
 558    ascii                0x00..0x7F
 559    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
 560    eight-bit-graphic    0xA0..0xBF
 561    ELSE                 0x81..0x9D + [0xA0..0xFF]+
 562    ---------------------------------------------
 563
 564    As this is the internal character representation, the format is
 565    usually not used externally (i.e. in a file or in a data sent to a
 566    process).  But, it is possible to have a text externally in this
 567    format (i.e. by encoding by the coding system `emacs-mule').
 568
 569    In that case, a sequence of one-byte codes has a slightly different
 570    form.
 571
 572    Firstly, all characters in eight-bit-control are represented by
 573    one-byte sequences which are their 8-bit code.
 574
 575    Next, character composition data are represented by the byte
 576    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
 577    where,
 578         METHOD is 0xF0 plus one of composition method (enum
 579         composition_method),
 580
 581         BYTES is 0xA0 plus the byte length of these composition data,
 582
 583         CHARS is 0xA0 plus the number of characters composed by these
 584         data,
 585
 586         COMPONENTs are characters of multibyte form or composition
 587         rules encoded by two-byte of ASCII codes.
 588
 589    In addition, for backward compatibility, the following formats are
 590    also recognized as composition data on decoding.
 591
 592    0x80 MSEQ ...
 593    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
 594
 595    Here,
 596         MSEQ is a multibyte form but in these special format:
 597           ASCII: 0xA0 ASCII_CODE+0x80,
 598           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
 599         RULE is a one byte code of the range 0xA0..0xF0 that
 600         represents a composition rule.
 601   */
 602
 603 enum emacs_code_class_type emacs_code_class[256];
 604
 605 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 606    Check if a text is encoded in Emacs' internal format.  If it is,
 607    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 608
 609 static int
 610 detect_coding_emacs_mule (src, src_end, multibytep)
 611       unsigned char *src, *src_end;
 612       int multibytep;
 613 {
 614   unsigned char c;
 615   int composing = 0;
 616   /* Dummy for ONE_MORE_BYTE.  */
 617   struct coding_system dummy_coding;
 618   struct coding_system *coding = &dummy_coding;
 619
 620   while (1)
 621     {
 622       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 623
 624       if (composing)
 625         {
 626           if (c < 0xA0)
 627             composing = 0;
 628           else if (c == 0xA0)
 629             {
 630               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
 631               c &= 0x7F;
 632             }
 633           else
 634             c -= 0x20;
 635         }
 636
 637       if (c < 0x20)
 638         {
 639           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 640             return 0;
 641         }
 642       else if (c >= 0x80 && c < 0xA0)
 643         {
 644           if (c == 0x80)
 645             /* Old leading code for a composite character.  */
 646             composing = 1;
 647           else
 648             {
 649               unsigned char *src_base = src - 1;
 650               int bytes;
 651
 652               if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
 653                                                bytes))
 654                 return 0;
 655               src = src_base + bytes;
 656             }
 657         }
 658     }
 659  label_end_of_loop:
 660   return CODING_CATEGORY_MASK_EMACS_MULE;
 661 }
 662
 663
 664 /* Record the starting position START and METHOD of one composition.  */
 665
 666 #define CODING_ADD_COMPOSITION_START(coding, start, method)     \
 667   do {                                                          \
 668     struct composition_data *cmp_data = coding->cmp_data;       \
 669     int *data = cmp_data->data + cmp_data->used;                \
 670     coding->cmp_data_start = cmp_data->used;                    \
 671     data[0] = -1;                                               \
 672     data[1] = cmp_data->char_offset + start;                    \
 673     data[3] = (int) method;                                     \
 674     cmp_data->used += 4;                                        \
 675   } while (0)
 676
 677 /* Record the ending position END of the current composition.  */
 678
 679 #define CODING_ADD_COMPOSITION_END(coding, end)                 \
 680   do {                                                          \
 681     struct composition_data *cmp_data = coding->cmp_data;       \
 682     int *data = cmp_data->data + coding->cmp_data_start;        \
 683     data[0] = cmp_data->used - coding->cmp_data_start;          \
 684     data[2] = cmp_data->char_offset + end;                      \
 685   } while (0)
 686
 687 /* Record one COMPONENT (alternate character or composition rule).  */
 688
 689 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component)             \
 690   do {                                                                  \
 691     coding->cmp_data->data[coding->cmp_data->used++] = component;       \
 692     if (coding->cmp_data->used - coding->cmp_data_start                 \
 693         == COMPOSITION_DATA_MAX_BUNCH_LENGTH)                           \
 694       {                                                                 \
 695         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
 696         coding->composing = COMPOSITION_NO;                             \
 697       }                                                                 \
 698   } while (0)
 699
 700
 701 /* Get one byte from a data pointed by SRC and increment SRC.  If SRC
 702    is not less than SRC_END, return -1 without incrementing Src.  */
 703
 704 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++)
 705
 706
 707 /* Decode a character represented as a component of composition
 708    sequence of Emacs 20 style at SRC.  Set C to that character, store
 709    its multibyte form sequence at P, and set P to the end of that
 710    sequence.  If no valid character is found, set C to -1.  */
 711
 712 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)                \
 713   do {                                                          \
 714     int bytes;                                                  \
 715                                                                 \
 716     c = SAFE_ONE_MORE_BYTE ();                                  \
 717     if (c < 0)                                                  \
 718       break;                                                    \
 719     if (CHAR_HEAD_P (c))                                        \
 720       c = -1;                                                   \
 721     else if (c == 0xA0)                                         \
 722       {                                                         \
 723         c = SAFE_ONE_MORE_BYTE ();                              \
 724         if (c < 0xA0)                                           \
 725           c = -1;                                               \
 726         else                                                    \
 727           {                                                     \
 728             c -= 0xA0;                                          \
 729             *p++ = c;                                           \
 730           }                                                     \
 731       }                                                         \
 732     else if (BASE_LEADING_CODE_P (c - 0x20))                    \
 733       {                                                         \
 734         unsigned char *p0 = p;                                  \
 735                                                                 \
 736         c -= 0x20;                                              \
 737         *p++ = c;                                               \
 738         bytes = BYTES_BY_CHAR_HEAD (c);                         \
 739         while (--bytes)                                         \
 740           {                                                     \
 741             c = SAFE_ONE_MORE_BYTE ();                          \
 742             if (c < 0)                                          \
 743               break;                                            \
 744             *p++ = c;                                           \
 745           }                                                     \
 746         if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes))     \
 747           c = STRING_CHAR (p0, bytes);                          \
 748         else                                                    \
 749           c = -1;                                               \
 750       }                                                         \
 751     else                                                        \
 752       c = -1;                                                   \
 753   } while (0)
 754
 755
 756 /* Decode a composition rule represented as a component of composition
 757    sequence of Emacs 20 style at SRC.  Set C to the rule.  If not
 758    valid rule is found, set C to -1.  */
 759
 760 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c)           \
 761   do {                                                  \
 762     c = SAFE_ONE_MORE_BYTE ();                          \
 763     c -= 0xA0;                                          \
 764     if (c < 0 || c >= 81)                               \
 765       c = -1;                                           \
 766     else                                                \
 767       {                                                 \
 768         gref = c / 9, nref = c % 9;                     \
 769         c = COMPOSITION_ENCODE_RULE (gref, nref);       \
 770       }                                                 \
 771   } while (0)
 772
 773
 774 /* Decode composition sequence encoded by `emacs-mule' at the source
 775    pointed by SRC.  SRC_END is the end of source.  Store information
 776    of the composition in CODING->cmp_data.
 777
 778    For backward compatibility, decode also a composition sequence of
 779    Emacs 20 style.  In that case, the composition sequence contains
 780    characters that should be extracted into a buffer or string.  Store
 781    those characters at *DESTINATION in multibyte form.
 782
 783    If we encounter an invalid byte sequence, return 0.
 784    If we encounter an insufficient source or destination, or
 785    insufficient space in CODING->cmp_data, return 1.
 786    Otherwise, return consumed bytes in the source.
 787
 788 */
 789 static INLINE int
 790 decode_composition_emacs_mule (coding, src, src_end,
 791                                destination, dst_end, dst_bytes)
 792      struct coding_system *coding;
 793      unsigned char *src, *src_end, **destination, *dst_end;
 794      int dst_bytes;
 795 {
 796   unsigned char *dst = *destination;
 797   int method, data_len, nchars;
 798   unsigned char *src_base = src++;
 799   /* Store components of composition.  */
 800   int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH];
 801   int ncomponent;
 802   /* Store multibyte form of characters to be composed.  This is for
 803      Emacs 20 style composition sequence.  */
 804   unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH];
 805   unsigned char *bufp = buf;
 806   int c, i, gref, nref;
 807
 808   if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
 809       >= COMPOSITION_DATA_SIZE)
 810     {
 811       coding->result = CODING_FINISH_INSUFFICIENT_CMP;
 812       return -1;
 813     }
 814
 815   ONE_MORE_BYTE (c);
 816   if (c - 0xF0 >= COMPOSITION_RELATIVE
 817            && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS)
 818     {
 819       int with_rule;
 820
 821       method = c - 0xF0;
 822       with_rule = (method == COMPOSITION_WITH_RULE
 823                    || method == COMPOSITION_WITH_RULE_ALTCHARS);
 824       ONE_MORE_BYTE (c);
 825       data_len = c - 0xA0;
 826       if (data_len < 4
 827           || src_base + data_len > src_end)
 828         return 0;
 829       ONE_MORE_BYTE (c);
 830       nchars = c - 0xA0;
 831       if (c < 1)
 832         return 0;
 833       for (ncomponent = 0; src < src_base + data_len; ncomponent++)
 834         {
 835           /* If it is longer than this, it can't be valid.  */
 836           if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH)
 837             return 0;
 838
 839           if (ncomponent % 2 && with_rule)
 840             {
 841               ONE_MORE_BYTE (gref);
 842               gref -= 32;
 843               ONE_MORE_BYTE (nref);
 844               nref -= 32;
 845               c = COMPOSITION_ENCODE_RULE (gref, nref);
 846             }
 847           else
 848             {
 849               int bytes;
 850               if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
 851                 c = STRING_CHAR (src, bytes);
 852               else
 853                 c = *src, bytes = 1;
 854               src += bytes;
 855             }
 856           component[ncomponent] = c;
 857         }
 858     }
 859   else
 860     {
 861       /* This may be an old Emacs 20 style format.  See the comment at
 862          the section 2 of this file.  */
 863       while (src < src_end && !CHAR_HEAD_P (*src)) src++;
 864       if (src == src_end
 865           && !(coding->mode & CODING_MODE_LAST_BLOCK))
 866         goto label_end_of_loop;
 867
 868       src_end = src;
 869       src = src_base + 1;
 870       if (c < 0xC0)
 871         {
 872           method = COMPOSITION_RELATIVE;
 873           for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;)
 874             {
 875               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 876               if (c < 0)
 877                 break;
 878               component[ncomponent++] = c;
 879             }
 880           if (ncomponent < 2)
 881             return 0;
 882           nchars = ncomponent;
 883         }
 884       else if (c == 0xFF)
 885         {
 886           method = COMPOSITION_WITH_RULE;
 887           src++;
 888           DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 889           if (c < 0)
 890             return 0;
 891           component[0] = c;
 892           for (ncomponent = 1;
 893                ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;)
 894             {
 895               DECODE_EMACS_MULE_COMPOSITION_RULE (c);
 896               if (c < 0)
 897                 break;
 898               component[ncomponent++] = c;
 899               DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp);
 900               if (c < 0)
 901                 break;
 902               component[ncomponent++] = c;
 903             }
 904           if (ncomponent < 3)
 905             return 0;
 906           nchars = (ncomponent + 1) / 2;
 907         }
 908       else
 909         return 0;
 910     }
 911
 912   if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src))
 913     {
 914       CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method);
 915       for (i = 0; i < ncomponent; i++)
 916         CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]);
 917       CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars);
 918       if (buf < bufp)
 919         {
 920           unsigned char *p = buf;
 921           EMIT_BYTES (p, bufp);
 922           *destination += bufp - buf;
 923           coding->produced_char += nchars;
 924         }
 925       return (src - src_base);
 926     }
 927  label_end_of_loop:
 928   return -1;
 929 }
 930
 931 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 932
 933 static void
 934 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
 935      struct coding_system *coding;
 936      unsigned char *source, *destination;
 937      int src_bytes, dst_bytes;
 938 {
 939   unsigned char *src = source;
 940   unsigned char *src_end = source + src_bytes;
 941   unsigned char *dst = destination;
 942   unsigned char *dst_end = destination + dst_bytes;
 943   /* SRC_BASE remembers the start position in source in each loop.
 944      The loop will be exited when there's not enough source code, or
 945      when there's not enough destination area to produce a
 946      character.  */
 947   unsigned char *src_base;
 948
 949   coding->produced_char = 0;
 950   while ((src_base = src) < src_end)
 951     {
 952       unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
 953       int bytes;
 954
 955       if (*src == '\r')
 956         {
 957           int c = *src++;
 958
 959           if (coding->eol_type == CODING_EOL_CR)
 960             c = '\n';
 961           else if (coding->eol_type == CODING_EOL_CRLF)
 962             {
 963               ONE_MORE_BYTE (c);
 964               if (c != '\n')
 965                 {
 966                   src--;
 967                   c = '\r';
 968                 }
 969             }
 970           *dst++ = c;
 971           coding->produced_char++;
 972           continue;
 973         }
 974       else if (*src == '\n')
 975         {
 976           if ((coding->eol_type == CODING_EOL_CR
 977                || coding->eol_type == CODING_EOL_CRLF)
 978               && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
 979             {
 980               coding->result = CODING_FINISH_INCONSISTENT_EOL;
 981               goto label_end_of_loop;
 982             }
 983           *dst++ = *src++;
 984           coding->produced_char++;
 985           continue;
 986         }
 987       else if (*src == 0x80 && coding->cmp_data)
 988         {
 989           /* Start of composition data.  */
 990           int consumed  = decode_composition_emacs_mule (coding, src, src_end,
 991                                                          &dst, dst_end,
 992                                                          dst_bytes);
 993           if (consumed < 0)
 994             goto label_end_of_loop;
 995           else if (consumed > 0)
 996             {
 997               src += consumed;
 998               continue;
 999             }
1000           bytes = CHAR_STRING (*src, tmp);
1001           p = tmp;
1002           src++;
1003         }
1004       else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
1005         {
1006           p = src;
1007           src += bytes;
1008         }
1009       else
1010         {
1011           bytes = CHAR_STRING (*src, tmp);
1012           p = tmp;
1013           src++;
1014         }
1015       if (dst + bytes >= (dst_bytes ? dst_end : src))
1016         {
1017           coding->result = CODING_FINISH_INSUFFICIENT_DST;
1018           break;
1019         }
1020       while (bytes--) *dst++ = *p++;
1021       coding->produced_char++;
1022     }
1023  label_end_of_loop:
1024   coding->consumed = coding->consumed_char = src_base - source;
1025   coding->produced = dst - destination;
1026 }
1027
1028
1029 /* Encode composition data stored at DATA into a special byte sequence
1030    starting by 0x80.  Update CODING->cmp_data_start and maybe
1031    CODING->cmp_data for the next call.  */
1032
1033 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data)                     \
1034   do {                                                                  \
1035     unsigned char buf[1024], *p0 = buf, *p;                             \
1036     int len = data[0];                                                  \
1037     int i;                                                              \
1038                                                                         \
1039     buf[0] = 0x80;                                                      \
1040     buf[1] = 0xF0 + data[3];    /* METHOD */                            \
1041     buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */           \
1042     p = buf + 4;                                                        \
1043     if (data[3] == COMPOSITION_WITH_RULE                                \
1044         || data[3] == COMPOSITION_WITH_RULE_ALTCHARS)                   \
1045       {                                                                 \
1046         p += CHAR_STRING (data[4], p);                                  \
1047         for (i = 5; i < len; i += 2)                                    \
1048           {                                                             \
1049             int gref, nref;                                             \
1050              COMPOSITION_DECODE_RULE (data[i], gref, nref);             \
1051             *p++ = 0x20 + gref;                                         \
1052             *p++ = 0x20 + nref;                                         \
1053             p += CHAR_STRING (data[i + 1], p);                          \
1054           }                                                             \
1055       }                                                                 \
1056     else                                                                \
1057       {                                                                 \
1058         for (i = 4; i < len; i++)                                       \
1059           p += CHAR_STRING (data[i], p);                                \
1060       }                                                                 \
1061     buf[2] = 0xA0 + (p - buf);  /* COMPONENTS-BYTES */                  \
1062                                                                         \
1063     if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src))              \
1064       {                                                                 \
1065         coding->result = CODING_FINISH_INSUFFICIENT_DST;                \
1066         goto label_end_of_loop;                                         \
1067       }                                                                 \
1068     while (p0 < p)                                                      \
1069       *dst++ = *p0++;                                                   \
1070     coding->cmp_data_start += data[0];                                  \
1071     if (coding->cmp_data_start == coding->cmp_data->used                \
1072         && coding->cmp_data->next)                                      \
1073       {                                                                 \
1074         coding->cmp_data = coding->cmp_data->next;                      \
1075         coding->cmp_data_start = 0;                                     \
1076       }                                                                 \
1077   } while (0)
1078
1079
1080 static void encode_eol P_ ((struct coding_system *, const unsigned char *,
1081                             unsigned char *, int, int));
1082
1083 static void
1084 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
1085      struct coding_system *coding;
1086      unsigned char *source, *destination;
1087      int src_bytes, dst_bytes;
1088 {
1089   unsigned char *src = source;
1090   unsigned char *src_end = source + src_bytes;
1091   unsigned char *dst = destination;
1092   unsigned char *dst_end = destination + dst_bytes;
1093   unsigned char *src_base;
1094   int c;
1095   int char_offset;
1096   int *data;
1097
1098   Lisp_Object translation_table;
1099
1100   translation_table = Qnil;
1101
1102   /* Optimization for the case that there's no composition.  */
1103   if (!coding->cmp_data || coding->cmp_data->used == 0)
1104     {
1105       encode_eol (coding, source, destination, src_bytes, dst_bytes);
1106       return;
1107     }
1108
1109   char_offset = coding->cmp_data->char_offset;
1110   data = coding->cmp_data->data + coding->cmp_data_start;
1111   while (1)
1112     {
1113       src_base = src;
1114
1115       /* If SRC starts a composition, encode the information about the
1116          composition in advance.  */
1117       if (coding->cmp_data_start < coding->cmp_data->used
1118           && char_offset + coding->consumed_char == data[1])
1119         {
1120           ENCODE_COMPOSITION_EMACS_MULE (coding, data);
1121           char_offset = coding->cmp_data->char_offset;
1122           data = coding->cmp_data->data + coding->cmp_data_start;
1123         }
1124
1125       ONE_MORE_CHAR (c);
1126       if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF
1127                         || coding->eol_type == CODING_EOL_CR))
1128         {
1129           if (coding->eol_type == CODING_EOL_CRLF)
1130             EMIT_TWO_BYTES ('\r', c);
1131           else
1132             EMIT_ONE_BYTE ('\r');
1133         }
1134       else if (SINGLE_BYTE_CHAR_P (c))
1135         EMIT_ONE_BYTE (c);
1136       else
1137         EMIT_BYTES (src_base, src);
1138       coding->consumed_char++;
1139     }
1140  label_end_of_loop:
1141   coding->consumed = src_base - source;
1142   coding->produced = coding->produced_char = dst - destination;
1143   return;
1144 }
1145
1146 \f
1147 /*** 3. ISO2022 handlers ***/
1148
1149 /* The following note describes the coding system ISO2022 briefly.
1150    Since the intention of this note is to help understand the
1151    functions in this file, some parts are NOT ACCURATE or are OVERLY
1152    SIMPLIFIED.  For thorough understanding, please refer to the
1153    original document of ISO2022.  This is equivalent to the standard
1154    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
1155
1156    ISO2022 provides many mechanisms to encode several character sets
1157    in 7-bit and 8-bit environments.  For 7-bit environments, all text
1158    is encoded using bytes less than 128.  This may make the encoded
1159    text a little bit longer, but the text passes more easily through
1160    several types of gateway, some of which strip off the MSB (Most
1161    Significant Bit).
1162
1163    There are two kinds of character sets: control character sets and
1164    graphic character sets.  The former contain control characters such
1165    as `newline' and `escape' to provide control functions (control
1166    functions are also provided by escape sequences).  The latter
1167    contain graphic characters such as 'A' and '-'.  Emacs recognizes
1168    two control character sets and many graphic character sets.
1169
1170    Graphic character sets are classified into one of the following
1171    four classes, according to the number of bytes (DIMENSION) and
1172    number of characters in one dimension (CHARS) of the set:
1173    - DIMENSION1_CHARS94
1174    - DIMENSION1_CHARS96
1175    - DIMENSION2_CHARS94
1176    - DIMENSION2_CHARS96
1177
1178    In addition, each character set is assigned an identification tag,
1179    unique for each set, called the "final character" (denoted as <F>
1180    hereafter).  The <F> of each character set is decided by ECMA(*)
1181    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
1182    (0x30..0x3F are for private use only).
1183
1184    Note (*): ECMA = European Computer Manufacturers Association
1185
1186    Here are examples of graphic character sets [NAME(<F>)]:
1187         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
1188         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
1189         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
1190         o DIMENSION2_CHARS96 -- none for the moment
1191
1192    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
1193         C0 [0x00..0x1F] -- control character plane 0
1194         GL [0x20..0x7F] -- graphic character plane 0
1195         C1 [0x80..0x9F] -- control character plane 1
1196         GR [0xA0..0xFF] -- graphic character plane 1
1197
1198    A control character set is directly designated and invoked to C0 or
1199    C1 by an escape sequence.  The most common case is that:
1200    - ISO646's  control character set is designated/invoked to C0, and
1201    - ISO6429's control character set is designated/invoked to C1,
1202    and usually these designations/invocations are omitted in encoded
1203    text.  In a 7-bit environment, only C0 can be used, and a control
1204    character for C1 is encoded by an appropriate escape sequence to
1205    fit into the environment.  All control characters for C1 are
1206    defined to have corresponding escape sequences.
1207
1208    A graphic character set is at first designated to one of four
1209    graphic registers (G0 through G3), then these graphic registers are
1210    invoked to GL or GR.  These designations and invocations can be
1211    done independently.  The most common case is that G0 is invoked to
1212    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
1213    these invocations and designations are omitted in encoded text.
1214    In a 7-bit environment, only GL can be used.
1215
1216    When a graphic character set of CHARS94 is invoked to GL, codes
1217    0x20 and 0x7F of the GL area work as control characters SPACE and
1218    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
1219    be used.
1220
1221    There are two ways of invocation: locking-shift and single-shift.
1222    With locking-shift, the invocation lasts until the next different
1223    invocation, whereas with single-shift, the invocation affects the
1224    following character only and doesn't affect the locking-shift
1225    state.  Invocations are done by the following control characters or
1226    escape sequences:
1227
1228    ----------------------------------------------------------------------
1229    abbrev  function                  cntrl escape seq   description
1230    ----------------------------------------------------------------------
1231    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
1232    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
1233    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
1234    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
1235    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
1236    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
1237    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
1238    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
1239    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
1240    ----------------------------------------------------------------------
1241    (*) These are not used by any known coding system.
1242
1243    Control characters for these functions are defined by macros
1244    ISO_CODE_XXX in `coding.h'.
1245
1246    Designations are done by the following escape sequences:
1247    ----------------------------------------------------------------------
1248    escape sequence      description
1249    ----------------------------------------------------------------------
1250    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
1251    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
1252    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
1253    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
1254    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
1255    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
1256    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
1257    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
1258    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
1259    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
1260    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
1261    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
1262    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
1263    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
1264    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
1265    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
1266    ----------------------------------------------------------------------
1267
1268    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
1269    of dimension 1, chars 94, and final character <F>, etc...
1270
1271    Note (*): Although these designations are not allowed in ISO2022,
1272    Emacs accepts them on decoding, and produces them on encoding
1273    CHARS96 character sets in a coding system which is characterized as
1274    7-bit environment, non-locking-shift, and non-single-shift.
1275
1276    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
1277    '(' can be omitted.  We refer to this as "short-form" hereafter.
1278
1279    Now you may notice that there are a lot of ways of encoding the
1280    same multilingual text in ISO2022.  Actually, there exist many
1281    coding systems such as Compound Text (used in X11's inter client
1282    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
1283    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
1284    localized platforms), and all of these are variants of ISO2022.
1285
1286    In addition to the above, Emacs handles two more kinds of escape
1287    sequences: ISO6429's direction specification and Emacs' private
1288    sequence for specifying character composition.
1289
1290    ISO6429's direction specification takes the following form:
1291         o CSI ']'      -- end of the current direction
1292         o CSI '0' ']'  -- end of the current direction
1293         o CSI '1' ']'  -- start of left-to-right text
1294         o CSI '2' ']'  -- start of right-to-left text
1295    The control character CSI (0x9B: control sequence introducer) is
1296    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
1297
1298    Character composition specification takes the following form:
1299         o ESC '0' -- start relative composition
1300         o ESC '1' -- end composition
1301         o ESC '2' -- start rule-base composition (*)
1302         o ESC '3' -- start relative composition with alternate chars  (**)
1303         o ESC '4' -- start rule-base composition with alternate chars  (**)
1304   Since these are not standard escape sequences of any ISO standard,
1305   the use of them with these meanings is restricted to Emacs only.
1306
1307   (*) This form is used only in Emacs 20.5 and older versions,
1308   but the newer versions can safely decode it.
1309   (**) This form is used only in Emacs 21.1 and newer versions,
1310   and the older versions can't decode it.
1311
1312   Here's a list of example usages of these composition escape
1313   sequences (categorized by `enum composition_method').
1314
1315   COMPOSITION_RELATIVE:
1316         ESC 0 CHAR [ CHAR ] ESC 1
1317   COMPOSITION_WITH_RULE:
1318         ESC 2 CHAR [ RULE CHAR ] ESC 1
1319   COMPOSITION_WITH_ALTCHARS:
1320         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
1321   COMPOSITION_WITH_RULE_ALTCHARS:
1322         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
1323
1324 enum iso_code_class_type iso_code_class[256];
1325
1326 #define CHARSET_OK(idx, charset, c)                                     \
1327   (coding_system_table[idx]                                             \
1328    && (charset == CHARSET_ASCII                                         \
1329        || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \
1330            CODING_SAFE_CHAR_P (safe_chars, c)))                         \
1331    && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
1332                                               charset)                  \
1333        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
1334
1335 #define SHIFT_OUT_OK(idx) \
1336   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
1337
1338 #define COMPOSITION_OK(idx)     \
1339   (coding_system_table[idx]->composing != COMPOSITION_DISABLED)
1340
1341 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1342    Check if a text is encoded in ISO2022.  If it is, return an
1343    integer in which appropriate flag bits any of:
1344         CODING_CATEGORY_MASK_ISO_7
1345         CODING_CATEGORY_MASK_ISO_7_TIGHT
1346         CODING_CATEGORY_MASK_ISO_8_1
1347         CODING_CATEGORY_MASK_ISO_8_2
1348         CODING_CATEGORY_MASK_ISO_7_ELSE
1349         CODING_CATEGORY_MASK_ISO_8_ELSE
1350    are set.  If a code which should never appear in ISO2022 is found,
1351    returns 0.  */
1352
1353 static int
1354 detect_coding_iso2022 (src, src_end, multibytep)
1355      unsigned char *src, *src_end;
1356      int multibytep;
1357 {
1358   int mask = CODING_CATEGORY_MASK_ISO;
1359   int mask_found = 0;
1360   int reg[4], shift_out = 0, single_shifting = 0;
1361   int c, c1, charset;
1362   /* Dummy for ONE_MORE_BYTE.  */
1363   struct coding_system dummy_coding;
1364   struct coding_system *coding = &dummy_coding;
1365   Lisp_Object safe_chars;
1366
1367   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
1368   while (mask && src < src_end)
1369     {
1370       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1371     retry:
1372       switch (c)
1373         {
1374         case ISO_CODE_ESC:
1375           if (inhibit_iso_escape_detection)
1376             break;
1377           single_shifting = 0;
1378           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1379           if (c >= '(' && c <= '/')
1380             {
1381               /* Designation sequence for a charset of dimension 1.  */
1382               ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1383               if (c1 < ' ' || c1 >= 0x80
1384                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
1385                 /* Invalid designation sequence.  Just ignore.  */
1386                 break;
1387               reg[(c - '(') % 4] = charset;
1388             }
1389           else if (c == '$')
1390             {
1391               /* Designation sequence for a charset of dimension 2.  */
1392               ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1393               if (c >= '@' && c <= 'B')
1394                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
1395                 reg[0] = charset = iso_charset_table[1][0][c];
1396               else if (c >= '(' && c <= '/')
1397                 {
1398                   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
1399                   if (c1 < ' ' || c1 >= 0x80
1400                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
1401                     /* Invalid designation sequence.  Just ignore.  */
1402                     break;
1403                   reg[(c - '(') % 4] = charset;
1404                 }
1405               else
1406                 /* Invalid designation sequence.  Just ignore.  */
1407                 break;
1408             }
1409           else if (c == 'N' || c == 'O')
1410             {
1411               /* ESC <Fe> for SS2 or SS3.  */
1412               mask &= CODING_CATEGORY_MASK_ISO_7_ELSE;
1413               break;
1414             }
1415           else if (c >= '0' && c <= '4')
1416             {
1417               /* ESC <Fp> for start/end composition.  */
1418               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7))
1419                 mask_found |= CODING_CATEGORY_MASK_ISO_7;
1420               else
1421                 mask &= ~CODING_CATEGORY_MASK_ISO_7;
1422               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT))
1423                 mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1424               else
1425                 mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1426               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1))
1427                 mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1428               else
1429                 mask &= ~CODING_CATEGORY_MASK_ISO_8_1;
1430               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2))
1431                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1432               else
1433                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1434               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE))
1435                 mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1436               else
1437                 mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1438               if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE))
1439                 mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1440               else
1441                 mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1442               break;
1443             }
1444           else
1445             /* Invalid escape sequence.  Just ignore.  */
1446             break;
1447
1448           /* We found a valid designation sequence for CHARSET.  */
1449           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
1450           c = MAKE_CHAR (charset, 0, 0);
1451           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
1452             mask_found |= CODING_CATEGORY_MASK_ISO_7;
1453           else
1454             mask &= ~CODING_CATEGORY_MASK_ISO_7;
1455           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
1456             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
1457           else
1458             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
1459           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
1460             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
1461           else
1462             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
1463           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
1464             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
1465           else
1466             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
1467           break;
1468
1469         case ISO_CODE_SO:
1470           if (inhibit_iso_escape_detection)
1471             break;
1472           single_shifting = 0;
1473           if (shift_out == 0
1474               && (reg[1] >= 0
1475                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
1476                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
1477             {
1478               /* Locking shift out.  */
1479               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1480               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1481             }
1482           break;
1483
1484         case ISO_CODE_SI:
1485           if (inhibit_iso_escape_detection)
1486             break;
1487           single_shifting = 0;
1488           if (shift_out == 1)
1489             {
1490               /* Locking shift in.  */
1491               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
1492               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
1493             }
1494           break;
1495
1496         case ISO_CODE_CSI:
1497           single_shifting = 0;
1498         case ISO_CODE_SS2:
1499         case ISO_CODE_SS3:
1500           {
1501             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
1502
1503             if (inhibit_iso_escape_detection)
1504               break;
1505             if (c != ISO_CODE_CSI)
1506               {
1507                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1508                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1509                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1510                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1511                     & CODING_FLAG_ISO_SINGLE_SHIFT)
1512                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1513                 single_shifting = 1;
1514               }
1515             if (VECTORP (Vlatin_extra_code_table)
1516                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1517               {
1518                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1519                     & CODING_FLAG_ISO_LATIN_EXTRA)
1520                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1521                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1522                     & CODING_FLAG_ISO_LATIN_EXTRA)
1523                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1524               }
1525             mask &= newmask;
1526             mask_found |= newmask;
1527           }
1528           break;
1529
1530         default:
1531           if (c < 0x80)
1532             {
1533               single_shifting = 0;
1534               break;
1535             }
1536           else if (c < 0xA0)
1537             {
1538               single_shifting = 0;
1539               if (VECTORP (Vlatin_extra_code_table)
1540                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
1541                 {
1542                   int newmask = 0;
1543
1544                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
1545                       & CODING_FLAG_ISO_LATIN_EXTRA)
1546                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
1547                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
1548                       & CODING_FLAG_ISO_LATIN_EXTRA)
1549                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
1550                   mask &= newmask;
1551                   mask_found |= newmask;
1552                 }
1553               else
1554                 return 0;
1555             }
1556           else
1557             {
1558               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
1559                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
1560               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
1561               /* Check the length of succeeding codes of the range
1562                  0xA0..0FF.  If the byte length is odd, we exclude
1563                  CODING_CATEGORY_MASK_ISO_8_2.  We can check this only
1564                  when we are not single shifting.  */
1565               if (!single_shifting
1566                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
1567                 {
1568                   int i = 1;
1569
1570                   c = -1;
1571                   while (src < src_end)
1572                     {
1573                       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
1574                       if (c < 0xA0)
1575                         break;
1576                       i++;
1577                     }
1578
1579                   if (i & 1 && src < src_end)
1580                     mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
1581                   else
1582                     mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
1583                   if (c >= 0)
1584                     /* This means that we have read one extra byte.  */
1585                     goto retry;
1586                 }
1587             }
1588           break;
1589         }
1590     }
1591  label_end_of_loop:
1592   return (mask & mask_found);
1593 }
1594
1595 /* Decode a character of which charset is CHARSET, the 1st position
1596    code is C1, the 2nd position code is C2, and return the decoded
1597    character code.  If the variable `translation_table' is non-nil,
1598    returned the translated code.  */
1599
1600 #define DECODE_ISO_CHARACTER(charset, c1, c2)   \
1601   (NILP (translation_table)                     \
1602    ? MAKE_CHAR (charset, c1, c2)                \
1603    : translate_char (translation_table, -1, charset, c1, c2))
1604
1605 /* Set designation state into CODING.  */
1606 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
1607   do {                                                                     \
1608     int charset, c;                                                        \
1609                                                                            \
1610     if (final_char < '0' || final_char >= 128)                             \
1611       goto label_invalid_code;                                             \
1612     charset = ISO_CHARSET_TABLE (make_number (dimension),                  \
1613                                  make_number (chars),                      \
1614                                  make_number (final_char));                \
1615     c = MAKE_CHAR (charset, 0, 0);                                         \
1616     if (charset >= 0                                                       \
1617         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
1618             || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
1619       {                                                                    \
1620         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
1621             && reg == 0                                                    \
1622             && charset == CHARSET_ASCII)                                   \
1623           {                                                                \
1624             /* We should insert this designation sequence as is so         \
1625                that it is surely written back to a file.  */               \
1626             coding->spec.iso2022.last_invalid_designation_register = -1;   \
1627             goto label_invalid_code;                                       \
1628           }                                                                \
1629         coding->spec.iso2022.last_invalid_designation_register = -1;       \
1630         if ((coding->mode & CODING_MODE_DIRECTION)                         \
1631             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
1632           charset = CHARSET_REVERSE_CHARSET (charset);                     \
1633         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
1634       }                                                                    \
1635     else                                                                   \
1636       {                                                                    \
1637         coding->spec.iso2022.last_invalid_designation_register = reg;      \
1638         goto label_invalid_code;                                           \
1639       }                                                                    \
1640   } while (0)
1641
1642 /* Allocate a memory block for storing information about compositions.
1643    The block is chained to the already allocated blocks.  */
1644
1645 void
1646 coding_allocate_composition_data (coding, char_offset)
1647      struct coding_system *coding;
1648      int char_offset;
1649 {
1650   struct composition_data *cmp_data
1651     = (struct composition_data *) xmalloc (sizeof *cmp_data);
1652
1653   cmp_data->char_offset = char_offset;
1654   cmp_data->used = 0;
1655   cmp_data->prev = coding->cmp_data;
1656   cmp_data->next = NULL;
1657   if (coding->cmp_data)
1658     coding->cmp_data->next = cmp_data;
1659   coding->cmp_data = cmp_data;
1660   coding->cmp_data_start = 0;
1661 }
1662
1663 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
1664    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
1665    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
1666    ESC 3 : altchar composition :  ESC 3 ALT ... ESC 0 CHAR ... ESC 1
1667    ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1
1668   */
1669
1670 #define DECODE_COMPOSITION_START(c1)                                       \
1671   do {                                                                     \
1672     if (coding->composing == COMPOSITION_DISABLED)                         \
1673       {                                                                    \
1674         *dst++ = ISO_CODE_ESC;                                             \
1675         *dst++ = c1 & 0x7f;                                                \
1676         coding->produced_char += 2;                                        \
1677       }                                                                    \
1678     else if (!COMPOSING_P (coding))                                        \
1679       {                                                                    \
1680         /* This is surely the start of a composition.  We must be sure     \
1681            that coding->cmp_data has enough space to store the             \
1682            information about the composition.  If not, terminate the       \
1683            current decoding loop, allocate one more memory block for       \
1684            coding->cmp_data in the caller, then start the decoding         \
1685            loop again.  We can't allocate memory here directly because     \
1686            it may cause buffer/string relocation.  */                      \
1687         if (!coding->cmp_data                                              \
1688             || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
1689                 >= COMPOSITION_DATA_SIZE))                                 \
1690           {                                                                \
1691             coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
1692             goto label_end_of_loop;                                        \
1693           }                                                                \
1694         coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
1695                              : c1 == '2' ? COMPOSITION_WITH_RULE           \
1696                              : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
1697                              : COMPOSITION_WITH_RULE_ALTCHARS);            \
1698         CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
1699                                       coding->composing);                  \
1700         coding->composition_rule_follows = 0;                              \
1701       }                                                                    \
1702     else                                                                   \
1703       {                                                                    \
1704         /* We are already handling a composition.  If the method is        \
1705            the following two, the codes following the current escape       \
1706            sequence are actual characters stored in a buffer.  */          \
1707         if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
1708             || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
1709           {                                                                \
1710             coding->composing = COMPOSITION_RELATIVE;                      \
1711             coding->composition_rule_follows = 0;                          \
1712           }                                                                \
1713       }                                                                    \
1714   } while (0)
1715
1716 /* Handle composition end sequence ESC 1.  */
1717
1718 #define DECODE_COMPOSITION_END(c1)                                      \
1719   do {                                                                  \
1720     if (! COMPOSING_P (coding))                                         \
1721       {                                                                 \
1722         *dst++ = ISO_CODE_ESC;                                          \
1723         *dst++ = c1;                                                    \
1724         coding->produced_char += 2;                                     \
1725       }                                                                 \
1726     else                                                                \
1727       {                                                                 \
1728         CODING_ADD_COMPOSITION_END (coding, coding->produced_char);     \
1729         coding->composing = COMPOSITION_NO;                             \
1730       }                                                                 \
1731   } while (0)
1732
1733 /* Decode a composition rule from the byte C1 (and maybe one more byte
1734    from SRC) and store one encoded composition rule in
1735    coding->cmp_data.  */
1736
1737 #define DECODE_COMPOSITION_RULE(c1)                                     \
1738   do {                                                                  \
1739     int rule = 0;                                                       \
1740     (c1) -= 32;                                                         \
1741     if (c1 < 81)                /* old format (before ver.21) */        \
1742       {                                                                 \
1743         int gref = (c1) / 9;                                            \
1744         int nref = (c1) % 9;                                            \
1745         if (gref == 4) gref = 10;                                       \
1746         if (nref == 4) nref = 10;                                       \
1747         rule = COMPOSITION_ENCODE_RULE (gref, nref);                    \
1748       }                                                                 \
1749     else if (c1 < 93)           /* new format (after ver.21) */         \
1750       {                                                                 \
1751         ONE_MORE_BYTE (c2);                                             \
1752         rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);              \
1753       }                                                                 \
1754     CODING_ADD_COMPOSITION_COMPONENT (coding, rule);                    \
1755     coding->composition_rule_follows = 0;                               \
1756   } while (0)
1757
1758
1759 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1760
1761 static void
1762 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1763      struct coding_system *coding;
1764      unsigned char *source, *destination;
1765      int src_bytes, dst_bytes;
1766 {
1767   unsigned char *src = source;
1768   unsigned char *src_end = source + src_bytes;
1769   unsigned char *dst = destination;
1770   unsigned char *dst_end = destination + dst_bytes;
1771   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1772   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1773   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1774   /* SRC_BASE remembers the start position in source in each loop.
1775      The loop will be exited when there's not enough source code
1776      (within macro ONE_MORE_BYTE), or when there's not enough
1777      destination area to produce a character (within macro
1778      EMIT_CHAR).  */
1779   unsigned char *src_base;
1780   int c, charset;
1781   Lisp_Object translation_table;
1782   Lisp_Object safe_chars;
1783
1784   safe_chars = coding_safe_chars (coding->symbol);
1785
1786   if (NILP (Venable_character_translation))
1787     translation_table = Qnil;
1788   else
1789     {
1790       translation_table = coding->translation_table_for_decode;
1791       if (NILP (translation_table))
1792         translation_table = Vstandard_translation_table_for_decode;
1793     }
1794
1795   coding->result = CODING_FINISH_NORMAL;
1796
1797   while (1)
1798     {
1799       int c1, c2;
1800
1801       src_base = src;
1802       ONE_MORE_BYTE (c1);
1803
1804       /* We produce no character or one character.  */
1805       switch (iso_code_class [c1])
1806         {
1807         case ISO_0x20_or_0x7F:
1808           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1809             {
1810               DECODE_COMPOSITION_RULE (c1);
1811               continue;
1812             }
1813           if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
1814             {
1815               /* This is SPACE or DEL.  */
1816               charset = CHARSET_ASCII;
1817               break;
1818             }
1819           /* This is a graphic character, we fall down ...  */
1820
1821         case ISO_graphic_plane_0:
1822           if (COMPOSING_P (coding) && coding->composition_rule_follows)
1823             {
1824               DECODE_COMPOSITION_RULE (c1);
1825               continue;
1826             }
1827           charset = charset0;
1828           break;
1829
1830         case ISO_0xA0_or_0xFF:
1831           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1832               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1833             goto label_invalid_code;
1834           /* This is a graphic character, we fall down ... */
1835
1836         case ISO_graphic_plane_1:
1837           if (charset1 < 0)
1838             goto label_invalid_code;
1839           charset = charset1;
1840           break;
1841
1842         case ISO_control_0:
1843           if (COMPOSING_P (coding))
1844             DECODE_COMPOSITION_END ('1');
1845
1846           /* All ISO2022 control characters in this class have the
1847              same representation in Emacs internal format.  */
1848           if (c1 == '\n'
1849               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1850               && (coding->eol_type == CODING_EOL_CR
1851                   || coding->eol_type == CODING_EOL_CRLF))
1852             {
1853               coding->result = CODING_FINISH_INCONSISTENT_EOL;
1854               goto label_end_of_loop;
1855             }
1856           charset = CHARSET_ASCII;
1857           break;
1858
1859         case ISO_control_1:
1860           if (COMPOSING_P (coding))
1861             DECODE_COMPOSITION_END ('1');
1862           goto label_invalid_code;
1863
1864         case ISO_carriage_return:
1865           if (COMPOSING_P (coding))
1866             DECODE_COMPOSITION_END ('1');
1867
1868           if (coding->eol_type == CODING_EOL_CR)
1869             c1 = '\n';
1870           else if (coding->eol_type == CODING_EOL_CRLF)
1871             {
1872               ONE_MORE_BYTE (c1);
1873               if (c1 != ISO_CODE_LF)
1874                 {
1875                   src--;
1876                   c1 = '\r';
1877                 }
1878             }
1879           charset = CHARSET_ASCII;
1880           break;
1881
1882         case ISO_shift_out:
1883           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1884               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1885             goto label_invalid_code;
1886           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1887           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1888           continue;
1889
1890         case ISO_shift_in:
1891           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1892             goto label_invalid_code;
1893           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1894           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1895           continue;
1896
1897         case ISO_single_shift_2_7:
1898         case ISO_single_shift_2:
1899           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1900             goto label_invalid_code;
1901           /* SS2 is handled as an escape sequence of ESC 'N' */
1902           c1 = 'N';
1903           goto label_escape_sequence;
1904
1905         case ISO_single_shift_3:
1906           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1907             goto label_invalid_code;
1908           /* SS2 is handled as an escape sequence of ESC 'O' */
1909           c1 = 'O';
1910           goto label_escape_sequence;
1911
1912         case ISO_control_sequence_introducer:
1913           /* CSI is handled as an escape sequence of ESC '[' ...  */
1914           c1 = '[';
1915           goto label_escape_sequence;
1916
1917         case ISO_escape:
1918           ONE_MORE_BYTE (c1);
1919         label_escape_sequence:
1920           /* Escape sequences handled by Emacs are invocation,
1921              designation, direction specification, and character
1922              composition specification.  */
1923           switch (c1)
1924             {
1925             case '&':           /* revision of following character set */
1926               ONE_MORE_BYTE (c1);
1927               if (!(c1 >= '@' && c1 <= '~'))
1928                 goto label_invalid_code;
1929               ONE_MORE_BYTE (c1);
1930               if (c1 != ISO_CODE_ESC)
1931                 goto label_invalid_code;
1932               ONE_MORE_BYTE (c1);
1933               goto label_escape_sequence;
1934
1935             case '$':           /* designation of 2-byte character set */
1936               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1937                 goto label_invalid_code;
1938               ONE_MORE_BYTE (c1);
1939               if (c1 >= '@' && c1 <= 'B')
1940                 {       /* designation of JISX0208.1978, GB2312.1980,
1941                            or JISX0208.1980 */
1942                   DECODE_DESIGNATION (0, 2, 94, c1);
1943                 }
1944               else if (c1 >= 0x28 && c1 <= 0x2B)
1945                 {       /* designation of DIMENSION2_CHARS94 character set */
1946                   ONE_MORE_BYTE (c2);
1947                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1948                 }
1949               else if (c1 >= 0x2C && c1 <= 0x2F)
1950                 {       /* designation of DIMENSION2_CHARS96 character set */
1951                   ONE_MORE_BYTE (c2);
1952                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1953                 }
1954               else
1955                 goto label_invalid_code;
1956               /* We must update these variables now.  */
1957               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1958               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1959               continue;
1960
1961             case 'n':           /* invocation of locking-shift-2 */
1962               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1963                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1964                 goto label_invalid_code;
1965               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1966               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1967               continue;
1968
1969             case 'o':           /* invocation of locking-shift-3 */
1970               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1971                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1972                 goto label_invalid_code;
1973               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1974               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1975               continue;
1976
1977             case 'N':           /* invocation of single-shift-2 */
1978               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1979                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1980                 goto label_invalid_code;
1981               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1982               ONE_MORE_BYTE (c1);
1983               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1984                 goto label_invalid_code;
1985               break;
1986
1987             case 'O':           /* invocation of single-shift-3 */
1988               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1989                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1990                 goto label_invalid_code;
1991               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1992               ONE_MORE_BYTE (c1);
1993               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
1994                 goto label_invalid_code;
1995               break;
1996
1997             case '0': case '2': case '3': case '4': /* start composition */
1998               DECODE_COMPOSITION_START (c1);
1999               continue;
2000
2001             case '1':           /* end composition */
2002               DECODE_COMPOSITION_END (c1);
2003               continue;
2004
2005             case '[':           /* specification of direction */
2006               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
2007                 goto label_invalid_code;
2008               /* For the moment, nested direction is not supported.
2009                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
2010                  left-to-right, and nonzero means right-to-left.  */
2011               ONE_MORE_BYTE (c1);
2012               switch (c1)
2013                 {
2014                 case ']':       /* end of the current direction */
2015                   coding->mode &= ~CODING_MODE_DIRECTION;
2016
2017                 case '0':       /* end of the current direction */
2018                 case '1':       /* start of left-to-right direction */
2019                   ONE_MORE_BYTE (c1);
2020                   if (c1 == ']')
2021                     coding->mode &= ~CODING_MODE_DIRECTION;
2022                   else
2023                     goto label_invalid_code;
2024                   break;
2025
2026                 case '2':       /* start of right-to-left direction */
2027                   ONE_MORE_BYTE (c1);
2028                   if (c1 == ']')
2029                     coding->mode |= CODING_MODE_DIRECTION;
2030                   else
2031                     goto label_invalid_code;
2032                   break;
2033
2034                 default:
2035                   goto label_invalid_code;
2036                 }
2037               continue;
2038
2039             default:
2040               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
2041                 goto label_invalid_code;
2042               if (c1 >= 0x28 && c1 <= 0x2B)
2043                 {       /* designation of DIMENSION1_CHARS94 character set */
2044                   ONE_MORE_BYTE (c2);
2045                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
2046                 }
2047               else if (c1 >= 0x2C && c1 <= 0x2F)
2048                 {       /* designation of DIMENSION1_CHARS96 character set */
2049                   ONE_MORE_BYTE (c2);
2050                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
2051                 }
2052               else
2053                 goto label_invalid_code;
2054               /* We must update these variables now.  */
2055               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
2056               charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
2057               continue;
2058             }
2059         }
2060
2061       /* Now we know CHARSET and 1st position code C1 of a character.
2062          Produce a multibyte sequence for that character while getting
2063          2nd position code C2 if necessary.  */
2064       if (CHARSET_DIMENSION (charset) == 2)
2065         {
2066           ONE_MORE_BYTE (c2);
2067           if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
2068             /* C2 is not in a valid range.  */
2069             goto label_invalid_code;
2070         }
2071       c = DECODE_ISO_CHARACTER (charset, c1, c2);
2072       EMIT_CHAR (c);
2073       continue;
2074
2075     label_invalid_code:
2076       coding->errors++;
2077       if (COMPOSING_P (coding))
2078         DECODE_COMPOSITION_END ('1');
2079       src = src_base;
2080       c = *src++;
2081       EMIT_CHAR (c);
2082     }
2083
2084  label_end_of_loop:
2085   coding->consumed = coding->consumed_char = src_base - source;
2086   coding->produced = dst - destination;
2087   return;
2088 }
2089
2090
2091 /* ISO2022 encoding stuff.  */
2092
2093 /*
2094    It is not enough to say just "ISO2022" on encoding, we have to
2095    specify more details.  In Emacs, each ISO2022 coding system
2096    variant has the following specifications:
2097         1. Initial designation to G0 through G3.
2098         2. Allows short-form designation?
2099         3. ASCII should be designated to G0 before control characters?
2100         4. ASCII should be designated to G0 at end of line?
2101         5. 7-bit environment or 8-bit environment?
2102         6. Use locking-shift?
2103         7. Use Single-shift?
2104    And the following two are only for Japanese:
2105         8. Use ASCII in place of JIS0201-1976-Roman?
2106         9. Use JISX0208-1983 in place of JISX0208-1978?
2107    These specifications are encoded in `coding->flags' as flag bits
2108    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
2109    details.
2110 */
2111
2112 /* Produce codes (escape sequence) for designating CHARSET to graphic
2113    register REG at DST, and increment DST.  If <final-char> of CHARSET is
2114    '@', 'A', or 'B' and the coding system CODING allows, produce
2115    designation sequence of short-form.  */
2116
2117 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
2118   do {                                                                  \
2119     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
2120     char *intermediate_char_94 = "()*+";                                \
2121     char *intermediate_char_96 = ",-./";                                \
2122     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
2123                                                                         \
2124     if (revision < 255)                                                 \
2125       {                                                                 \
2126         *dst++ = ISO_CODE_ESC;                                          \
2127         *dst++ = '&';                                                   \
2128         *dst++ = '@' + revision;                                        \
2129       }                                                                 \
2130     *dst++ = ISO_CODE_ESC;                                              \
2131     if (CHARSET_DIMENSION (charset) == 1)                               \
2132       {                                                                 \
2133         if (CHARSET_CHARS (charset) == 94)                              \
2134           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
2135         else                                                            \
2136           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2137       }                                                                 \
2138     else                                                                \
2139       {                                                                 \
2140         *dst++ = '$';                                                   \
2141         if (CHARSET_CHARS (charset) == 94)                              \
2142           {                                                             \
2143             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
2144                 || reg != 0                                             \
2145                 || final_char < '@' || final_char > 'B')                \
2146               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
2147           }                                                             \
2148         else                                                            \
2149           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
2150       }                                                                 \
2151     *dst++ = final_char;                                                \
2152     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
2153   } while (0)
2154
2155 /* The following two macros produce codes (control character or escape
2156    sequence) for ISO2022 single-shift functions (single-shift-2 and
2157    single-shift-3).  */
2158
2159 #define ENCODE_SINGLE_SHIFT_2                           \
2160   do {                                                  \
2161     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2162       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
2163     else                                                \
2164       *dst++ = ISO_CODE_SS2;                            \
2165     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2166   } while (0)
2167
2168 #define ENCODE_SINGLE_SHIFT_3                           \
2169   do {                                                  \
2170     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
2171       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
2172     else                                                \
2173       *dst++ = ISO_CODE_SS3;                            \
2174     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
2175   } while (0)
2176
2177 /* The following four macros produce codes (control character or
2178    escape sequence) for ISO2022 locking-shift functions (shift-in,
2179    shift-out, locking-shift-2, and locking-shift-3).  */
2180
2181 #define ENCODE_SHIFT_IN                         \
2182   do {                                          \
2183     *dst++ = ISO_CODE_SI;                       \
2184     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
2185   } while (0)
2186
2187 #define ENCODE_SHIFT_OUT                        \
2188   do {                                          \
2189     *dst++ = ISO_CODE_SO;                       \
2190     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
2191   } while (0)
2192
2193 #define ENCODE_LOCKING_SHIFT_2                  \
2194   do {                                          \
2195     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
2196     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
2197   } while (0)
2198
2199 #define ENCODE_LOCKING_SHIFT_3                  \
2200   do {                                          \
2201     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
2202     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
2203   } while (0)
2204
2205 /* Produce codes for a DIMENSION1 character whose character set is
2206    CHARSET and whose position-code is C1.  Designation and invocation
2207    sequences are also produced in advance if necessary.  */
2208
2209 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
2210   do {                                                                  \
2211     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2212       {                                                                 \
2213         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2214           *dst++ = c1 & 0x7F;                                           \
2215         else                                                            \
2216           *dst++ = c1 | 0x80;                                           \
2217         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2218         break;                                                          \
2219       }                                                                 \
2220     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2221       {                                                                 \
2222         *dst++ = c1 & 0x7F;                                             \
2223         break;                                                          \
2224       }                                                                 \
2225     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2226       {                                                                 \
2227         *dst++ = c1 | 0x80;                                             \
2228         break;                                                          \
2229       }                                                                 \
2230     else                                                                \
2231       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2232          must invoke it, or, at first, designate it to some graphic     \
2233          register.  Then repeat the loop to actually produce the        \
2234          character.  */                                                 \
2235       dst = encode_invocation_designation (charset, coding, dst);       \
2236   } while (1)
2237
2238 /* Produce codes for a DIMENSION2 character whose character set is
2239    CHARSET and whose position-codes are C1 and C2.  Designation and
2240    invocation codes are also produced in advance if necessary.  */
2241
2242 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
2243   do {                                                                  \
2244     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
2245       {                                                                 \
2246         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
2247           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
2248         else                                                            \
2249           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
2250         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
2251         break;                                                          \
2252       }                                                                 \
2253     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
2254       {                                                                 \
2255         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
2256         break;                                                          \
2257       }                                                                 \
2258     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
2259       {                                                                 \
2260         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
2261         break;                                                          \
2262       }                                                                 \
2263     else                                                                \
2264       /* Since CHARSET is not yet invoked to any graphic planes, we     \
2265          must invoke it, or, at first, designate it to some graphic     \
2266          register.  Then repeat the loop to actually produce the        \
2267          character.  */                                                 \
2268       dst = encode_invocation_designation (charset, coding, dst);       \
2269   } while (1)
2270
2271 #define ENCODE_ISO_CHARACTER(c)                                 \
2272   do {                                                          \
2273     int charset, c1, c2;                                        \
2274                                                                 \
2275     SPLIT_CHAR (c, charset, c1, c2);                            \
2276     if (CHARSET_DEFINED_P (charset))                            \
2277       {                                                         \
2278         if (CHARSET_DIMENSION (charset) == 1)                   \
2279           {                                                     \
2280             if (charset == CHARSET_ASCII                        \
2281                 && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
2282               charset = charset_latin_jisx0201;                 \
2283             ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
2284           }                                                     \
2285         else                                                    \
2286           {                                                     \
2287             if (charset == charset_jisx0208                     \
2288                 && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
2289               charset = charset_jisx0208_1978;                  \
2290             ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
2291           }                                                     \
2292       }                                                         \
2293     else                                                        \
2294       {                                                         \
2295         *dst++ = c1;                                            \
2296         if (c2 >= 0)                                            \
2297           *dst++ = c2;                                          \
2298       }                                                         \
2299   } while (0)
2300
2301
2302 /* Instead of encoding character C, produce one or two `?'s.  */
2303
2304 #define ENCODE_UNSAFE_CHARACTER(c)                                      \
2305   do {                                                                  \
2306     ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);       \
2307     if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                           \
2308       ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);     \
2309   } while (0)
2310
2311
2312 /* Produce designation and invocation codes at a place pointed by DST
2313    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
2314    Return new DST.  */
2315
2316 unsigned char *
2317 encode_invocation_designation (charset, coding, dst)
2318      int charset;
2319      struct coding_system *coding;
2320      unsigned char *dst;
2321 {
2322   int reg;                      /* graphic register number */
2323
2324   /* At first, check designations.  */
2325   for (reg = 0; reg < 4; reg++)
2326     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
2327       break;
2328
2329   if (reg >= 4)
2330     {
2331       /* CHARSET is not yet designated to any graphic registers.  */
2332       /* At first check the requested designation.  */
2333       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2334       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
2335         /* Since CHARSET requests no special designation, designate it
2336            to graphic register 0.  */
2337         reg = 0;
2338
2339       ENCODE_DESIGNATION (charset, reg, coding);
2340     }
2341
2342   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
2343       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
2344     {
2345       /* Since the graphic register REG is not invoked to any graphic
2346          planes, invoke it to graphic plane 0.  */
2347       switch (reg)
2348         {
2349         case 0:                 /* graphic register 0 */
2350           ENCODE_SHIFT_IN;
2351           break;
2352
2353         case 1:                 /* graphic register 1 */
2354           ENCODE_SHIFT_OUT;
2355           break;
2356
2357         case 2:                 /* graphic register 2 */
2358           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2359             ENCODE_SINGLE_SHIFT_2;
2360           else
2361             ENCODE_LOCKING_SHIFT_2;
2362           break;
2363
2364         case 3:                 /* graphic register 3 */
2365           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
2366             ENCODE_SINGLE_SHIFT_3;
2367           else
2368             ENCODE_LOCKING_SHIFT_3;
2369           break;
2370         }
2371     }
2372
2373   return dst;
2374 }
2375
2376 /* Produce 2-byte codes for encoded composition rule RULE.  */
2377
2378 #define ENCODE_COMPOSITION_RULE(rule)           \
2379   do {                                          \
2380     int gref, nref;                             \
2381     COMPOSITION_DECODE_RULE (rule, gref, nref); \
2382     *dst++ = 32 + 81 + gref;                    \
2383     *dst++ = 32 + nref;                         \
2384   } while (0)
2385
2386 /* Produce codes for indicating the start of a composition sequence
2387    (ESC 0, ESC 3, or ESC 4).  DATA points to an array of integers
2388    which specify information about the composition.  See the comment
2389    in coding.h for the format of DATA.  */
2390
2391 #define ENCODE_COMPOSITION_START(coding, data)                          \
2392   do {                                                                  \
2393     coding->composing = data[3];                                        \
2394     *dst++ = ISO_CODE_ESC;                                              \
2395     if (coding->composing == COMPOSITION_RELATIVE)                      \
2396       *dst++ = '0';                                                     \
2397     else                                                                \
2398       {                                                                 \
2399         *dst++ = (coding->composing == COMPOSITION_WITH_ALTCHARS        \
2400                   ? '3' : '4');                                         \
2401         coding->cmp_data_index = coding->cmp_data_start + 4;            \
2402         coding->composition_rule_follows = 0;                           \
2403       }                                                                 \
2404   } while (0)
2405
2406 /* Produce codes for indicating the end of the current composition.  */
2407
2408 #define ENCODE_COMPOSITION_END(coding, data)                    \
2409   do {                                                          \
2410     *dst++ = ISO_CODE_ESC;                                      \
2411     *dst++ = '1';                                               \
2412     coding->cmp_data_start += data[0];                          \
2413     coding->composing = COMPOSITION_NO;                         \
2414     if (coding->cmp_data_start == coding->cmp_data->used        \
2415         && coding->cmp_data->next)                              \
2416       {                                                         \
2417         coding->cmp_data = coding->cmp_data->next;              \
2418         coding->cmp_data_start = 0;                             \
2419       }                                                         \
2420   } while (0)
2421
2422 /* Produce composition start sequence ESC 0.  Here, this sequence
2423    doesn't mean the start of a new composition but means that we have
2424    just produced components (alternate chars and composition rules) of
2425    the composition and the actual text follows in SRC.  */
2426
2427 #define ENCODE_COMPOSITION_FAKE_START(coding)   \
2428   do {                                          \
2429     *dst++ = ISO_CODE_ESC;                      \
2430     *dst++ = '0';                               \
2431     coding->composing = COMPOSITION_RELATIVE;   \
2432   } while (0)
2433
2434 /* The following three macros produce codes for indicating direction
2435    of text.  */
2436 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
2437   do {                                                  \
2438     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
2439       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
2440     else                                                \
2441       *dst++ = ISO_CODE_CSI;                            \
2442   } while (0)
2443
2444 #define ENCODE_DIRECTION_R2L    \
2445   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
2446
2447 #define ENCODE_DIRECTION_L2R    \
2448   ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
2449
2450 /* Produce codes for designation and invocation to reset the graphic
2451    planes and registers to initial state.  */
2452 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
2453   do {                                                                      \
2454     int reg;                                                                \
2455     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
2456       ENCODE_SHIFT_IN;                                                      \
2457     for (reg = 0; reg < 4; reg++)                                           \
2458       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
2459           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
2460               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
2461         ENCODE_DESIGNATION                                                  \
2462           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
2463   } while (0)
2464
2465 /* Produce designation sequences of charsets in the line started from
2466    SRC to a place pointed by DST, and return updated DST.
2467
2468    If the current block ends before any end-of-line, we may fail to
2469    find all the necessary designations.  */
2470
2471 static unsigned char *
2472 encode_designation_at_bol (coding, translation_table, src, src_end, dst)
2473      struct coding_system *coding;
2474      Lisp_Object translation_table;
2475      unsigned char *src, *src_end, *dst;
2476 {
2477   int charset, c, found = 0, reg;
2478   /* Table of charsets to be designated to each graphic register.  */
2479   int r[4];
2480
2481   for (reg = 0; reg < 4; reg++)
2482     r[reg] = -1;
2483
2484   while (found < 4)
2485     {
2486       ONE_MORE_CHAR (c);
2487       if (c == '\n')
2488         break;
2489
2490       charset = CHAR_CHARSET (c);
2491       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
2492       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
2493         {
2494           found++;
2495           r[reg] = charset;
2496         }
2497     }
2498
2499  label_end_of_loop:
2500   if (found)
2501     {
2502       for (reg = 0; reg < 4; reg++)
2503         if (r[reg] >= 0
2504             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
2505           ENCODE_DESIGNATION (r[reg], reg, coding);
2506     }
2507
2508   return dst;
2509 }
2510
2511 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
2512
2513 static void
2514 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
2515      struct coding_system *coding;
2516      unsigned char *source, *destination;
2517      int src_bytes, dst_bytes;
2518 {
2519   unsigned char *src = source;
2520   unsigned char *src_end = source + src_bytes;
2521   unsigned char *dst = destination;
2522   unsigned char *dst_end = destination + dst_bytes;
2523   /* Since the maximum bytes produced by each loop is 20, we subtract 19
2524      from DST_END to assure overflow checking is necessary only at the
2525      head of loop.  */
2526   unsigned char *adjusted_dst_end = dst_end - 19;
2527   /* SRC_BASE remembers the start position in source in each loop.
2528      The loop will be exited when there's not enough source text to
2529      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
2530      there's not enough destination area to produce encoded codes
2531      (within macro EMIT_BYTES).  */
2532   unsigned char *src_base;
2533   int c;
2534   Lisp_Object translation_table;
2535   Lisp_Object safe_chars;
2536
2537   safe_chars = coding_safe_chars (coding->symbol);
2538
2539   if (NILP (Venable_character_translation))
2540     translation_table = Qnil;
2541   else
2542     {
2543       translation_table = coding->translation_table_for_encode;
2544       if (NILP (translation_table))
2545         translation_table = Vstandard_translation_table_for_encode;
2546     }
2547
2548   coding->consumed_char = 0;
2549   coding->errors = 0;
2550   while (1)
2551     {
2552       src_base = src;
2553
2554       if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
2555         {
2556           coding->result = CODING_FINISH_INSUFFICIENT_DST;
2557           break;
2558         }
2559
2560       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
2561           && CODING_SPEC_ISO_BOL (coding))
2562         {
2563           /* We have to produce designation sequences if any now.  */
2564           dst = encode_designation_at_bol (coding, translation_table,
2565                                            src, src_end, dst);
2566           CODING_SPEC_ISO_BOL (coding) = 0;
2567         }
2568
2569       /* Check composition start and end.  */
2570       if (coding->composing != COMPOSITION_DISABLED
2571           && coding->cmp_data_start < coding->cmp_data->used)
2572         {
2573           struct composition_data *cmp_data = coding->cmp_data;
2574           int *data = cmp_data->data + coding->cmp_data_start;
2575           int this_pos = cmp_data->char_offset + coding->consumed_char;
2576
2577           if (coding->composing == COMPOSITION_RELATIVE)
2578             {
2579               if (this_pos == data[2])
2580                 {
2581                   ENCODE_COMPOSITION_END (coding, data);
2582                   cmp_data = coding->cmp_data;
2583                   data = cmp_data->data + coding->cmp_data_start;
2584                 }
2585             }
2586           else if (COMPOSING_P (coding))
2587             {
2588               /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR  */
2589               if (coding->cmp_data_index == coding->cmp_data_start + data[0])
2590                 /* We have consumed components of the composition.
2591                    What follows in SRC is the composition's base
2592                    text.  */
2593                 ENCODE_COMPOSITION_FAKE_START (coding);
2594               else
2595                 {
2596                   int c = cmp_data->data[coding->cmp_data_index++];
2597                   if (coding->composition_rule_follows)
2598                     {
2599                       ENCODE_COMPOSITION_RULE (c);
2600                       coding->composition_rule_follows = 0;
2601                     }
2602                   else
2603                     {
2604                       if (coding->flags & CODING_FLAG_ISO_SAFE
2605                           && ! CODING_SAFE_CHAR_P (safe_chars, c))
2606                         ENCODE_UNSAFE_CHARACTER (c);
2607                       else
2608                         ENCODE_ISO_CHARACTER (c);
2609                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
2610                         coding->composition_rule_follows = 1;
2611                     }
2612                   continue;
2613                 }
2614             }
2615           if (!COMPOSING_P (coding))
2616             {
2617               if (this_pos == data[1])
2618                 {
2619                   ENCODE_COMPOSITION_START (coding, data);
2620                   continue;
2621                 }
2622             }
2623         }
2624
2625       ONE_MORE_CHAR (c);
2626
2627       /* Now encode the character C.  */
2628       if (c < 0x20 || c == 0x7F)
2629         {
2630           if (c == '\r')
2631             {
2632               if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2633                 {
2634                   if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2635                     ENCODE_RESET_PLANE_AND_REGISTER;
2636                   *dst++ = c;
2637                   continue;
2638                 }
2639               /* fall down to treat '\r' as '\n' ...  */
2640               c = '\n';
2641             }
2642           if (c == '\n')
2643             {
2644               if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
2645                 ENCODE_RESET_PLANE_AND_REGISTER;
2646               if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
2647                 bcopy (coding->spec.iso2022.initial_designation,
2648                        coding->spec.iso2022.current_designation,
2649                        sizeof coding->spec.iso2022.initial_designation);
2650               if (coding->eol_type == CODING_EOL_LF
2651                   || coding->eol_type == CODING_EOL_UNDECIDED)
2652                 *dst++ = ISO_CODE_LF;
2653               else if (coding->eol_type == CODING_EOL_CRLF)
2654                 *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
2655               else
2656                 *dst++ = ISO_CODE_CR;
2657               CODING_SPEC_ISO_BOL (coding) = 1;
2658             }
2659           else
2660             {
2661               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
2662                 ENCODE_RESET_PLANE_AND_REGISTER;
2663               *dst++ = c;
2664             }
2665         }
2666       else if (ASCII_BYTE_P (c))
2667         ENCODE_ISO_CHARACTER (c);
2668       else if (SINGLE_BYTE_CHAR_P (c))
2669         {
2670           *dst++ = c;
2671           coding->errors++;
2672         }
2673       else if (coding->flags & CODING_FLAG_ISO_SAFE
2674                && ! CODING_SAFE_CHAR_P (safe_chars, c))
2675         ENCODE_UNSAFE_CHARACTER (c);
2676       else
2677         ENCODE_ISO_CHARACTER (c);
2678
2679       coding->consumed_char++;
2680     }
2681
2682  label_end_of_loop:
2683   coding->consumed = src_base - source;
2684   coding->produced = coding->produced_char = dst - destination;
2685 }
2686
2687 \f
2688 /*** 4. SJIS and BIG5 handlers ***/
2689
2690 /* Although SJIS and BIG5 are not ISO coding systems, they are used
2691    quite widely.  So, for the moment, Emacs supports them in the bare
2692    C code.  But, in the future, they may be supported only by CCL.  */
2693
2694 /* SJIS is a coding system encoding three character sets: ASCII, right
2695    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
2696    as is.  A character of charset katakana-jisx0201 is encoded by
2697    "position-code + 0x80".  A character of charset japanese-jisx0208
2698    is encoded in 2-byte but two position-codes are divided and shifted
2699    so that it fits in the range below.
2700
2701    --- CODE RANGE of SJIS ---
2702    (character set)      (range)
2703    ASCII                0x00 .. 0x7F
2704    KATAKANA-JISX0201    0xA1 .. 0xDF
2705    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
2706             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
2707    -------------------------------
2708
2709 */
2710
2711 /* BIG5 is a coding system encoding two character sets: ASCII and
2712    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2713    character set and is encoded in two bytes.
2714
2715    --- CODE RANGE of BIG5 ---
2716    (character set)      (range)
2717    ASCII                0x00 .. 0x7F
2718    Big5 (1st byte)      0xA1 .. 0xFE
2719         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2720    --------------------------
2721
2722    Since the number of characters in Big5 is larger than maximum
2723    characters in Emacs' charset (96x96), it can't be handled as one
2724    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2725    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2726    contains frequently used characters and the latter contains less
2727    frequently used characters.  */
2728
2729 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2730    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2731    C1 and C2 are the 1st and 2nd position-codes of Emacs' internal
2732    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2733
2734 /* Number of Big5 characters which have the same code in 1st byte.  */
2735 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2736
2737 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2738   do {                                                                  \
2739     unsigned int temp                                                   \
2740       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2741     if (b1 < 0xC9)                                                      \
2742       charset = charset_big5_1;                                         \
2743     else                                                                \
2744       {                                                                 \
2745         charset = charset_big5_2;                                       \
2746         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2747       }                                                                 \
2748     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2749     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2750   } while (0)
2751
2752 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2753   do {                                                                  \
2754     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2755     if (charset == charset_big5_2)                                      \
2756       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2757     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2758     b2 = temp % BIG5_SAME_ROW;                                          \
2759     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2760   } while (0)
2761
2762 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2763    Check if a text is encoded in SJIS.  If it is, return
2764    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2765
2766 static int
2767 detect_coding_sjis (src, src_end, multibytep)
2768      unsigned char *src, *src_end;
2769      int multibytep;
2770 {
2771   int c;
2772   /* Dummy for ONE_MORE_BYTE.  */
2773   struct coding_system dummy_coding;
2774   struct coding_system *coding = &dummy_coding;
2775
2776   while (1)
2777     {
2778       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2779       if (c < 0x80)
2780         continue;
2781       if (c == 0x80 || c == 0xA0 || c > 0xEF)
2782         return 0;
2783       if (c <= 0x9F || c >= 0xE0)
2784         {
2785           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2786           if (c < 0x40 || c == 0x7F || c > 0xFC)
2787             return 0;
2788         }
2789     }
2790  label_end_of_loop:
2791   return CODING_CATEGORY_MASK_SJIS;
2792 }
2793
2794 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2795    Check if a text is encoded in BIG5.  If it is, return
2796    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2797
2798 static int
2799 detect_coding_big5 (src, src_end, multibytep)
2800      unsigned char *src, *src_end;
2801      int multibytep;
2802 {
2803   int c;
2804   /* Dummy for ONE_MORE_BYTE.  */
2805   struct coding_system dummy_coding;
2806   struct coding_system *coding = &dummy_coding;
2807
2808   while (1)
2809     {
2810       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2811       if (c < 0x80)
2812         continue;
2813       if (c < 0xA1 || c > 0xFE)
2814         return 0;
2815       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2816       if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
2817         return 0;
2818     }
2819  label_end_of_loop:
2820   return CODING_CATEGORY_MASK_BIG5;
2821 }
2822
2823 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2824    Check if a text is encoded in UTF-8.  If it is, return
2825    CODING_CATEGORY_MASK_UTF_8, else return 0.  */
2826
2827 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
2828 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
2829 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
2830 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
2831 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
2832 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
2833 #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC)
2834
2835 static int
2836 detect_coding_utf_8 (src, src_end, multibytep)
2837      unsigned char *src, *src_end;
2838      int multibytep;
2839 {
2840   unsigned char c;
2841   int seq_maybe_bytes;
2842   /* Dummy for ONE_MORE_BYTE.  */
2843   struct coding_system dummy_coding;
2844   struct coding_system *coding = &dummy_coding;
2845
2846   while (1)
2847     {
2848       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2849       if (UTF_8_1_OCTET_P (c))
2850         continue;
2851       else if (UTF_8_2_OCTET_LEADING_P (c))
2852         seq_maybe_bytes = 1;
2853       else if (UTF_8_3_OCTET_LEADING_P (c))
2854         seq_maybe_bytes = 2;
2855       else if (UTF_8_4_OCTET_LEADING_P (c))
2856         seq_maybe_bytes = 3;
2857       else if (UTF_8_5_OCTET_LEADING_P (c))
2858         seq_maybe_bytes = 4;
2859       else if (UTF_8_6_OCTET_LEADING_P (c))
2860         seq_maybe_bytes = 5;
2861       else
2862         return 0;
2863
2864       do
2865         {
2866           ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
2867           if (!UTF_8_EXTRA_OCTET_P (c))
2868             return 0;
2869           seq_maybe_bytes--;
2870         }
2871       while (seq_maybe_bytes > 0);
2872     }
2873
2874  label_end_of_loop:
2875   return CODING_CATEGORY_MASK_UTF_8;
2876 }
2877
2878 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2879    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
2880    Little Endian (otherwise).  If it is, return
2881    CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE,
2882    else return 0.  */
2883
2884 #define UTF_16_INVALID_P(val)   \
2885   (((val) == 0xFFFE)            \
2886    || ((val) == 0xFFFF))
2887
2888 #define UTF_16_HIGH_SURROGATE_P(val) \
2889   (((val) & 0xD800) == 0xD800)
2890
2891 #define UTF_16_LOW_SURROGATE_P(val) \
2892   (((val) & 0xDC00) == 0xDC00)
2893
2894 static int
2895 detect_coding_utf_16 (src, src_end, multibytep)
2896      unsigned char *src, *src_end;
2897      int multibytep;
2898 {
2899   unsigned char c1, c2;
2900   /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE.  */
2901   struct coding_system dummy_coding;
2902   struct coding_system *coding = &dummy_coding;
2903
2904   ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
2905   ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
2906
2907   if ((c1 == 0xFF) && (c2 == 0xFE))
2908     return CODING_CATEGORY_MASK_UTF_16_LE;
2909   else if ((c1 == 0xFE) && (c2 == 0xFF))
2910     return CODING_CATEGORY_MASK_UTF_16_BE;
2911
2912  label_end_of_loop:
2913   return 0;
2914 }
2915
2916 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2917    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2918
2919 static void
2920 decode_coding_sjis_big5 (coding, source, destination,
2921                          src_bytes, dst_bytes, sjis_p)
2922      struct coding_system *coding;
2923      unsigned char *source, *destination;
2924      int src_bytes, dst_bytes;
2925      int sjis_p;
2926 {
2927   unsigned char *src = source;
2928   unsigned char *src_end = source + src_bytes;
2929   unsigned char *dst = destination;
2930   unsigned char *dst_end = destination + dst_bytes;
2931   /* SRC_BASE remembers the start position in source in each loop.
2932      The loop will be exited when there's not enough source code
2933      (within macro ONE_MORE_BYTE), or when there's not enough
2934      destination area to produce a character (within macro
2935      EMIT_CHAR).  */
2936   unsigned char *src_base;
2937   Lisp_Object translation_table;
2938
2939   if (NILP (Venable_character_translation))
2940     translation_table = Qnil;
2941   else
2942     {
2943       translation_table = coding->translation_table_for_decode;
2944       if (NILP (translation_table))
2945         translation_table = Vstandard_translation_table_for_decode;
2946     }
2947
2948   coding->produced_char = 0;
2949   while (1)
2950     {
2951       int c, charset, c1, c2;
2952
2953       src_base = src;
2954       ONE_MORE_BYTE (c1);
2955
2956       if (c1 < 0x80)
2957         {
2958           charset = CHARSET_ASCII;
2959           if (c1 < 0x20)
2960             {
2961               if (c1 == '\r')
2962                 {
2963                   if (coding->eol_type == CODING_EOL_CRLF)
2964                     {
2965                       ONE_MORE_BYTE (c2);
2966                       if (c2 == '\n')
2967                         c1 = c2;
2968                       else
2969                         /* To process C2 again, SRC is subtracted by 1.  */
2970                         src--;
2971                     }
2972                   else if (coding->eol_type == CODING_EOL_CR)
2973                     c1 = '\n';
2974                 }
2975               else if (c1 == '\n'
2976                        && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2977                        && (coding->eol_type == CODING_EOL_CR
2978                            || coding->eol_type == CODING_EOL_CRLF))
2979                 {
2980                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
2981                   goto label_end_of_loop;
2982                 }
2983             }
2984         }
2985       else
2986         {
2987           if (sjis_p)
2988             {
2989               if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF)
2990                 goto label_invalid_code;
2991               if (c1 <= 0x9F || c1 >= 0xE0)
2992                 {
2993                   /* SJIS -> JISX0208 */
2994                   ONE_MORE_BYTE (c2);
2995                   if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
2996                     goto label_invalid_code;
2997                   DECODE_SJIS (c1, c2, c1, c2);
2998                   charset = charset_jisx0208;
2999                 }
3000               else
3001                 /* SJIS -> JISX0201-Kana */
3002                 charset = charset_katakana_jisx0201;
3003             }
3004           else
3005             {
3006               /* BIG5 -> Big5 */
3007               if (c1 < 0xA0 || c1 > 0xFE)
3008                 goto label_invalid_code;
3009               ONE_MORE_BYTE (c2);
3010               if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
3011                 goto label_invalid_code;
3012               DECODE_BIG5 (c1, c2, charset, c1, c2);
3013             }
3014         }
3015
3016       c = DECODE_ISO_CHARACTER (charset, c1, c2);
3017       EMIT_CHAR (c);
3018       continue;
3019
3020     label_invalid_code:
3021       coding->errors++;
3022       src = src_base;
3023       c = *src++;
3024       EMIT_CHAR (c);
3025     }
3026
3027  label_end_of_loop:
3028   coding->consumed = coding->consumed_char = src_base - source;
3029   coding->produced = dst - destination;
3030   return;
3031 }
3032
3033 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3034    This function can encode charsets `ascii', `katakana-jisx0201',
3035    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
3036    are sure that all these charsets are registered as official charset
3037    (i.e. do not have extended leading-codes).  Characters of other
3038    charsets are produced without any encoding.  If SJIS_P is 1, encode
3039    SJIS text, else encode BIG5 text.  */
3040
3041 static void
3042 encode_coding_sjis_big5 (coding, source, destination,
3043                          src_bytes, dst_bytes, sjis_p)
3044      struct coding_system *coding;
3045      unsigned char *source, *destination;
3046      int src_bytes, dst_bytes;
3047      int sjis_p;
3048 {
3049   unsigned char *src = source;
3050   unsigned char *src_end = source + src_bytes;
3051   unsigned char *dst = destination;
3052   unsigned char *dst_end = destination + dst_bytes;
3053   /* SRC_BASE remembers the start position in source in each loop.
3054      The loop will be exited when there's not enough source text to
3055      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3056      there's not enough destination area to produce encoded codes
3057      (within macro EMIT_BYTES).  */
3058   unsigned char *src_base;
3059   Lisp_Object translation_table;
3060
3061   if (NILP (Venable_character_translation))
3062     translation_table = Qnil;
3063   else
3064     {
3065       translation_table = coding->translation_table_for_encode;
3066       if (NILP (translation_table))
3067         translation_table = Vstandard_translation_table_for_encode;
3068     }
3069
3070   while (1)
3071     {
3072       int c, charset, c1, c2;
3073
3074       src_base = src;
3075       ONE_MORE_CHAR (c);
3076
3077       /* Now encode the character C.  */
3078       if (SINGLE_BYTE_CHAR_P (c))
3079         {
3080           switch (c)
3081             {
3082             case '\r':
3083               if (!(coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
3084                 {
3085                   EMIT_ONE_BYTE (c);
3086                   break;
3087                 }
3088               c = '\n';
3089             case '\n':
3090               if (coding->eol_type == CODING_EOL_CRLF)
3091                 {
3092                   EMIT_TWO_BYTES ('\r', c);
3093                   break;
3094                 }
3095               else if (coding->eol_type == CODING_EOL_CR)
3096                 c = '\r';
3097             default:
3098               EMIT_ONE_BYTE (c);
3099             }
3100         }
3101       else
3102         {
3103           SPLIT_CHAR (c, charset, c1, c2);
3104           if (sjis_p)
3105             {
3106               if (charset == charset_jisx0208
3107                   || charset == charset_jisx0208_1978)
3108                 {
3109                   ENCODE_SJIS (c1, c2, c1, c2);
3110                   EMIT_TWO_BYTES (c1, c2);
3111                 }
3112               else if (charset == charset_katakana_jisx0201)
3113                 EMIT_ONE_BYTE (c1 | 0x80);
3114               else if (charset == charset_latin_jisx0201)
3115                 EMIT_ONE_BYTE (c1);
3116               else
3117                 /* There's no way other than producing the internal
3118                    codes as is.  */
3119                 EMIT_BYTES (src_base, src);
3120             }
3121           else
3122             {
3123               if (charset == charset_big5_1 || charset == charset_big5_2)
3124                 {
3125                   ENCODE_BIG5 (charset, c1, c2, c1, c2);
3126                   EMIT_TWO_BYTES (c1, c2);
3127                 }
3128               else
3129                 /* There's no way other than producing the internal
3130                    codes as is.  */
3131                 EMIT_BYTES (src_base, src);
3132             }
3133         }
3134       coding->consumed_char++;
3135     }
3136
3137  label_end_of_loop:
3138   coding->consumed = src_base - source;
3139   coding->produced = coding->produced_char = dst - destination;
3140 }
3141
3142 \f
3143 /*** 5. CCL handlers ***/
3144
3145 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3146    Check if a text is encoded in a coding system of which
3147    encoder/decoder are written in CCL program.  If it is, return
3148    CODING_CATEGORY_MASK_CCL, else return 0.  */
3149
3150 static int
3151 detect_coding_ccl (src, src_end, multibytep)
3152      unsigned char *src, *src_end;
3153      int multibytep;
3154 {
3155   unsigned char *valid;
3156   int c;
3157   /* Dummy for ONE_MORE_BYTE.  */
3158   struct coding_system dummy_coding;
3159   struct coding_system *coding = &dummy_coding;
3160
3161   /* No coding system is assigned to coding-category-ccl.  */
3162   if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
3163     return 0;
3164
3165   valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
3166   while (1)
3167     {
3168       ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
3169       if (! valid[c])
3170         return 0;
3171     }
3172  label_end_of_loop:
3173   return CODING_CATEGORY_MASK_CCL;
3174 }
3175
3176 \f
3177 /*** 6. End-of-line handlers ***/
3178
3179 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3180
3181 static void
3182 decode_eol (coding, source, destination, src_bytes, dst_bytes)
3183      struct coding_system *coding;
3184      unsigned char *source, *destination;
3185      int src_bytes, dst_bytes;
3186 {
3187   unsigned char *src = source;
3188   unsigned char *dst = destination;
3189   unsigned char *src_end = src + src_bytes;
3190   unsigned char *dst_end = dst + dst_bytes;
3191   Lisp_Object translation_table;
3192   /* SRC_BASE remembers the start position in source in each loop.
3193      The loop will be exited when there's not enough source code
3194      (within macro ONE_MORE_BYTE), or when there's not enough
3195      destination area to produce a character (within macro
3196      EMIT_CHAR).  */
3197   unsigned char *src_base;
3198   int c;
3199
3200   translation_table = Qnil;
3201   switch (coding->eol_type)
3202     {
3203     case CODING_EOL_CRLF:
3204       while (1)
3205         {
3206           src_base = src;
3207           ONE_MORE_BYTE (c);
3208           if (c == '\r')
3209             {
3210               ONE_MORE_BYTE (c);
3211               if (c != '\n')
3212                 {
3213                   src--;
3214                   c = '\r';
3215                 }
3216             }
3217           else if (c == '\n'
3218                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
3219             {
3220               coding->result = CODING_FINISH_INCONSISTENT_EOL;
3221               goto label_end_of_loop;
3222             }
3223           EMIT_CHAR (c);
3224         }
3225       break;
3226
3227     case CODING_EOL_CR:
3228       while (1)
3229         {
3230           src_base = src;
3231           ONE_MORE_BYTE (c);
3232           if (c == '\n')
3233             {
3234               if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
3235                 {
3236                   coding->result = CODING_FINISH_INCONSISTENT_EOL;
3237                   goto label_end_of_loop;
3238                 }
3239             }
3240           else if (c == '\r')
3241             c = '\n';
3242           EMIT_CHAR (c);
3243         }
3244       break;
3245
3246     default:                    /* no need for EOL handling */
3247       while (1)
3248         {
3249           src_base = src;
3250           ONE_MORE_BYTE (c);
3251           EMIT_CHAR (c);
3252         }
3253     }
3254
3255  label_end_of_loop:
3256   coding->consumed = coding->consumed_char = src_base - source;
3257   coding->produced = dst - destination;
3258   return;
3259 }
3260
3261 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
3262    format of end-of-line according to `coding->eol_type'.  It also
3263    convert multibyte form 8-bit characters to unibyte if
3264    CODING->src_multibyte is nonzero.  If `coding->mode &
3265    CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
3266    also means end-of-line.  */
3267
3268 static void
3269 encode_eol (coding, source, destination, src_bytes, dst_bytes)
3270      struct coding_system *coding;
3271      const unsigned char *source;
3272      unsigned char *destination;
3273      int src_bytes, dst_bytes;
3274 {
3275   const unsigned char *src = source;
3276   unsigned char *dst = destination;
3277   const unsigned char *src_end = src + src_bytes;
3278   unsigned char *dst_end = dst + dst_bytes;
3279   Lisp_Object translation_table;
3280   /* SRC_BASE remembers the start position in source in each loop.
3281      The loop will be exited when there's not enough source text to
3282      analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
3283      there's not enough destination area to produce encoded codes
3284      (within macro EMIT_BYTES).  */
3285   const unsigned char *src_base;
3286   unsigned char *tmp;
3287   int c;
3288   int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
3289
3290   translation_table = Qnil;
3291   if (coding->src_multibyte
3292       && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
3293     {
3294       src_end--;
3295       src_bytes--;
3296       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
3297     }
3298
3299   if (coding->eol_type == CODING_EOL_CRLF)
3300     {
3301       while (src < src_end)
3302         {
3303           src_base = src;
3304           c = *src++;
3305           if (c >= 0x20)
3306             EMIT_ONE_BYTE (c);
3307           else if (c == '\n' || (c == '\r' && selective_display))
3308             EMIT_TWO_BYTES ('\r', '\n');
3309           else
3310             EMIT_ONE_BYTE (c);
3311         }
3312       src_base = src;
3313     label_end_of_loop:
3314       ;
3315     }
3316   else
3317     {
3318       if (!dst_bytes || src_bytes <= dst_bytes)
3319         {
3320           safe_bcopy (src, dst, src_bytes);
3321           src_base = src_end;
3322           dst += src_bytes;
3323         }
3324       else
3325         {
3326           if (coding->src_multibyte
3327               && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
3328             dst_bytes--;
3329           safe_bcopy (src, dst, dst_bytes);
3330           src_base = src + dst_bytes;
3331           dst = destination + dst_bytes;
3332           coding->result = CODING_FINISH_INSUFFICIENT_DST;
3333         }
3334       if (coding->eol_type == CODING_EOL_CR)
3335         {
3336           for (tmp = destination; tmp < dst; tmp++)
3337             if (*tmp == '\n') *tmp = '\r';
3338         }
3339       else if (selective_display)
3340         {
3341           for (tmp = destination; tmp < dst; tmp++)
3342             if (*tmp == '\r') *tmp = '\n';
3343         }
3344     }
3345   if (coding->src_multibyte)
3346     dst = destination + str_as_unibyte (destination, dst - destination);
3347
3348   coding->consumed = src_base - source;
3349   coding->produced = dst - destination;
3350   coding->produced_char = coding->produced;
3351 }
3352
3353 \f
3354 /*** 7. C library functions ***/
3355
3356 /* In Emacs Lisp, a coding system is represented by a Lisp symbol which
3357    has a property `coding-system'.  The value of this property is a
3358    vector of length 5 (called the coding-vector).  Among elements of
3359    this vector, the first (element[0]) and the fifth (element[4])
3360    carry important information for decoding/encoding.  Before
3361    decoding/encoding, this information should be set in fields of a
3362    structure of type `coding_system'.
3363
3364    The value of the property `coding-system' can be a symbol of another
3365    subsidiary coding-system.  In that case, Emacs gets coding-vector
3366    from that symbol.
3367
3368    `element[0]' contains information to be set in `coding->type'.  The
3369    value and its meaning is as follows:
3370
3371    0 -- coding_type_emacs_mule
3372    1 -- coding_type_sjis
3373    2 -- coding_type_iso2022
3374    3 -- coding_type_big5
3375    4 -- coding_type_ccl encoder/decoder written in CCL
3376    nil -- coding_type_no_conversion
3377    t -- coding_type_undecided (automatic conversion on decoding,
3378                                no-conversion on encoding)
3379
3380    `element[4]' contains information to be set in `coding->flags' and
3381    `coding->spec'.  The meaning varies by `coding->type'.
3382
3383    If `coding->type' is `coding_type_iso2022', element[4] is a vector
3384    of length 32 (of which the first 13 sub-elements are used now).
3385    Meanings of these sub-elements are:
3386
3387    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
3388         If the value is an integer of valid charset, the charset is
3389         assumed to be designated to graphic register N initially.
3390
3391         If the value is minus, it is a minus value of charset which
3392         reserves graphic register N, which means that the charset is
3393         not designated initially but should be designated to graphic
3394         register N just before encoding a character in that charset.
3395
3396         If the value is nil, graphic register N is never used on
3397         encoding.
3398
3399    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
3400         Each value takes t or nil.  See the section ISO2022 of
3401         `coding.h' for more information.
3402
3403    If `coding->type' is `coding_type_big5', element[4] is t to denote
3404    BIG5-ETen or nil to denote BIG5-HKU.
3405
3406    If `coding->type' takes the other value, element[4] is ignored.
3407
3408    Emacs Lisp's coding systems also carry information about format of
3409    end-of-line in a value of property `eol-type'.  If the value is
3410    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
3411    means CODING_EOL_CR.  If it is not integer, it should be a vector
3412    of subsidiary coding systems of which property `eol-type' has one
3413    of the above values.
3414
3415 */
3416
3417 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
3418    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
3419    is setup so that no conversion is necessary and return -1, else
3420    return 0.  */
3421
3422 int
3423 setup_coding_system (coding_system, coding)
3424      Lisp_Object coding_system;
3425      struct coding_system *coding;
3426 {
3427   Lisp_Object coding_spec, coding_type, eol_type, plist;
3428   Lisp_Object val;
3429
3430   /* At first, zero clear all members.  */
3431   bzero (coding, sizeof (struct coding_system));
3432
3433   /* Initialize some fields required for all kinds of coding systems.  */
3434   coding->symbol = coding_system;
3435   coding->heading_ascii = -1;
3436   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
3437   coding->composing = COMPOSITION_DISABLED;
3438   coding->cmp_data = NULL;
3439
3440   if (NILP (coding_system))
3441     goto label_invalid_coding_system;
3442
3443   coding_spec = Fget (coding_system, Qcoding_system);
3444
3445   if (!VECTORP (coding_spec)
3446       || XVECTOR (coding_spec)->size != 5
3447       || !CONSP (XVECTOR (coding_spec)->contents[3]))
3448     goto label_invalid_coding_system;
3449
3450   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
3451   if (VECTORP (eol_type))
3452     {
3453       coding->eol_type = CODING_EOL_UNDECIDED;
3454       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
3455     }
3456   else if (XFASTINT (eol_type) == 1)
3457     {
3458       coding->eol_type = CODING_EOL_CRLF;
3459       coding->common_flags
3460         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3461     }
3462   else if (XFASTINT (eol_type) == 2)
3463     {
3464       coding->eol_type = CODING_EOL_CR;
3465       coding->common_flags
3466         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3467     }
3468   else
3469     coding->eol_type = CODING_EOL_LF;
3470
3471   coding_type = XVECTOR (coding_spec)->contents[0];
3472   /* Try short cut.  */
3473   if (SYMBOLP (coding_type))
3474     {
3475       if (EQ (coding_type, Qt))
3476         {
3477           coding->type = coding_type_undecided;
3478           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
3479         }
3480       else
3481         coding->type = coding_type_no_conversion;
3482       /* Initialize this member.  Any thing other than
3483          CODING_CATEGORY_IDX_UTF_16_BE and
3484          CODING_CATEGORY_IDX_UTF_16_LE are ok because they have
3485          special treatment in detect_eol.  */
3486       coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
3487
3488       return 0;
3489     }
3490
3491   /* Get values of coding system properties:
3492      `post-read-conversion', `pre-write-conversion',
3493      `translation-table-for-decode', `translation-table-for-encode'.  */
3494   plist = XVECTOR (coding_spec)->contents[3];
3495   /* Pre & post conversion functions should be disabled if
3496      inhibit_eol_conversion is nonzero.  This is the case that a code
3497      conversion function is called while those functions are running.  */
3498   if (! inhibit_pre_post_conversion)
3499     {
3500       coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
3501       coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
3502     }
3503   val = Fplist_get (plist, Qtranslation_table_for_decode);
3504   if (SYMBOLP (val))
3505     val = Fget (val, Qtranslation_table_for_decode);
3506   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
3507   val = Fplist_get (plist, Qtranslation_table_for_encode);
3508   if (SYMBOLP (val))
3509     val = Fget (val, Qtranslation_table_for_encode);
3510   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
3511   val = Fplist_get (plist, Qcoding_category);
3512   if (!NILP (val))
3513     {
3514       val = Fget (val, Qcoding_category_index);
3515       if (INTEGERP (val))
3516         coding->category_idx = XINT (val);
3517       else
3518         goto label_invalid_coding_system;
3519     }
3520   else
3521     goto label_invalid_coding_system;
3522
3523   /* If the coding system has non-nil `composition' property, enable
3524      composition handling.  */
3525   val = Fplist_get (plist, Qcomposition);
3526   if (!NILP (val))
3527     coding->composing = COMPOSITION_NO;
3528
3529   switch (XFASTINT (coding_type))
3530     {
3531     case 0:
3532       coding->type = coding_type_emacs_mule;
3533       coding->common_flags
3534         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3535       if (!NILP (coding->post_read_conversion))
3536         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
3537       if (!NILP (coding->pre_write_conversion))
3538         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
3539       break;
3540
3541     case 1:
3542       coding->type = coding_type_sjis;
3543       coding->common_flags
3544         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3545       break;
3546
3547     case 2:
3548       coding->type = coding_type_iso2022;
3549       coding->common_flags
3550         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3551       {
3552         Lisp_Object val, temp;
3553         Lisp_Object *flags;
3554         int i, charset, reg_bits = 0;
3555
3556         val = XVECTOR (coding_spec)->contents[4];
3557
3558         if (!VECTORP (val) || XVECTOR (val)->size != 32)
3559           goto label_invalid_coding_system;
3560
3561         flags = XVECTOR (val)->contents;
3562         coding->flags
3563           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
3564              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
3565              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
3566              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
3567              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
3568              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
3569              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
3570              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
3571              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
3572              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
3573              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3574              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
3575              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
3576              );
3577
3578         /* Invoke graphic register 0 to plane 0.  */
3579         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
3580         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
3581         CODING_SPEC_ISO_INVOCATION (coding, 1)
3582           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
3583         /* Not single shifting at first.  */
3584         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
3585         /* Beginning of buffer should also be regarded as bol. */
3586         CODING_SPEC_ISO_BOL (coding) = 1;
3587
3588         for (charset = 0; charset <= MAX_CHARSET; charset++)
3589           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
3590         val = Vcharset_revision_alist;
3591         while (CONSP (val))
3592           {
3593             charset = get_charset_id (Fcar_safe (XCAR (val)));
3594             if (charset >= 0
3595                 && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp))
3596                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
3597               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
3598             val = XCDR (val);
3599           }
3600
3601         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
3602            FLAGS[REG] can be one of below:
3603                 integer CHARSET: CHARSET occupies register I,
3604                 t: designate nothing to REG initially, but can be used
3605                   by any charsets,
3606                 list of integer, nil, or t: designate the first
3607                   element (if integer) to REG initially, the remaining
3608                   elements (if integer) is designated to REG on request,
3609                   if an element is t, REG can be used by any charsets,
3610                 nil: REG is never used.  */
3611         for (charset = 0; charset <= MAX_CHARSET; charset++)
3612           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3613             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
3614         for (i = 0; i < 4; i++)
3615           {
3616             if ((INTEGERP (flags[i])
3617                  && (charset = XINT (flags[i]), CHARSET_VALID_P (charset)))
3618                 || (charset = get_charset_id (flags[i])) >= 0)
3619               {
3620                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3621                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
3622               }
3623             else if (EQ (flags[i], Qt))
3624               {
3625                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3626                 reg_bits |= 1 << i;
3627                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3628               }
3629             else if (CONSP (flags[i]))
3630               {
3631                 Lisp_Object tail;
3632                 tail = flags[i];
3633
3634                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
3635                 if ((INTEGERP (XCAR (tail))
3636                      && (charset = XINT (XCAR (tail)),
3637                          CHARSET_VALID_P (charset)))
3638                     || (charset = get_charset_id (XCAR (tail))) >= 0)
3639                   {
3640                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
3641                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
3642                   }
3643                 else
3644                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3645                 tail = XCDR (tail);
3646                 while (CONSP (tail))
3647                   {
3648                     if ((INTEGERP (XCAR (tail))
3649                          && (charset = XINT (XCAR (tail)),
3650                              CHARSET_VALID_P (charset)))
3651                         || (charset = get_charset_id (XCAR (tail))) >= 0)
3652                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3653                         = i;
3654                     else if (EQ (XCAR (tail), Qt))
3655                       reg_bits |= 1 << i;
3656                     tail = XCDR (tail);
3657                   }
3658               }
3659             else
3660               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
3661
3662             CODING_SPEC_ISO_DESIGNATION (coding, i)
3663               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3664           }
3665
3666         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3667           {
3668             /* REG 1 can be used only by locking shift in 7-bit env.  */
3669             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3670               reg_bits &= ~2;
3671             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3672               /* Without any shifting, only REG 0 and 1 can be used.  */
3673               reg_bits &= 3;
3674           }
3675
3676         if (reg_bits)
3677           for (charset = 0; charset <= MAX_CHARSET; charset++)
3678             {
3679               if (CHARSET_DEFINED_P (charset)
3680                   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3681                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
3682                 {
3683                   /* There exist some default graphic registers to be
3684                      used by CHARSET.  */
3685
3686                   /* We had better avoid designating a charset of
3687                      CHARS96 to REG 0 as far as possible.  */
3688                   if (CHARSET_CHARS (charset) == 96)
3689                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3690                       = (reg_bits & 2
3691                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3692                   else
3693                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3694                       = (reg_bits & 1
3695                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3696                 }
3697             }
3698       }
3699       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3700       coding->spec.iso2022.last_invalid_designation_register = -1;
3701       break;
3702
3703     case 3:
3704       coding->type = coding_type_big5;
3705       coding->common_flags
3706         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3707       coding->flags
3708         = (NILP (XVECTOR (coding_spec)->contents[4])
3709            ? CODING_FLAG_BIG5_HKU
3710            : CODING_FLAG_BIG5_ETEN);
3711       break;
3712
3713     case 4:
3714       coding->type = coding_type_ccl;
3715       coding->common_flags
3716         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3717       {
3718         val = XVECTOR (coding_spec)->contents[4];
3719         if (! CONSP (val)
3720             || setup_ccl_program (&(coding->spec.ccl.decoder),
3721                                   XCAR (val)) < 0
3722             || setup_ccl_program (&(coding->spec.ccl.encoder),
3723                                   XCDR (val)) < 0)
3724           goto label_invalid_coding_system;
3725
3726         bzero (coding->spec.ccl.valid_codes, 256);
3727         val = Fplist_get (plist, Qvalid_codes);
3728         if (CONSP (val))
3729           {
3730             Lisp_Object this;
3731
3732             for (; CONSP (val); val = XCDR (val))
3733               {
3734                 this = XCAR (val);
3735                 if (INTEGERP (this)
3736                     && XINT (this) >= 0 && XINT (this) < 256)
3737                   coding->spec.ccl.valid_codes[XINT (this)] = 1;
3738                 else if (CONSP (this)
3739                          && INTEGERP (XCAR (this))
3740                          && INTEGERP (XCDR (this)))
3741                   {
3742                     int start = XINT (XCAR (this));
3743                     int end = XINT (XCDR (this));
3744
3745                     if (start >= 0 && start <= end && end < 256)
3746                       while (start <= end)
3747                         coding->spec.ccl.valid_codes[start++] = 1;
3748                   }
3749               }
3750           }
3751       }
3752       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3753       coding->spec.ccl.cr_carryover = 0;
3754       coding->spec.ccl.eight_bit_carryover[0] = 0;
3755       break;
3756
3757     case 5:
3758       coding->type = coding_type_raw_text;
3759       break;
3760
3761     default:
3762       goto label_invalid_coding_system;
3763     }
3764   return 0;
3765
3766  label_invalid_coding_system:
3767   coding->type = coding_type_no_conversion;
3768   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3769   coding->common_flags = 0;
3770   coding->eol_type = CODING_EOL_LF;
3771   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3772   return -1;
3773 }
3774
3775 /* Free memory blocks allocated for storing composition information.  */
3776
3777 void
3778 coding_free_composition_data (coding)
3779      struct coding_system *coding;
3780 {
3781   struct composition_data *cmp_data = coding->cmp_data, *next;
3782
3783   if (!cmp_data)
3784     return;
3785   /* Memory blocks are chained.  At first, rewind to the first, then,
3786      free blocks one by one.  */
3787   while (cmp_data->prev)
3788     cmp_data = cmp_data->prev;
3789   while (cmp_data)
3790     {
3791       next = cmp_data->next;
3792       xfree (cmp_data);
3793       cmp_data = next;
3794     }
3795   coding->cmp_data = NULL;
3796 }
3797
3798 /* Set `char_offset' member of all memory blocks pointed by
3799    coding->cmp_data to POS.  */
3800
3801 void
3802 coding_adjust_composition_offset (coding, pos)
3803      struct coding_system *coding;
3804      int pos;
3805 {
3806   struct composition_data *cmp_data;
3807
3808   for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next)
3809     cmp_data->char_offset = pos;
3810 }
3811
3812 /* Setup raw-text or one of its subsidiaries in the structure
3813    coding_system CODING according to the already setup value eol_type
3814    in CODING.  CODING should be setup for some coding system in
3815    advance.  */
3816
3817 void
3818 setup_raw_text_coding_system (coding)
3819      struct coding_system *coding;
3820 {
3821   if (coding->type != coding_type_raw_text)
3822     {
3823       coding->symbol = Qraw_text;
3824       coding->type = coding_type_raw_text;
3825       if (coding->eol_type != CODING_EOL_UNDECIDED)
3826         {
3827           Lisp_Object subsidiaries;
3828           subsidiaries = Fget (Qraw_text, Qeol_type);
3829
3830           if (VECTORP (subsidiaries)
3831               && XVECTOR (subsidiaries)->size == 3)
3832             coding->symbol
3833               = XVECTOR (subsidiaries)->contents[coding->eol_type];
3834         }
3835       setup_coding_system (coding->symbol, coding);
3836     }
3837   return;
3838 }
3839
3840 /* Emacs has a mechanism to automatically detect a coding system if it
3841    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3842    it's impossible to distinguish some coding systems accurately
3843    because they use the same range of codes.  So, at first, coding
3844    systems are categorized into 7, those are:
3845
3846    o coding-category-emacs-mule
3847
3848         The category for a coding system which has the same code range
3849         as Emacs' internal format.  Assigned the coding-system (Lisp
3850         symbol) `emacs-mule' by default.
3851
3852    o coding-category-sjis
3853
3854         The category for a coding system which has the same code range
3855         as SJIS.  Assigned the coding-system (Lisp
3856         symbol) `japanese-shift-jis' by default.
3857
3858    o coding-category-iso-7
3859
3860         The category for a coding system which has the same code range
3861         as ISO2022 of 7-bit environment.  This doesn't use any locking
3862         shift and single shift functions.  This can encode/decode all
3863         charsets.  Assigned the coding-system (Lisp symbol)
3864         `iso-2022-7bit' by default.
3865
3866    o coding-category-iso-7-tight
3867
3868         Same as coding-category-iso-7 except that this can
3869         encode/decode only the specified charsets.
3870
3871    o coding-category-iso-8-1
3872
3873         The category for a coding system which has the same code range
3874         as ISO2022 of 8-bit environment and graphic plane 1 used only
3875         for DIMENSION1 charset.  This doesn't use any locking shift
3876         and single shift functions.  Assigned the coding-system (Lisp
3877         symbol) `iso-latin-1' by default.
3878
3879    o coding-category-iso-8-2
3880
3881         The category for a coding system which has the same code range
3882         as ISO2022 of 8-bit environment and graphic plane 1 used only
3883         for DIMENSION2 charset.  This doesn't use any locking shift
3884         and single shift functions.  Assigned the coding-system (Lisp
3885         symbol) `japanese-iso-8bit' by default.
3886
3887    o coding-category-iso-7-else
3888
3889         The category for a coding system which has the same code range
3890         as ISO2022 of 7-bit environment but uses locking shift or
3891         single shift functions.  Assigned the coding-system (Lisp
3892         symbol) `iso-2022-7bit-lock' by default.
3893
3894    o coding-category-iso-8-else
3895
3896         The category for a coding system which has the same code range
3897         as ISO2022 of 8-bit environment but uses locking shift or
3898         single shift functions.  Assigned the coding-system (Lisp
3899         symbol) `iso-2022-8bit-ss2' by default.
3900
3901    o coding-category-big5
3902
3903         The category for a coding system which has the same code range
3904         as BIG5.  Assigned the coding-system (Lisp symbol)
3905         `cn-big5' by default.
3906
3907    o coding-category-utf-8
3908
3909         The category for a coding system which has the same code range
3910         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
3911         symbol) `utf-8' by default.
3912
3913    o coding-category-utf-16-be
3914
3915         The category for a coding system in which a text has an
3916         Unicode signature (cf. Unicode Standard) in the order of BIG
3917         endian at the head.  Assigned the coding-system (Lisp symbol)
3918         `utf-16-be' by default.
3919
3920    o coding-category-utf-16-le
3921
3922         The category for a coding system in which a text has an
3923         Unicode signature (cf. Unicode Standard) in the order of
3924         LITTLE endian at the head.  Assigned the coding-system (Lisp
3925         symbol) `utf-16-le' by default.
3926
3927    o coding-category-ccl
3928
3929         The category for a coding system of which encoder/decoder is
3930         written in CCL programs.  The default value is nil, i.e., no
3931         coding system is assigned.
3932
3933    o coding-category-binary
3934
3935         The category for a coding system not categorized in any of the
3936         above.  Assigned the coding-system (Lisp symbol)
3937         `no-conversion' by default.
3938
3939    Each of them is a Lisp symbol and the value is an actual
3940    `coding-system' (this is also a Lisp symbol) assigned by a user.
3941    What Emacs does actually is to detect a category of coding system.
3942    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3943    decide a single possible category, it selects a category of the
3944    highest priority.  Priorities of categories are also specified by a
3945    user in a Lisp variable `coding-category-list'.
3946
3947 */
3948
3949 static
3950 int ascii_skip_code[256];
3951
3952 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3953    If it detects possible coding systems, return an integer in which
3954    appropriate flag bits are set.  Flag bits are defined by macros
3955    CODING_CATEGORY_MASK_XXX in `coding.h'.  If PRIORITIES is non-NULL,
3956    it should point the table `coding_priorities'.  In that case, only
3957    the flag bit for a coding system of the highest priority is set in
3958    the returned value.  If MULTIBYTEP is nonzero, 8-bit codes of the
3959    range 0x80..0x9F are in multibyte form.
3960
3961    How many ASCII characters are at the head is returned as *SKIP.  */
3962
3963 static int
3964 detect_coding_mask (source, src_bytes, priorities, skip, multibytep)
3965      unsigned char *source;
3966      int src_bytes, *priorities, *skip;
3967      int multibytep;
3968 {
3969   register unsigned char c;
3970   unsigned char *src = source, *src_end = source + src_bytes;
3971   unsigned int mask, utf16_examined_p, iso2022_examined_p;
3972   int i;
3973
3974   /* At first, skip all ASCII characters and control characters except
3975      for three ISO2022 specific control characters.  */
3976   ascii_skip_code[ISO_CODE_SO] = 0;
3977   ascii_skip_code[ISO_CODE_SI] = 0;
3978   ascii_skip_code[ISO_CODE_ESC] = 0;
3979
3980  label_loop_detect_coding:
3981   while (src < src_end && ascii_skip_code[*src]) src++;
3982   *skip = src - source;
3983
3984   if (src >= src_end)
3985     /* We found nothing other than ASCII.  There's nothing to do.  */
3986     return 0;
3987
3988   c = *src;
3989   /* The text seems to be encoded in some multilingual coding system.
3990      Now, try to find in which coding system the text is encoded.  */
3991   if (c < 0x80)
3992     {
3993       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3994       /* C is an ISO2022 specific control code of C0.  */
3995       mask = detect_coding_iso2022 (src, src_end, multibytep);
3996       if (mask == 0)
3997         {
3998           /* No valid ISO2022 code follows C.  Try again.  */
3999           src++;
4000           if (c == ISO_CODE_ESC)
4001             ascii_skip_code[ISO_CODE_ESC] = 1;
4002           else
4003             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
4004           goto label_loop_detect_coding;
4005         }
4006       if (priorities)
4007         {
4008           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4009             {
4010               if (mask & priorities[i])
4011                 return priorities[i];
4012             }
4013           return CODING_CATEGORY_MASK_RAW_TEXT;
4014         }
4015     }
4016   else
4017     {
4018       int try;
4019
4020       if (multibytep && c == LEADING_CODE_8_BIT_CONTROL)
4021         c = src[1] - 0x20;
4022
4023       if (c < 0xA0)
4024         {
4025           /* C is the first byte of SJIS character code,
4026              or a leading-code of Emacs' internal format (emacs-mule),
4027              or the first byte of UTF-16.  */
4028           try = (CODING_CATEGORY_MASK_SJIS
4029                   | CODING_CATEGORY_MASK_EMACS_MULE
4030                   | CODING_CATEGORY_MASK_UTF_16_BE
4031                   | CODING_CATEGORY_MASK_UTF_16_LE);
4032
4033           /* Or, if C is a special latin extra code,
4034              or is an ISO2022 specific control code of C1 (SS2 or SS3),
4035              or is an ISO2022 control-sequence-introducer (CSI),
4036              we should also consider the possibility of ISO2022 codings.  */
4037           if ((VECTORP (Vlatin_extra_code_table)
4038                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
4039               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
4040               || (c == ISO_CODE_CSI
4041                   && (src < src_end
4042                       && (*src == ']'
4043                           || ((*src == '0' || *src == '1' || *src == '2')
4044                               && src + 1 < src_end
4045                               && src[1] == ']')))))
4046             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
4047                      | CODING_CATEGORY_MASK_ISO_8BIT);
4048         }
4049       else
4050         /* C is a character of ISO2022 in graphic plane right,
4051            or a SJIS's 1-byte character code (i.e. JISX0201),
4052            or the first byte of BIG5's 2-byte code,
4053            or the first byte of UTF-8/16.  */
4054         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
4055                 | CODING_CATEGORY_MASK_ISO_8BIT
4056                 | CODING_CATEGORY_MASK_SJIS
4057                 | CODING_CATEGORY_MASK_BIG5
4058                 | CODING_CATEGORY_MASK_UTF_8
4059                 | CODING_CATEGORY_MASK_UTF_16_BE
4060                 | CODING_CATEGORY_MASK_UTF_16_LE);
4061
4062       /* Or, we may have to consider the possibility of CCL.  */
4063       if (coding_system_table[CODING_CATEGORY_IDX_CCL]
4064           && (coding_system_table[CODING_CATEGORY_IDX_CCL]
4065               ->spec.ccl.valid_codes)[c])
4066         try |= CODING_CATEGORY_MASK_CCL;
4067
4068       mask = 0;
4069       utf16_examined_p = iso2022_examined_p = 0;
4070       if (priorities)
4071         {
4072           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4073             {
4074               if (!iso2022_examined_p
4075                   && (priorities[i] & try & CODING_CATEGORY_MASK_ISO))
4076                 {
4077                   mask |= detect_coding_iso2022 (src, src_end, multibytep);
4078                   iso2022_examined_p = 1;
4079                 }
4080               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
4081                 mask |= detect_coding_sjis (src, src_end, multibytep);
4082               else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8)
4083                 mask |= detect_coding_utf_8 (src, src_end, multibytep);
4084               else if (!utf16_examined_p
4085                        && (priorities[i] & try &
4086                            CODING_CATEGORY_MASK_UTF_16_BE_LE))
4087                 {
4088                   mask |= detect_coding_utf_16 (src, src_end, multibytep);
4089                   utf16_examined_p = 1;
4090                 }
4091               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
4092                 mask |= detect_coding_big5 (src, src_end, multibytep);
4093               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
4094                 mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4095               else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL)
4096                 mask |= detect_coding_ccl (src, src_end, multibytep);
4097               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
4098                 mask |= CODING_CATEGORY_MASK_RAW_TEXT;
4099               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
4100                 mask |= CODING_CATEGORY_MASK_BINARY;
4101               if (mask & priorities[i])
4102                 return priorities[i];
4103             }
4104           return CODING_CATEGORY_MASK_RAW_TEXT;
4105         }
4106       if (try & CODING_CATEGORY_MASK_ISO)
4107         mask |= detect_coding_iso2022 (src, src_end, multibytep);
4108       if (try & CODING_CATEGORY_MASK_SJIS)
4109         mask |= detect_coding_sjis (src, src_end, multibytep);
4110       if (try & CODING_CATEGORY_MASK_BIG5)
4111         mask |= detect_coding_big5 (src, src_end, multibytep);
4112       if (try & CODING_CATEGORY_MASK_UTF_8)
4113         mask |= detect_coding_utf_8 (src, src_end, multibytep);
4114       if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE)
4115         mask |= detect_coding_utf_16 (src, src_end, multibytep);
4116       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
4117         mask |= detect_coding_emacs_mule (src, src_end, multibytep);
4118       if (try & CODING_CATEGORY_MASK_CCL)
4119         mask |= detect_coding_ccl (src, src_end, multibytep);
4120     }
4121   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
4122 }
4123
4124 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
4125    The information of the detected coding system is set in CODING.  */
4126
4127 void
4128 detect_coding (coding, src, src_bytes)
4129      struct coding_system *coding;
4130      const unsigned char *src;
4131      int src_bytes;
4132 {
4133   unsigned int idx;
4134   int skip, mask;
4135   Lisp_Object val;
4136
4137   val = Vcoding_category_list;
4138   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip,
4139                              coding->src_multibyte);
4140   coding->heading_ascii = skip;
4141
4142   if (!mask) return;
4143
4144   /* We found a single coding system of the highest priority in MASK.  */
4145   idx = 0;
4146   while (mask && ! (mask & 1)) mask >>= 1, idx++;
4147   if (! mask)
4148     idx = CODING_CATEGORY_IDX_RAW_TEXT;
4149
4150   val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]);
4151
4152   if (coding->eol_type != CODING_EOL_UNDECIDED)
4153     {
4154       Lisp_Object tmp;
4155
4156       tmp = Fget (val, Qeol_type);
4157       if (VECTORP (tmp))
4158         val = XVECTOR (tmp)->contents[coding->eol_type];
4159     }
4160
4161   /* Setup this new coding system while preserving some slots.  */
4162   {
4163     int src_multibyte = coding->src_multibyte;
4164     int dst_multibyte = coding->dst_multibyte;
4165
4166     setup_coding_system (val, coding);
4167     coding->src_multibyte = src_multibyte;
4168     coding->dst_multibyte = dst_multibyte;
4169     coding->heading_ascii = skip;
4170   }
4171 }
4172
4173 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
4174    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
4175    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
4176
4177    How many non-eol characters are at the head is returned as *SKIP.  */
4178
4179 #define MAX_EOL_CHECK_COUNT 3
4180
4181 static int
4182 detect_eol_type (source, src_bytes, skip)
4183      unsigned char *source;
4184      int src_bytes, *skip;
4185 {
4186   unsigned char *src = source, *src_end = src + src_bytes;
4187   unsigned char c;
4188   int total = 0;                /* How many end-of-lines are found so far.  */
4189   int eol_type = CODING_EOL_UNDECIDED;
4190   int this_eol_type;
4191
4192   *skip = 0;
4193
4194   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
4195     {
4196       c = *src++;
4197       if (c == '\n' || c == '\r')
4198         {
4199           if (*skip == 0)
4200             *skip = src - 1 - source;
4201           total++;
4202           if (c == '\n')
4203             this_eol_type = CODING_EOL_LF;
4204           else if (src >= src_end || *src != '\n')
4205             this_eol_type = CODING_EOL_CR;
4206           else
4207             this_eol_type = CODING_EOL_CRLF, src++;
4208
4209           if (eol_type == CODING_EOL_UNDECIDED)
4210             /* This is the first end-of-line.  */
4211             eol_type = this_eol_type;
4212           else if (eol_type != this_eol_type)
4213             {
4214               /* The found type is different from what found before.  */
4215               eol_type = CODING_EOL_INCONSISTENT;
4216               break;
4217             }
4218         }
4219     }
4220
4221   if (*skip == 0)
4222     *skip = src_end - source;
4223   return eol_type;
4224 }
4225
4226 /* Like detect_eol_type, but detect EOL type in 2-octet
4227    big-endian/little-endian format for coding systems utf-16-be and
4228    utf-16-le.  */
4229
4230 static int
4231 detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p)
4232      unsigned char *source;
4233      int src_bytes, *skip, big_endian_p;
4234 {
4235   unsigned char *src = source, *src_end = src + src_bytes;
4236   unsigned int c1, c2;
4237   int total = 0;                /* How many end-of-lines are found so far.  */
4238   int eol_type = CODING_EOL_UNDECIDED;
4239   int this_eol_type;
4240   int msb, lsb;
4241
4242   if (big_endian_p)
4243     msb = 0, lsb = 1;
4244   else
4245     msb = 1, lsb = 0;
4246
4247   *skip = 0;
4248
4249   while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT)
4250     {
4251       c1 = (src[msb] << 8) | (src[lsb]);
4252       src += 2;
4253
4254       if (c1 == '\n' || c1 == '\r')
4255         {
4256           if (*skip == 0)
4257             *skip = src - 2 - source;
4258           total++;
4259           if (c1 == '\n')
4260             {
4261               this_eol_type = CODING_EOL_LF;
4262             }
4263           else
4264             {
4265               if ((src + 1) >= src_end)
4266                 {
4267                   this_eol_type = CODING_EOL_CR;
4268                 }
4269               else
4270                 {
4271                   c2 = (src[msb] << 8) | (src[lsb]);
4272                   if (c2 == '\n')
4273                     this_eol_type = CODING_EOL_CRLF, src += 2;
4274                   else
4275                     this_eol_type = CODING_EOL_CR;
4276                 }
4277             }
4278
4279           if (eol_type == CODING_EOL_UNDECIDED)
4280             /* This is the first end-of-line.  */
4281             eol_type = this_eol_type;
4282           else if (eol_type != this_eol_type)
4283             {
4284               /* The found type is different from what found before.  */
4285               eol_type = CODING_EOL_INCONSISTENT;
4286               break;
4287             }
4288         }
4289     }
4290
4291   if (*skip == 0)
4292     *skip = src_end - source;
4293   return eol_type;
4294 }
4295
4296 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
4297    is encoded.  If it detects an appropriate format of end-of-line, it
4298    sets the information in *CODING.  */
4299
4300 void
4301 detect_eol (coding, src, src_bytes)
4302      struct coding_system *coding;
4303      const unsigned char *src;
4304      int src_bytes;
4305 {
4306   Lisp_Object val;
4307   int skip;
4308   int eol_type;
4309
4310   switch (coding->category_idx)
4311     {
4312     case CODING_CATEGORY_IDX_UTF_16_BE:
4313       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1);
4314       break;
4315     case CODING_CATEGORY_IDX_UTF_16_LE:
4316       eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0);
4317       break;
4318     default:
4319       eol_type = detect_eol_type (src, src_bytes, &skip);
4320       break;
4321     }
4322
4323   if (coding->heading_ascii > skip)
4324     coding->heading_ascii = skip;
4325   else
4326     skip = coding->heading_ascii;
4327
4328   if (eol_type == CODING_EOL_UNDECIDED)
4329     return;
4330   if (eol_type == CODING_EOL_INCONSISTENT)
4331     {
4332 #if 0
4333       /* This code is suppressed until we find a better way to
4334          distinguish raw text file and binary file.  */
4335
4336       /* If we have already detected that the coding is raw-text, the
4337          coding should actually be no-conversion.  */
4338       if (coding->type == coding_type_raw_text)
4339         {
4340           setup_coding_system (Qno_conversion, coding);
4341           return;
4342         }
4343       /* Else, let's decode only text code anyway.  */
4344 #endif /* 0 */
4345       eol_type = CODING_EOL_LF;
4346     }
4347
4348   val = Fget (coding->symbol, Qeol_type);
4349   if (VECTORP (val) && XVECTOR (val)->size == 3)
4350     {
4351       int src_multibyte = coding->src_multibyte;
4352       int dst_multibyte = coding->dst_multibyte;
4353       struct composition_data *cmp_data = coding->cmp_data;
4354
4355       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
4356       coding->src_multibyte = src_multibyte;
4357       coding->dst_multibyte = dst_multibyte;
4358       coding->heading_ascii = skip;
4359       coding->cmp_data = cmp_data;
4360     }
4361 }
4362
4363 #define CONVERSION_BUFFER_EXTRA_ROOM 256
4364
4365 #define DECODING_BUFFER_MAG(coding)                     \
4366   (coding->type == coding_type_iso2022                  \
4367    ? 3                                                  \
4368    : (coding->type == coding_type_ccl                   \
4369       ? coding->spec.ccl.decoder.buf_magnification      \
4370       : 2))
4371
4372 /* Return maximum size (bytes) of a buffer enough for decoding
4373    SRC_BYTES of text encoded in CODING.  */
4374
4375 int
4376 decoding_buffer_size (coding, src_bytes)
4377      struct coding_system *coding;
4378      int src_bytes;
4379 {
4380   return (src_bytes * DECODING_BUFFER_MAG (coding)
4381           + CONVERSION_BUFFER_EXTRA_ROOM);
4382 }
4383
4384 /* Return maximum size (bytes) of a buffer enough for encoding
4385    SRC_BYTES of text to CODING.  */
4386
4387 int
4388 encoding_buffer_size (coding, src_bytes)
4389      struct coding_system *coding;
4390      int src_bytes;
4391 {
4392   int magnification;
4393
4394   if (coding->type == coding_type_ccl)
4395     magnification = coding->spec.ccl.encoder.buf_magnification;
4396   else if (CODING_REQUIRE_ENCODING (coding))
4397     magnification = 3;
4398   else
4399     magnification = 1;
4400
4401   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
4402 }
4403
4404 /* Working buffer for code conversion.  */
4405 struct conversion_buffer
4406 {
4407   int size;                     /* size of data.  */
4408   int on_stack;                 /* 1 if allocated by alloca.  */
4409   unsigned char *data;
4410 };
4411
4412 /* Don't use alloca for allocating memory space larger than this, lest
4413    we overflow their stack.  */
4414 #define MAX_ALLOCA 16*1024
4415
4416 /* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
4417 #define allocate_conversion_buffer(buf, len)            \
4418   do {                                                  \
4419     if (len < MAX_ALLOCA)                               \
4420       {                                                 \
4421         buf.data = (unsigned char *) alloca (len);      \
4422         buf.on_stack = 1;                               \
4423       }                                                 \
4424     else                                                \
4425       {                                                 \
4426         buf.data = (unsigned char *) xmalloc (len);     \
4427         buf.on_stack = 0;                               \
4428       }                                                 \
4429     buf.size = len;                                     \
4430   } while (0)
4431
4432 /* Double the allocated memory for *BUF.  */
4433 static void
4434 extend_conversion_buffer (buf)
4435      struct conversion_buffer *buf;
4436 {
4437   if (buf->on_stack)
4438     {
4439       unsigned char *save = buf->data;
4440       buf->data = (unsigned char *) xmalloc (buf->size * 2);
4441       bcopy (save, buf->data, buf->size);
4442       buf->on_stack = 0;
4443     }
4444   else
4445     {
4446       buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
4447     }
4448   buf->size *= 2;
4449 }
4450
4451 /* Free the allocated memory for BUF if it is not on stack.  */
4452 static void
4453 free_conversion_buffer (buf)
4454      struct conversion_buffer *buf;
4455 {
4456   if (!buf->on_stack)
4457     xfree (buf->data);
4458 }
4459
4460 int
4461 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
4462      struct coding_system *coding;
4463      unsigned char *source, *destination;
4464      int src_bytes, dst_bytes, encodep;
4465 {
4466   struct ccl_program *ccl
4467     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
4468   unsigned char *dst = destination;
4469
4470   ccl->suppress_error = coding->suppress_error;
4471   ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4472   if (encodep)
4473     {
4474       /* On encoding, EOL format is converted within ccl_driver.  For
4475          that, setup proper information in the structure CCL.  */
4476       ccl->eol_type = coding->eol_type;
4477       if (ccl->eol_type ==CODING_EOL_UNDECIDED)
4478         ccl->eol_type = CODING_EOL_LF;
4479       ccl->cr_consumed = coding->spec.ccl.cr_carryover;
4480     }
4481   ccl->multibyte = coding->src_multibyte;
4482   if (coding->spec.ccl.eight_bit_carryover[0] != 0)
4483     {
4484       /* Move carryover bytes to DESTINATION.  */
4485       unsigned char *p = coding->spec.ccl.eight_bit_carryover;
4486       while (*p)
4487         *dst++ = *p++;
4488       coding->spec.ccl.eight_bit_carryover[0] = 0;
4489       if (dst_bytes)
4490         dst_bytes -= dst - destination;
4491     }
4492
4493   coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes,
4494                                   &(coding->consumed))
4495                       + dst - destination);
4496
4497   if (encodep)
4498     {
4499       coding->produced_char = coding->produced;
4500       coding->spec.ccl.cr_carryover = ccl->cr_consumed;
4501     }
4502   else if (!ccl->eight_bit_control)
4503     {
4504       /* The produced bytes forms a valid multibyte sequence. */
4505       coding->produced_char
4506         = multibyte_chars_in_text (destination, coding->produced);
4507       coding->spec.ccl.eight_bit_carryover[0] = 0;
4508     }
4509   else
4510     {
4511       /* On decoding, the destination should always multibyte.  But,
4512          CCL program might have been generated an invalid multibyte
4513          sequence.  Here we make such a sequence valid as
4514          multibyte.  */
4515       int bytes
4516         = dst_bytes ? dst_bytes : source + coding->consumed - destination;
4517
4518       if ((coding->consumed < src_bytes
4519            || !ccl->last_block)
4520           && coding->produced >= 1
4521           && destination[coding->produced - 1] >= 0x80)
4522         {
4523           /* We should not convert the tailing 8-bit codes to
4524              multibyte form even if they doesn't form a valid
4525              multibyte sequence.  They may form a valid sequence in
4526              the next call.  */
4527           int carryover = 0;
4528
4529           if (destination[coding->produced - 1] < 0xA0)
4530             carryover = 1;
4531           else if (coding->produced >= 2)
4532             {
4533               if (destination[coding->produced - 2] >= 0x80)
4534                 {
4535                   if (destination[coding->produced - 2] < 0xA0)
4536                     carryover = 2;
4537                   else if (coding->produced >= 3
4538                            && destination[coding->produced - 3] >= 0x80
4539                            && destination[coding->produced - 3] < 0xA0)
4540                     carryover = 3;
4541                 }
4542             }
4543           if (carryover > 0)
4544             {
4545               BCOPY_SHORT (destination + coding->produced - carryover,
4546                            coding->spec.ccl.eight_bit_carryover,
4547                            carryover);
4548               coding->spec.ccl.eight_bit_carryover[carryover] = 0;
4549               coding->produced -= carryover;
4550             }
4551         }
4552       coding->produced = str_as_multibyte (destination, bytes,
4553                                            coding->produced,
4554                                            &(coding->produced_char));
4555     }
4556
4557   switch (ccl->status)
4558     {
4559     case CCL_STAT_SUSPEND_BY_SRC:
4560       coding->result = CODING_FINISH_INSUFFICIENT_SRC;
4561       break;
4562     case CCL_STAT_SUSPEND_BY_DST:
4563       coding->result = CODING_FINISH_INSUFFICIENT_DST;
4564       break;
4565     case CCL_STAT_QUIT:
4566     case CCL_STAT_INVALID_CMD:
4567       coding->result = CODING_FINISH_INTERRUPT;
4568       break;
4569     default:
4570       coding->result = CODING_FINISH_NORMAL;
4571       break;
4572     }
4573   return coding->result;
4574 }
4575
4576 /* Decode EOL format of the text at PTR of BYTES length destructively
4577    according to CODING->eol_type.  This is called after the CCL
4578    program produced a decoded text at PTR.  If we do CRLF->LF
4579    conversion, update CODING->produced and CODING->produced_char.  */
4580
4581 static void
4582 decode_eol_post_ccl (coding, ptr, bytes)
4583      struct coding_system *coding;
4584      unsigned char *ptr;
4585      int bytes;
4586 {
4587   Lisp_Object val, saved_coding_symbol;
4588   unsigned char *pend = ptr + bytes;
4589   int dummy;
4590
4591   /* Remember the current coding system symbol.  We set it back when
4592      an inconsistent EOL is found so that `last-coding-system-used' is
4593      set to the coding system that doesn't specify EOL conversion.  */
4594   saved_coding_symbol = coding->symbol;
4595
4596   coding->spec.ccl.cr_carryover = 0;
4597   if (coding->eol_type == CODING_EOL_UNDECIDED)
4598     {
4599       /* Here, to avoid the call of setup_coding_system, we directly
4600          call detect_eol_type.  */
4601       coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
4602       if (coding->eol_type == CODING_EOL_INCONSISTENT)
4603         coding->eol_type = CODING_EOL_LF;
4604       if (coding->eol_type != CODING_EOL_UNDECIDED)
4605         {
4606           val = Fget (coding->symbol, Qeol_type);
4607           if (VECTORP (val) && XVECTOR (val)->size == 3)
4608             coding->symbol = XVECTOR (val)->contents[coding->eol_type];
4609         }
4610       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4611     }
4612
4613   if (coding->eol_type == CODING_EOL_LF
4614       || coding->eol_type == CODING_EOL_UNDECIDED)
4615     {
4616       /* We have nothing to do.  */
4617       ptr = pend;
4618     }
4619   else if (coding->eol_type == CODING_EOL_CRLF)
4620     {
4621       unsigned char *pstart = ptr, *p = ptr;
4622
4623       if (! (coding->mode & CODING_MODE_LAST_BLOCK)
4624           && *(pend - 1) == '\r')
4625         {
4626           /* If the last character is CR, we can't handle it here
4627              because LF will be in the not-yet-decoded source text.
4628              Record that the CR is not yet processed.  */
4629           coding->spec.ccl.cr_carryover = 1;
4630           coding->produced--;
4631           coding->produced_char--;
4632           pend--;
4633         }
4634       while (ptr < pend)
4635         {
4636           if (*ptr == '\r')
4637             {
4638               if (ptr + 1 < pend && *(ptr + 1) == '\n')
4639                 {
4640                   *p++ = '\n';
4641                   ptr += 2;
4642                 }
4643               else
4644                 {
4645                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4646                     goto undo_eol_conversion;
4647                   *p++ = *ptr++;
4648                 }
4649             }
4650           else if (*ptr == '\n'
4651                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4652             goto undo_eol_conversion;
4653           else
4654             *p++ = *ptr++;
4655           continue;
4656
4657         undo_eol_conversion:
4658           /* We have faced with inconsistent EOL format at PTR.
4659              Convert all LFs before PTR back to CRLFs.  */
4660           for (p--, ptr--; p >= pstart; p--)
4661             {
4662               if (*p == '\n')
4663                 *ptr-- = '\n', *ptr-- = '\r';
4664               else
4665                 *ptr-- = *p;
4666             }
4667           /*  If carryover is recorded, cancel it because we don't
4668               convert CRLF anymore.  */
4669           if (coding->spec.ccl.cr_carryover)
4670             {
4671               coding->spec.ccl.cr_carryover = 0;
4672               coding->produced++;
4673               coding->produced_char++;
4674               pend++;
4675             }
4676           p = ptr = pend;
4677           coding->eol_type = CODING_EOL_LF;
4678           coding->symbol = saved_coding_symbol;
4679         }
4680       if (p < pend)
4681         {
4682           /* As each two-byte sequence CRLF was converted to LF, (PEND
4683              - P) is the number of deleted characters.  */
4684           coding->produced -= pend - p;
4685           coding->produced_char -= pend - p;
4686         }
4687     }
4688   else                  /* i.e. coding->eol_type == CODING_EOL_CR */
4689     {
4690       unsigned char *p = ptr;
4691
4692       for (; ptr < pend; ptr++)
4693         {
4694           if (*ptr == '\r')
4695             *ptr = '\n';
4696           else if (*ptr == '\n'
4697                    && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
4698             {
4699               for (; p < ptr; p++)
4700                 {
4701                   if (*p == '\n')
4702                     *p = '\r';
4703                 }
4704               ptr = pend;
4705               coding->eol_type = CODING_EOL_LF;
4706               coding->symbol = saved_coding_symbol;
4707             }
4708         }
4709     }
4710 }
4711
4712 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
4713    decoding, it may detect coding system and format of end-of-line if
4714    those are not yet decided.  The source should be unibyte, the
4715    result is multibyte if CODING->dst_multibyte is nonzero, else
4716    unibyte.  */
4717
4718 int
4719 decode_coding (coding, source, destination, src_bytes, dst_bytes)
4720      struct coding_system *coding;
4721      const unsigned char *source;
4722      unsigned char *destination;
4723      int src_bytes, dst_bytes;
4724 {
4725   int extra = 0;
4726
4727   if (coding->type == coding_type_undecided)
4728     detect_coding (coding, source, src_bytes);
4729
4730   if (coding->eol_type == CODING_EOL_UNDECIDED
4731       && coding->type != coding_type_ccl)
4732     {
4733       detect_eol (coding, source, src_bytes);
4734       /* We had better recover the original eol format if we
4735          encounter an inconsistent eol format while decoding.  */
4736       coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4737     }
4738
4739   coding->produced = coding->produced_char = 0;
4740   coding->consumed = coding->consumed_char = 0;
4741   coding->errors = 0;
4742   coding->result = CODING_FINISH_NORMAL;
4743
4744   switch (coding->type)
4745     {
4746     case coding_type_sjis:
4747       decode_coding_sjis_big5 (coding, source, destination,
4748                                src_bytes, dst_bytes, 1);
4749       break;
4750
4751     case coding_type_iso2022:
4752       decode_coding_iso2022 (coding, source, destination,
4753                              src_bytes, dst_bytes);
4754       break;
4755
4756     case coding_type_big5:
4757       decode_coding_sjis_big5 (coding, source, destination,
4758                                src_bytes, dst_bytes, 0);
4759       break;
4760
4761     case coding_type_emacs_mule:
4762       decode_coding_emacs_mule (coding, source, destination,
4763                                 src_bytes, dst_bytes);
4764       break;
4765
4766     case coding_type_ccl:
4767       if (coding->spec.ccl.cr_carryover)
4768         {
4769           /* Put the CR which was not processed by the previous call
4770              of decode_eol_post_ccl in DESTINATION.  It will be
4771              decoded together with the following LF by the call to
4772              decode_eol_post_ccl below.  */
4773           *destination = '\r';
4774           coding->produced++;
4775           coding->produced_char++;
4776           dst_bytes--;
4777           extra = coding->spec.ccl.cr_carryover;
4778         }
4779       ccl_coding_driver (coding, source, destination + extra,
4780                          src_bytes, dst_bytes, 0);
4781       if (coding->eol_type != CODING_EOL_LF)
4782         {
4783           coding->produced += extra;
4784           coding->produced_char += extra;
4785           decode_eol_post_ccl (coding, destination, coding->produced);
4786         }
4787       break;
4788
4789     default:
4790       decode_eol (coding, source, destination, src_bytes, dst_bytes);
4791     }
4792
4793   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4794       && coding->mode & CODING_MODE_LAST_BLOCK
4795       && coding->consumed == src_bytes)
4796     coding->result = CODING_FINISH_NORMAL;
4797
4798   if (coding->mode & CODING_MODE_LAST_BLOCK
4799       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4800     {
4801       const unsigned char *src = source + coding->consumed;
4802       unsigned char *dst = destination + coding->produced;
4803
4804       src_bytes -= coding->consumed;
4805       coding->errors++;
4806       if (COMPOSING_P (coding))
4807         DECODE_COMPOSITION_END ('1');
4808       while (src_bytes--)
4809         {
4810           int c = *src++;
4811           dst += CHAR_STRING (c, dst);
4812           coding->produced_char++;
4813         }
4814       coding->consumed = coding->consumed_char = src - source;
4815       coding->produced = dst - destination;
4816       coding->result = CODING_FINISH_NORMAL;
4817     }
4818
4819   if (!coding->dst_multibyte)
4820     {
4821       coding->produced = str_as_unibyte (destination, coding->produced);
4822       coding->produced_char = coding->produced;
4823     }
4824
4825   return coding->result;
4826 }
4827
4828 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  The
4829    multibyteness of the source is CODING->src_multibyte, the
4830    multibyteness of the result is always unibyte.  */
4831
4832 int
4833 encode_coding (coding, source, destination, src_bytes, dst_bytes)
4834      struct coding_system *coding;
4835      const unsigned char *source;
4836      unsigned char *destination;
4837      int src_bytes, dst_bytes;
4838 {
4839   coding->produced = coding->produced_char = 0;
4840   coding->consumed = coding->consumed_char = 0;
4841   coding->errors = 0;
4842   coding->result = CODING_FINISH_NORMAL;
4843
4844   switch (coding->type)
4845     {
4846     case coding_type_sjis:
4847       encode_coding_sjis_big5 (coding, source, destination,
4848                                src_bytes, dst_bytes, 1);
4849       break;
4850
4851     case coding_type_iso2022:
4852       encode_coding_iso2022 (coding, source, destination,
4853                              src_bytes, dst_bytes);
4854       break;
4855
4856     case coding_type_big5:
4857       encode_coding_sjis_big5 (coding, source, destination,
4858                                src_bytes, dst_bytes, 0);
4859       break;
4860
4861     case coding_type_emacs_mule:
4862       encode_coding_emacs_mule (coding, source, destination,
4863                                 src_bytes, dst_bytes);
4864       break;
4865
4866     case coding_type_ccl:
4867       ccl_coding_driver (coding, source, destination,
4868                          src_bytes, dst_bytes, 1);
4869       break;
4870
4871     default:
4872       encode_eol (coding, source, destination, src_bytes, dst_bytes);
4873     }
4874
4875   if (coding->mode & CODING_MODE_LAST_BLOCK
4876       && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
4877     {
4878       const unsigned char *src = source + coding->consumed;
4879       unsigned char *dst = destination + coding->produced;
4880
4881       if (coding->type == coding_type_iso2022)
4882         ENCODE_RESET_PLANE_AND_REGISTER;
4883       if (COMPOSING_P (coding))
4884         *dst++ = ISO_CODE_ESC, *dst++ = '1';
4885       if (coding->consumed < src_bytes)
4886         {
4887           int len = src_bytes - coding->consumed;
4888
4889           BCOPY_SHORT (src, dst, len);
4890           if (coding->src_multibyte)
4891             len = str_as_unibyte (dst, len);
4892           dst += len;
4893           coding->consumed = src_bytes;
4894         }
4895       coding->produced = coding->produced_char = dst - destination;
4896       coding->result = CODING_FINISH_NORMAL;
4897     }
4898
4899   if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
4900       && coding->consumed == src_bytes)
4901     coding->result = CODING_FINISH_NORMAL;
4902
4903   return coding->result;
4904 }
4905
4906 /* Scan text in the region between *BEG and *END (byte positions),
4907    skip characters which we don't have to decode by coding system
4908    CODING at the head and tail, then set *BEG and *END to the region
4909    of the text we actually have to convert.  The caller should move
4910    the gap out of the region in advance if the region is from a
4911    buffer.
4912
4913    If STR is not NULL, *BEG and *END are indices into STR.  */
4914
4915 static void
4916 shrink_decoding_region (beg, end, coding, str)
4917      int *beg, *end;
4918      struct coding_system *coding;
4919      unsigned char *str;
4920 {
4921   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
4922   int eol_conversion;
4923   Lisp_Object translation_table;
4924
4925   if (coding->type == coding_type_ccl
4926       || coding->type == coding_type_undecided
4927       || coding->eol_type != CODING_EOL_LF
4928       || !NILP (coding->post_read_conversion)
4929       || coding->composing != COMPOSITION_DISABLED)
4930     {
4931       /* We can't skip any data.  */
4932       return;
4933     }
4934   if (coding->type == coding_type_no_conversion
4935       || coding->type == coding_type_raw_text
4936       || coding->type == coding_type_emacs_mule)
4937     {
4938       /* We need no conversion, but don't have to skip any data here.
4939          Decoding routine handles them effectively anyway.  */
4940       return;
4941     }
4942
4943   translation_table = coding->translation_table_for_decode;
4944   if (NILP (translation_table) && !NILP (Venable_character_translation))
4945     translation_table = Vstandard_translation_table_for_decode;
4946   if (CHAR_TABLE_P (translation_table))
4947     {
4948       int i;
4949       for (i = 0; i < 128; i++)
4950         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
4951           break;
4952       if (i < 128)
4953         /* Some ASCII character should be translated.  We give up
4954            shrinking.  */
4955         return;
4956     }
4957
4958   if (coding->heading_ascii >= 0)
4959     /* Detection routine has already found how much we can skip at the
4960        head.  */
4961     *beg += coding->heading_ascii;
4962
4963   if (str)
4964     {
4965       begp_orig = begp = str + *beg;
4966       endp_orig = endp = str + *end;
4967     }
4968   else
4969     {
4970       begp_orig = begp = BYTE_POS_ADDR (*beg);
4971       endp_orig = endp = begp + *end - *beg;
4972     }
4973
4974   eol_conversion = (coding->eol_type == CODING_EOL_CR
4975                     || coding->eol_type == CODING_EOL_CRLF);
4976
4977   switch (coding->type)
4978     {
4979     case coding_type_sjis:
4980     case coding_type_big5:
4981       /* We can skip all ASCII characters at the head.  */
4982       if (coding->heading_ascii < 0)
4983         {
4984           if (eol_conversion)
4985             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
4986           else
4987             while (begp < endp && *begp < 0x80) begp++;
4988         }
4989       /* We can skip all ASCII characters at the tail except for the
4990          second byte of SJIS or BIG5 code.  */
4991       if (eol_conversion)
4992         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
4993       else
4994         while (begp < endp && endp[-1] < 0x80) endp--;
4995       /* Do not consider LF as ascii if preceded by CR, since that
4996          confuses eol decoding. */
4997       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
4998         endp++;
4999       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
5000         endp++;
5001       break;
5002
5003     case coding_type_iso2022:
5004       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5005         /* We can't skip any data.  */
5006         break;
5007       if (coding->heading_ascii < 0)
5008         {
5009           /* We can skip all ASCII characters at the head except for a
5010              few control codes.  */
5011           while (begp < endp && (c = *begp) < 0x80
5012                  && c != ISO_CODE_CR && c != ISO_CODE_SO
5013                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
5014                  && (!eol_conversion || c != ISO_CODE_LF))
5015             begp++;
5016         }
5017       switch (coding->category_idx)
5018         {
5019         case CODING_CATEGORY_IDX_ISO_8_1:
5020         case CODING_CATEGORY_IDX_ISO_8_2:
5021           /* We can skip all ASCII characters at the tail.  */
5022           if (eol_conversion)
5023             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
5024           else
5025             while (begp < endp && endp[-1] < 0x80) endp--;
5026           /* Do not consider LF as ascii if preceded by CR, since that
5027              confuses eol decoding. */
5028           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
5029             endp++;
5030           break;
5031
5032         case CODING_CATEGORY_IDX_ISO_7:
5033         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
5034           {
5035             /* We can skip all characters at the tail except for 8-bit
5036                codes and ESC and the following 2-byte at the tail.  */
5037             unsigned char *eight_bit = NULL;
5038
5039             if (eol_conversion)
5040               while (begp < endp
5041                      && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
5042                 {
5043                   if (!eight_bit && c & 0x80) eight_bit = endp;
5044                   endp--;
5045                 }
5046             else
5047               while (begp < endp
5048                      && (c = endp[-1]) != ISO_CODE_ESC)
5049                 {
5050                   if (!eight_bit && c & 0x80) eight_bit = endp;
5051                   endp--;
5052                 }
5053             /* Do not consider LF as ascii if preceded by CR, since that
5054                confuses eol decoding. */
5055             if (begp < endp && endp < endp_orig
5056                 && endp[-1] == '\r' && endp[0] == '\n')
5057               endp++;
5058             if (begp < endp && endp[-1] == ISO_CODE_ESC)
5059               {
5060                 if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
5061                   /* This is an ASCII designation sequence.  We can
5062                      surely skip the tail.  But, if we have
5063                      encountered an 8-bit code, skip only the codes
5064                      after that.  */
5065                   endp = eight_bit ? eight_bit : endp + 2;
5066                 else
5067                   /* Hmmm, we can't skip the tail.  */
5068                   endp = endp_orig;
5069               }
5070             else if (eight_bit)
5071               endp = eight_bit;
5072           }
5073         }
5074       break;
5075
5076     default:
5077       abort ();
5078     }
5079   *beg += begp - begp_orig;
5080   *end += endp - endp_orig;
5081   return;
5082 }
5083
5084 /* Like shrink_decoding_region but for encoding.  */
5085
5086 static void
5087 shrink_encoding_region (beg, end, coding, str)
5088      int *beg, *end;
5089      struct coding_system *coding;
5090      unsigned char *str;
5091 {
5092   unsigned char *begp_orig, *begp, *endp_orig, *endp;
5093   int eol_conversion;
5094   Lisp_Object translation_table;
5095
5096   if (coding->type == coding_type_ccl
5097       || coding->eol_type == CODING_EOL_CRLF
5098       || coding->eol_type == CODING_EOL_CR
5099       || (coding->cmp_data && coding->cmp_data->used > 0))
5100     {
5101       /* We can't skip any data.  */
5102       return;
5103     }
5104   if (coding->type == coding_type_no_conversion
5105       || coding->type == coding_type_raw_text
5106       || coding->type == coding_type_emacs_mule
5107       || coding->type == coding_type_undecided)
5108     {
5109       /* We need no conversion, but don't have to skip any data here.
5110          Encoding routine handles them effectively anyway.  */
5111       return;
5112     }
5113
5114   translation_table = coding->translation_table_for_encode;
5115   if (NILP (translation_table) && !NILP (Venable_character_translation))
5116     translation_table = Vstandard_translation_table_for_encode;
5117   if (CHAR_TABLE_P (translation_table))
5118     {
5119       int i;
5120       for (i = 0; i < 128; i++)
5121         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
5122           break;
5123       if (i < 128)
5124         /* Some ASCII character should be translated.  We give up
5125            shrinking.  */
5126         return;
5127     }
5128
5129   if (str)
5130     {
5131       begp_orig = begp = str + *beg;
5132       endp_orig = endp = str + *end;
5133     }
5134   else
5135     {
5136       begp_orig = begp = BYTE_POS_ADDR (*beg);
5137       endp_orig = endp = begp + *end - *beg;
5138     }
5139
5140   eol_conversion = (coding->eol_type == CODING_EOL_CR
5141                     || coding->eol_type == CODING_EOL_CRLF);
5142
5143   /* Here, we don't have to check coding->pre_write_conversion because
5144      the caller is expected to have handled it already.  */
5145   switch (coding->type)
5146     {
5147     case coding_type_iso2022:
5148       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
5149         /* We can't skip any data.  */
5150         break;
5151       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
5152         {
5153           unsigned char *bol = begp;
5154           while (begp < endp && *begp < 0x80)
5155             {
5156               begp++;
5157               if (begp[-1] == '\n')
5158                 bol = begp;
5159             }
5160           begp = bol;
5161           goto label_skip_tail;
5162         }
5163       /* fall down ... */
5164
5165     case coding_type_sjis:
5166     case coding_type_big5:
5167       /* We can skip all ASCII characters at the head and tail.  */
5168       if (eol_conversion)
5169         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
5170       else
5171         while (begp < endp && *begp < 0x80) begp++;
5172     label_skip_tail:
5173       if (eol_conversion)
5174         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
5175       else
5176         while (begp < endp && *(endp - 1) < 0x80) endp--;
5177       break;
5178
5179     default:
5180       abort ();
5181     }
5182
5183   *beg += begp - begp_orig;
5184   *end += endp - endp_orig;
5185   return;
5186 }
5187
5188 /* As shrinking conversion region requires some overhead, we don't try
5189    shrinking if the length of conversion region is less than this
5190    value.  */
5191 static int shrink_conversion_region_threshhold = 1024;
5192
5193 #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)        \
5194   do {                                                                  \
5195     if (*(end) - *(beg) > shrink_conversion_region_threshhold)          \
5196       {                                                                 \
5197         if (encodep) shrink_encoding_region (beg, end, coding, str);    \
5198         else shrink_decoding_region (beg, end, coding, str);            \
5199       }                                                                 \
5200   } while (0)
5201
5202 static Lisp_Object
5203 code_convert_region_unwind (arg)
5204      Lisp_Object arg;
5205 {
5206   inhibit_pre_post_conversion = 0;
5207   Vlast_coding_system_used = arg;
5208   return Qnil;
5209 }
5210
5211 /* Store information about all compositions in the range FROM and TO
5212    of OBJ in memory blocks pointed by CODING->cmp_data.  OBJ is a
5213    buffer or a string, defaults to the current buffer.  */
5214
5215 void
5216 coding_save_composition (coding, from, to, obj)
5217      struct coding_system *coding;
5218      int from, to;
5219      Lisp_Object obj;
5220 {
5221   Lisp_Object prop;
5222   int start, end;
5223
5224   if (coding->composing == COMPOSITION_DISABLED)
5225     return;
5226   if (!coding->cmp_data)
5227     coding_allocate_composition_data (coding, from);
5228   if (!find_composition (from, to, &start, &end, &prop, obj)
5229       || end > to)
5230     return;
5231   if (start < from
5232       && (!find_composition (end, to, &start, &end, &prop, obj)
5233           || end > to))
5234     return;
5235   coding->composing = COMPOSITION_NO;
5236   do
5237     {
5238       if (COMPOSITION_VALID_P (start, end, prop))
5239         {
5240           enum composition_method method = COMPOSITION_METHOD (prop);
5241           if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH
5242               >= COMPOSITION_DATA_SIZE)
5243             coding_allocate_composition_data (coding, from);
5244           /* For relative composition, we remember start and end
5245              positions, for the other compositions, we also remember
5246              components.  */
5247           CODING_ADD_COMPOSITION_START (coding, start - from, method);
5248           if (method != COMPOSITION_RELATIVE)
5249             {
5250               /* We must store a*/
5251               Lisp_Object val, ch;
5252
5253               val = COMPOSITION_COMPONENTS (prop);
5254               if (CONSP (val))
5255                 while (CONSP (val))
5256                   {
5257                     ch = XCAR (val), val = XCDR (val);
5258                     CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5259                   }
5260               else if (VECTORP (val) || STRINGP (val))
5261                 {
5262                   int len = (VECTORP (val)
5263                              ? XVECTOR (val)->size : SCHARS (val));
5264                   int i;
5265                   for (i = 0; i < len; i++)
5266                     {
5267                       ch = (STRINGP (val)
5268                             ? Faref (val, make_number (i))
5269                             : XVECTOR (val)->contents[i]);
5270                       CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch));
5271                     }
5272                 }
5273               else              /* INTEGERP (val) */
5274                 CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val));
5275             }
5276           CODING_ADD_COMPOSITION_END (coding, end - from);
5277         }
5278       start = end;
5279     }
5280   while (start < to
5281          && find_composition (start, to, &start, &end, &prop, obj)
5282          && end <= to);
5283
5284   /* Make coding->cmp_data point to the first memory block.  */
5285   while (coding->cmp_data->prev)
5286     coding->cmp_data = coding->cmp_data->prev;
5287   coding->cmp_data_start = 0;
5288 }
5289
5290 /* Reflect the saved information about compositions to OBJ.
5291    CODING->cmp_data points to a memory block for the information.  OBJ
5292    is a buffer or a string, defaults to the current buffer.  */
5293
5294 void
5295 coding_restore_composition (coding, obj)
5296      struct coding_system *coding;
5297      Lisp_Object obj;
5298 {
5299   struct composition_data *cmp_data = coding->cmp_data;
5300
5301   if (!cmp_data)
5302     return;
5303
5304   while (cmp_data->prev)
5305     cmp_data = cmp_data->prev;
5306
5307   while (cmp_data)
5308     {
5309       int i;
5310
5311       for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
5312            i += cmp_data->data[i])
5313         {
5314           int *data = cmp_data->data + i;
5315           enum composition_method method = (enum composition_method) data[3];
5316           Lisp_Object components;
5317
5318           if (method == COMPOSITION_RELATIVE)
5319             components = Qnil;
5320           else
5321             {
5322               int len = data[0] - 4, j;
5323               Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5324
5325               if (method == COMPOSITION_WITH_RULE_ALTCHARS
5326                   && len % 2 == 0)
5327                 len --;
5328               for (j = 0; j < len; j++)
5329                 args[j] = make_number (data[4 + j]);
5330               components = (method == COMPOSITION_WITH_ALTCHARS
5331                             ? Fstring (len, args) : Fvector (len, args));
5332             }
5333           compose_text (data[1], data[2], components, Qnil, obj);
5334         }
5335       cmp_data = cmp_data->next;
5336     }
5337 }
5338
5339 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
5340    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
5341    coding system CODING, and return the status code of code conversion
5342    (currently, this value has no meaning).
5343
5344    How many characters (and bytes) are converted to how many
5345    characters (and bytes) are recorded in members of the structure
5346    CODING.
5347
5348    If REPLACE is nonzero, we do various things as if the original text
5349    is deleted and a new text is inserted.  See the comments in
5350    replace_range (insdel.c) to know what we are doing.
5351
5352    If REPLACE is zero, it is assumed that the source text is unibyte.
5353    Otherwise, it is assumed that the source text is multibyte.  */
5354
5355 int
5356 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
5357      int from, from_byte, to, to_byte, encodep, replace;
5358      struct coding_system *coding;
5359 {
5360   int len = to - from, len_byte = to_byte - from_byte;
5361   int nchars_del = 0, nbytes_del = 0;
5362   int require, inserted, inserted_byte;
5363   int head_skip, tail_skip, total_skip = 0;
5364   Lisp_Object saved_coding_symbol;
5365   int first = 1;
5366   unsigned char *src, *dst;
5367   Lisp_Object deletion;
5368   int orig_point = PT, orig_len = len;
5369   int prev_Z;
5370   int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
5371
5372   deletion = Qnil;
5373   saved_coding_symbol = coding->symbol;
5374
5375   if (from < PT && PT < to)
5376     {
5377       TEMP_SET_PT_BOTH (from, from_byte);
5378       orig_point = from;
5379     }
5380
5381   if (replace)
5382     {
5383       int saved_from = from;
5384       int saved_inhibit_modification_hooks;
5385
5386       prepare_to_modify_buffer (from, to, &from);
5387       if (saved_from != from)
5388         {
5389           to = from + len;
5390           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
5391           len_byte = to_byte - from_byte;
5392         }
5393
5394       /* The code conversion routine can not preserve text properties
5395          for now.  So, we must remove all text properties in the
5396          region.  Here, we must suppress all modification hooks.  */
5397       saved_inhibit_modification_hooks = inhibit_modification_hooks;
5398       inhibit_modification_hooks = 1;
5399       Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
5400       inhibit_modification_hooks = saved_inhibit_modification_hooks;
5401     }
5402
5403   if (! encodep && CODING_REQUIRE_DETECTION (coding))
5404     {
5405       /* We must detect encoding of text and eol format.  */
5406
5407       if (from < GPT && to > GPT)
5408         move_gap_both (from, from_byte);
5409       if (coding->type == coding_type_undecided)
5410         {
5411           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
5412           if (coding->type == coding_type_undecided)
5413             {
5414               /* It seems that the text contains only ASCII, but we
5415                  should not leave it undecided because the deeper
5416                  decoding routine (decode_coding) tries to detect the
5417                  encodings again in vain.  */
5418               coding->type = coding_type_emacs_mule;
5419               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5420               /* As emacs-mule decoder will handle composition, we
5421                  need this setting to allocate coding->cmp_data
5422                  later.  */
5423               coding->composing = COMPOSITION_NO;
5424             }
5425         }
5426       if (coding->eol_type == CODING_EOL_UNDECIDED
5427           && coding->type != coding_type_ccl)
5428         {
5429           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
5430           if (coding->eol_type == CODING_EOL_UNDECIDED)
5431             coding->eol_type = CODING_EOL_LF;
5432           /* We had better recover the original eol format if we
5433              encounter an inconsistent eol format while decoding.  */
5434           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5435         }
5436     }
5437
5438   /* Now we convert the text.  */
5439
5440   /* For encoding, we must process pre-write-conversion in advance.  */
5441   if (! inhibit_pre_post_conversion
5442       && encodep
5443       && SYMBOLP (coding->pre_write_conversion)
5444       && ! NILP (Ffboundp (coding->pre_write_conversion)))
5445     {
5446       /* The function in pre-write-conversion may put a new text in a
5447          new buffer.  */
5448       struct buffer *prev = current_buffer;
5449       Lisp_Object new;
5450
5451       record_unwind_protect (code_convert_region_unwind,
5452                              Vlast_coding_system_used);
5453       /* We should not call any more pre-write/post-read-conversion
5454          functions while this pre-write-conversion is running.  */
5455       inhibit_pre_post_conversion = 1;
5456       call2 (coding->pre_write_conversion,
5457              make_number (from), make_number (to));
5458       inhibit_pre_post_conversion = 0;
5459       /* Discard the unwind protect.  */
5460       specpdl_ptr--;
5461
5462       if (current_buffer != prev)
5463         {
5464           len = ZV - BEGV;
5465           new = Fcurrent_buffer ();
5466           set_buffer_internal_1 (prev);
5467           del_range_2 (from, from_byte, to, to_byte, 0);
5468           TEMP_SET_PT_BOTH (from, from_byte);
5469           insert_from_buffer (XBUFFER (new), 1, len, 0);
5470           Fkill_buffer (new);
5471           if (orig_point >= to)
5472             orig_point += len - orig_len;
5473           else if (orig_point > from)
5474             orig_point = from;
5475           orig_len = len;
5476           to = from + len;
5477           from_byte = CHAR_TO_BYTE (from);
5478           to_byte = CHAR_TO_BYTE (to);
5479           len_byte = to_byte - from_byte;
5480           TEMP_SET_PT_BOTH (from, from_byte);
5481         }
5482     }
5483
5484   if (replace)
5485     {
5486       if (! EQ (current_buffer->undo_list, Qt))
5487         deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
5488       else
5489         {
5490           nchars_del = to - from;
5491           nbytes_del = to_byte - from_byte;
5492         }
5493     }
5494
5495   if (coding->composing != COMPOSITION_DISABLED)
5496     {
5497       if (encodep)
5498         coding_save_composition (coding, from, to, Fcurrent_buffer ());
5499       else
5500         coding_allocate_composition_data (coding, from);
5501     }
5502
5503   /* Try to skip the heading and tailing ASCIIs.  */
5504   if (coding->type != coding_type_ccl)
5505     {
5506       int from_byte_orig = from_byte, to_byte_orig = to_byte;
5507
5508       if (from < GPT && GPT < to)
5509         move_gap_both (from, from_byte);
5510       SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
5511       if (from_byte == to_byte
5512           && (encodep || NILP (coding->post_read_conversion))
5513           && ! CODING_REQUIRE_FLUSHING (coding))
5514         {
5515           coding->produced = len_byte;
5516           coding->produced_char = len;
5517           if (!replace)
5518             /* We must record and adjust for this new text now.  */
5519             adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
5520           return 0;
5521         }
5522
5523       head_skip = from_byte - from_byte_orig;
5524       tail_skip = to_byte_orig - to_byte;
5525       total_skip = head_skip + tail_skip;
5526       from += head_skip;
5527       to -= tail_skip;
5528       len -= total_skip; len_byte -= total_skip;
5529     }
5530
5531   /* For conversion, we must put the gap before the text in addition to
5532      making the gap larger for efficient decoding.  The required gap
5533      size starts from 2000 which is the magic number used in make_gap.
5534      But, after one batch of conversion, it will be incremented if we
5535      find that it is not enough .  */
5536   require = 2000;
5537
5538   if (GAP_SIZE  < require)
5539     make_gap (require - GAP_SIZE);
5540   move_gap_both (from, from_byte);
5541
5542   inserted = inserted_byte = 0;
5543
5544   GAP_SIZE += len_byte;
5545   ZV -= len;
5546   Z -= len;
5547   ZV_BYTE -= len_byte;
5548   Z_BYTE -= len_byte;
5549
5550   if (GPT - BEG < BEG_UNCHANGED)
5551     BEG_UNCHANGED = GPT - BEG;
5552   if (Z - GPT < END_UNCHANGED)
5553     END_UNCHANGED = Z - GPT;
5554
5555   if (!encodep && coding->src_multibyte)
5556     {
5557       /* Decoding routines expects that the source text is unibyte.
5558          We must convert 8-bit characters of multibyte form to
5559          unibyte.  */
5560       int len_byte_orig = len_byte;
5561       len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
5562       if (len_byte < len_byte_orig)
5563         safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
5564                     len_byte);
5565       coding->src_multibyte = 0;
5566     }
5567
5568   for (;;)
5569     {
5570       int result;
5571
5572       /* The buffer memory is now:
5573          +--------+converted-text+---------+-------original-text-------+---+
5574          |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
5575                   |<---------------------- GAP ----------------------->|  */
5576       src = GAP_END_ADDR - len_byte;
5577       dst = GPT_ADDR + inserted_byte;
5578
5579       if (encodep)
5580         result = encode_coding (coding, src, dst, len_byte, 0);
5581       else
5582         {
5583           if (coding->composing != COMPOSITION_DISABLED)
5584             coding->cmp_data->char_offset = from + inserted;
5585           result = decode_coding (coding, src, dst, len_byte, 0);
5586         }
5587
5588       /* The buffer memory is now:
5589          +--------+-------converted-text----+--+------original-text----+---+
5590          |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
5591                   |<---------------------- GAP ----------------------->|  */
5592
5593       inserted += coding->produced_char;
5594       inserted_byte += coding->produced;
5595       len_byte -= coding->consumed;
5596
5597       if (result == CODING_FINISH_INSUFFICIENT_CMP)
5598         {
5599           coding_allocate_composition_data (coding, from + inserted);
5600           continue;
5601         }
5602
5603       src += coding->consumed;
5604       dst += coding->produced;
5605
5606       if (result == CODING_FINISH_NORMAL)
5607         {
5608           src += len_byte;
5609           break;
5610         }
5611       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
5612         {
5613           unsigned char *pend = dst, *p = pend - inserted_byte;
5614           Lisp_Object eol_type;
5615
5616           /* Encode LFs back to the original eol format (CR or CRLF).  */
5617           if (coding->eol_type == CODING_EOL_CR)
5618             {
5619               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
5620             }
5621           else
5622             {
5623               int count = 0;
5624
5625               while (p < pend) if (*p++ == '\n') count++;
5626               if (src - dst < count)
5627                 {
5628                   /* We don't have sufficient room for encoding LFs
5629                      back to CRLF.  We must record converted and
5630                      not-yet-converted text back to the buffer
5631                      content, enlarge the gap, then record them out of
5632                      the buffer contents again.  */
5633                   int add = len_byte + inserted_byte;
5634
5635                   GAP_SIZE -= add;
5636                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5637                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
5638                   make_gap (count - GAP_SIZE);
5639                   GAP_SIZE += add;
5640                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5641                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5642                   /* Don't forget to update SRC, DST, and PEND.  */
5643                   src = GAP_END_ADDR - len_byte;
5644                   dst = GPT_ADDR + inserted_byte;
5645                   pend = dst;
5646                 }
5647               inserted += count;
5648               inserted_byte += count;
5649               coding->produced += count;
5650               p = dst = pend + count;
5651               while (count)
5652                 {
5653                   *--p = *--pend;
5654                   if (*p == '\n') count--, *--p = '\r';
5655                 }
5656             }
5657
5658           /* Suppress eol-format conversion in the further conversion.  */
5659           coding->eol_type = CODING_EOL_LF;
5660
5661           /* Set the coding system symbol to that for Unix-like EOL.  */
5662           eol_type = Fget (saved_coding_symbol, Qeol_type);
5663           if (VECTORP (eol_type)
5664               && XVECTOR (eol_type)->size == 3
5665               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
5666             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
5667           else
5668             coding->symbol = saved_coding_symbol;
5669
5670           continue;
5671         }
5672       if (len_byte <= 0)
5673         {
5674           if (coding->type != coding_type_ccl
5675               || coding->mode & CODING_MODE_LAST_BLOCK)
5676             break;
5677           coding->mode |= CODING_MODE_LAST_BLOCK;
5678           continue;
5679         }
5680       if (result == CODING_FINISH_INSUFFICIENT_SRC)
5681         {
5682           /* The source text ends in invalid codes.  Let's just
5683              make them valid buffer contents, and finish conversion.  */
5684           if (multibyte_p)
5685             {
5686               unsigned char *start = dst;
5687
5688               inserted += len_byte;
5689               while (len_byte--)
5690                 {
5691                   int c = *src++;
5692                   dst += CHAR_STRING (c, dst);
5693                 }
5694
5695               inserted_byte += dst - start;
5696             }
5697           else
5698             {
5699               inserted += len_byte;
5700               inserted_byte += len_byte;
5701               while (len_byte--)
5702                 *dst++ = *src++;
5703             }
5704           break;
5705         }
5706       if (result == CODING_FINISH_INTERRUPT)
5707         {
5708           /* The conversion procedure was interrupted by a user.  */
5709           break;
5710         }
5711       /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST  */
5712       if (coding->consumed < 1)
5713         {
5714           /* It's quite strange to require more memory without
5715              consuming any bytes.  Perhaps CCL program bug.  */
5716           break;
5717         }
5718       if (first)
5719         {
5720           /* We have just done the first batch of conversion which was
5721              stopped because of insufficient gap.  Let's reconsider the
5722              required gap size (i.e. SRT - DST) now.
5723
5724              We have converted ORIG bytes (== coding->consumed) into
5725              NEW bytes (coding->produced).  To convert the remaining
5726              LEN bytes, we may need REQUIRE bytes of gap, where:
5727                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
5728                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
5729              Here, we are sure that NEW >= ORIG.  */
5730           float ratio;
5731
5732           if (coding->produced <= coding->consumed)
5733             {
5734               /* This happens because of CCL-based coding system with
5735                  eol-type CRLF.  */
5736               require = 0;
5737             }
5738           else
5739             {
5740               ratio = (coding->produced - coding->consumed) / coding->consumed;
5741               require = len_byte * ratio;
5742             }
5743           first = 0;
5744         }
5745       if ((src - dst) < (require + 2000))
5746         {
5747           /* See the comment above the previous call of make_gap.  */
5748           int add = len_byte + inserted_byte;
5749
5750           GAP_SIZE -= add;
5751           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
5752           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5753           make_gap (require + 2000);
5754           GAP_SIZE += add;
5755           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
5756           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5757         }
5758     }
5759   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
5760
5761   if (encodep && coding->dst_multibyte)
5762     {
5763       /* The output is unibyte.  We must convert 8-bit characters to
5764          multibyte form.  */
5765       if (inserted_byte * 2 > GAP_SIZE)
5766         {
5767           GAP_SIZE -= inserted_byte;
5768           ZV += inserted_byte; Z += inserted_byte;
5769           ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
5770           GPT += inserted_byte; GPT_BYTE += inserted_byte;
5771           make_gap (inserted_byte - GAP_SIZE);
5772           GAP_SIZE += inserted_byte;
5773           ZV -= inserted_byte; Z -= inserted_byte;
5774           ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
5775           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
5776         }
5777       inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
5778     }
5779
5780   /* If we shrank the conversion area, adjust it now.  */
5781   if (total_skip > 0)
5782     {
5783       if (tail_skip > 0)
5784         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
5785       inserted += total_skip; inserted_byte += total_skip;
5786       GAP_SIZE += total_skip;
5787       GPT -= head_skip; GPT_BYTE -= head_skip;
5788       ZV -= total_skip; ZV_BYTE -= total_skip;
5789       Z -= total_skip; Z_BYTE -= total_skip;
5790       from -= head_skip; from_byte -= head_skip;
5791       to += tail_skip; to_byte += tail_skip;
5792     }
5793
5794   prev_Z = Z;
5795   if (! EQ (current_buffer->undo_list, Qt))
5796     adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
5797   else
5798     adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del,
5799                                  inserted, inserted_byte);
5800   inserted = Z - prev_Z;
5801
5802   if (!encodep && coding->cmp_data && coding->cmp_data->used)
5803     coding_restore_composition (coding, Fcurrent_buffer ());
5804   coding_free_composition_data (coding);
5805
5806   if (! inhibit_pre_post_conversion
5807       && ! encodep && ! NILP (coding->post_read_conversion))
5808     {
5809       Lisp_Object val;
5810       Lisp_Object saved_coding_system;
5811
5812       if (from != PT)
5813         TEMP_SET_PT_BOTH (from, from_byte);
5814       prev_Z = Z;
5815       record_unwind_protect (code_convert_region_unwind,
5816                              Vlast_coding_system_used);
5817       saved_coding_system = Vlast_coding_system_used;
5818       Vlast_coding_system_used = coding->symbol;
5819       /* We should not call any more pre-write/post-read-conversion
5820          functions while this post-read-conversion is running.  */
5821       inhibit_pre_post_conversion = 1;
5822       val = call1 (coding->post_read_conversion, make_number (inserted));
5823       inhibit_pre_post_conversion = 0;
5824       coding->symbol = Vlast_coding_system_used;
5825       Vlast_coding_system_used = saved_coding_system;
5826       /* Discard the unwind protect.  */
5827       specpdl_ptr--;
5828       CHECK_NUMBER (val);
5829       inserted += Z - prev_Z;
5830     }
5831
5832   if (orig_point >= from)
5833     {
5834       if (orig_point >= from + orig_len)
5835         orig_point += inserted - orig_len;
5836       else
5837         orig_point = from;
5838       TEMP_SET_PT (orig_point);
5839     }
5840
5841   if (replace)
5842     {
5843       signal_after_change (from, to - from, inserted);
5844       update_compositions (from, from + inserted, CHECK_BORDER);
5845     }
5846
5847   {
5848     coding->consumed = to_byte - from_byte;
5849     coding->consumed_char = to - from;
5850     coding->produced = inserted_byte;
5851     coding->produced_char = inserted;
5852   }
5853
5854   return 0;
5855 }
5856
5857 Lisp_Object
5858 run_pre_post_conversion_on_str (str, coding, encodep)
5859      Lisp_Object str;
5860      struct coding_system *coding;
5861      int encodep;
5862 {
5863   int count = SPECPDL_INDEX ();
5864   struct gcpro gcpro1, gcpro2;
5865   int multibyte = STRING_MULTIBYTE (str);
5866   Lisp_Object buffer;
5867   struct buffer *buf;
5868   Lisp_Object old_deactivate_mark;
5869
5870   record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
5871   record_unwind_protect (code_convert_region_unwind,
5872                          Vlast_coding_system_used);
5873   /* It is not crucial to specbind this.  */
5874   old_deactivate_mark = Vdeactivate_mark;
5875   GCPRO2 (str, old_deactivate_mark);
5876
5877   buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
5878   buf = XBUFFER (buffer);
5879
5880   buf->directory = current_buffer->directory;
5881   buf->read_only = Qnil;
5882   buf->filename = Qnil;
5883   buf->undo_list = Qt;
5884   buf->overlays_before = Qnil;
5885   buf->overlays_after = Qnil;
5886
5887   set_buffer_internal (buf);
5888   /* We must insert the contents of STR as is without
5889      unibyte<->multibyte conversion.  For that, we adjust the
5890      multibyteness of the working buffer to that of STR.  */
5891   Ferase_buffer ();
5892   buf->enable_multibyte_characters = multibyte ? Qt : Qnil;
5893
5894   insert_from_string (str, 0, 0,
5895                       SCHARS (str), SBYTES (str), 0);
5896   UNGCPRO;
5897   inhibit_pre_post_conversion = 1;
5898   if (encodep)
5899     call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
5900   else
5901     {
5902       Vlast_coding_system_used = coding->symbol;
5903       TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
5904       call1 (coding->post_read_conversion, make_number (Z - BEG));
5905       coding->symbol = Vlast_coding_system_used;
5906     }
5907   inhibit_pre_post_conversion = 0;
5908   Vdeactivate_mark = old_deactivate_mark;
5909   str = make_buffer_string (BEG, Z, 1);
5910   return unbind_to (count, str);
5911 }
5912
5913 Lisp_Object
5914 decode_coding_string (str, coding, nocopy)
5915      Lisp_Object str;
5916      struct coding_system *coding;
5917      int nocopy;
5918 {
5919   int len;
5920   struct conversion_buffer buf;
5921   int from, to_byte;
5922   Lisp_Object saved_coding_symbol;
5923   int result;
5924   int require_decoding;
5925   int shrinked_bytes = 0;
5926   Lisp_Object newstr;
5927   int consumed, consumed_char, produced, produced_char;
5928
5929   from = 0;
5930   to_byte = SBYTES (str);
5931
5932   saved_coding_symbol = coding->symbol;
5933   coding->src_multibyte = STRING_MULTIBYTE (str);
5934   coding->dst_multibyte = 1;
5935   if (CODING_REQUIRE_DETECTION (coding))
5936     {
5937       /* See the comments in code_convert_region.  */
5938       if (coding->type == coding_type_undecided)
5939         {
5940           detect_coding (coding, SDATA (str), to_byte);
5941           if (coding->type == coding_type_undecided)
5942             {
5943               coding->type = coding_type_emacs_mule;
5944               coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
5945               /* As emacs-mule decoder will handle composition, we
5946                  need this setting to allocate coding->cmp_data
5947                  later.  */
5948               coding->composing = COMPOSITION_NO;
5949             }
5950         }
5951       if (coding->eol_type == CODING_EOL_UNDECIDED
5952           && coding->type != coding_type_ccl)
5953         {
5954           saved_coding_symbol = coding->symbol;
5955           detect_eol (coding, SDATA (str), to_byte);
5956           if (coding->eol_type == CODING_EOL_UNDECIDED)
5957             coding->eol_type = CODING_EOL_LF;
5958           /* We had better recover the original eol format if we
5959              encounter an inconsistent eol format while decoding.  */
5960           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
5961         }
5962     }
5963
5964   if (coding->type == coding_type_no_conversion
5965       || coding->type == coding_type_raw_text)
5966     coding->dst_multibyte = 0;
5967
5968   require_decoding = CODING_REQUIRE_DECODING (coding);
5969
5970   if (STRING_MULTIBYTE (str))
5971     {
5972       /* Decoding routines expect the source text to be unibyte.  */
5973       str = Fstring_as_unibyte (str);
5974       to_byte = SBYTES (str);
5975       nocopy = 1;
5976       coding->src_multibyte = 0;
5977     }
5978
5979   /* Try to skip the heading and tailing ASCIIs.  */
5980   if (require_decoding && coding->type != coding_type_ccl)
5981     {
5982       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
5983                                 0);
5984       if (from == to_byte)
5985         require_decoding = 0;
5986       shrinked_bytes = from + (SBYTES (str) - to_byte);
5987     }
5988
5989   if (!require_decoding
5990       && !(SYMBOLP (coding->post_read_conversion)
5991            && !NILP (Ffboundp (coding->post_read_conversion))))
5992     {
5993       coding->consumed = SBYTES (str);
5994       coding->consumed_char = SCHARS (str);
5995       if (coding->dst_multibyte)
5996         {
5997           str = Fstring_as_multibyte (str);
5998           nocopy = 1;
5999         }
6000       coding->produced = SBYTES (str);
6001       coding->produced_char = SCHARS (str);
6002       return (nocopy ? str : Fcopy_sequence (str));
6003     }
6004
6005   if (coding->composing != COMPOSITION_DISABLED)
6006     coding_allocate_composition_data (coding, from);
6007   len = decoding_buffer_size (coding, to_byte - from);
6008   allocate_conversion_buffer (buf, len);
6009
6010   consumed = consumed_char = produced = produced_char = 0;
6011   while (1)
6012     {
6013       result = decode_coding (coding, SDATA (str) + from + consumed,
6014                               buf.data + produced, to_byte - from - consumed,
6015                               buf.size - produced);
6016       consumed += coding->consumed;
6017       consumed_char += coding->consumed_char;
6018       produced += coding->produced;
6019       produced_char += coding->produced_char;
6020       if (result == CODING_FINISH_NORMAL
6021           || (result == CODING_FINISH_INSUFFICIENT_SRC
6022               && coding->consumed == 0))
6023         break;
6024       if (result == CODING_FINISH_INSUFFICIENT_CMP)
6025         coding_allocate_composition_data (coding, from + produced_char);
6026       else if (result == CODING_FINISH_INSUFFICIENT_DST)
6027         extend_conversion_buffer (&buf);
6028       else if (result == CODING_FINISH_INCONSISTENT_EOL)
6029         {
6030           Lisp_Object eol_type;
6031
6032           /* Recover the original EOL format.  */
6033           if (coding->eol_type == CODING_EOL_CR)
6034             {
6035               unsigned char *p;
6036               for (p = buf.data; p < buf.data + produced; p++)
6037                 if (*p == '\n') *p = '\r';
6038             }
6039           else if (coding->eol_type == CODING_EOL_CRLF)
6040             {
6041               int num_eol = 0;
6042               unsigned char *p0, *p1;
6043               for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
6044                 if (*p0 == '\n') num_eol++;
6045               if (produced + num_eol >= buf.size)
6046                 extend_conversion_buffer (&buf);
6047               for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
6048                 {
6049                   *--p1 = *--p0;
6050                   if (*p0 == '\n') *--p1 = '\r';
6051                 }
6052               produced += num_eol;
6053               produced_char += num_eol;
6054             }
6055           /* Suppress eol-format conversion in the further conversion.  */
6056           coding->eol_type = CODING_EOL_LF;
6057
6058           /* Set the coding system symbol to that for Unix-like EOL.  */
6059           eol_type = Fget (saved_coding_symbol, Qeol_type);
6060           if (VECTORP (eol_type)
6061               && XVECTOR (eol_type)->size == 3
6062               && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF]))
6063             coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF];
6064           else
6065             coding->symbol = saved_coding_symbol;
6066
6067
6068         }
6069     }
6070
6071   coding->consumed = consumed;
6072   coding->consumed_char = consumed_char;
6073   coding->produced = produced;
6074   coding->produced_char = produced_char;
6075
6076   if (coding->dst_multibyte)
6077     newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
6078                                            produced + shrinked_bytes);
6079   else
6080     newstr = make_uninit_string (produced + shrinked_bytes);
6081   if (from > 0)
6082     STRING_COPYIN (newstr, 0, SDATA (str), from);
6083   STRING_COPYIN (newstr, from, buf.data, produced);
6084   if (shrinked_bytes > from)
6085     STRING_COPYIN (newstr, from + produced,
6086                    SDATA (str) + to_byte,
6087                    shrinked_bytes - from);
6088   free_conversion_buffer (&buf);
6089
6090   if (coding->cmp_data && coding->cmp_data->used)
6091     coding_restore_composition (coding, newstr);
6092   coding_free_composition_data (coding);
6093
6094   if (SYMBOLP (coding->post_read_conversion)
6095       && !NILP (Ffboundp (coding->post_read_conversion)))
6096     newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
6097
6098   return newstr;
6099 }
6100
6101 Lisp_Object
6102 encode_coding_string (str, coding, nocopy)
6103      Lisp_Object str;
6104      struct coding_system *coding;
6105      int nocopy;
6106 {
6107   int len;
6108   struct conversion_buffer buf;
6109   int from, to, to_byte;
6110   int result;
6111   int shrinked_bytes = 0;
6112   Lisp_Object newstr;
6113   int consumed, consumed_char, produced, produced_char;
6114
6115   if (SYMBOLP (coding->pre_write_conversion)
6116       && !NILP (Ffboundp (coding->pre_write_conversion)))
6117     str = run_pre_post_conversion_on_str (str, coding, 1);
6118
6119   from = 0;
6120   to = SCHARS (str);
6121   to_byte = SBYTES (str);
6122
6123   /* Encoding routines determine the multibyteness of the source text
6124      by coding->src_multibyte.  */
6125   coding->src_multibyte = STRING_MULTIBYTE (str);
6126   coding->dst_multibyte = 0;
6127   if (! CODING_REQUIRE_ENCODING (coding))
6128     {
6129       coding->consumed = SBYTES (str);
6130       coding->consumed_char = SCHARS (str);
6131       if (STRING_MULTIBYTE (str))
6132         {
6133           str = Fstring_as_unibyte (str);
6134           nocopy = 1;
6135         }
6136       coding->produced = SBYTES (str);
6137       coding->produced_char = SCHARS (str);
6138       return (nocopy ? str : Fcopy_sequence (str));
6139     }
6140
6141   if (coding->composing != COMPOSITION_DISABLED)
6142     coding_save_composition (coding, from, to, str);
6143
6144   /* Try to skip the heading and tailing ASCIIs.  */
6145   if (coding->type != coding_type_ccl)
6146     {
6147       SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str),
6148                                 1);
6149       if (from == to_byte)
6150         return (nocopy ? str : Fcopy_sequence (str));
6151       shrinked_bytes = from + (SBYTES (str) - to_byte);
6152     }
6153
6154   len = encoding_buffer_size (coding, to_byte - from);
6155   allocate_conversion_buffer (buf, len);
6156
6157   consumed = consumed_char = produced = produced_char = 0;
6158   while (1)
6159     {
6160       result = encode_coding (coding, SDATA (str) + from + consumed,
6161                               buf.data + produced, to_byte - from - consumed,
6162                               buf.size - produced);
6163       consumed += coding->consumed;
6164       consumed_char += coding->consumed_char;
6165       produced += coding->produced;
6166       produced_char += coding->produced_char;
6167       if (result == CODING_FINISH_NORMAL
6168           || (result == CODING_FINISH_INSUFFICIENT_SRC
6169               && coding->consumed == 0))
6170         break;
6171       /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
6172       extend_conversion_buffer (&buf);
6173     }
6174
6175   coding->consumed = consumed;
6176   coding->consumed_char = consumed_char;
6177   coding->produced = produced;
6178   coding->produced_char = produced_char;
6179
6180   newstr = make_uninit_string (produced + shrinked_bytes);
6181   if (from > 0)
6182     STRING_COPYIN (newstr, 0, SDATA (str), from);
6183   STRING_COPYIN (newstr, from, buf.data, produced);
6184   if (shrinked_bytes > from)
6185     STRING_COPYIN (newstr, from + produced,
6186                    SDATA (str) + to_byte,
6187                    shrinked_bytes - from);
6188
6189   free_conversion_buffer (&buf);
6190   coding_free_composition_data (coding);
6191
6192   return newstr;
6193 }
6194
6195 \f
6196 #ifdef emacs
6197 /*** 8. Emacs Lisp library functions ***/
6198
6199 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6200        doc: /* Return t if OBJECT is nil or a coding-system.
6201 See the documentation of `make-coding-system' for information
6202 about coding-system objects.  */)
6203      (obj)
6204      Lisp_Object obj;
6205 {
6206   if (NILP (obj))
6207     return Qt;
6208   if (!SYMBOLP (obj))
6209     return Qnil;
6210   /* Get coding-spec vector for OBJ.  */
6211   obj = Fget (obj, Qcoding_system);
6212   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
6213           ? Qt : Qnil);
6214 }
6215
6216 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6217        Sread_non_nil_coding_system, 1, 1, 0,
6218        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6219      (prompt)
6220      Lisp_Object prompt;
6221 {
6222   Lisp_Object val;
6223   do
6224     {
6225       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6226                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6227     }
6228   while (SCHARS (val) == 0);
6229   return (Fintern (val, Qnil));
6230 }
6231
6232 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6233        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6234 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6235      (prompt, default_coding_system)
6236      Lisp_Object prompt, default_coding_system;
6237 {
6238   Lisp_Object val;
6239   if (SYMBOLP (default_coding_system))
6240     default_coding_system = SYMBOL_NAME (default_coding_system);
6241   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6242                           Qt, Qnil, Qcoding_system_history,
6243                           default_coding_system, Qnil);
6244   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
6245 }
6246
6247 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6248        1, 1, 0,
6249        doc: /* Check validity of CODING-SYSTEM.
6250 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6251 It is valid if it is a symbol with a non-nil `coding-system' property.
6252 The value of property should be a vector of length 5.  */)
6253      (coding_system)
6254      Lisp_Object coding_system;
6255 {
6256   CHECK_SYMBOL (coding_system);
6257   if (!NILP (Fcoding_system_p (coding_system)))
6258     return coding_system;
6259   while (1)
6260     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6261 }
6262 \f
6263 Lisp_Object
6264 detect_coding_system (src, src_bytes, highest, multibytep)
6265      const unsigned char *src;
6266      int src_bytes, highest;
6267      int multibytep;
6268 {
6269   int coding_mask, eol_type;
6270   Lisp_Object val, tmp;
6271   int dummy;
6272
6273   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep);
6274   eol_type  = detect_eol_type (src, src_bytes, &dummy);
6275   if (eol_type == CODING_EOL_INCONSISTENT)
6276     eol_type = CODING_EOL_UNDECIDED;
6277
6278   if (!coding_mask)
6279     {
6280       val = Qundecided;
6281       if (eol_type != CODING_EOL_UNDECIDED)
6282         {
6283           Lisp_Object val2;
6284           val2 = Fget (Qundecided, Qeol_type);
6285           if (VECTORP (val2))
6286             val = XVECTOR (val2)->contents[eol_type];
6287         }
6288       return (highest ? val : Fcons (val, Qnil));
6289     }
6290
6291   /* At first, gather possible coding systems in VAL.  */
6292   val = Qnil;
6293   for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp))
6294     {
6295       Lisp_Object category_val, category_index;
6296
6297       category_index = Fget (XCAR (tmp), Qcoding_category_index);
6298       category_val = Fsymbol_value (XCAR (tmp));
6299       if (!NILP (category_val)
6300           && NATNUMP (category_index)
6301           && (coding_mask & (1 << XFASTINT (category_index))))
6302         {
6303           val = Fcons (category_val, val);
6304           if (highest)
6305             break;
6306         }
6307     }
6308   if (!highest)
6309     val = Fnreverse (val);
6310
6311   /* Then, replace the elements with subsidiary coding systems.  */
6312   for (tmp = val; CONSP (tmp); tmp = XCDR (tmp))
6313     {
6314       if (eol_type != CODING_EOL_UNDECIDED
6315           && eol_type != CODING_EOL_INCONSISTENT)
6316         {
6317           Lisp_Object eol;
6318           eol = Fget (XCAR (tmp), Qeol_type);
6319           if (VECTORP (eol))
6320             XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]);
6321         }
6322     }
6323   return (highest ? XCAR (val) : val);
6324 }
6325
6326 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6327        2, 3, 0,
6328        doc: /* Detect how the byte sequence in the region is encoded.
6329 Return a list of possible coding systems used on decoding a byte
6330 sequence containing the bytes in the region between START and END when
6331 the coding system `undecided' is specified.  The list is ordered by
6332 priority decided in the current language environment.
6333
6334 If only ASCII characters are found, it returns a list of single element
6335 `undecided' or its subsidiary coding system according to a detected
6336 end-of-line format.
6337
6338 If optional argument HIGHEST is non-nil, return the coding system of
6339 highest priority.  */)
6340      (start, end, highest)
6341      Lisp_Object start, end, highest;
6342 {
6343   int from, to;
6344   int from_byte, to_byte;
6345   int include_anchor_byte = 0;
6346
6347   CHECK_NUMBER_COERCE_MARKER (start);
6348   CHECK_NUMBER_COERCE_MARKER (end);
6349
6350   validate_region (&start, &end);
6351   from = XINT (start), to = XINT (end);
6352   from_byte = CHAR_TO_BYTE (from);
6353   to_byte = CHAR_TO_BYTE (to);
6354
6355   if (from < GPT && to >= GPT)
6356     move_gap_both (to, to_byte);
6357   /* If we an anchor byte `\0' follows the region, we include it in
6358      the detecting source.  Then code detectors can handle the tailing
6359      byte sequence more accurately.
6360
6361      Fix me: This is not a perfect solution.  It is better that we
6362      add one more argument, say LAST_BLOCK, to all detect_coding_XXX.
6363   */
6364   if (to == Z || (to == GPT && GAP_SIZE > 0))
6365     include_anchor_byte = 1;
6366   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6367                                to_byte - from_byte + include_anchor_byte,
6368                                !NILP (highest),
6369                                !NILP (current_buffer
6370                                       ->enable_multibyte_characters));
6371 }
6372
6373 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6374        1, 2, 0,
6375        doc: /* Detect how the byte sequence in STRING is encoded.
6376 Return a list of possible coding systems used on decoding a byte
6377 sequence containing the bytes in STRING when the coding system
6378 `undecided' is specified.  The list is ordered by priority decided in
6379 the current language environment.
6380
6381 If only ASCII characters are found, it returns a list of single element
6382 `undecided' or its subsidiary coding system according to a detected
6383 end-of-line format.
6384
6385 If optional argument HIGHEST is non-nil, return the coding system of
6386 highest priority.  */)
6387      (string, highest)
6388      Lisp_Object string, highest;
6389 {
6390   CHECK_STRING (string);
6391
6392   return detect_coding_system (SDATA (string),
6393                                /* "+ 1" is to include the anchor byte
6394                                   `\0'.  With this, code detectors can
6395                                   handle the tailing bytes more
6396                                   accurately.  */
6397                                SBYTES (string) + 1,
6398                                !NILP (highest),
6399                                STRING_MULTIBYTE (string));
6400 }
6401
6402 /*  Subroutine for Fsafe_coding_systems_region_internal.
6403
6404     Return a list of coding systems that safely encode the multibyte
6405     text between P and PEND.  SAFE_CODINGS, if non-nil, is an alist of
6406     possible coding systems.  If it is nil, it means that we have not
6407     yet found any coding systems.
6408
6409     WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
6410     element of WORK_TABLE is set to t once the element is looked up.
6411
6412     If a non-ASCII single byte char is found, set
6413     *single_byte_char_found to 1.  */
6414
6415 static Lisp_Object
6416 find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
6417      unsigned char *p, *pend;
6418      Lisp_Object safe_codings, work_table;
6419      int *single_byte_char_found;
6420 {
6421   int c, len, i;
6422   Lisp_Object val, ch;
6423   Lisp_Object prev, tail;
6424
6425   while (p < pend)
6426     {
6427       c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
6428       p += len;
6429       if (ASCII_BYTE_P (c))
6430         /* We can ignore ASCII characters here.  */
6431         continue;
6432       if (SINGLE_BYTE_CHAR_P (c))
6433         *single_byte_char_found = 1;
6434       if (NILP (safe_codings))
6435         /* Already all coding systems are excluded.  But, we can't
6436            terminate the loop here because non-ASCII single-byte char
6437            must be found.  */
6438         continue;
6439       /* Check the safe coding systems for C.  */
6440       ch = make_number (c);
6441       val = Faref (work_table, ch);
6442       if (EQ (val, Qt))
6443         /* This element was already checked.  Ignore it.  */
6444         continue;
6445       /* Remember that we checked this element.  */
6446       Faset (work_table, ch, Qt);
6447
6448       for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail))
6449         {
6450           Lisp_Object elt, translation_table, hash_table, accept_latin_extra;
6451           int encodable;
6452
6453           elt = XCAR (tail);
6454           if (CONSP (XCDR (elt)))
6455             {
6456               /* This entry has this format now:
6457                  ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6458                           ACCEPT-LATIN-EXTRA ) */
6459               val = XCDR (elt);
6460               encodable = ! NILP (Faref (XCAR (val), ch));
6461               if (! encodable)
6462                 {
6463                   val = XCDR (val);
6464                   translation_table = XCAR (val);
6465                   hash_table = XCAR (XCDR (val));
6466                   accept_latin_extra = XCAR (XCDR (XCDR (val)));
6467                 }
6468             }
6469           else
6470             {
6471               /* This entry has this format now: ( CODING . SAFE-CHARS) */
6472               encodable = ! NILP (Faref (XCDR (elt), ch));
6473               if (! encodable)
6474                 {
6475                   /* Transform the format to:
6476                      ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE
6477                        ACCEPT-LATIN-EXTRA )  */
6478                   val = Fget (XCAR (elt), Qcoding_system);
6479                   translation_table
6480                     = Fplist_get (AREF (val, 3),
6481                                   Qtranslation_table_for_encode);
6482                   if (SYMBOLP (translation_table))
6483                     translation_table = Fget (translation_table,
6484                                               Qtranslation_table);
6485                   hash_table
6486                     = (CHAR_TABLE_P (translation_table)
6487                        ? XCHAR_TABLE (translation_table)->extras[1]
6488                        : Qnil);
6489                   accept_latin_extra
6490                     = ((EQ (AREF (val, 0), make_number (2))
6491                         && VECTORP (AREF (val, 4)))
6492                        ? AREF (AREF (val, 4), CODING_FLAG_ISO_LATIN_EXTRA)
6493                        : Qnil);
6494                   XSETCAR (tail, list5 (XCAR (elt), XCDR (elt),
6495                                         translation_table, hash_table,
6496                                         accept_latin_extra));
6497                 }
6498             }
6499
6500           if (! encodable
6501               && ((CHAR_TABLE_P (translation_table)
6502                    && ! NILP (Faref (translation_table, ch)))
6503                   || (HASH_TABLE_P (hash_table)
6504                       && ! NILP (Fgethash (ch, hash_table, Qnil)))
6505                   || (SINGLE_BYTE_CHAR_P (c)
6506                       && ! NILP (accept_latin_extra)
6507                       && VECTORP (Vlatin_extra_code_table)
6508                       && ! NILP (AREF (Vlatin_extra_code_table, c)))))
6509             encodable = 1;
6510           if (encodable)
6511             prev = tail;
6512           else
6513             {
6514               /* Exclude this coding system from SAFE_CODINGS.  */
6515               if (EQ (tail, safe_codings))
6516                 safe_codings = XCDR (safe_codings);
6517               else
6518                 XSETCDR (prev, XCDR (tail));
6519             }
6520         }
6521     }
6522   return safe_codings;
6523 }
6524
6525 DEFUN ("find-coding-systems-region-internal",
6526        Ffind_coding_systems_region_internal,
6527        Sfind_coding_systems_region_internal, 2, 2, 0,
6528        doc: /* Internal use only.  */)
6529      (start, end)
6530      Lisp_Object start, end;
6531 {
6532   Lisp_Object work_table, safe_codings;
6533   int non_ascii_p = 0;
6534   int single_byte_char_found = 0;
6535   const unsigned char *p1, *p1end, *p2, *p2end, *p;
6536
6537   if (STRINGP (start))
6538     {
6539       if (!STRING_MULTIBYTE (start))
6540         return Qt;
6541       p1 = SDATA (start), p1end = p1 + SBYTES (start);
6542       p2 = p2end = p1end;
6543       if (SCHARS (start) != SBYTES (start))
6544         non_ascii_p = 1;
6545     }
6546   else
6547     {
6548       int from, to, stop;
6549
6550       CHECK_NUMBER_COERCE_MARKER (start);
6551       CHECK_NUMBER_COERCE_MARKER (end);
6552       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6553         args_out_of_range (start, end);
6554       if (NILP (current_buffer->enable_multibyte_characters))
6555         return Qt;
6556       from = CHAR_TO_BYTE (XINT (start));
6557       to = CHAR_TO_BYTE (XINT (end));
6558       stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
6559       p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
6560       if (stop == to)
6561         p2 = p2end = p1end;
6562       else
6563         p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
6564       if (XINT (end) - XINT (start) != to - from)
6565         non_ascii_p = 1;
6566     }
6567
6568   if (!non_ascii_p)
6569     {
6570       /* We are sure that the text contains no multibyte character.
6571          Check if it contains eight-bit-graphic.  */
6572       p = p1;
6573       for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
6574       if (p == p1end)
6575         {
6576           for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
6577           if (p == p2end)
6578             return Qt;
6579         }
6580     }
6581
6582   /* The text contains non-ASCII characters.  */
6583
6584   work_table = Fmake_char_table (Qchar_coding_system, Qnil);
6585   safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars));
6586
6587   safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table,
6588                                     &single_byte_char_found);
6589   if (p2 < p2end)
6590     safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
6591                                       &single_byte_char_found);
6592   if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars)))
6593     safe_codings = Qt;
6594   else
6595     {
6596       /* Turn safe_codings to a list of coding systems... */
6597       Lisp_Object val;
6598
6599       if (single_byte_char_found)
6600         /* ... and append these for eight-bit chars.  */
6601         val = Fcons (Qraw_text,
6602                      Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil)));
6603       else
6604         /* ... and append generic coding systems.  */
6605         val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars));
6606
6607       for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings))
6608         val = Fcons (XCAR (XCAR (safe_codings)), val);
6609       safe_codings = val;
6610     }
6611
6612   return safe_codings;
6613 }
6614
6615
6616 /* Search from position POS for such characters that are unencodable
6617    accoding to SAFE_CHARS, and return a list of their positions.  P
6618    points where in the memory the character at POS exists.  Limit the
6619    search at PEND or when Nth unencodable characters are found.
6620
6621    If SAFE_CHARS is a char table, an element for an unencodable
6622    character is nil.
6623
6624    If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
6625
6626    Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
6627    eight-bit-graphic characters are unencodable.  */
6628
6629 static Lisp_Object
6630 unencodable_char_position (safe_chars, pos, p, pend, n)
6631      Lisp_Object safe_chars;
6632      int pos;
6633      unsigned char *p, *pend;
6634      int n;
6635 {
6636   Lisp_Object pos_list;
6637
6638   pos_list = Qnil;
6639   while (p < pend)
6640     {
6641       int len;
6642       int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
6643
6644       if (c >= 128
6645           && (CHAR_TABLE_P (safe_chars)
6646               ? NILP (CHAR_TABLE_REF (safe_chars, c))
6647               : (NILP (safe_chars) || c < 256)))
6648         {
6649           pos_list = Fcons (make_number (pos), pos_list);
6650           if (--n <= 0)
6651             break;
6652         }
6653       pos++;
6654       p += len;
6655     }
6656   return Fnreverse (pos_list);
6657 }
6658
6659
6660 DEFUN ("unencodable-char-position", Funencodable_char_position,
6661        Sunencodable_char_position, 3, 5, 0,
6662        doc: /*
6663 Return position of first un-encodable character in a region.
6664 START and END specfiy the region and CODING-SYSTEM specifies the
6665 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
6666
6667 If optional 4th argument COUNT is non-nil, it specifies at most how
6668 many un-encodable characters to search.  In this case, the value is a
6669 list of positions.
6670
6671 If optional 5th argument STRING is non-nil, it is a string to search
6672 for un-encodable characters.  In that case, START and END are indexes
6673 to the string.  */)
6674      (start, end, coding_system, count, string)
6675      Lisp_Object start, end, coding_system, count, string;
6676 {
6677   int n;
6678   Lisp_Object safe_chars;
6679   struct coding_system coding;
6680   Lisp_Object positions;
6681   int from, to;
6682   unsigned char *p, *pend;
6683
6684   if (NILP (string))
6685     {
6686       validate_region (&start, &end);
6687       from = XINT (start);
6688       to = XINT (end);
6689       if (NILP (current_buffer->enable_multibyte_characters))
6690         return Qnil;
6691       p = CHAR_POS_ADDR (from);
6692       if (to == GPT)
6693         pend = GPT_ADDR;
6694       else
6695         pend = CHAR_POS_ADDR (to);
6696     }
6697   else
6698     {
6699       CHECK_STRING (string);
6700       CHECK_NATNUM (start);
6701       CHECK_NATNUM (end);
6702       from = XINT (start);
6703       to = XINT (end);
6704       if (from > to
6705           || to > SCHARS (string))
6706         args_out_of_range_3 (string, start, end);
6707       if (! STRING_MULTIBYTE (string))
6708         return Qnil;
6709       p = SDATA (string) + string_char_to_byte (string, from);
6710       pend = SDATA (string) + string_char_to_byte (string, to);
6711     }
6712
6713   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
6714
6715   if (NILP (count))
6716     n = 1;
6717   else
6718     {
6719       CHECK_NATNUM (count);
6720       n = XINT (count);
6721     }
6722
6723   if (coding.type == coding_type_no_conversion
6724       || coding.type == coding_type_raw_text)
6725     return Qnil;
6726
6727   if (coding.type == coding_type_undecided)
6728     safe_chars = Qnil;
6729   else
6730     safe_chars = coding_safe_chars (coding_system);
6731
6732   if (STRINGP (string)
6733       || from >= GPT || to <= GPT)
6734     positions = unencodable_char_position (safe_chars, from, p, pend, n);
6735   else
6736     {
6737       Lisp_Object args[2];
6738
6739       args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
6740       n -= XINT (Flength (args[0]));
6741       if (n <= 0)
6742         positions = args[0];
6743       else
6744         {
6745           args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
6746                                                pend, n);
6747           positions = Fappend (2, args);
6748         }
6749     }
6750
6751   return  (NILP (count) ? Fcar (positions) : positions);
6752 }
6753
6754
6755 Lisp_Object
6756 code_convert_region1 (start, end, coding_system, encodep)
6757      Lisp_Object start, end, coding_system;
6758      int encodep;
6759 {
6760   struct coding_system coding;
6761   int from, to;
6762
6763   CHECK_NUMBER_COERCE_MARKER (start);
6764   CHECK_NUMBER_COERCE_MARKER (end);
6765   CHECK_SYMBOL (coding_system);
6766
6767   validate_region (&start, &end);
6768   from = XFASTINT (start);
6769   to = XFASTINT (end);
6770
6771   if (NILP (coding_system))
6772     return make_number (to - from);
6773
6774   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6775     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6776
6777   coding.mode |= CODING_MODE_LAST_BLOCK;
6778   coding.src_multibyte = coding.dst_multibyte
6779     = !NILP (current_buffer->enable_multibyte_characters);
6780   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
6781                        &coding, encodep, 1);
6782   Vlast_coding_system_used = coding.symbol;
6783   return make_number (coding.produced_char);
6784 }
6785
6786 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6787        3, 3, "r\nzCoding system: ",
6788        doc: /* Decode the current region from the specified coding system.
6789 When called from a program, takes three arguments:
6790 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6791 This function sets `last-coding-system-used' to the precise coding system
6792 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6793 not fully specified.)
6794 It returns the length of the decoded text.  */)
6795      (start, end, coding_system)
6796      Lisp_Object start, end, coding_system;
6797 {
6798   return code_convert_region1 (start, end, coding_system, 0);
6799 }
6800
6801 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6802        3, 3, "r\nzCoding system: ",
6803        doc: /* Encode the current region into the specified coding system.
6804 When called from a program, takes three arguments:
6805 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6806 This function sets `last-coding-system-used' to the precise coding system
6807 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6808 not fully specified.)
6809 It returns the length of the encoded text.  */)
6810      (start, end, coding_system)
6811      Lisp_Object start, end, coding_system;
6812 {
6813   return code_convert_region1 (start, end, coding_system, 1);
6814 }
6815
6816 Lisp_Object
6817 code_convert_string1 (string, coding_system, nocopy, encodep)
6818      Lisp_Object string, coding_system, nocopy;
6819      int encodep;
6820 {
6821   struct coding_system coding;
6822
6823   CHECK_STRING (string);
6824   CHECK_SYMBOL (coding_system);
6825
6826   if (NILP (coding_system))
6827     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
6828
6829   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6830     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6831
6832   coding.mode |= CODING_MODE_LAST_BLOCK;
6833   string = (encodep
6834             ? encode_coding_string (string, &coding, !NILP (nocopy))
6835             : decode_coding_string (string, &coding, !NILP (nocopy)));
6836   Vlast_coding_system_used = coding.symbol;
6837
6838   return string;
6839 }
6840
6841 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6842        2, 3, 0,
6843        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6844 Optional arg NOCOPY non-nil means it is OK to return STRING itself
6845 if the decoding operation is trivial.
6846 This function sets `last-coding-system-used' to the precise coding system
6847 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6848 not fully specified.)  */)
6849      (string, coding_system, nocopy)
6850      Lisp_Object string, coding_system, nocopy;
6851 {
6852   return code_convert_string1 (string, coding_system, nocopy, 0);
6853 }
6854
6855 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6856        2, 3, 0,
6857        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6858 Optional arg NOCOPY non-nil means it is OK to return STRING itself
6859 if the encoding operation is trivial.
6860 This function sets `last-coding-system-used' to the precise coding system
6861 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6862 not fully specified.)  */)
6863      (string, coding_system, nocopy)
6864      Lisp_Object string, coding_system, nocopy;
6865 {
6866   return code_convert_string1 (string, coding_system, nocopy, 1);
6867 }
6868
6869 /* Encode or decode STRING according to CODING_SYSTEM.
6870    Do not set Vlast_coding_system_used.
6871
6872    This function is called only from macros DECODE_FILE and
6873    ENCODE_FILE, thus we ignore character composition.  */
6874
6875 Lisp_Object
6876 code_convert_string_norecord (string, coding_system, encodep)
6877      Lisp_Object string, coding_system;
6878      int encodep;
6879 {
6880   struct coding_system coding;
6881
6882   CHECK_STRING (string);
6883   CHECK_SYMBOL (coding_system);
6884
6885   if (NILP (coding_system))
6886     return string;
6887
6888   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
6889     error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system)));
6890
6891   coding.composing = COMPOSITION_DISABLED;
6892   coding.mode |= CODING_MODE_LAST_BLOCK;
6893   return (encodep
6894           ? encode_coding_string (string, &coding, 1)
6895           : decode_coding_string (string, &coding, 1));
6896 }
6897 \f
6898 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6899        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6900 Return the corresponding character.  */)
6901      (code)
6902      Lisp_Object code;
6903 {
6904   unsigned char c1, c2, s1, s2;
6905   Lisp_Object val;
6906
6907   CHECK_NUMBER (code);
6908   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
6909   if (s1 == 0)
6910     {
6911       if (s2 < 0x80)
6912         XSETFASTINT (val, s2);
6913       else if (s2 >= 0xA0 || s2 <= 0xDF)
6914         XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
6915       else
6916         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6917     }
6918   else
6919     {
6920       if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF)
6921           || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
6922         error ("Invalid Shift JIS code: %x", XFASTINT (code));
6923       DECODE_SJIS (s1, s2, c1, c2);
6924       XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
6925     }
6926   return val;
6927 }
6928
6929 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6930        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
6931 Return the corresponding code in SJIS.  */)
6932      (ch)
6933      Lisp_Object ch;
6934 {
6935   int charset, c1, c2, s1, s2;
6936   Lisp_Object val;
6937
6938   CHECK_NUMBER (ch);
6939   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
6940   if (charset == CHARSET_ASCII)
6941     {
6942       val = ch;
6943     }
6944   else if (charset == charset_jisx0208
6945            && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F)
6946     {
6947       ENCODE_SJIS (c1, c2, s1, s2);
6948       XSETFASTINT (val, (s1 << 8) | s2);
6949     }
6950   else if (charset == charset_katakana_jisx0201
6951            && c1 > 0x20 && c2 < 0xE0)
6952     {
6953       XSETFASTINT (val, c1 | 0x80);
6954     }
6955   else
6956     error ("Can't encode to shift_jis: %d", XFASTINT (ch));
6957   return val;
6958 }
6959
6960 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6961        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
6962 Return the corresponding character.  */)
6963      (code)
6964      Lisp_Object code;
6965 {
6966   int charset;
6967   unsigned char b1, b2, c1, c2;
6968   Lisp_Object val;
6969
6970   CHECK_NUMBER (code);
6971   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
6972   if (b1 == 0)
6973     {
6974       if (b2 >= 0x80)
6975         error ("Invalid BIG5 code: %x", XFASTINT (code));
6976       val = code;
6977     }
6978   else
6979     {
6980       if ((b1 < 0xA1 || b1 > 0xFE)
6981           || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
6982         error ("Invalid BIG5 code: %x", XFASTINT (code));
6983       DECODE_BIG5 (b1, b2, charset, c1, c2);
6984       XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
6985     }
6986   return val;
6987 }
6988
6989 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6990        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
6991 Return the corresponding character code in Big5.  */)
6992      (ch)
6993      Lisp_Object ch;
6994 {
6995   int charset, c1, c2, b1, b2;
6996   Lisp_Object val;
6997
6998   CHECK_NUMBER (ch);
6999   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
7000   if (charset == CHARSET_ASCII)
7001     {
7002       val = ch;
7003     }
7004   else if ((charset == charset_big5_1
7005             && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec))
7006            || (charset == charset_big5_2
7007                && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2))
7008     {
7009       ENCODE_BIG5 (charset, c1, c2, b1, b2);
7010       XSETFASTINT (val, (b1 << 8) | b2);
7011     }
7012   else
7013     error ("Can't encode to Big5: %d", XFASTINT (ch));
7014   return val;
7015 }
7016 \f
7017 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
7018        Sset_terminal_coding_system_internal, 1, 1, 0,
7019        doc: /* Internal use only.  */)
7020      (coding_system)
7021      Lisp_Object coding_system;
7022 {
7023   CHECK_SYMBOL (coding_system);
7024   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
7025   /* We had better not send unsafe characters to terminal.  */
7026   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
7027   /* Character composition should be disabled.  */
7028   terminal_coding.composing = COMPOSITION_DISABLED;
7029   /* Error notification should be suppressed.  */
7030   terminal_coding.suppress_error = 1;
7031   terminal_coding.src_multibyte = 1;
7032   terminal_coding.dst_multibyte = 0;
7033   return Qnil;
7034 }
7035
7036 DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal,
7037        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7038        doc: /* Internal use only.  */)
7039      (coding_system)
7040      Lisp_Object coding_system;
7041 {
7042   CHECK_SYMBOL (coding_system);
7043   setup_coding_system (Fcheck_coding_system (coding_system),
7044                        &safe_terminal_coding);
7045   /* Character composition should be disabled.  */
7046   safe_terminal_coding.composing = COMPOSITION_DISABLED;
7047   /* Error notification should be suppressed.  */
7048   terminal_coding.suppress_error = 1;
7049   safe_terminal_coding.src_multibyte = 1;
7050   safe_terminal_coding.dst_multibyte = 0;
7051   return Qnil;
7052 }
7053
7054 DEFUN ("terminal-coding-system", Fterminal_coding_system,
7055        Sterminal_coding_system, 0, 0, 0,
7056        doc: /* Return coding system specified for terminal output.  */)
7057      ()
7058 {
7059   return terminal_coding.symbol;
7060 }
7061
7062 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
7063        Sset_keyboard_coding_system_internal, 1, 1, 0,
7064        doc: /* Internal use only.  */)
7065      (coding_system)
7066      Lisp_Object coding_system;
7067 {
7068   CHECK_SYMBOL (coding_system);
7069   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
7070   /* Character composition should be disabled.  */
7071   keyboard_coding.composing = COMPOSITION_DISABLED;
7072   return Qnil;
7073 }
7074
7075 DEFUN ("keyboard-coding-system", Fkeyboard_coding_system,
7076        Skeyboard_coding_system, 0, 0, 0,
7077        doc: /* Return coding system specified for decoding keyboard input.  */)
7078      ()
7079 {
7080   return keyboard_coding.symbol;
7081 }
7082
7083 \f
7084 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7085        Sfind_operation_coding_system,  1, MANY, 0,
7086        doc: /* Choose a coding system for an operation based on the target name.
7087 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7088 DECODING-SYSTEM is the coding system to use for decoding
7089 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7090 for encoding (in case OPERATION does encoding).
7091
7092 The first argument OPERATION specifies an I/O primitive:
7093   For file I/O, `insert-file-contents' or `write-region'.
7094   For process I/O, `call-process', `call-process-region', or `start-process'.
7095   For network I/O, `open-network-stream'.
7096
7097 The remaining arguments should be the same arguments that were passed
7098 to the primitive.  Depending on which primitive, one of those arguments
7099 is selected as the TARGET.  For example, if OPERATION does file I/O,
7100 whichever argument specifies the file name is TARGET.
7101
7102 TARGET has a meaning which depends on OPERATION:
7103   For file I/O, TARGET is a file name.
7104   For process I/O, TARGET is a process name.
7105   For network I/O, TARGET is a service name or a port number
7106
7107 This function looks up what specified for TARGET in,
7108 `file-coding-system-alist', `process-coding-system-alist',
7109 or `network-coding-system-alist' depending on OPERATION.
7110 They may specify a coding system, a cons of coding systems,
7111 or a function symbol to call.
7112 In the last case, we call the function with one argument,
7113 which is a list of all the arguments given to this function.
7114
7115 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7116      (nargs, args)
7117      int nargs;
7118      Lisp_Object *args;
7119 {
7120   Lisp_Object operation, target_idx, target, val;
7121   register Lisp_Object chain;
7122
7123   if (nargs < 2)
7124     error ("Too few arguments");
7125   operation = args[0];
7126   if (!SYMBOLP (operation)
7127       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7128     error ("Invalid first argument");
7129   if (nargs < 1 + XINT (target_idx))
7130     error ("Too few arguments for operation: %s",
7131            SDATA (SYMBOL_NAME (operation)));
7132   /* For write-region, if the 6th argument (i.e. VISIT, the 5th
7133      argument to write-region) is string, it must be treated as a
7134      target file name.  */
7135   if (EQ (operation, Qwrite_region)
7136       && nargs > 5
7137       && STRINGP (args[5]))
7138     target_idx = make_number (4);
7139   target = args[XINT (target_idx) + 1];
7140   if (!(STRINGP (target)
7141         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7142     error ("Invalid argument %d", XINT (target_idx) + 1);
7143
7144   chain = ((EQ (operation, Qinsert_file_contents)
7145             || EQ (operation, Qwrite_region))
7146            ? Vfile_coding_system_alist
7147            : (EQ (operation, Qopen_network_stream)
7148               ? Vnetwork_coding_system_alist
7149               : Vprocess_coding_system_alist));
7150   if (NILP (chain))
7151     return Qnil;
7152
7153   for (; CONSP (chain); chain = XCDR (chain))
7154     {
7155       Lisp_Object elt;
7156       elt = XCAR (chain);
7157
7158       if (CONSP (elt)
7159           && ((STRINGP (target)
7160                && STRINGP (XCAR (elt))
7161                && fast_string_match (XCAR (elt), target) >= 0)
7162               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7163         {
7164           val = XCDR (elt);
7165           /* Here, if VAL is both a valid coding system and a valid
7166              function symbol, we return VAL as a coding system.  */
7167           if (CONSP (val))
7168             return val;
7169           if (! SYMBOLP (val))
7170             return Qnil;
7171           if (! NILP (Fcoding_system_p (val)))
7172             return Fcons (val, val);
7173           if (! NILP (Ffboundp (val)))
7174             {
7175               val = call1 (val, Flist (nargs, args));
7176               if (CONSP (val))
7177                 return val;
7178               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7179                 return Fcons (val, val);
7180             }
7181           return Qnil;
7182         }
7183     }
7184   return Qnil;
7185 }
7186
7187 DEFUN ("update-coding-systems-internal",  Fupdate_coding_systems_internal,
7188        Supdate_coding_systems_internal, 0, 0, 0,
7189        doc: /* Update internal database for ISO2022 and CCL based coding systems.
7190 When values of any coding categories are changed, you must
7191 call this function.  */)
7192      ()
7193 {
7194   int i;
7195
7196   for (i = CODING_CATEGORY_IDX_EMACS_MULE; i < CODING_CATEGORY_IDX_MAX; i++)
7197     {
7198       Lisp_Object val;
7199
7200       val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[i]);
7201       if (!NILP (val))
7202         {
7203           if (! coding_system_table[i])
7204             coding_system_table[i] = ((struct coding_system *)
7205                                       xmalloc (sizeof (struct coding_system)));
7206           setup_coding_system (val, coding_system_table[i]);
7207         }
7208       else if (coding_system_table[i])
7209         {
7210           xfree (coding_system_table[i]);
7211           coding_system_table[i] = NULL;
7212         }
7213     }
7214
7215   return Qnil;
7216 }
7217
7218 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
7219        Sset_coding_priority_internal, 0, 0, 0,
7220        doc: /* Update internal database for the current value of `coding-category-list'.
7221 This function is internal use only.  */)
7222      ()
7223 {
7224   int i = 0, idx;
7225   Lisp_Object val;
7226
7227   val = Vcoding_category_list;
7228
7229   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
7230     {
7231       if (! SYMBOLP (XCAR (val)))
7232         break;
7233       idx = XFASTINT (Fget (XCAR (val), Qcoding_category_index));
7234       if (idx >= CODING_CATEGORY_IDX_MAX)
7235         break;
7236       coding_priorities[i++] = (1 << idx);
7237       val = XCDR (val);
7238     }
7239   /* If coding-category-list is valid and contains all coding
7240      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
7241      the following code saves Emacs from crashing.  */
7242   while (i < CODING_CATEGORY_IDX_MAX)
7243     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
7244
7245   return Qnil;
7246 }
7247
7248 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7249        Sdefine_coding_system_internal, 1, 1, 0,
7250        doc: /* Register CODING-SYSTEM as a base coding system.
7251 This function is internal use only.  */)
7252      (coding_system)
7253      Lisp_Object coding_system;
7254 {
7255   Lisp_Object safe_chars, slot;
7256
7257   if (NILP (Fcheck_coding_system (coding_system)))
7258     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
7259   safe_chars = coding_safe_chars (coding_system);
7260   if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
7261     error ("No valid safe-chars property for %s",
7262            SDATA (SYMBOL_NAME (coding_system)));
7263   if (EQ (safe_chars, Qt))
7264     {
7265       if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
7266         XSETCAR (Vcoding_system_safe_chars,
7267                  Fcons (coding_system, XCAR (Vcoding_system_safe_chars)));
7268     }
7269   else
7270     {
7271       slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars));
7272       if (NILP (slot))
7273         XSETCDR (Vcoding_system_safe_chars,
7274                  nconc2 (XCDR (Vcoding_system_safe_chars),
7275                          Fcons (Fcons (coding_system, safe_chars), Qnil)));
7276       else
7277         XSETCDR (slot, safe_chars);
7278     }
7279   return Qnil;
7280 }
7281
7282 #endif /* emacs */
7283
7284 \f
7285 /*** 9. Post-amble ***/
7286
7287 void
7288 init_coding_once ()
7289 {
7290   int i;
7291
7292   /* Emacs' internal format specific initialize routine.  */
7293   for (i = 0; i <= 0x20; i++)
7294     emacs_code_class[i] = EMACS_control_code;
7295   emacs_code_class[0x0A] = EMACS_linefeed_code;
7296   emacs_code_class[0x0D] = EMACS_carriage_return_code;
7297   for (i = 0x21 ; i < 0x7F; i++)
7298     emacs_code_class[i] = EMACS_ascii_code;
7299   emacs_code_class[0x7F] = EMACS_control_code;
7300   for (i = 0x80; i < 0xFF; i++)
7301     emacs_code_class[i] = EMACS_invalid_code;
7302   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
7303   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
7304   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
7305   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
7306
7307   /* ISO2022 specific initialize routine.  */
7308   for (i = 0; i < 0x20; i++)
7309     iso_code_class[i] = ISO_control_0;
7310   for (i = 0x21; i < 0x7F; i++)
7311     iso_code_class[i] = ISO_graphic_plane_0;
7312   for (i = 0x80; i < 0xA0; i++)
7313     iso_code_class[i] = ISO_control_1;
7314   for (i = 0xA1; i < 0xFF; i++)
7315     iso_code_class[i] = ISO_graphic_plane_1;
7316   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7317   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7318   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7319   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7320   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7321   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7322   iso_code_class[ISO_CODE_ESC] = ISO_escape;
7323   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7324   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7325   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7326
7327   setup_coding_system (Qnil, &keyboard_coding);
7328   setup_coding_system (Qnil, &terminal_coding);
7329   setup_coding_system (Qnil, &safe_terminal_coding);
7330   setup_coding_system (Qnil, &default_buffer_file_coding);
7331
7332   bzero (coding_system_table, sizeof coding_system_table);
7333
7334   bzero (ascii_skip_code, sizeof ascii_skip_code);
7335   for (i = 0; i < 128; i++)
7336     ascii_skip_code[i] = 1;
7337
7338 #if defined (MSDOS) || defined (WINDOWSNT)
7339   system_eol_type = CODING_EOL_CRLF;
7340 #else
7341   system_eol_type = CODING_EOL_LF;
7342 #endif
7343
7344   inhibit_pre_post_conversion = 0;
7345 }
7346
7347 #ifdef emacs
7348
7349 void
7350 syms_of_coding ()
7351 {
7352   Qtarget_idx = intern ("target-idx");
7353   staticpro (&Qtarget_idx);
7354
7355   Qcoding_system_history = intern ("coding-system-history");
7356   staticpro (&Qcoding_system_history);
7357   Fset (Qcoding_system_history, Qnil);
7358
7359   /* Target FILENAME is the first argument.  */
7360   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7361   /* Target FILENAME is the third argument.  */
7362   Fput (Qwrite_region, Qtarget_idx, make_number (2));
7363
7364   Qcall_process = intern ("call-process");
7365   staticpro (&Qcall_process);
7366   /* Target PROGRAM is the first argument.  */
7367   Fput (Qcall_process, Qtarget_idx, make_number (0));
7368
7369   Qcall_process_region = intern ("call-process-region");
7370   staticpro (&Qcall_process_region);
7371   /* Target PROGRAM is the third argument.  */
7372   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7373
7374   Qstart_process = intern ("start-process");
7375   staticpro (&Qstart_process);
7376   /* Target PROGRAM is the third argument.  */
7377   Fput (Qstart_process, Qtarget_idx, make_number (2));
7378
7379   Qopen_network_stream = intern ("open-network-stream");
7380   staticpro (&Qopen_network_stream);
7381   /* Target SERVICE is the fourth argument.  */
7382   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7383
7384   Qcoding_system = intern ("coding-system");
7385   staticpro (&Qcoding_system);
7386
7387   Qeol_type = intern ("eol-type");
7388   staticpro (&Qeol_type);
7389
7390   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
7391   staticpro (&Qbuffer_file_coding_system);
7392
7393   Qpost_read_conversion = intern ("post-read-conversion");
7394   staticpro (&Qpost_read_conversion);
7395
7396   Qpre_write_conversion = intern ("pre-write-conversion");
7397   staticpro (&Qpre_write_conversion);
7398
7399   Qno_conversion = intern ("no-conversion");
7400   staticpro (&Qno_conversion);
7401
7402   Qundecided = intern ("undecided");
7403   staticpro (&Qundecided);
7404
7405   Qcoding_system_p = intern ("coding-system-p");
7406   staticpro (&Qcoding_system_p);
7407
7408   Qcoding_system_error = intern ("coding-system-error");
7409   staticpro (&Qcoding_system_error);
7410
7411   Fput (Qcoding_system_error, Qerror_conditions,
7412         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7413   Fput (Qcoding_system_error, Qerror_message,
7414         build_string ("Invalid coding system"));
7415
7416   Qcoding_category = intern ("coding-category");
7417   staticpro (&Qcoding_category);
7418   Qcoding_category_index = intern ("coding-category-index");
7419   staticpro (&Qcoding_category_index);
7420
7421   Vcoding_category_table
7422     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
7423   staticpro (&Vcoding_category_table);
7424   {
7425     int i;
7426     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
7427       {
7428         XVECTOR (Vcoding_category_table)->contents[i]
7429           = intern (coding_category_name[i]);
7430         Fput (XVECTOR (Vcoding_category_table)->contents[i],
7431               Qcoding_category_index, make_number (i));
7432       }
7433   }
7434
7435   Vcoding_system_safe_chars = Fcons (Qnil, Qnil);
7436   staticpro (&Vcoding_system_safe_chars);
7437
7438   Qtranslation_table = intern ("translation-table");
7439   staticpro (&Qtranslation_table);
7440   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
7441
7442   Qtranslation_table_id = intern ("translation-table-id");
7443   staticpro (&Qtranslation_table_id);
7444
7445   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
7446   staticpro (&Qtranslation_table_for_decode);
7447
7448   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
7449   staticpro (&Qtranslation_table_for_encode);
7450
7451   Qsafe_chars = intern ("safe-chars");
7452   staticpro (&Qsafe_chars);
7453
7454   Qchar_coding_system = intern ("char-coding-system");
7455   staticpro (&Qchar_coding_system);
7456
7457   /* Intern this now in case it isn't already done.
7458      Setting this variable twice is harmless.
7459      But don't staticpro it here--that is done in alloc.c.  */
7460   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7461   Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
7462   Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0));
7463
7464   Qvalid_codes = intern ("valid-codes");
7465   staticpro (&Qvalid_codes);
7466
7467   Qemacs_mule = intern ("emacs-mule");
7468   staticpro (&Qemacs_mule);
7469
7470   Qraw_text = intern ("raw-text");
7471   staticpro (&Qraw_text);
7472
7473   defsubr (&Scoding_system_p);
7474   defsubr (&Sread_coding_system);
7475   defsubr (&Sread_non_nil_coding_system);
7476   defsubr (&Scheck_coding_system);
7477   defsubr (&Sdetect_coding_region);
7478   defsubr (&Sdetect_coding_string);
7479   defsubr (&Sfind_coding_systems_region_internal);
7480   defsubr (&Sunencodable_char_position);
7481   defsubr (&Sdecode_coding_region);
7482   defsubr (&Sencode_coding_region);
7483   defsubr (&Sdecode_coding_string);
7484   defsubr (&Sencode_coding_string);
7485   defsubr (&Sdecode_sjis_char);
7486   defsubr (&Sencode_sjis_char);
7487   defsubr (&Sdecode_big5_char);
7488   defsubr (&Sencode_big5_char);
7489   defsubr (&Sset_terminal_coding_system_internal);
7490   defsubr (&Sset_safe_terminal_coding_system_internal);
7491   defsubr (&Sterminal_coding_system);
7492   defsubr (&Sset_keyboard_coding_system_internal);
7493   defsubr (&Skeyboard_coding_system);
7494   defsubr (&Sfind_operation_coding_system);
7495   defsubr (&Supdate_coding_systems_internal);
7496   defsubr (&Sset_coding_priority_internal);
7497   defsubr (&Sdefine_coding_system_internal);
7498
7499   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
7500                doc: /* List of coding systems.
7501
7502 Do not alter the value of this variable manually.  This variable should be
7503 updated by the functions `make-coding-system' and
7504 `define-coding-system-alias'.  */);
7505   Vcoding_system_list = Qnil;
7506
7507   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
7508                doc: /* Alist of coding system names.
7509 Each element is one element list of coding system name.
7510 This variable is given to `completing-read' as TABLE argument.
7511
7512 Do not alter the value of this variable manually.  This variable should be
7513 updated by the functions `make-coding-system' and
7514 `define-coding-system-alias'.  */);
7515   Vcoding_system_alist = Qnil;
7516
7517   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
7518                doc: /* List of coding-categories (symbols) ordered by priority.
7519
7520 On detecting a coding system, Emacs tries code detection algorithms
7521 associated with each coding-category one by one in this order.  When
7522 one algorithm agrees with a byte sequence of source text, the coding
7523 system bound to the corresponding coding-category is selected.  */);
7524   {
7525     int i;
7526
7527     Vcoding_category_list = Qnil;
7528     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
7529       Vcoding_category_list
7530         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
7531                  Vcoding_category_list);
7532   }
7533
7534   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
7535                doc: /* Specify the coding system for read operations.
7536 It is useful to bind this variable with `let', but do not set it globally.
7537 If the value is a coding system, it is used for decoding on read operation.
7538 If not, an appropriate element is used from one of the coding system alists:
7539 There are three such tables, `file-coding-system-alist',
7540 `process-coding-system-alist', and `network-coding-system-alist'.  */);
7541   Vcoding_system_for_read = Qnil;
7542
7543   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
7544                doc: /* Specify the coding system for write operations.
7545 Programs bind this variable with `let', but you should not set it globally.
7546 If the value is a coding system, it is used for encoding of output,
7547 when writing it to a file and when sending it to a file or subprocess.
7548
7549 If this does not specify a coding system, an appropriate element
7550 is used from one of the coding system alists:
7551 There are three such tables, `file-coding-system-alist',
7552 `process-coding-system-alist', and `network-coding-system-alist'.
7553 For output to files, if the above procedure does not specify a coding system,
7554 the value of `buffer-file-coding-system' is used.  */);
7555   Vcoding_system_for_write = Qnil;
7556
7557   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
7558                doc: /* Coding system used in the latest file or process I/O.
7559 Also set by `encode-coding-region', `decode-coding-region',
7560 `encode-coding-string' and `decode-coding-string'.  */);
7561   Vlast_coding_system_used = Qnil;
7562
7563   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
7564                doc: /* *Non-nil means always inhibit code conversion of end-of-line format.
7565 See info node `Coding Systems' and info node `Text and Binary' concerning
7566 such conversion.  */);
7567   inhibit_eol_conversion = 0;
7568
7569   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
7570                doc: /* Non-nil means process buffer inherits coding system of process output.
7571 Bind it to t if the process output is to be treated as if it were a file
7572 read from some filesystem.  */);
7573   inherit_process_coding_system = 0;
7574
7575   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
7576                doc: /* Alist to decide a coding system to use for a file I/O operation.
7577 The format is ((PATTERN . VAL) ...),
7578 where PATTERN is a regular expression matching a file name,
7579 VAL is a coding system, a cons of coding systems, or a function symbol.
7580 If VAL is a coding system, it is used for both decoding and encoding
7581 the file contents.
7582 If VAL is a cons of coding systems, the car part is used for decoding,
7583 and the cdr part is used for encoding.
7584 If VAL is a function symbol, the function must return a coding system
7585 or a cons of coding systems which are used as above.  The function gets
7586 the arguments with which `find-operation-coding-system' was called.
7587
7588 See also the function `find-operation-coding-system'
7589 and the variable `auto-coding-alist'.  */);
7590   Vfile_coding_system_alist = Qnil;
7591
7592   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
7593     doc: /* Alist to decide a coding system to use for a process I/O operation.
7594 The format is ((PATTERN . VAL) ...),
7595 where PATTERN is a regular expression matching a program name,
7596 VAL is a coding system, a cons of coding systems, or a function symbol.
7597 If VAL is a coding system, it is used for both decoding what received
7598 from the program and encoding what sent to the program.
7599 If VAL is a cons of coding systems, the car part is used for decoding,
7600 and the cdr part is used for encoding.
7601 If VAL is a function symbol, the function must return a coding system
7602 or a cons of coding systems which are used as above.
7603
7604 See also the function `find-operation-coding-system'.  */);
7605   Vprocess_coding_system_alist = Qnil;
7606
7607   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
7608     doc: /* Alist to decide a coding system to use for a network I/O operation.
7609 The format is ((PATTERN . VAL) ...),
7610 where PATTERN is a regular expression matching a network service name
7611 or is a port number to connect to,
7612 VAL is a coding system, a cons of coding systems, or a function symbol.
7613 If VAL is a coding system, it is used for both decoding what received
7614 from the network stream and encoding what sent to the network stream.
7615 If VAL is a cons of coding systems, the car part is used for decoding,
7616 and the cdr part is used for encoding.
7617 If VAL is a function symbol, the function must return a coding system
7618 or a cons of coding systems which are used as above.
7619
7620 See also the function `find-operation-coding-system'.  */);
7621   Vnetwork_coding_system_alist = Qnil;
7622
7623   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
7624                doc: /* Coding system to use with system messages.
7625 Also used for decoding keyboard input on X Window system.  */);
7626   Vlocale_coding_system = Qnil;
7627
7628   /* The eol mnemonics are reset in startup.el system-dependently.  */
7629   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
7630                doc: /* *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
7631   eol_mnemonic_unix = build_string (":");
7632
7633   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
7634                doc: /* *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
7635   eol_mnemonic_dos = build_string ("\\");
7636
7637   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
7638                doc: /* *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
7639   eol_mnemonic_mac = build_string ("/");
7640
7641   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
7642                doc: /* *String displayed in mode line when end-of-line format is not yet determined.  */);
7643   eol_mnemonic_undecided = build_string (":");
7644
7645   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
7646                doc: /* *Non-nil enables character translation while encoding and decoding.  */);
7647   Venable_character_translation = Qt;
7648
7649   DEFVAR_LISP ("standard-translation-table-for-decode",
7650                &Vstandard_translation_table_for_decode,
7651                doc: /* Table for translating characters while decoding.  */);
7652   Vstandard_translation_table_for_decode = Qnil;
7653
7654   DEFVAR_LISP ("standard-translation-table-for-encode",
7655                &Vstandard_translation_table_for_encode,
7656                doc: /* Table for translating characters while encoding.  */);
7657   Vstandard_translation_table_for_encode = Qnil;
7658
7659   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
7660                doc: /* Alist of charsets vs revision numbers.
7661 While encoding, if a charset (car part of an element) is found,
7662 designate it with the escape sequence identifying revision (cdr part of the element).  */);
7663   Vcharset_revision_alist = Qnil;
7664
7665   DEFVAR_LISP ("default-process-coding-system",
7666                &Vdefault_process_coding_system,
7667                doc: /* Cons of coding systems used for process I/O by default.
7668 The car part is used for decoding a process output,
7669 the cdr part is used for encoding a text to be sent to a process.  */);
7670   Vdefault_process_coding_system = Qnil;
7671
7672   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
7673                doc: /* Table of extra Latin codes in the range 128..159 (inclusive).
7674 This is a vector of length 256.
7675 If Nth element is non-nil, the existence of code N in a file
7676 \(or output of subprocess) doesn't prevent it to be detected as
7677 a coding system of ISO 2022 variant which has a flag
7678 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
7679 or reading output of a subprocess.
7680 Only 128th through 159th elements has a meaning.  */);
7681   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
7682
7683   DEFVAR_LISP ("select-safe-coding-system-function",
7684                &Vselect_safe_coding_system_function,
7685                doc: /* Function to call to select safe coding system for encoding a text.
7686
7687 If set, this function is called to force a user to select a proper
7688 coding system which can encode the text in the case that a default
7689 coding system used in each operation can't encode the text.
7690
7691 The default value is `select-safe-coding-system' (which see).  */);
7692   Vselect_safe_coding_system_function = Qnil;
7693
7694   DEFVAR_BOOL ("coding-system-require-warning",
7695                &coding_system_require_warning,
7696                doc: /* Internal use only.
7697 If non-nil, on writing a file, `select-safe-coding-system-function' is
7698 called even if `coding-system-for-write' is non-nil.  The command
7699 `universal-coding-system-argument' binds this variable to t temporarily.  */);
7700   coding_system_require_warning = 0;
7701
7702
7703   DEFVAR_BOOL ("inhibit-iso-escape-detection",
7704                &inhibit_iso_escape_detection,
7705                doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
7706
7707 By default, on reading a file, Emacs tries to detect how the text is
7708 encoded.  This code detection is sensitive to escape sequences.  If
7709 the sequence is valid as ISO2022, the code is determined as one of
7710 the ISO2022 encodings, and the file is decoded by the corresponding
7711 coding system (e.g. `iso-2022-7bit').
7712
7713 However, there may be a case that you want to read escape sequences in
7714 a file as is.  In such a case, you can set this variable to non-nil.
7715 Then, as the code detection ignores any escape sequences, no file is
7716 detected as encoded in some ISO2022 encoding.  The result is that all
7717 escape sequences become visible in a buffer.
7718
7719 The default value is nil, and it is strongly recommended not to change
7720 it.  That is because many Emacs Lisp source files that contain
7721 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
7722 in Emacs's distribution, and they won't be decoded correctly on
7723 reading if you suppress escape sequence detection.
7724
7725 The other way to read escape sequences in a file without decoding is
7726 to explicitly specify some coding system that doesn't use ISO2022's
7727 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
7728   inhibit_iso_escape_detection = 0;
7729
7730   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
7731                doc: /* Char table for translating self-inserting characters.
7732 This is applied to the result of input methods, not their input.  See also
7733 `keyboard-translate-table'.  */);
7734     Vtranslation_table_for_input = Qnil;
7735 }
7736
7737 char *
7738 emacs_strerror (error_number)
7739      int error_number;
7740 {
7741   char *str;
7742
7743   synchronize_system_messages_locale ();
7744   str = strerror (error_number);
7745
7746   if (! NILP (Vlocale_coding_system))
7747     {
7748       Lisp_Object dec = code_convert_string_norecord (build_string (str),
7749                                                       Vlocale_coding_system,
7750                                                       0);
7751       str = (char *) SDATA (dec);
7752     }
7753
7754   return str;
7755 }
7756
7757 #endif /* emacs */
7758