src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001, 2002 Free Software Foundation, Inc.
   5    Copyright (C) 2001, 2002
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H13PRO009
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 2, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  24 Boston, MA 02111-1307, USA.  */
  25
  26 /*** TABLE OF CONTENTS ***
  27
  28   0. General comments
  29   1. Preamble
  30   2. Emacs' internal format (emacs-utf-8) handlers
  31   3. UTF-8 handlers
  32   4. UTF-16 handlers
  33   5. Charset-base coding systems handlers
  34   6. emacs-mule (old Emacs' internal format) handlers
  35   7. ISO2022 handlers
  36   8. Shift-JIS and BIG5 handlers
  37   9. CCL handlers
  38   10. C library functions
  39   11. Emacs Lisp library functions
  40   12. Postamble
  41
  42 */
  43
  44 /*** 0. General comments ***
  45
  46
  47 CODING SYSTEM
  48
  49   A coding system is an object for an encoding mechanism that contains
  50   information about how to convert byte sequences to character
  51   sequences and vice versa.  When we say "decode", it means converting
  52   a byte sequence of a specific coding system into a character
  53   sequence that is represented by Emacs' internal coding system
  54   `emacs-utf-8', and when we say "encode", it means converting a
  55   character sequence of emacs-utf-8 to a byte sequence of a specific
  56   coding system.
  57
  58   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  59   C level, a coding system is represented by a vector of attributes
  60   stored in the hash table Vcharset_hash_table.  The conversion from
  61   coding system symbol to attributes vector is done by looking up
  62   Vcharset_hash_table by the symbol.
  63
  64   Coding systems are classified into the following types depending on
  65   the encoding mechanism.  Here's a brief description of the types.
  66
  67   o UTF-8
  68
  69   o UTF-16
  70
  71   o Charset-base coding system
  72
  73   A coding system defined by one or more (coded) character sets.
  74   Decoding and encoding are done by a code converter defined for each
  75   character set.
  76
  77   o Old Emacs internal format (emacs-mule)
  78
  79   The coding system adopted by old versions of Emacs (20 and 21).
  80
  81   o ISO2022-base coding system
  82
  83   The most famous coding system for multiple character sets.  X's
  84   Compound Text, various EUCs (Extended Unix Code), and coding systems
  85   used in the Internet communication such as ISO-2022-JP are all
  86   variants of ISO2022.
  87
  88   o SJIS (or Shift-JIS or MS-Kanji-Code)
  89
  90   A coding system to encode character sets: ASCII, JISX0201, and
  91   JISX0208.  Widely used for PC's in Japan.  Details are described in
  92   section 8.
  93
  94   o BIG5
  95
  96   A coding system to encode character sets: ASCII and Big5.  Widely
  97   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
  98   described in section 8.  In this file, when we write "big5" (all
  99   lowercase), we mean the coding system, and when we write "Big5"
 100   (capitalized), we mean the character set.
 101
 102   o CCL
 103
 104   If a user wants to decode/encode text encoded in a coding system
 105   not listed above, he can supply a decoder and an encoder for it in
 106   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 107   program while decoding/encoding.
 108
 109   o Raw-text
 110
 111   A coding system for text containing raw eight-bit data.  Emacs
 112   treats each byte of source text as a character (except for
 113   end-of-line conversion).
 114
 115   o No-conversion
 116
 117   Like raw text, but don't do end-of-line conversion.
 118
 119
 120 END-OF-LINE FORMAT
 121
 122   How text end-of-line is encoded depends on operating system.  For
 123   instance, Unix's format is just one byte of LF (line-feed) code,
 124   whereas DOS's format is two-byte sequence of `carriage-return' and
 125   `line-feed' codes.  MacOS's format is usually one byte of
 126   `carriage-return'.
 127
 128   Since text character encoding and end-of-line encoding are
 129   independent, any coding system described above can take any format
 130   of end-of-line (except for no-conversion).
 131
 132 STRUCT CODING_SYSTEM
 133
 134   Before using a coding system for code conversion (i.e. decoding and
 135   encoding), we setup a structure of type `struct coding_system'.
 136   This structure keeps various information about a specific code
 137   conversion (e.g. the location of source and destination data).
 138
 139 */
 140
 141 /* COMMON MACROS */
 142
 143
 144 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 145
 146   These functions check if a byte sequence specified as a source in
 147   CODING conforms to the format of XXX.  Return 1 if the data contains
 148   a byte sequence which can be decoded into non-ASCII characters by
 149   the coding system.  Otherwize (i.e. the data contains only ASCII
 150   characters or invalid sequence) return 0.
 151
 152   It also resets some bits of an integer pointed by MASK.  The macros
 153   CATEGORY_MASK_XXX specifies each bit of this integer.
 154
 155   Below is the template of these functions.  */
 156
 157 #if 0
 158 static int
 159 detect_coding_XXX (coding, mask)
 160      struct coding_system *coding;
 161      int *mask;
 162 {
 163   unsigned char *src = coding->source;
 164   unsigned char *src_end = coding->source + coding->src_bytes;
 165   int multibytep = coding->src_multibyte;
 166   int c;
 167   int found = 0;
 168   ...;
 169
 170   while (1)
 171     {
 172       /* Get one byte from the source.  If the souce is exausted, jump
 173          to no_more_source:.  */
 174       ONE_MORE_BYTE (c);
 175       /* Check if it conforms to XXX.  If not, break the loop.  */
 176     }
 177   /* As the data is invalid for XXX, reset a proper bits.  */
 178   *mask &= ~CODING_CATEGORY_XXX;
 179   return 0;
 180  no_more_source:
 181   /* The source exausted.  */
 182   if (!found)
 183     /* ASCII characters only. */
 184     return 0;
 185   /* Some data should be decoded into non-ASCII characters.  */
 186   *mask &= CODING_CATEGORY_XXX;
 187   return 1;
 188 }
 189 #endif
 190
 191 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 192
 193   These functions decode a byte sequence specified as a source by
 194   CODING.  The resulting multibyte text goes to a place pointed to by
 195   CODING->charbuf, the length of which should not exceed
 196   CODING->charbuf_size;
 197
 198   These functions set the information of original and decoded texts in
 199   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 200   They also set CODING->result to one of CODING_RESULT_XXX indicating
 201   how the decoding is finished.
 202
 203   Below is the template of these functions.  */
 204
 205 #if 0
 206 static void
 207 decode_coding_XXXX (coding)
 208      struct coding_system *coding;
 209 {
 210   unsigned char *src = coding->source + coding->consumed;
 211   unsigned char *src_end = coding->source + coding->src_bytes;
 212   /* SRC_BASE remembers the start position in source in each loop.
 213      The loop will be exited when there's not enough source code, or
 214      when there's no room in CHARBUF for a decoded character.  */
 215   unsigned char *src_base;
 216   /* A buffer to produce decoded characters.  */
 217   int *charbuf = coding->charbuf;
 218   int *charbuf_end = charbuf + coding->charbuf_size;
 219   int multibytep = coding->src_multibyte;
 220
 221   while (1)
 222     {
 223       src_base = src;
 224       if (charbuf < charbuf_end)
 225         /* No more room to produce a decoded character.  */
 226         break;
 227       ONE_MORE_BYTE (c);
 228       /* Decode it. */
 229     }
 230
 231  no_more_source:
 232   if (src_base < src_end
 233       && coding->mode & CODING_MODE_LAST_BLOCK)
 234     /* If the source ends by partial bytes to construct a character,
 235        treat them as eight-bit raw data.  */
 236     while (src_base < src_end && charbuf < charbuf_end)
 237       *charbuf++ = *src_base++;
 238   /* Remember how many bytes and characters we consumed.  If the
 239      source is multibyte, the bytes and chars are not identical.  */
 240   coding->consumed = coding->consumed_char = src_base - coding->source;
 241   /* Remember how many characters we produced.  */
 242   coding->charbuf_used = charbuf - coding->charbuf;
 243 }
 244 #endif
 245
 246 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 247
 248   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 249   internal multibyte format by CODING.  The resulting byte sequence
 250   goes to a place pointed to by DESTINATION, the length of which
 251   should not exceed DST_BYTES.
 252
 253   These functions set the information of original and encoded texts in
 254   the members produced, produced_char, consumed, and consumed_char of
 255   the structure *CODING.  They also set the member result to one of
 256   CODING_RESULT_XXX indicating how the encoding finished.
 257
 258   DST_BYTES zero means that source area and destination area are
 259   overlapped, which means that we can produce a encoded text until it
 260   reaches at the head of not-yet-encoded source text.
 261
 262   Below is a template of these functions.  */
 263 #if 0
 264 static void
 265 encode_coding_XXX (coding)
 266      struct coding_system *coding;
 267 {
 268   int multibytep = coding->dst_multibyte;
 269   int *charbuf = coding->charbuf;
 270   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 271   unsigned char *dst = coding->destination + coding->produced;
 272   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 273   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 274   int produced_chars = 0;
 275
 276   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 277     {
 278       int c = *charbuf;
 279       /* Encode C into DST, and increment DST.  */
 280     }
 281  label_no_more_destination:
 282   /* How many chars and bytes we produced.  */
 283   coding->produced_char += produced_chars;
 284   coding->produced = dst - coding->destination;
 285 }
 286 #endif
 287
 288 \f
 289 /*** 1. Preamble ***/
 290
 291 #include <config.h>
 292 #include <stdio.h>
 293
 294 #include "lisp.h"
 295 #include "buffer.h"
 296 #include "character.h"
 297 #include "charset.h"
 298 #include "ccl.h"
 299 #include "composite.h"
 300 #include "coding.h"
 301 #include "window.h"
 302
 303 Lisp_Object Vcoding_system_hash_table;
 304
 305 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 306 Lisp_Object Qunix, Qdos;
 307 extern Lisp_Object Qmac;        /* frame.c */
 308 Lisp_Object Qbuffer_file_coding_system;
 309 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 310 Lisp_Object Qdefault_char;
 311 Lisp_Object Qno_conversion, Qundecided;
 312 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 313 Lisp_Object Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig, Qutf_16_le;
 314 Lisp_Object Qsignature, Qendian, Qbig, Qlittle;
 315 Lisp_Object Qcoding_system_history;
 316 Lisp_Object Qvalid_codes;
 317
 318 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 319 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 320 Lisp_Object Qstart_process, Qopen_network_stream;
 321 Lisp_Object Qtarget_idx;
 322
 323 Lisp_Object Vselect_safe_coding_system_function;
 324
 325 /* Mnemonic string for each format of end-of-line.  */
 326 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 327 /* Mnemonic string to indicate format of end-of-line is not yet
 328    decided.  */
 329 Lisp_Object eol_mnemonic_undecided;
 330
 331 #ifdef emacs
 332
 333 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 334
 335 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 336
 337 /* Coding system emacs-mule and raw-text are for converting only
 338    end-of-line format.  */
 339 Lisp_Object Qemacs_mule, Qraw_text;
 340
 341 /* Coding-systems are handed between Emacs Lisp programs and C internal
 342    routines by the following three variables.  */
 343 /* Coding-system for reading files and receiving data from process.  */
 344 Lisp_Object Vcoding_system_for_read;
 345 /* Coding-system for writing files and sending data to process.  */
 346 Lisp_Object Vcoding_system_for_write;
 347 /* Coding-system actually used in the latest I/O.  */
 348 Lisp_Object Vlast_coding_system_used;
 349
 350 /* A vector of length 256 which contains information about special
 351    Latin codes (especially for dealing with Microsoft codes).  */
 352 Lisp_Object Vlatin_extra_code_table;
 353
 354 /* Flag to inhibit code conversion of end-of-line format.  */
 355 int inhibit_eol_conversion;
 356
 357 /* Flag to inhibit ISO2022 escape sequence detection.  */
 358 int inhibit_iso_escape_detection;
 359
 360 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 361 int inherit_process_coding_system;
 362
 363 /* Coding system to be used to encode text for terminal display.  */
 364 struct coding_system terminal_coding;
 365
 366 /* Coding system to be used to encode text for terminal display when
 367    terminal coding system is nil.  */
 368 struct coding_system safe_terminal_coding;
 369
 370 /* Coding system of what is sent from terminal keyboard.  */
 371 struct coding_system keyboard_coding;
 372
 373 Lisp_Object Vfile_coding_system_alist;
 374 Lisp_Object Vprocess_coding_system_alist;
 375 Lisp_Object Vnetwork_coding_system_alist;
 376
 377 Lisp_Object Vlocale_coding_system;
 378
 379 #endif /* emacs */
 380
 381 /* Flag to tell if we look up translation table on character code
 382    conversion.  */
 383 Lisp_Object Venable_character_translation;
 384 /* Standard translation table to look up on decoding (reading).  */
 385 Lisp_Object Vstandard_translation_table_for_decode;
 386 /* Standard translation table to look up on encoding (writing).  */
 387 Lisp_Object Vstandard_translation_table_for_encode;
 388
 389 Lisp_Object Qtranslation_table;
 390 Lisp_Object Qtranslation_table_id;
 391 Lisp_Object Qtranslation_table_for_decode;
 392 Lisp_Object Qtranslation_table_for_encode;
 393
 394 /* Alist of charsets vs revision number.  */
 395 static Lisp_Object Vcharset_revision_table;
 396
 397 /* Default coding systems used for process I/O.  */
 398 Lisp_Object Vdefault_process_coding_system;
 399
 400 /* Global flag to tell that we can't call post-read-conversion and
 401    pre-write-conversion functions.  Usually the value is zero, but it
 402    is set to 1 temporarily while such functions are running.  This is
 403    to avoid infinite recursive call.  */
 404 static int inhibit_pre_post_conversion;
 405
 406 /* Two special coding systems.  */
 407 Lisp_Object Vsjis_coding_system;
 408 Lisp_Object Vbig5_coding_system;
 409
 410
 411 static int detect_coding_utf_8 P_ ((struct coding_system *, int *));
 412 static void decode_coding_utf_8 P_ ((struct coding_system *));
 413 static int encode_coding_utf_8 P_ ((struct coding_system *));
 414
 415 static int detect_coding_utf_16 P_ ((struct coding_system *, int *));
 416 static void decode_coding_utf_16 P_ ((struct coding_system *));
 417 static int encode_coding_utf_16 P_ ((struct coding_system *));
 418
 419 static int detect_coding_iso_2022 P_ ((struct coding_system *, int *));
 420 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 421 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 422
 423 static int detect_coding_emacs_mule P_ ((struct coding_system *, int *));
 424 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 425 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 426
 427 static int detect_coding_sjis P_ ((struct coding_system *, int *));
 428 static void decode_coding_sjis P_ ((struct coding_system *));
 429 static int encode_coding_sjis P_ ((struct coding_system *));
 430
 431 static int detect_coding_big5 P_ ((struct coding_system *, int *));
 432 static void decode_coding_big5 P_ ((struct coding_system *));
 433 static int encode_coding_big5 P_ ((struct coding_system *));
 434
 435 static int detect_coding_ccl P_ ((struct coding_system *, int *));
 436 static void decode_coding_ccl P_ ((struct coding_system *));
 437 static int encode_coding_ccl P_ ((struct coding_system *));
 438
 439 static void decode_coding_raw_text P_ ((struct coding_system *));
 440 static int encode_coding_raw_text P_ ((struct coding_system *));
 441
 442
 443 /* ISO2022 section */
 444
 445 #define CODING_ISO_INITIAL(coding, reg)                 \
 446   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 447                      coding_attr_iso_initial),          \
 448                reg)))
 449
 450
 451 #define CODING_ISO_REQUEST(coding, charset_id)  \
 452   ((charset_id <= (coding)->max_charset_id      \
 453     ? (coding)->safe_charsets[charset_id]       \
 454     : -1))
 455
 456
 457 #define CODING_ISO_FLAGS(coding)        \
 458   ((coding)->spec.iso_2022.flags)
 459 #define CODING_ISO_DESIGNATION(coding, reg)     \
 460   ((coding)->spec.iso_2022.current_designation[reg])
 461 #define CODING_ISO_INVOCATION(coding, plane)    \
 462   ((coding)->spec.iso_2022.current_invocation[plane])
 463 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 464   ((coding)->spec.iso_2022.single_shifting)
 465 #define CODING_ISO_BOL(coding)  \
 466   ((coding)->spec.iso_2022.bol)
 467 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 468   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 469
 470 /* Control characters of ISO2022.  */
 471                         /* code */      /* function */
 472 #define ISO_CODE_LF     0x0A            /* line-feed */
 473 #define ISO_CODE_CR     0x0D            /* carriage-return */
 474 #define ISO_CODE_SO     0x0E            /* shift-out */
 475 #define ISO_CODE_SI     0x0F            /* shift-in */
 476 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 477 #define ISO_CODE_ESC    0x1B            /* escape */
 478 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 479 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 480 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 481
 482 /* All code (1-byte) of ISO2022 is classified into one of the
 483    followings.  */
 484 enum iso_code_class_type
 485   {
 486     ISO_control_0,              /* Control codes in the range
 487                                    0x00..0x1F and 0x7F, except for the
 488                                    following 5 codes.  */
 489     ISO_carriage_return,        /* ISO_CODE_CR (0x0D) */
 490     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 491     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 492     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 493     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 494     ISO_control_1,              /* Control codes in the range
 495                                    0x80..0x9F, except for the
 496                                    following 3 codes.  */
 497     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 498     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 499     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 500     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 501     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 502     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 503     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 504   };
 505
 506 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 507     `iso-flags' attribute of an iso2022 coding system.  */
 508
 509 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 510    instead of the correct short-form sequence (e.g. ESC $ A).  */
 511 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 512
 513 /* If set, reset graphic planes and registers at end-of-line to the
 514    initial state.  */
 515 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 516
 517 /* If set, reset graphic planes and registers before any control
 518    characters to the initial state.  */
 519 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 520
 521 /* If set, encode by 7-bit environment.  */
 522 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 523
 524 /* If set, use locking-shift function.  */
 525 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 526
 527 /* If set, use single-shift function.  Overwrite
 528    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 529 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 530
 531 /* If set, use designation escape sequence.  */
 532 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 533
 534 /* If set, produce revision number sequence.  */
 535 #define CODING_ISO_FLAG_REVISION        0x0080
 536
 537 /* If set, produce ISO6429's direction specifying sequence.  */
 538 #define CODING_ISO_FLAG_DIRECTION       0x0100
 539
 540 /* If set, assume designation states are reset at beginning of line on
 541    output.  */
 542 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 543
 544 /* If set, designation sequence should be placed at beginning of line
 545    on output.  */
 546 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 547
 548 /* If set, do not encode unsafe charactes on output.  */
 549 #define CODING_ISO_FLAG_SAFE            0x0800
 550
 551 /* If set, extra latin codes (128..159) are accepted as a valid code
 552    on input.  */
 553 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 554
 555 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 556
 557 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 558
 559 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 560
 561 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 562
 563 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 564
 565 /* A character to be produced on output if encoding of the original
 566    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 567 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 568
 569
 570 /* UTF-16 section */
 571 #define CODING_UTF_16_BOM(coding)       \
 572   ((coding)->spec.utf_16.bom)
 573
 574 #define CODING_UTF_16_ENDIAN(coding)    \
 575   ((coding)->spec.utf_16.endian)
 576
 577 #define CODING_UTF_16_SURROGATE(coding) \
 578   ((coding)->spec.utf_16.surrogate)
 579
 580
 581 /* CCL section */
 582 #define CODING_CCL_DECODER(coding)      \
 583   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 584 #define CODING_CCL_ENCODER(coding)      \
 585   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 586 #define CODING_CCL_VALIDS(coding)                                          \
 587   (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \
 588    ->data)
 589
 590 /* Index for each coding category in `coding_categories' */
 591
 592 enum coding_category
 593   {
 594     coding_category_iso_7,
 595     coding_category_iso_7_tight,
 596     coding_category_iso_8_1,
 597     coding_category_iso_8_2,
 598     coding_category_iso_7_else,
 599     coding_category_iso_8_else,
 600     coding_category_utf_8,
 601     coding_category_utf_16_auto,
 602     coding_category_utf_16_be,
 603     coding_category_utf_16_le,
 604     coding_category_utf_16_be_nosig,
 605     coding_category_utf_16_le_nosig,
 606     coding_category_charset,
 607     coding_category_sjis,
 608     coding_category_big5,
 609     coding_category_ccl,
 610     coding_category_emacs_mule,
 611     /* All above are targets of code detection.  */
 612     coding_category_raw_text,
 613     coding_category_undecided,
 614     coding_category_max
 615   };
 616
 617 /* Definitions of flag bits used in detect_coding_XXXX.  */
 618 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 619 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 620 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 621 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 622 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 623 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 624 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 625 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 626 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 627 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 628 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 629 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 630 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 631 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 632 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 633 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 634
 635 /* This value is returned if detect_coding_mask () find nothing other
 636    than ASCII characters.  */
 637 #define CATEGORY_MASK_ANY               \
 638   (CATEGORY_MASK_ISO_7                  \
 639    | CATEGORY_MASK_ISO_7_TIGHT          \
 640    | CATEGORY_MASK_ISO_8_1              \
 641    | CATEGORY_MASK_ISO_8_2              \
 642    | CATEGORY_MASK_ISO_7_ELSE           \
 643    | CATEGORY_MASK_ISO_8_ELSE           \
 644    | CATEGORY_MASK_UTF_8                \
 645    | CATEGORY_MASK_UTF_16_BE            \
 646    | CATEGORY_MASK_UTF_16_LE            \
 647    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 648    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 649    | CATEGORY_MASK_CHARSET              \
 650    | CATEGORY_MASK_SJIS                 \
 651    | CATEGORY_MASK_BIG5                 \
 652    | CATEGORY_MASK_CCL                  \
 653    | CATEGORY_MASK_EMACS_MULE)
 654
 655
 656 #define CATEGORY_MASK_ISO_7BIT \
 657   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 658
 659 #define CATEGORY_MASK_ISO_8BIT \
 660   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 661
 662 #define CATEGORY_MASK_ISO_ELSE \
 663   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 664
 665 #define CATEGORY_MASK_ISO_ESCAPE        \
 666   (CATEGORY_MASK_ISO_7                  \
 667    | CATEGORY_MASK_ISO_7_TIGHT          \
 668    | CATEGORY_MASK_ISO_7_ELSE           \
 669    | CATEGORY_MASK_ISO_8_ELSE)
 670
 671 #define CATEGORY_MASK_ISO       \
 672   (  CATEGORY_MASK_ISO_7BIT     \
 673      | CATEGORY_MASK_ISO_8BIT   \
 674      | CATEGORY_MASK_ISO_ELSE)
 675
 676 #define CATEGORY_MASK_UTF_16            \
 677   (CATEGORY_MASK_UTF_16_BE              \
 678    | CATEGORY_MASK_UTF_16_LE            \
 679    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 680    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 681
 682
 683 /* List of symbols `coding-category-xxx' ordered by priority.  This
 684    variable is exposed to Emacs Lisp.  */
 685 static Lisp_Object Vcoding_category_list;
 686
 687 /* Table of coding categories (Lisp symbols).  This variable is for
 688    internal use oly.  */
 689 static Lisp_Object Vcoding_category_table;
 690
 691 /* Table of coding-categories ordered by priority.  */
 692 static enum coding_category coding_priorities[coding_category_max];
 693
 694 /* Nth element is a coding context for the coding system bound to the
 695    Nth coding category.  */
 696 static struct coding_system coding_categories[coding_category_max];
 697
 698 static int detected_mask[coding_category_raw_text] =
 699   { CATEGORY_MASK_ISO,
 700     CATEGORY_MASK_ISO,
 701     CATEGORY_MASK_ISO,
 702     CATEGORY_MASK_ISO,
 703     CATEGORY_MASK_ISO,
 704     CATEGORY_MASK_ISO,
 705     CATEGORY_MASK_UTF_8,
 706     CATEGORY_MASK_UTF_16,
 707     CATEGORY_MASK_UTF_16,
 708     CATEGORY_MASK_UTF_16,
 709     CATEGORY_MASK_UTF_16,
 710     CATEGORY_MASK_UTF_16,
 711     CATEGORY_MASK_CHARSET,
 712     CATEGORY_MASK_SJIS,
 713     CATEGORY_MASK_BIG5,
 714     CATEGORY_MASK_CCL,
 715     CATEGORY_MASK_EMACS_MULE
 716   };
 717
 718 /*** Commonly used macros and functions ***/
 719
 720 #ifndef min
 721 #define min(a, b) ((a) < (b) ? (a) : (b))
 722 #endif
 723 #ifndef max
 724 #define max(a, b) ((a) > (b) ? (a) : (b))
 725 #endif
 726
 727 #define CODING_GET_INFO(coding, attrs, eol_type, charset_list)  \
 728   do {                                                          \
 729     attrs = CODING_ID_ATTRS (coding->id);                       \
 730     eol_type = CODING_ID_EOL_TYPE (coding->id);                 \
 731     if (VECTORP (eol_type))                                     \
 732       eol_type = Qunix;                                         \
 733     charset_list = CODING_ATTR_CHARSET_LIST (attrs);            \
 734   } while (0)
 735
 736
 737 /* Safely get one byte from the source text pointed by SRC which ends
 738    at SRC_END, and set C to that byte.  If there are not enough bytes
 739    in the source, it jumps to `no_more_source'.  The caller
 740    should declare and set these variables appropriately in advance:
 741         src, src_end, multibytep
 742 */
 743
 744 #define ONE_MORE_BYTE(c)                                        \
 745   do {                                                          \
 746     if (src == src_end)                                         \
 747       {                                                         \
 748         if (src_base < src)                                     \
 749           coding->result = CODING_RESULT_INSUFFICIENT_SRC;      \
 750         goto no_more_source;                                    \
 751       }                                                         \
 752     c = *src++;                                                 \
 753     if (multibytep && (c & 0x80))                               \
 754       {                                                         \
 755         if ((c & 0xFE) != 0xC0)                                 \
 756           error ("Undecodable char found");                     \
 757         c = ((c & 1) << 6) | *src++;                            \
 758       }                                                         \
 759     consumed_chars++;                                           \
 760   } while (0)
 761
 762
 763 #define ONE_MORE_BYTE_NO_CHECK(c)               \
 764   do {                                          \
 765     c = *src++;                                 \
 766     if (multibytep && (c & 0x80))               \
 767       {                                         \
 768         if ((c & 0xFE) != 0xC0)                 \
 769           error ("Undecodable char found");     \
 770         c = ((c & 1) << 6) | *src++;            \
 771       }                                         \
 772     consumed_chars++;                           \
 773   } while (0)
 774
 775
 776 /* Store a byte C in the place pointed by DST and increment DST to the
 777    next free point, and increment PRODUCED_CHARS.  The caller should
 778    assure that C is 0..127, and declare and set the variable `dst'
 779    appropriately in advance.
 780 */
 781
 782
 783 #define EMIT_ONE_ASCII_BYTE(c)  \
 784   do {                          \
 785     produced_chars++;           \
 786     *dst++ = (c);               \
 787   } while (0)
 788
 789
 790 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 791
 792 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 793   do {                                  \
 794     produced_chars += 2;                \
 795     *dst++ = (c1), *dst++ = (c2);       \
 796   } while (0)
 797
 798
 799 /* Store a byte C in the place pointed by DST and increment DST to the
 800    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 801    nonzero, store in an appropriate multibyte from.  The caller should
 802    declare and set the variables `dst' and `multibytep' appropriately
 803    in advance.  */
 804
 805 #define EMIT_ONE_BYTE(c)                \
 806   do {                                  \
 807     produced_chars++;                   \
 808     if (multibytep)                     \
 809       {                                 \
 810         int ch = (c);                   \
 811         if (ch >= 0x80)                 \
 812           ch = BYTE8_TO_CHAR (ch);      \
 813         CHAR_STRING_ADVANCE (ch, dst);  \
 814       }                                 \
 815     else                                \
 816       *dst++ = (c);                     \
 817   } while (0)
 818
 819
 820 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 821
 822 #define EMIT_TWO_BYTES(c1, c2)          \
 823   do {                                  \
 824     produced_chars += 2;                \
 825     if (multibytep)                     \
 826       {                                 \
 827         int ch;                         \
 828                                         \
 829         ch = (c1);                      \
 830         if (ch >= 0x80)                 \
 831           ch = BYTE8_TO_CHAR (ch);      \
 832         CHAR_STRING_ADVANCE (ch, dst);  \
 833         ch = (c2);                      \
 834         if (ch >= 0x80)                 \
 835           ch = BYTE8_TO_CHAR (ch);      \
 836         CHAR_STRING_ADVANCE (ch, dst);  \
 837       }                                 \
 838     else                                \
 839       {                                 \
 840         *dst++ = (c1);                  \
 841         *dst++ = (c2);                  \
 842       }                                 \
 843   } while (0)
 844
 845
 846 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 847   do {                                  \
 848     EMIT_ONE_BYTE (c1);                 \
 849     EMIT_TWO_BYTES (c2, c3);            \
 850   } while (0)
 851
 852
 853 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 854   do {                                          \
 855     EMIT_TWO_BYTES (c1, c2);                    \
 856     EMIT_TWO_BYTES (c3, c4);                    \
 857   } while (0)
 858
 859
 860 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 861   do {                                                                       \
 862     charset_map_loaded = 0;                                                  \
 863     c = DECODE_CHAR (charset, code);                                         \
 864     if (charset_map_loaded)                                                  \
 865       {                                                                      \
 866         unsigned char *orig = coding->source;                                \
 867         EMACS_INT offset;                                                    \
 868                                                                              \
 869         coding_set_source (coding);                                          \
 870         offset = coding->source - orig;                                      \
 871         src += offset;                                                       \
 872         src_base += offset;                                                  \
 873         src_end += offset;                                                   \
 874       }                                                                      \
 875   } while (0)
 876
 877
 878 #define ASSURE_DESTINATION(bytes)                               \
 879   do {                                                          \
 880     if (dst + (bytes) >= dst_end)                               \
 881       {                                                         \
 882         int more_bytes = charbuf_end - charbuf + (bytes);       \
 883                                                                 \
 884         dst = alloc_destination (coding, more_bytes, dst);      \
 885         dst_end = coding->destination + coding->dst_bytes;      \
 886       }                                                         \
 887   } while (0)
 888
 889
 890
 891 static void
 892 coding_set_source (coding)
 893      struct coding_system *coding;
 894 {
 895   if (BUFFERP (coding->src_object))
 896     {
 897       if (coding->src_pos < 0)
 898         coding->source = GAP_END_ADDR + coding->src_pos_byte;
 899       else
 900         {
 901           struct buffer *buf = XBUFFER (coding->src_object);
 902           EMACS_INT gpt_byte = BUF_GPT_BYTE (buf);
 903           unsigned char *beg_addr = BUF_BEG_ADDR (buf);
 904
 905           coding->source = beg_addr + coding->src_pos_byte - 1;
 906           if (coding->src_pos_byte >= gpt_byte)
 907             coding->source += BUF_GAP_SIZE (buf);
 908         }
 909     }
 910   else if (STRINGP (coding->src_object))
 911     {
 912       coding->source = (XSTRING (coding->src_object)->data
 913                         + coding->src_pos_byte);
 914     }
 915   else
 916     /* Otherwise, the source is C string and is never relocated
 917        automatically.  Thus we don't have to update anything.  */
 918     ;
 919 }
 920
 921 static void
 922 coding_set_destination (coding)
 923      struct coding_system *coding;
 924 {
 925   if (BUFFERP (coding->dst_object))
 926     {
 927       /* We are sure that coding->dst_pos_byte is before the gap of the
 928          buffer. */
 929       coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
 930                              + coding->dst_pos_byte - 1);
 931       if (coding->src_pos < 0)
 932         coding->dst_bytes = (GAP_END_ADDR
 933                              - (coding->src_bytes - coding->consumed)
 934                              - coding->destination);
 935       else
 936         coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
 937                              - coding->destination);
 938     }
 939   else
 940     /* Otherwise, the destination is C string and is never relocated
 941        automatically.  Thus we don't have to update anything.  */
 942     ;
 943 }
 944
 945
 946 static void
 947 coding_alloc_by_realloc (coding, bytes)
 948      struct coding_system *coding;
 949      EMACS_INT bytes;
 950 {
 951   coding->destination = (unsigned char *) xrealloc (coding->destination,
 952                                                     coding->dst_bytes + bytes);
 953   coding->dst_bytes += bytes;
 954 }
 955
 956 static void
 957 coding_alloc_by_making_gap (coding, bytes)
 958      struct coding_system *coding;
 959      EMACS_INT bytes;
 960 {
 961   if (BUFFERP (coding->dst_object)
 962       && EQ (coding->src_object, coding->dst_object))
 963     {
 964       EMACS_INT add = coding->src_bytes - coding->consumed;
 965
 966       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
 967       make_gap (bytes);
 968       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
 969     }
 970   else
 971     {
 972       Lisp_Object this_buffer;
 973
 974       this_buffer = Fcurrent_buffer ();
 975       set_buffer_internal (XBUFFER (coding->dst_object));
 976       make_gap (bytes);
 977       set_buffer_internal (XBUFFER (this_buffer));
 978     }
 979 }
 980
 981
 982 static unsigned char *
 983 alloc_destination (coding, nbytes, dst)
 984      struct coding_system *coding;
 985      int nbytes;
 986      unsigned char *dst;
 987 {
 988   EMACS_INT offset = dst - coding->destination;
 989
 990   if (BUFFERP (coding->dst_object))
 991     coding_alloc_by_making_gap (coding, nbytes);
 992   else
 993     coding_alloc_by_realloc (coding, nbytes);
 994   coding->result = CODING_RESULT_SUCCESS;
 995   coding_set_destination (coding);
 996   dst = coding->destination + offset;
 997   return dst;
 998 }
 999
1000 \f
1001 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1002
1003
1004
1005 \f
1006 /*** 3. UTF-8 ***/
1007
1008 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1009    Check if a text is encoded in UTF-8.  If it is, return
1010    CATEGORY_MASK_UTF_8, else return 0.  */
1011
1012 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1013 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1014 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1015 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1016 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1017 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1018
1019 static int
1020 detect_coding_utf_8 (coding, mask)
1021      struct coding_system *coding;
1022      int *mask;
1023 {
1024   unsigned char *src = coding->source, *src_base = src;
1025   unsigned char *src_end = coding->source + coding->src_bytes;
1026   int multibytep = coding->src_multibyte;
1027   int consumed_chars = 0;
1028   int found = 0;
1029
1030   /* A coding system of this category is always ASCII compatible.  */
1031   src += coding->head_ascii;
1032
1033   while (1)
1034     {
1035       int c, c1, c2, c3, c4;
1036
1037       ONE_MORE_BYTE (c);
1038       if (UTF_8_1_OCTET_P (c))
1039         continue;
1040       ONE_MORE_BYTE (c1);
1041       if (! UTF_8_EXTRA_OCTET_P (c1))
1042         break;
1043       if (UTF_8_2_OCTET_LEADING_P (c))
1044         {
1045           found++;
1046           continue;
1047         }
1048       ONE_MORE_BYTE (c2);
1049       if (! UTF_8_EXTRA_OCTET_P (c2))
1050         break;
1051       if (UTF_8_3_OCTET_LEADING_P (c))
1052         {
1053           found++;
1054           continue;
1055         }
1056       ONE_MORE_BYTE (c3);
1057       if (! UTF_8_EXTRA_OCTET_P (c3))
1058         break;
1059       if (UTF_8_4_OCTET_LEADING_P (c))
1060         {
1061           found++;
1062           continue;
1063         }
1064       ONE_MORE_BYTE (c4);
1065       if (! UTF_8_EXTRA_OCTET_P (c4))
1066         break;
1067       if (UTF_8_5_OCTET_LEADING_P (c))
1068         {
1069           found++;
1070           continue;
1071         }
1072       break;
1073     }
1074   *mask &= ~CATEGORY_MASK_UTF_8;
1075   return 0;
1076
1077  no_more_source:
1078   if (! found)
1079     return 0;
1080   *mask &= CATEGORY_MASK_UTF_8;
1081   return 1;
1082 }
1083
1084
1085 /* Fixme: deal with surrogates?  */
1086 static void
1087 decode_coding_utf_8 (coding)
1088      struct coding_system *coding;
1089 {
1090   unsigned char *src = coding->source + coding->consumed;
1091   unsigned char *src_end = coding->source + coding->src_bytes;
1092   unsigned char *src_base;
1093   int *charbuf = coding->charbuf;
1094   int *charbuf_end = charbuf + coding->charbuf_size;
1095   int consumed_chars = 0, consumed_chars_base;
1096   int multibytep = coding->src_multibyte;
1097   Lisp_Object attr, eol_type, charset_list;
1098
1099   CODING_GET_INFO (coding, attr, eol_type, charset_list);
1100
1101   while (1)
1102     {
1103       int c, c1, c2, c3, c4, c5;
1104
1105       src_base = src;
1106       consumed_chars_base = consumed_chars;
1107
1108       if (charbuf >= charbuf_end)
1109         break;
1110
1111       ONE_MORE_BYTE (c1);
1112       if (UTF_8_1_OCTET_P(c1))
1113         {
1114           c = c1;
1115           if (c == '\r')
1116             {
1117               if (EQ (eol_type, Qdos))
1118                 {
1119                   if (src == src_end)
1120                     goto no_more_source;
1121                   if (*src == '\n')
1122                     ONE_MORE_BYTE (c);
1123                 }
1124               else if (EQ (eol_type, Qmac))
1125                 c = '\n';
1126             }
1127         }
1128       else
1129         {
1130           ONE_MORE_BYTE (c2);
1131           if (! UTF_8_EXTRA_OCTET_P (c2))
1132             goto invalid_code;
1133           if (UTF_8_2_OCTET_LEADING_P (c1))
1134             {
1135               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1136               /* Reject overlong sequences here and below.  Encoders
1137                  producing them are incorrect, they can be misleading,
1138                  and they mess up read/write invariance.  */
1139               if (c < 128)
1140                 goto invalid_code;
1141             }
1142           else
1143             {
1144               ONE_MORE_BYTE (c3);
1145               if (! UTF_8_EXTRA_OCTET_P (c3))
1146                 goto invalid_code;
1147               if (UTF_8_3_OCTET_LEADING_P (c1))
1148                 {
1149                   c = (((c1 & 0xF) << 12)
1150                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1151                   if (c < 0x800)
1152                     goto invalid_code;
1153                 }
1154               else
1155                 {
1156                   ONE_MORE_BYTE (c4);
1157                   if (! UTF_8_EXTRA_OCTET_P (c4))
1158                     goto invalid_code;
1159                   if (UTF_8_4_OCTET_LEADING_P (c1))
1160                     {
1161                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1162                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1163                     if (c < 0x10000)
1164                       goto invalid_code;
1165                     }
1166                   else
1167                     {
1168                       ONE_MORE_BYTE (c5);
1169                       if (! UTF_8_EXTRA_OCTET_P (c5))
1170                         goto invalid_code;
1171                       if (UTF_8_5_OCTET_LEADING_P (c1))
1172                         {
1173                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1174                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1175                                | (c5 & 0x3F));
1176                           if ((c > MAX_CHAR) || (c < 0x200000))
1177                             goto invalid_code;
1178                         }
1179                       else
1180                         goto invalid_code;
1181                     }
1182                 }
1183             }
1184         }
1185
1186       *charbuf++ = c;
1187       continue;
1188
1189     invalid_code:
1190       src = src_base;
1191       consumed_chars = consumed_chars_base;
1192       ONE_MORE_BYTE (c);
1193       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1194       coding->errors++;
1195     }
1196
1197  no_more_source:
1198   coding->consumed_char += consumed_chars_base;
1199   coding->consumed = src_base - coding->source;
1200   coding->charbuf_used = charbuf - coding->charbuf;
1201 }
1202
1203
1204 static int
1205 encode_coding_utf_8 (coding)
1206      struct coding_system *coding;
1207 {
1208   int multibytep = coding->dst_multibyte;
1209   int *charbuf = coding->charbuf;
1210   int *charbuf_end = charbuf + coding->charbuf_used;
1211   unsigned char *dst = coding->destination + coding->produced;
1212   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1213   int produced_chars = 0;
1214   int c;
1215
1216   if (multibytep)
1217     {
1218       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1219
1220       while (charbuf < charbuf_end)
1221         {
1222           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1223
1224           ASSURE_DESTINATION (safe_room);
1225           c = *charbuf++;
1226           CHAR_STRING_ADVANCE (c, pend);
1227           for (p = str; p < pend; p++)
1228             EMIT_ONE_BYTE (*p);
1229         }
1230     }
1231   else
1232     {
1233       int safe_room = MAX_MULTIBYTE_LENGTH;
1234
1235       while (charbuf < charbuf_end)
1236         {
1237           ASSURE_DESTINATION (safe_room);
1238           c = *charbuf++;
1239           dst += CHAR_STRING (c, dst);
1240           produced_chars++;
1241         }
1242     }
1243   coding->result = CODING_RESULT_SUCCESS;
1244   coding->produced_char += produced_chars;
1245   coding->produced = dst - coding->destination;
1246   return 0;
1247 }
1248
1249
1250 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1251    Check if a text is encoded in UTF-16 Big Endian (endian == 1) or
1252    Little Endian (otherwise).  If it is, return
1253    CATEGORY_MASK_UTF_16_BE or CATEGORY_MASK_UTF_16_LE,
1254    else return 0.  */
1255
1256 #define UTF_16_HIGH_SURROGATE_P(val) \
1257   (((val) & 0xFC00) == 0xD800)
1258
1259 #define UTF_16_LOW_SURROGATE_P(val) \
1260   (((val) & 0xFC00) == 0xDC00)
1261
1262 #define UTF_16_INVALID_P(val)   \
1263   (((val) == 0xFFFE)            \
1264    || ((val) == 0xFFFF)         \
1265    || UTF_16_LOW_SURROGATE_P (val))
1266
1267
1268 static int
1269 detect_coding_utf_16 (coding, mask)
1270      struct coding_system *coding;
1271      int *mask;
1272 {
1273   unsigned char *src = coding->source, *src_base = src;
1274   unsigned char *src_end = coding->source + coding->src_bytes;
1275   int multibytep = coding->src_multibyte;
1276   int consumed_chars = 0;
1277   int c1, c2;
1278
1279   ONE_MORE_BYTE (c1);
1280   ONE_MORE_BYTE (c2);
1281
1282   if ((c1 == 0xFF) && (c2 == 0xFE))
1283     {
1284       *mask &= CATEGORY_MASK_UTF_16_LE;
1285       return 1;
1286     }
1287   else if ((c1 == 0xFE) && (c2 == 0xFF))
1288     {
1289       *mask &= CATEGORY_MASK_UTF_16_BE;
1290       return 1;
1291     }
1292  no_more_source:
1293   return 0;
1294 }
1295
1296 static void
1297 decode_coding_utf_16 (coding)
1298      struct coding_system *coding;
1299 {
1300   unsigned char *src = coding->source + coding->consumed;
1301   unsigned char *src_end = coding->source + coding->src_bytes;
1302   unsigned char *src_base;
1303   int *charbuf = coding->charbuf;
1304   int *charbuf_end = charbuf + coding->charbuf_size;
1305   int consumed_chars = 0, consumed_chars_base;
1306   int multibytep = coding->src_multibyte;
1307   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1308   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1309   int surrogate = CODING_UTF_16_SURROGATE (coding);
1310   Lisp_Object attr, eol_type, charset_list;
1311
1312   CODING_GET_INFO (coding, attr, eol_type, charset_list);
1313
1314   if (bom != utf_16_without_bom)
1315     {
1316       int c, c1, c2;
1317
1318       src_base = src;
1319       ONE_MORE_BYTE (c1);
1320       ONE_MORE_BYTE (c2);
1321       c = (c1 << 8) | c2;
1322       if (bom == utf_16_with_bom)
1323         {
1324           if (endian == utf_16_big_endian
1325               ? c != 0xFFFE : c != 0xFEFF)
1326             {
1327               /* We are sure that there's enouph room at CHARBUF.  */
1328               *charbuf++ = c1;
1329               *charbuf++ = c2;
1330               coding->errors++;
1331             }
1332         }
1333       else
1334         {
1335           if (c == 0xFFFE)
1336             CODING_UTF_16_ENDIAN (coding)
1337               = endian = utf_16_big_endian;
1338           else if (c == 0xFEFF)
1339             CODING_UTF_16_ENDIAN (coding)
1340               = endian = utf_16_little_endian;
1341           else
1342             {
1343               CODING_UTF_16_ENDIAN (coding)
1344                 = endian = utf_16_big_endian;
1345               src = src_base;
1346             }
1347         }
1348       CODING_UTF_16_BOM (coding) = utf_16_with_bom;
1349     }
1350
1351   while (1)
1352     {
1353       int c, c1, c2;
1354
1355       src_base = src;
1356       consumed_chars_base = consumed_chars;
1357
1358       if (charbuf + 2 >= charbuf_end)
1359         break;
1360
1361       ONE_MORE_BYTE (c1);
1362       ONE_MORE_BYTE (c2);
1363       c = (endian == utf_16_big_endian
1364            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1365       if (surrogate)
1366         {
1367           if (! UTF_16_LOW_SURROGATE_P (c))
1368             {
1369               if (endian == utf_16_big_endian)
1370                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1371               else
1372                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1373               *charbuf++ = c1;
1374               *charbuf++ = c2;
1375               coding->errors++;
1376               if (UTF_16_HIGH_SURROGATE_P (c))
1377                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1378               else
1379                 *charbuf++ = c;
1380             }
1381           else
1382             {
1383               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1384               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1385               *charbuf++ = c;
1386             }
1387         }
1388       else
1389         {
1390           if (UTF_16_HIGH_SURROGATE_P (c))
1391             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1392           else
1393             *charbuf++ = c;
1394         }
1395     }
1396
1397  no_more_source:
1398   coding->consumed_char += consumed_chars_base;
1399   coding->consumed = src_base - coding->source;
1400   coding->charbuf_used = charbuf - coding->charbuf;
1401 }
1402
1403 static int
1404 encode_coding_utf_16 (coding)
1405      struct coding_system *coding;
1406 {
1407   int multibytep = coding->dst_multibyte;
1408   int *charbuf = coding->charbuf;
1409   int *charbuf_end = charbuf + coding->charbuf_used;
1410   unsigned char *dst = coding->destination + coding->produced;
1411   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1412   int safe_room = 8;
1413   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1414   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1415   int produced_chars = 0;
1416   Lisp_Object attrs, eol_type, charset_list;
1417   int c;
1418
1419   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1420
1421   if (bom == utf_16_with_bom)
1422     {
1423       ASSURE_DESTINATION (safe_room);
1424       if (big_endian)
1425         EMIT_TWO_BYTES (0xFF, 0xFE);
1426       else
1427         EMIT_TWO_BYTES (0xFE, 0xFF);
1428       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1429     }
1430
1431   while (charbuf < charbuf_end)
1432     {
1433       ASSURE_DESTINATION (safe_room);
1434       c = *charbuf++;
1435       if (c >= MAX_UNICODE_CHAR)
1436         c = coding->default_char;
1437
1438       if (c < 0x10000)
1439         {
1440           if (big_endian)
1441             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1442           else
1443             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1444         }
1445       else
1446         {
1447           int c1, c2;
1448
1449           c -= 0x10000;
1450           c1 = (c >> 10) + 0xD800;
1451           c2 = (c & 0x3FF) + 0xDC00;
1452           if (big_endian)
1453             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1454           else
1455             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1456         }
1457     }
1458   coding->result = CODING_RESULT_SUCCESS;
1459   coding->produced = dst - coding->destination;
1460   coding->produced_char += produced_chars;
1461   return 0;
1462 }
1463
1464 \f
1465 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1466
1467 /* Emacs' internal format for representation of multiple character
1468    sets is a kind of multi-byte encoding, i.e. characters are
1469    represented by variable-length sequences of one-byte codes.
1470
1471    ASCII characters and control characters (e.g. `tab', `newline') are
1472    represented by one-byte sequences which are their ASCII codes, in
1473    the range 0x00 through 0x7F.
1474
1475    8-bit characters of the range 0x80..0x9F are represented by
1476    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1477    code + 0x20).
1478
1479    8-bit characters of the range 0xA0..0xFF are represented by
1480    one-byte sequences which are their 8-bit code.
1481
1482    The other characters are represented by a sequence of `base
1483    leading-code', optional `extended leading-code', and one or two
1484    `position-code's.  The length of the sequence is determined by the
1485    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1486    whereas extended leading-code and position-code take the range 0xA0
1487    through 0xFF.  See `charset.h' for more details about leading-code
1488    and position-code.
1489
1490    --- CODE RANGE of Emacs' internal format ---
1491    character set        range
1492    -------------        -----
1493    ascii                0x00..0x7F
1494    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1495    eight-bit-graphic    0xA0..0xBF
1496    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1497    ---------------------------------------------
1498
1499    As this is the internal character representation, the format is
1500    usually not used externally (i.e. in a file or in a data sent to a
1501    process).  But, it is possible to have a text externally in this
1502    format (i.e. by encoding by the coding system `emacs-mule').
1503
1504    In that case, a sequence of one-byte codes has a slightly different
1505    form.
1506
1507    At first, all characters in eight-bit-control are represented by
1508    one-byte sequences which are their 8-bit code.
1509
1510    Next, character composition data are represented by the byte
1511    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1512    where,
1513         METHOD is 0xF0 plus one of composition method (enum
1514         composition_method),
1515
1516         BYTES is 0xA0 plus a byte length of this composition data,
1517
1518         CHARS is 0x20 plus a number of characters composed by this
1519         data,
1520
1521         COMPONENTs are characters of multibye form or composition
1522         rules encoded by two-byte of ASCII codes.
1523
1524    In addition, for backward compatibility, the following formats are
1525    also recognized as composition data on decoding.
1526
1527    0x80 MSEQ ...
1528    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1529
1530    Here,
1531         MSEQ is a multibyte form but in these special format:
1532           ASCII: 0xA0 ASCII_CODE+0x80,
1533           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1534         RULE is a one byte code of the range 0xA0..0xF0 that
1535         represents a composition rule.
1536   */
1537
1538 char emacs_mule_bytes[256];
1539
1540 int
1541 emacs_mule_char (coding, src, nbytes, nchars)
1542      struct coding_system *coding;
1543      unsigned char *src;
1544      int *nbytes, *nchars;
1545 {
1546   unsigned char *src_end = coding->source + coding->src_bytes;
1547   int multibytep = coding->src_multibyte;
1548   unsigned char *src_base = src;
1549   struct charset *charset;
1550   unsigned code;
1551   int c;
1552   int consumed_chars = 0;
1553
1554   ONE_MORE_BYTE (c);
1555   switch (emacs_mule_bytes[c])
1556     {
1557     case 2:
1558       if (! (charset = emacs_mule_charset[c]))
1559         goto invalid_code;
1560       ONE_MORE_BYTE (c);
1561       code = c & 0x7F;
1562       break;
1563
1564     case 3:
1565       if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1566           || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1567         {
1568           ONE_MORE_BYTE (c);
1569           if (! (charset = emacs_mule_charset[c]))
1570             goto invalid_code;
1571           ONE_MORE_BYTE (c);
1572           code = c & 0x7F;
1573         }
1574       else
1575         {
1576           if (! (charset = emacs_mule_charset[c]))
1577             goto invalid_code;
1578           ONE_MORE_BYTE (c);
1579           code = (c & 0x7F) << 8;
1580           ONE_MORE_BYTE (c);
1581           code |= c & 0x7F;
1582         }
1583       break;
1584
1585     case 4:
1586       ONE_MORE_BYTE (c);
1587       if (! (charset = emacs_mule_charset[c]))
1588         goto invalid_code;
1589       ONE_MORE_BYTE (c);
1590       code = (c & 0x7F) << 8;
1591       ONE_MORE_BYTE (c);
1592       code |= c & 0x7F;
1593       break;
1594
1595     case 1:
1596       code = c;
1597       charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1598                                  ? charset_ascii : charset_eight_bit);
1599       break;
1600
1601     default:
1602       abort ();
1603     }
1604   c = DECODE_CHAR (charset, code);
1605   if (c < 0)
1606     goto invalid_code;
1607   *nbytes = src - src_base;
1608   *nchars = consumed_chars;
1609   return c;
1610
1611  no_more_source:
1612   return -2;
1613
1614  invalid_code:
1615   return -1;
1616 }
1617
1618
1619 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1620    Check if a text is encoded in `emacs-mule'.  */
1621
1622 static int
1623 detect_coding_emacs_mule (coding, mask)
1624      struct coding_system *coding;
1625      int *mask;
1626 {
1627   unsigned char *src = coding->source, *src_base = src;
1628   unsigned char *src_end = coding->source + coding->src_bytes;
1629   int multibytep = coding->src_multibyte;
1630   int consumed_chars = 0;
1631   int c;
1632   int found = 0;
1633
1634   /* A coding system of this category is always ASCII compatible.  */
1635   src += coding->head_ascii;
1636
1637   while (1)
1638     {
1639       ONE_MORE_BYTE (c);
1640
1641       if (c == 0x80)
1642         {
1643           /* Perhaps the start of composite character.  We simple skip
1644              it because analyzing it is too heavy for detecting.  But,
1645              at least, we check that the composite character
1646              constitues of more than 4 bytes.  */
1647           unsigned char *src_base;
1648
1649         repeat:
1650           src_base = src;
1651           do
1652             {
1653               ONE_MORE_BYTE (c);
1654             }
1655           while (c >= 0xA0);
1656
1657           if (src - src_base <= 4)
1658             break;
1659           found = 1;
1660           if (c == 0x80)
1661             goto repeat;
1662         }
1663
1664       if (c < 0x80)
1665         {
1666           if (c < 0x20
1667               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1668             break;
1669         }
1670       else
1671         {
1672           unsigned char *src_base = src - 1;
1673
1674           do
1675             {
1676               ONE_MORE_BYTE (c);
1677             }
1678           while (c >= 0xA0);
1679           if (src - src_base != emacs_mule_bytes[*src_base])
1680             break;
1681           found = 1;
1682         }
1683     }
1684   *mask &= ~CATEGORY_MASK_EMACS_MULE;
1685   return 0;
1686
1687  no_more_source:
1688   if (!found)
1689     return 0;
1690   *mask &= CATEGORY_MASK_EMACS_MULE;
1691   return 1;
1692 }
1693
1694
1695 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1696
1697 /* Decode a character represented as a component of composition
1698    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1699    update SRC to the head of next character (or an encoded composition
1700    rule).  If SRC doesn't points a composition component, set C to -1.
1701    If SRC points an invalid byte sequence, global exit by a return
1702    value 0.  */
1703
1704 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1705   if (1)                                                        \
1706     {                                                           \
1707       int c;                                                    \
1708       int nbytes, nchars;                                       \
1709                                                                 \
1710       if (src == src_end)                                       \
1711         break;                                                  \
1712       c = emacs_mule_char (coding, src, &nbytes, &nchars);      \
1713       if (c < 0)                                                \
1714         {                                                       \
1715           if (c == -2)                                          \
1716             break;                                              \
1717           goto invalid_code;                                    \
1718         }                                                       \
1719       *buf++ = c;                                               \
1720       src += nbytes;                                            \
1721       consumed_chars += nchars;                                 \
1722     }                                                           \
1723   else
1724
1725
1726 /* Decode a composition rule represented as a component of composition
1727    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1728    and increment BUF.  If SRC points an invalid byte sequence, set C
1729    to -1.  */
1730
1731 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1732   do {                                                  \
1733     int c, gref, nref;                                  \
1734                                                         \
1735     if (src >= src_end)                                 \
1736       goto invalid_code;                                \
1737     ONE_MORE_BYTE_NO_CHECK (c);                         \
1738     c -= 0x20;                                          \
1739     if (c < 0 || c >= 81)                               \
1740       goto invalid_code;                                \
1741                                                         \
1742     gref = c / 9, nref = c % 9;                         \
1743     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1744   } while (0)
1745
1746
1747 /* Decode a composition rule represented as a component of composition
1748    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1749    and increment BUF.  If SRC points an invalid byte sequence, set C
1750    to -1.  */
1751
1752 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1753   do {                                                  \
1754     int gref, nref;                                     \
1755                                                         \
1756     if (src + 1>= src_end)                              \
1757       goto invalid_code;                                \
1758     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1759     gref -= 0x20;                                       \
1760     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1761     nref -= 0x20;                                       \
1762     if (gref < 0 || gref >= 81                          \
1763         || nref < 0 || nref >= 81)                      \
1764       goto invalid_code;                                \
1765     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1766   } while (0)
1767
1768
1769 #define ADD_COMPOSITION_DATA(buf, method, nchars)       \
1770   do {                                                  \
1771     *buf++ = -5;                                        \
1772     *buf++ = coding->produced_char + char_offset;       \
1773     *buf++ = CODING_ANNOTATE_COMPOSITION_MASK;          \
1774     *buf++ = method;                                    \
1775     *buf++ = nchars;                                    \
1776   } while (0)
1777
1778
1779 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1780   do {                                                                  \
1781     /* Emacs 21 style format.  The first three bytes at SRC are         \
1782        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1783        the byte length of this composition information, CHARS is the    \
1784        number of characters composed by this composition.  */           \
1785     enum composition_method method = c - 0xF2;                          \
1786     int *charbuf_base = charbuf;                                        \
1787     int consumed_chars_limit;                                           \
1788     int nbytes, nchars;                                                 \
1789                                                                         \
1790     ONE_MORE_BYTE (c);                                                  \
1791     nbytes = c - 0xA0;                                                  \
1792     if (nbytes < 3)                                                     \
1793       goto invalid_code;                                                \
1794     ONE_MORE_BYTE (c);                                                  \
1795     nchars = c - 0xA0;                                                  \
1796     ADD_COMPOSITION_DATA (charbuf, method, nchars);                     \
1797     consumed_chars_limit = consumed_chars_base + nbytes;                \
1798     if (method != COMPOSITION_RELATIVE)                                 \
1799       {                                                                 \
1800         int i = 0;                                                      \
1801         while (consumed_chars < consumed_chars_limit)                   \
1802           {                                                             \
1803             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
1804               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
1805             else                                                        \
1806               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
1807             i++;                                                        \
1808           }                                                             \
1809         if (consumed_chars < consumed_chars_limit)                      \
1810           goto invalid_code;                                            \
1811         charbuf_base[0] -= i;                                           \
1812       }                                                                 \
1813   } while (0)
1814
1815
1816 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)            \
1817   do {                                                          \
1818     /* Emacs 20 style format for relative composition.  */      \
1819     /* Store multibyte form of characters to be composed.  */   \
1820     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1821     int *buf = components;                                      \
1822     int i, j;                                                   \
1823                                                                 \
1824     src = src_base;                                             \
1825     ONE_MORE_BYTE (c);          /* skip 0x80 */                 \
1826     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1827       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                 \
1828     if (i < 2)                                                  \
1829       goto invalid_code;                                        \
1830     ADD_COMPOSITION_DATA (charbuf, COMPOSITION_RELATIVE, i);    \
1831     for (j = 0; j < i; j++)                                     \
1832       *charbuf++ = components[j];                               \
1833   } while (0)
1834
1835
1836 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
1837   do {                                                          \
1838     /* Emacs 20 style format for rule-base composition.  */     \
1839     /* Store multibyte form of characters to be composed.  */   \
1840     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
1841     int *buf = components;                                      \
1842     int i, j;                                                   \
1843                                                                 \
1844     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
1845     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)            \
1846       {                                                         \
1847         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
1848         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
1849       }                                                         \
1850     if (i < 1 || (buf - components) % 2 == 0)                   \
1851       goto invalid_code;                                        \
1852     if (charbuf + i + (i / 2) + 1 < charbuf_end)                \
1853       goto no_more_source;                                      \
1854     ADD_COMPOSITION_DATA (buf, COMPOSITION_WITH_RULE, i);       \
1855     for (j = 0; j < i; j++)                                     \
1856       *charbuf++ = components[j];                               \
1857     for (j = 0; j < i; j += 2)                                  \
1858       *charbuf++ = components[j];                               \
1859   } while (0)
1860
1861
1862 static void
1863 decode_coding_emacs_mule (coding)
1864      struct coding_system *coding;
1865 {
1866   unsigned char *src = coding->source + coding->consumed;
1867   unsigned char *src_end = coding->source + coding->src_bytes;
1868   unsigned char *src_base;
1869   int *charbuf = coding->charbuf;
1870   int *charbuf_end = charbuf + coding->charbuf_size;
1871   int consumed_chars = 0, consumed_chars_base;
1872   int char_offset = 0;
1873   int multibytep = coding->src_multibyte;
1874   Lisp_Object attrs, eol_type, charset_list;
1875
1876   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1877
1878   while (1)
1879     {
1880       int c;
1881
1882       src_base = src;
1883       consumed_chars_base = consumed_chars;
1884
1885       if (charbuf >= charbuf_end)
1886         break;
1887
1888       ONE_MORE_BYTE (c);
1889
1890       if (c < 0x80)
1891         {
1892           if (c == '\r')
1893             {
1894               if (EQ (eol_type, Qdos))
1895                 {
1896                   if (src == src_end)
1897                     goto no_more_source;
1898                   if (*src == '\n')
1899                     ONE_MORE_BYTE (c);
1900                 }
1901               else if (EQ (eol_type, Qmac))
1902                 c = '\n';
1903             }
1904           *charbuf++ = c;
1905           char_offset++;
1906         }
1907       else if (c == 0x80)
1908         {
1909           if (charbuf + 5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 > charbuf_end)
1910             break;
1911           ONE_MORE_BYTE (c);
1912           if (c - 0xF2 >= COMPOSITION_RELATIVE
1913               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
1914             DECODE_EMACS_MULE_21_COMPOSITION (c);
1915           else if (c < 0xC0)
1916             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
1917           else if (c == 0xFF)
1918             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
1919           else
1920             goto invalid_code;
1921           coding->annotated = 1;
1922         }
1923       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
1924         {
1925           int nbytes, nchars;
1926           src = src_base;
1927           consumed_chars = consumed_chars_base;
1928           c = emacs_mule_char (coding, src, &nbytes, &nchars);
1929           if (c < 0)
1930             {
1931               if (c == -2)
1932                 break;
1933               goto invalid_code;
1934             }
1935           *charbuf++ = c;
1936           src += nbytes;
1937           consumed_chars += nchars;
1938           char_offset++;
1939         }
1940       continue;
1941
1942     invalid_code:
1943       src = src_base;
1944       consumed_chars = consumed_chars_base;
1945       ONE_MORE_BYTE (c);
1946       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1947       coding->errors++;
1948     }
1949
1950  no_more_source:
1951   coding->consumed_char += consumed_chars_base;
1952   coding->consumed = src_base - coding->source;
1953   coding->charbuf_used = charbuf - coding->charbuf;
1954 }
1955
1956
1957 #define EMACS_MULE_LEADING_CODES(id, codes)     \
1958   do {                                          \
1959     if (id < 0xA0)                              \
1960       codes[0] = id, codes[1] = 0;              \
1961     else if (id < 0xE0)                         \
1962       codes[0] = 0x9A, codes[1] = id;           \
1963     else if (id < 0xF0)                         \
1964       codes[0] = 0x9B, codes[1] = id;           \
1965     else if (id < 0xF5)                         \
1966       codes[0] = 0x9C, codes[1] = id;           \
1967     else                                        \
1968       codes[0] = 0x9D, codes[1] = id;           \
1969   } while (0);
1970
1971
1972 static int
1973 encode_coding_emacs_mule (coding)
1974      struct coding_system *coding;
1975 {
1976   int multibytep = coding->dst_multibyte;
1977   int *charbuf = coding->charbuf;
1978   int *charbuf_end = charbuf + coding->charbuf_used;
1979   unsigned char *dst = coding->destination + coding->produced;
1980   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1981   int safe_room = 8;
1982   int produced_chars = 0;
1983   Lisp_Object attrs, eol_type, charset_list;
1984   int c;
1985
1986   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
1987
1988   while (charbuf < charbuf_end)
1989     {
1990       ASSURE_DESTINATION (safe_room);
1991       c = *charbuf++;
1992       if (ASCII_CHAR_P (c))
1993         EMIT_ONE_ASCII_BYTE (c);
1994       else if (CHAR_BYTE8_P (c))
1995         {
1996           c = CHAR_TO_BYTE8 (c);
1997           EMIT_ONE_BYTE (c);
1998         }
1999       else
2000         {
2001           struct charset *charset;
2002           unsigned code;
2003           int dimension;
2004           int emacs_mule_id;
2005           unsigned char leading_codes[2];
2006
2007           charset = char_charset (c, charset_list, &code);
2008           if (! charset)
2009             {
2010               c = coding->default_char;
2011               if (ASCII_CHAR_P (c))
2012                 {
2013                   EMIT_ONE_ASCII_BYTE (c);
2014                   continue;
2015                 }
2016               charset = char_charset (c, charset_list, &code);
2017             }
2018           dimension = CHARSET_DIMENSION (charset);
2019           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2020           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2021           EMIT_ONE_BYTE (leading_codes[0]);
2022           if (leading_codes[1])
2023             EMIT_ONE_BYTE (leading_codes[1]);
2024           if (dimension == 1)
2025             EMIT_ONE_BYTE (code);
2026           else
2027             {
2028               EMIT_ONE_BYTE (code >> 8);
2029               EMIT_ONE_BYTE (code & 0xFF);
2030             }
2031         }
2032     }
2033   coding->result = CODING_RESULT_SUCCESS;
2034   coding->produced_char += produced_chars;
2035   coding->produced = dst - coding->destination;
2036   return 0;
2037 }
2038
2039 \f
2040 /*** 7. ISO2022 handlers ***/
2041
2042 /* The following note describes the coding system ISO2022 briefly.
2043    Since the intention of this note is to help understand the
2044    functions in this file, some parts are NOT ACCURATE or are OVERLY
2045    SIMPLIFIED.  For thorough understanding, please refer to the
2046    original document of ISO2022.  This is equivalent to the standard
2047    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2048
2049    ISO2022 provides many mechanisms to encode several character sets
2050    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2051    is encoded using bytes less than 128.  This may make the encoded
2052    text a little bit longer, but the text passes more easily through
2053    several types of gateway, some of which strip off the MSB (Most
2054    Significant Bit).
2055
2056    There are two kinds of character sets: control character sets and
2057    graphic character sets.  The former contain control characters such
2058    as `newline' and `escape' to provide control functions (control
2059    functions are also provided by escape sequences).  The latter
2060    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2061    two control character sets and many graphic character sets.
2062
2063    Graphic character sets are classified into one of the following
2064    four classes, according to the number of bytes (DIMENSION) and
2065    number of characters in one dimension (CHARS) of the set:
2066    - DIMENSION1_CHARS94
2067    - DIMENSION1_CHARS96
2068    - DIMENSION2_CHARS94
2069    - DIMENSION2_CHARS96
2070
2071    In addition, each character set is assigned an identification tag,
2072    unique for each set, called the "final character" (denoted as <F>
2073    hereafter).  The <F> of each character set is decided by ECMA(*)
2074    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2075    (0x30..0x3F are for private use only).
2076
2077    Note (*): ECMA = European Computer Manufacturers Association
2078
2079    Here are examples of graphic character sets [NAME(<F>)]:
2080         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2081         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2082         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2083         o DIMENSION2_CHARS96 -- none for the moment
2084
2085    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2086         C0 [0x00..0x1F] -- control character plane 0
2087         GL [0x20..0x7F] -- graphic character plane 0
2088         C1 [0x80..0x9F] -- control character plane 1
2089         GR [0xA0..0xFF] -- graphic character plane 1
2090
2091    A control character set is directly designated and invoked to C0 or
2092    C1 by an escape sequence.  The most common case is that:
2093    - ISO646's  control character set is designated/invoked to C0, and
2094    - ISO6429's control character set is designated/invoked to C1,
2095    and usually these designations/invocations are omitted in encoded
2096    text.  In a 7-bit environment, only C0 can be used, and a control
2097    character for C1 is encoded by an appropriate escape sequence to
2098    fit into the environment.  All control characters for C1 are
2099    defined to have corresponding escape sequences.
2100
2101    A graphic character set is at first designated to one of four
2102    graphic registers (G0 through G3), then these graphic registers are
2103    invoked to GL or GR.  These designations and invocations can be
2104    done independently.  The most common case is that G0 is invoked to
2105    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2106    these invocations and designations are omitted in encoded text.
2107    In a 7-bit environment, only GL can be used.
2108
2109    When a graphic character set of CHARS94 is invoked to GL, codes
2110    0x20 and 0x7F of the GL area work as control characters SPACE and
2111    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2112    be used.
2113
2114    There are two ways of invocation: locking-shift and single-shift.
2115    With locking-shift, the invocation lasts until the next different
2116    invocation, whereas with single-shift, the invocation affects the
2117    following character only and doesn't affect the locking-shift
2118    state.  Invocations are done by the following control characters or
2119    escape sequences:
2120
2121    ----------------------------------------------------------------------
2122    abbrev  function                  cntrl escape seq   description
2123    ----------------------------------------------------------------------
2124    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2125    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2126    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2127    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2128    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2129    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2130    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2131    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2132    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2133    ----------------------------------------------------------------------
2134    (*) These are not used by any known coding system.
2135
2136    Control characters for these functions are defined by macros
2137    ISO_CODE_XXX in `coding.h'.
2138
2139    Designations are done by the following escape sequences:
2140    ----------------------------------------------------------------------
2141    escape sequence      description
2142    ----------------------------------------------------------------------
2143    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2144    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2145    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2146    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2147    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2148    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2149    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2150    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2151    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2152    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2153    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2154    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2155    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2156    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2157    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2158    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2159    ----------------------------------------------------------------------
2160
2161    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2162    of dimension 1, chars 94, and final character <F>, etc...
2163
2164    Note (*): Although these designations are not allowed in ISO2022,
2165    Emacs accepts them on decoding, and produces them on encoding
2166    CHARS96 character sets in a coding system which is characterized as
2167    7-bit environment, non-locking-shift, and non-single-shift.
2168
2169    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2170    '(' must be omitted.  We refer to this as "short-form" hereafter.
2171
2172    Now you may notice that there are a lot of ways of encoding the
2173    same multilingual text in ISO2022.  Actually, there exist many
2174    coding systems such as Compound Text (used in X11's inter client
2175    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2176    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2177    localized platforms), and all of these are variants of ISO2022.
2178
2179    In addition to the above, Emacs handles two more kinds of escape
2180    sequences: ISO6429's direction specification and Emacs' private
2181    sequence for specifying character composition.
2182
2183    ISO6429's direction specification takes the following form:
2184         o CSI ']'      -- end of the current direction
2185         o CSI '0' ']'  -- end of the current direction
2186         o CSI '1' ']'  -- start of left-to-right text
2187         o CSI '2' ']'  -- start of right-to-left text
2188    The control character CSI (0x9B: control sequence introducer) is
2189    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2190
2191    Character composition specification takes the following form:
2192         o ESC '0' -- start relative composition
2193         o ESC '1' -- end composition
2194         o ESC '2' -- start rule-base composition (*)
2195         o ESC '3' -- start relative composition with alternate chars  (**)
2196         o ESC '4' -- start rule-base composition with alternate chars  (**)
2197   Since these are not standard escape sequences of any ISO standard,
2198   the use of them with these meanings is restricted to Emacs only.
2199
2200   (*) This form is used only in Emacs 20.7 and older versions,
2201   but newer versions can safely decode it.
2202   (**) This form is used only in Emacs 21.1 and newer versions,
2203   and older versions can't decode it.
2204
2205   Here's a list of example usages of these composition escape
2206   sequences (categorized by `enum composition_method').
2207
2208   COMPOSITION_RELATIVE:
2209         ESC 0 CHAR [ CHAR ] ESC 1
2210   COMPOSITION_WITH_RULE:
2211         ESC 2 CHAR [ RULE CHAR ] ESC 1
2212   COMPOSITION_WITH_ALTCHARS:
2213         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2214   COMPOSITION_WITH_RULE_ALTCHARS:
2215         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2216
2217 enum iso_code_class_type iso_code_class[256];
2218
2219 #define SAFE_CHARSET_P(coding, id)      \
2220   ((id) <= (coding)->max_charset_id     \
2221    && (coding)->safe_charsets[id] >= 0)
2222
2223
2224 #define SHIFT_OUT_OK(category)  \
2225   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2226
2227 static void
2228 setup_iso_safe_charsets (attrs)
2229      Lisp_Object attrs;
2230 {
2231   Lisp_Object charset_list, safe_charsets;
2232   Lisp_Object request;
2233   Lisp_Object reg_usage;
2234   Lisp_Object tail;
2235   int reg94, reg96;
2236   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2237   int max_charset_id;
2238
2239   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2240   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2241       && ! EQ (charset_list, Viso_2022_charset_list))
2242     {
2243       CODING_ATTR_CHARSET_LIST (attrs)
2244         = charset_list = Viso_2022_charset_list;
2245       ASET (attrs, coding_attr_safe_charsets, Qnil);
2246     }
2247
2248   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2249     return;
2250
2251   max_charset_id = 0;
2252   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2253     {
2254       int id = XINT (XCAR (tail));
2255       if (max_charset_id < id)
2256         max_charset_id = id;
2257     }
2258
2259   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2260                                 make_number (255));
2261   request = AREF (attrs, coding_attr_iso_request);
2262   reg_usage = AREF (attrs, coding_attr_iso_usage);
2263   reg94 = XINT (XCAR (reg_usage));
2264   reg96 = XINT (XCDR (reg_usage));
2265
2266   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2267     {
2268       Lisp_Object id;
2269       Lisp_Object reg;
2270       struct charset *charset;
2271
2272       id = XCAR (tail);
2273       charset = CHARSET_FROM_ID (XINT (id));
2274       reg = Fcdr (Fassq (id, request));
2275       if (! NILP (reg))
2276         XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg);
2277       else if (charset->iso_chars_96)
2278         {
2279           if (reg96 < 4)
2280             XSTRING (safe_charsets)->data[XINT (id)] = reg96;
2281         }
2282       else
2283         {
2284           if (reg94 < 4)
2285             XSTRING (safe_charsets)->data[XINT (id)] = reg94;
2286         }
2287     }
2288   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2289 }
2290
2291
2292 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2293    Check if a text is encoded in ISO2022.  If it is, returns an
2294    integer in which appropriate flag bits any of:
2295         CATEGORY_MASK_ISO_7
2296         CATEGORY_MASK_ISO_7_TIGHT
2297         CATEGORY_MASK_ISO_8_1
2298         CATEGORY_MASK_ISO_8_2
2299         CATEGORY_MASK_ISO_7_ELSE
2300         CATEGORY_MASK_ISO_8_ELSE
2301    are set.  If a code which should never appear in ISO2022 is found,
2302    returns 0.  */
2303
2304 static int
2305 detect_coding_iso_2022 (coding, mask)
2306      struct coding_system *coding;
2307      int *mask;
2308 {
2309   unsigned char *src = coding->source, *src_base = src;
2310   unsigned char *src_end = coding->source + coding->src_bytes;
2311   int multibytep = coding->src_multibyte;
2312   int mask_iso = CATEGORY_MASK_ISO;
2313   int mask_found = 0, mask_8bit_found = 0;
2314   int reg[4], shift_out = 0, single_shifting = 0;
2315   int id;
2316   int c, c1;
2317   int consumed_chars = 0;
2318   int i;
2319
2320   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2321     {
2322       struct coding_system *this = &(coding_categories[i]);
2323       Lisp_Object attrs, val;
2324
2325       attrs = CODING_ID_ATTRS (this->id);
2326       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2327           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2328         setup_iso_safe_charsets (attrs);
2329       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2330       this->max_charset_id = XSTRING (val)->size - 1;
2331       this->safe_charsets = (char *) XSTRING (val)->data;
2332     }
2333
2334   /* A coding system of this category is always ASCII compatible.  */
2335   src += coding->head_ascii;
2336
2337   reg[0] = charset_ascii, reg[1] = reg[2] = reg[3] = -1;
2338   while (mask_iso && src < src_end)
2339     {
2340       ONE_MORE_BYTE (c);
2341       switch (c)
2342         {
2343         case ISO_CODE_ESC:
2344           if (inhibit_iso_escape_detection)
2345             break;
2346           single_shifting = 0;
2347           ONE_MORE_BYTE (c);
2348           if (c >= '(' && c <= '/')
2349             {
2350               /* Designation sequence for a charset of dimension 1.  */
2351               ONE_MORE_BYTE (c1);
2352               if (c1 < ' ' || c1 >= 0x80
2353                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2354                 /* Invalid designation sequence.  Just ignore.  */
2355                 break;
2356               reg[(c - '(') % 4] = id;
2357             }
2358           else if (c == '$')
2359             {
2360               /* Designation sequence for a charset of dimension 2.  */
2361               ONE_MORE_BYTE (c);
2362               if (c >= '@' && c <= 'B')
2363                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2364                 reg[0] = id = iso_charset_table[1][0][c];
2365               else if (c >= '(' && c <= '/')
2366                 {
2367                   ONE_MORE_BYTE (c1);
2368                   if (c1 < ' ' || c1 >= 0x80
2369                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2370                     /* Invalid designation sequence.  Just ignore.  */
2371                     break;
2372                   reg[(c - '(') % 4] = id;
2373                 }
2374               else
2375                 /* Invalid designation sequence.  Just ignore.  */
2376                 break;
2377             }
2378           else if (c == 'N' || c == 'O')
2379             {
2380               /* ESC <Fe> for SS2 or SS3.  */
2381               mask_iso &= CATEGORY_MASK_ISO_7_ELSE;
2382               break;
2383             }
2384           else if (c >= '0' && c <= '4')
2385             {
2386               /* ESC <Fp> for start/end composition.  */
2387               mask_found |= CATEGORY_MASK_ISO;
2388               break;
2389             }
2390           else
2391             {
2392               /* Invalid escape sequence.  */
2393               mask_iso &= ~CATEGORY_MASK_ISO_ESCAPE;
2394               break;
2395             }
2396
2397           /* We found a valid designation sequence for CHARSET.  */
2398           mask_iso &= ~CATEGORY_MASK_ISO_8BIT;
2399           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2400                               id))
2401             mask_found |= CATEGORY_MASK_ISO_7;
2402           else
2403             mask_iso &= ~CATEGORY_MASK_ISO_7;
2404           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2405                               id))
2406             mask_found |= CATEGORY_MASK_ISO_7_TIGHT;
2407           else
2408             mask_iso &= ~CATEGORY_MASK_ISO_7_TIGHT;
2409           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2410                               id))
2411             mask_found |= CATEGORY_MASK_ISO_7_ELSE;
2412           else
2413             mask_iso &= ~CATEGORY_MASK_ISO_7_ELSE;
2414           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2415                               id))
2416             mask_found |= CATEGORY_MASK_ISO_8_ELSE;
2417           else
2418             mask_iso &= ~CATEGORY_MASK_ISO_8_ELSE;
2419           break;
2420
2421         case ISO_CODE_SO:
2422           if (inhibit_iso_escape_detection)
2423             break;
2424           single_shifting = 0;
2425           if (shift_out == 0
2426               && (reg[1] >= 0
2427                   || SHIFT_OUT_OK (coding_category_iso_7_else)
2428                   || SHIFT_OUT_OK (coding_category_iso_8_else)))
2429             {
2430               /* Locking shift out.  */
2431               mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2432               mask_found |= CATEGORY_MASK_ISO_ELSE;
2433             }
2434           break;
2435
2436         case ISO_CODE_SI:
2437           if (inhibit_iso_escape_detection)
2438             break;
2439           single_shifting = 0;
2440           if (shift_out == 1)
2441             {
2442               /* Locking shift in.  */
2443               mask_iso &= ~CATEGORY_MASK_ISO_7BIT;
2444               mask_found |= CATEGORY_MASK_ISO_ELSE;
2445             }
2446           break;
2447
2448         case ISO_CODE_CSI:
2449           single_shifting = 0;
2450         case ISO_CODE_SS2:
2451         case ISO_CODE_SS3:
2452           {
2453             int newmask = CATEGORY_MASK_ISO_8_ELSE;
2454
2455             if (inhibit_iso_escape_detection)
2456               break;
2457             if (c != ISO_CODE_CSI)
2458               {
2459                 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2460                     & CODING_ISO_FLAG_SINGLE_SHIFT)
2461                   newmask |= CATEGORY_MASK_ISO_8_1;
2462                 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2463                     & CODING_ISO_FLAG_SINGLE_SHIFT)
2464                   newmask |= CATEGORY_MASK_ISO_8_2;
2465                 single_shifting = 1;
2466               }
2467             if (VECTORP (Vlatin_extra_code_table)
2468                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2469               {
2470                 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2471                     & CODING_ISO_FLAG_LATIN_EXTRA)
2472                   newmask |= CATEGORY_MASK_ISO_8_1;
2473                 if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2474                     & CODING_ISO_FLAG_LATIN_EXTRA)
2475                   newmask |= CATEGORY_MASK_ISO_8_2;
2476               }
2477             mask_iso &= newmask;
2478             mask_found |= newmask;
2479           }
2480           break;
2481
2482         default:
2483           if (c < 0x80)
2484             {
2485               single_shifting = 0;
2486               break;
2487             }
2488           else if (c < 0xA0)
2489             {
2490               single_shifting = 0;
2491               mask_8bit_found = 1;
2492               if (VECTORP (Vlatin_extra_code_table)
2493                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2494                 {
2495                   int newmask = 0;
2496
2497                   if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2498                       & CODING_ISO_FLAG_LATIN_EXTRA)
2499                     newmask |= CATEGORY_MASK_ISO_8_1;
2500                   if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2501                       & CODING_ISO_FLAG_LATIN_EXTRA)
2502                     newmask |= CATEGORY_MASK_ISO_8_2;
2503                   mask_iso &= newmask;
2504                   mask_found |= newmask;
2505                 }
2506               else
2507                 return 0;
2508             }
2509           else
2510             {
2511               mask_iso &= ~(CATEGORY_MASK_ISO_7BIT
2512                             | CATEGORY_MASK_ISO_7_ELSE);
2513               mask_found |= CATEGORY_MASK_ISO_8_1;
2514               mask_8bit_found = 1;
2515               /* Check the length of succeeding codes of the range
2516                  0xA0..0FF.  If the byte length is odd, we exclude
2517                  CATEGORY_MASK_ISO_8_2.  We can check this only
2518                  when we are not single shifting.  */
2519               if (!single_shifting
2520                   && mask_iso & CATEGORY_MASK_ISO_8_2)
2521                 {
2522                   int i = 1;
2523                   while (src < src_end)
2524                     {
2525                       ONE_MORE_BYTE (c);
2526                       if (c < 0xA0)
2527                         break;
2528                       i++;
2529                     }
2530
2531                   if (i & 1 && src < src_end)
2532                     mask_iso &= ~CATEGORY_MASK_ISO_8_2;
2533                   else
2534                     mask_found |= CATEGORY_MASK_ISO_8_2;
2535                 }
2536             }
2537           break;
2538         }
2539     }
2540  no_more_source:
2541   if (!mask_iso)
2542     {
2543       *mask &= ~CATEGORY_MASK_ISO;
2544       return 0;
2545     }
2546   if (!mask_found)
2547     return 0;
2548   *mask &= mask_iso & mask_found;
2549   if (! mask_8bit_found)
2550     *mask &= ~(CATEGORY_MASK_ISO_8BIT | CATEGORY_MASK_ISO_8_ELSE);
2551   return 1;
2552 }
2553
2554
2555 /* Set designation state into CODING.  */
2556 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2557   do {                                                                  \
2558     int id, prev;                                                       \
2559                                                                         \
2560     if (final < '0' || final >= 128                                     \
2561         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2562         || !SAFE_CHARSET_P (coding, id))                                \
2563       {                                                                 \
2564         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2565         goto invalid_code;                                              \
2566       }                                                                 \
2567     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2568     if (id == charset_jisx0201_roman)                                   \
2569       {                                                                 \
2570         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2571           id = charset_ascii;                                           \
2572       }                                                                 \
2573     else if (id == charset_jisx0208_1978)                               \
2574       {                                                                 \
2575         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2576           id = charset_jisx0208;                                        \
2577       }                                                                 \
2578     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2579     /* If there was an invalid designation to REG previously, and this  \
2580        designation is ASCII to REG, we should keep this designation     \
2581        sequence.  */                                                    \
2582     if (prev == -2 && id == charset_ascii)                              \
2583       goto invalid_code;                                                \
2584   } while (0)
2585
2586
2587 #define MAYBE_FINISH_COMPOSITION()                              \
2588   do {                                                          \
2589     int i;                                                      \
2590     if (composition_state == COMPOSING_NO)                      \
2591       break;                                                    \
2592     /* It is assured that we have enough room for producing     \
2593        characters stored in the table `components'.  */         \
2594     if (charbuf + component_idx > charbuf_end)                  \
2595       goto no_more_source;                                      \
2596     composition_state = COMPOSING_NO;                           \
2597     if (method == COMPOSITION_RELATIVE                          \
2598         || method == COMPOSITION_WITH_ALTCHARS)                 \
2599       {                                                         \
2600         for (i = 0; i < component_idx; i++)                     \
2601           *charbuf++ = components[i];                           \
2602         char_offset += component_idx;                           \
2603       }                                                         \
2604     else                                                        \
2605       {                                                         \
2606         for (i = 0; i < component_idx; i += 2)                  \
2607           *charbuf++ = components[i];                           \
2608         char_offset += (component_idx / 2) + 1;                 \
2609       }                                                         \
2610   } while (0)
2611
2612
2613 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2614    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2615    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2616    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2617    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2618   */
2619
2620 #define DECODE_COMPOSITION_START(c1)                                    \
2621   do {                                                                  \
2622     if (c1 == '0'                                                       \
2623         && composition_state == COMPOSING_COMPONENT_RULE)               \
2624       {                                                                 \
2625         component_len = component_idx;                                  \
2626         composition_state = COMPOSING_CHAR;                             \
2627       }                                                                 \
2628     else                                                                \
2629       {                                                                 \
2630         unsigned char *p;                                               \
2631                                                                         \
2632         MAYBE_FINISH_COMPOSITION ();                                    \
2633         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2634           goto no_more_source;                                          \
2635         for (p = src; p < src_end - 1; p++)                             \
2636           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2637             break;                                                      \
2638         if (p == src_end - 1)                                           \
2639           {                                                             \
2640             if (coding->mode & CODING_MODE_LAST_BLOCK)                  \
2641               goto invalid_code;                                        \
2642             goto no_more_source;                                        \
2643           }                                                             \
2644                                                                         \
2645         /* This is surely the start of a composition.  */               \
2646         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2647                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2648                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2649                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2650         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2651                              : COMPOSING_COMPONENT_CHAR);               \
2652         component_idx = component_len = 0;                              \
2653       }                                                                 \
2654   } while (0)
2655
2656
2657 /* Handle compositoin end sequence ESC 1.  */
2658
2659 #define DECODE_COMPOSITION_END()                                        \
2660   do {                                                                  \
2661     int nchars = (component_len > 0 ? component_idx - component_len     \
2662                   : method == COMPOSITION_RELATIVE ? component_idx      \
2663                   : (component_idx + 1) / 2);                           \
2664     int i;                                                              \
2665     int *saved_charbuf = charbuf;                                       \
2666                                                                         \
2667     ADD_COMPOSITION_DATA (charbuf, method, nchars);                     \
2668     if (method != COMPOSITION_RELATIVE)                                 \
2669       {                                                                 \
2670         if (component_len == 0)                                         \
2671           for (i = 0; i < component_idx; i++)                           \
2672             *charbuf++ = components[i];                                 \
2673         else                                                            \
2674           for (i = 0; i < component_len; i++)                           \
2675             *charbuf++ = components[i];                                 \
2676         *saved_charbuf = saved_charbuf - charbuf;                       \
2677       }                                                                 \
2678     if (method == COMPOSITION_WITH_RULE)                                \
2679       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2680         *charbuf++ = components[i];                                     \
2681     else                                                                \
2682       for (i = component_len; i < component_idx; i++, char_offset++)    \
2683         *charbuf++ = components[i];                                     \
2684     coding->annotated = 1;                                              \
2685     composition_state = COMPOSING_NO;                                   \
2686   } while (0)
2687
2688
2689 /* Decode a composition rule from the byte C1 (and maybe one more byte
2690    from SRC) and store one encoded composition rule in
2691    coding->cmp_data.  */
2692
2693 #define DECODE_COMPOSITION_RULE(c1)                                     \
2694   do {                                                                  \
2695     (c1) -= 32;                                                         \
2696     if (c1 < 81)                /* old format (before ver.21) */        \
2697       {                                                                 \
2698         int gref = (c1) / 9;                                            \
2699         int nref = (c1) % 9;                                            \
2700         if (gref == 4) gref = 10;                                       \
2701         if (nref == 4) nref = 10;                                       \
2702         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2703       }                                                                 \
2704     else if (c1 < 93)           /* new format (after ver.21) */         \
2705       {                                                                 \
2706         ONE_MORE_BYTE (c2);                                             \
2707         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2708       }                                                                 \
2709     else                                                                \
2710       c1 = 0;                                                           \
2711   } while (0)
2712
2713
2714 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2715
2716 static void
2717 decode_coding_iso_2022 (coding)
2718      struct coding_system *coding;
2719 {
2720   unsigned char *src = coding->source + coding->consumed;
2721   unsigned char *src_end = coding->source + coding->src_bytes;
2722   unsigned char *src_base;
2723   int *charbuf = coding->charbuf;
2724   int *charbuf_end = charbuf + coding->charbuf_size - 4;
2725   int consumed_chars = 0, consumed_chars_base;
2726   int char_offset = 0;
2727   int multibytep = coding->src_multibyte;
2728   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2729   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2730   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2731   struct charset *charset;
2732   int c;
2733   /* For handling composition sequence.  */
2734 #define COMPOSING_NO                    0
2735 #define COMPOSING_CHAR                  1
2736 #define COMPOSING_RULE                  2
2737 #define COMPOSING_COMPONENT_CHAR        3
2738 #define COMPOSING_COMPONENT_RULE        4
2739
2740   int composition_state = COMPOSING_NO;
2741   enum composition_method method;
2742   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2743   int component_idx;
2744   int component_len;
2745   Lisp_Object attrs, eol_type, charset_list;
2746
2747   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
2748   setup_iso_safe_charsets (attrs);
2749
2750   while (1)
2751     {
2752       int c1, c2;
2753
2754       src_base = src;
2755       consumed_chars_base = consumed_chars;
2756
2757       if (charbuf >= charbuf_end)
2758         break;
2759
2760       ONE_MORE_BYTE (c1);
2761
2762       /* We produce no character or one character.  */
2763       switch (iso_code_class [c1])
2764         {
2765         case ISO_0x20_or_0x7F:
2766           if (composition_state != COMPOSING_NO)
2767             {
2768               if (composition_state == COMPOSING_RULE
2769                   || composition_state == COMPOSING_COMPONENT_RULE)
2770                 {
2771                   DECODE_COMPOSITION_RULE (c1);
2772                   components[component_idx++] = c1;
2773                   composition_state--;
2774                   continue;
2775                 }
2776             }
2777           if (charset_id_0 < 0
2778               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
2779             /* This is SPACE or DEL.  */
2780             charset = CHARSET_FROM_ID (charset_ascii);
2781           else
2782             charset = CHARSET_FROM_ID (charset_id_0);
2783           break;
2784
2785         case ISO_graphic_plane_0:
2786           if (composition_state != COMPOSING_NO)
2787             {
2788               if (composition_state == COMPOSING_RULE
2789                   || composition_state == COMPOSING_COMPONENT_RULE)
2790                 {
2791                   DECODE_COMPOSITION_RULE (c1);
2792                   components[component_idx++] = c1;
2793                   composition_state--;
2794                   continue;
2795                 }
2796             }
2797           charset = CHARSET_FROM_ID (charset_id_0);
2798           break;
2799
2800         case ISO_0xA0_or_0xFF:
2801           if (charset_id_1 < 0
2802               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
2803               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
2804             goto invalid_code;
2805           /* This is a graphic character, we fall down ... */
2806
2807         case ISO_graphic_plane_1:
2808           if (charset_id_1 < 0)
2809             goto invalid_code;
2810           charset = CHARSET_FROM_ID (charset_id_1);
2811           break;
2812
2813         case ISO_carriage_return:
2814           if (c1 == '\r')
2815             {
2816               if (EQ (eol_type, Qdos))
2817                 {
2818                   if (src == src_end)
2819                     goto no_more_source;
2820                   if (*src == '\n')
2821                     ONE_MORE_BYTE (c1);
2822                 }
2823               else if (EQ (eol_type, Qmac))
2824                 c1 = '\n';
2825             }
2826           /* fall through */
2827
2828         case ISO_control_0:
2829           MAYBE_FINISH_COMPOSITION ();
2830           charset = CHARSET_FROM_ID (charset_ascii);
2831           break;
2832
2833         case ISO_control_1:
2834           MAYBE_FINISH_COMPOSITION ();
2835           goto invalid_code;
2836
2837         case ISO_shift_out:
2838           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2839               || CODING_ISO_DESIGNATION (coding, 1) < 0)
2840             goto invalid_code;
2841           CODING_ISO_INVOCATION (coding, 0) = 1;
2842           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2843           continue;
2844
2845         case ISO_shift_in:
2846           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
2847             goto invalid_code;
2848           CODING_ISO_INVOCATION (coding, 0) = 0;
2849           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2850           continue;
2851
2852         case ISO_single_shift_2_7:
2853         case ISO_single_shift_2:
2854           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2855             goto invalid_code;
2856           /* SS2 is handled as an escape sequence of ESC 'N' */
2857           c1 = 'N';
2858           goto label_escape_sequence;
2859
2860         case ISO_single_shift_3:
2861           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
2862             goto invalid_code;
2863           /* SS2 is handled as an escape sequence of ESC 'O' */
2864           c1 = 'O';
2865           goto label_escape_sequence;
2866
2867         case ISO_control_sequence_introducer:
2868           /* CSI is handled as an escape sequence of ESC '[' ...  */
2869           c1 = '[';
2870           goto label_escape_sequence;
2871
2872         case ISO_escape:
2873           ONE_MORE_BYTE (c1);
2874         label_escape_sequence:
2875           /* Escape sequences handled here are invocation,
2876              designation, direction specification, and character
2877              composition specification.  */
2878           switch (c1)
2879             {
2880             case '&':           /* revision of following character set */
2881               ONE_MORE_BYTE (c1);
2882               if (!(c1 >= '@' && c1 <= '~'))
2883                 goto invalid_code;
2884               ONE_MORE_BYTE (c1);
2885               if (c1 != ISO_CODE_ESC)
2886                 goto invalid_code;
2887               ONE_MORE_BYTE (c1);
2888               goto label_escape_sequence;
2889
2890             case '$':           /* designation of 2-byte character set */
2891               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
2892                 goto invalid_code;
2893               ONE_MORE_BYTE (c1);
2894               if (c1 >= '@' && c1 <= 'B')
2895                 {       /* designation of JISX0208.1978, GB2312.1980,
2896                            or JISX0208.1980 */
2897                   DECODE_DESIGNATION (0, 2, 0, c1);
2898                 }
2899               else if (c1 >= 0x28 && c1 <= 0x2B)
2900                 {       /* designation of DIMENSION2_CHARS94 character set */
2901                   ONE_MORE_BYTE (c2);
2902                   DECODE_DESIGNATION (c1 - 0x28, 2, 0, c2);
2903                 }
2904               else if (c1 >= 0x2C && c1 <= 0x2F)
2905                 {       /* designation of DIMENSION2_CHARS96 character set */
2906                   ONE_MORE_BYTE (c2);
2907                   DECODE_DESIGNATION (c1 - 0x2C, 2, 1, c2);
2908                 }
2909               else
2910                 goto invalid_code;
2911               /* We must update these variables now.  */
2912               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2913               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2914               continue;
2915
2916             case 'n':           /* invocation of locking-shift-2 */
2917               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2918                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
2919                 goto invalid_code;
2920               CODING_ISO_INVOCATION (coding, 0) = 2;
2921               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2922               continue;
2923
2924             case 'o':           /* invocation of locking-shift-3 */
2925               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
2926                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
2927                 goto invalid_code;
2928               CODING_ISO_INVOCATION (coding, 0) = 3;
2929               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2930               continue;
2931
2932             case 'N':           /* invocation of single-shift-2 */
2933               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2934                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
2935                 goto invalid_code;
2936               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 2));
2937               ONE_MORE_BYTE (c1);
2938               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2939                 goto invalid_code;
2940               break;
2941
2942             case 'O':           /* invocation of single-shift-3 */
2943               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
2944                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
2945                 goto invalid_code;
2946               charset = CHARSET_FROM_ID (CODING_ISO_DESIGNATION (coding, 3));
2947               ONE_MORE_BYTE (c1);
2948               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
2949                 goto invalid_code;
2950               break;
2951
2952             case '0': case '2': case '3': case '4': /* start composition */
2953               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
2954                 goto invalid_code;
2955               DECODE_COMPOSITION_START (c1);
2956               continue;
2957
2958             case '1':           /* end composition */
2959               if (composition_state == COMPOSING_NO)
2960                 goto invalid_code;
2961               DECODE_COMPOSITION_END ();
2962               continue;
2963
2964             case '[':           /* specification of direction */
2965               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
2966                 goto invalid_code;
2967               /* For the moment, nested direction is not supported.
2968                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
2969                  left-to-right, and nozero means right-to-left.  */
2970               ONE_MORE_BYTE (c1);
2971               switch (c1)
2972                 {
2973                 case ']':       /* end of the current direction */
2974                   coding->mode &= ~CODING_MODE_DIRECTION;
2975
2976                 case '0':       /* end of the current direction */
2977                 case '1':       /* start of left-to-right direction */
2978                   ONE_MORE_BYTE (c1);
2979                   if (c1 == ']')
2980                     coding->mode &= ~CODING_MODE_DIRECTION;
2981                   else
2982                     goto invalid_code;
2983                   break;
2984
2985                 case '2':       /* start of right-to-left direction */
2986                   ONE_MORE_BYTE (c1);
2987                   if (c1 == ']')
2988                     coding->mode |= CODING_MODE_DIRECTION;
2989                   else
2990                     goto invalid_code;
2991                   break;
2992
2993                 default:
2994                   goto invalid_code;
2995                 }
2996               continue;
2997
2998             default:
2999               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3000                 goto invalid_code;
3001               if (c1 >= 0x28 && c1 <= 0x2B)
3002                 {       /* designation of DIMENSION1_CHARS94 character set */
3003                   ONE_MORE_BYTE (c2);
3004                   DECODE_DESIGNATION (c1 - 0x28, 1, 0, c2);
3005                 }
3006               else if (c1 >= 0x2C && c1 <= 0x2F)
3007                 {       /* designation of DIMENSION1_CHARS96 character set */
3008                   ONE_MORE_BYTE (c2);
3009                   DECODE_DESIGNATION (c1 - 0x2C, 1, 1, c2);
3010                 }
3011               else
3012                 goto invalid_code;
3013               /* We must update these variables now.  */
3014               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3015               charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3016               continue;
3017             }
3018         }
3019
3020       /* Now we know CHARSET and 1st position code C1 of a character.
3021          Produce a decoded character while getting 2nd position code
3022          C2 if necessary.  */
3023       c1 &= 0x7F;
3024       if (CHARSET_DIMENSION (charset) > 1)
3025         {
3026           ONE_MORE_BYTE (c2);
3027           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3028             /* C2 is not in a valid range.  */
3029             goto invalid_code;
3030           c1 = (c1 << 8) | (c2 & 0x7F);
3031           if (CHARSET_DIMENSION (charset) > 2)
3032             {
3033               ONE_MORE_BYTE (c2);
3034               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3035                 /* C2 is not in a valid range.  */
3036                 goto invalid_code;
3037               c1 = (c1 << 8) | (c2 & 0x7F);
3038             }
3039         }
3040
3041       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3042       if (c < 0)
3043         {
3044           MAYBE_FINISH_COMPOSITION ();
3045           for (; src_base < src; src_base++, char_offset++)
3046             {
3047               if (ASCII_BYTE_P (*src_base))
3048                 *charbuf++ = *src_base;
3049               else
3050                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3051             }
3052         }
3053       else if (composition_state == COMPOSING_NO)
3054         {
3055           *charbuf++ = c;
3056           char_offset++;
3057         }
3058       else
3059         {
3060           components[component_idx++] = c;
3061           if (method == COMPOSITION_WITH_RULE
3062               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3063                   && composition_state == COMPOSING_COMPONENT_CHAR))
3064             composition_state++;
3065         }
3066       continue;
3067
3068     invalid_code:
3069       MAYBE_FINISH_COMPOSITION ();
3070       src = src_base;
3071       consumed_chars = consumed_chars_base;
3072       ONE_MORE_BYTE (c);
3073       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3074       coding->errors++;
3075     }
3076
3077  no_more_source:
3078   coding->consumed_char += consumed_chars_base;
3079   coding->consumed = src_base - coding->source;
3080   coding->charbuf_used = charbuf - coding->charbuf;
3081 }
3082
3083
3084 /* ISO2022 encoding stuff.  */
3085
3086 /*
3087    It is not enough to say just "ISO2022" on encoding, we have to
3088    specify more details.  In Emacs, each coding system of ISO2022
3089    variant has the following specifications:
3090         1. Initial designation to G0 thru G3.
3091         2. Allows short-form designation?
3092         3. ASCII should be designated to G0 before control characters?
3093         4. ASCII should be designated to G0 at end of line?
3094         5. 7-bit environment or 8-bit environment?
3095         6. Use locking-shift?
3096         7. Use Single-shift?
3097    And the following two are only for Japanese:
3098         8. Use ASCII in place of JIS0201-1976-Roman?
3099         9. Use JISX0208-1983 in place of JISX0208-1978?
3100    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3101    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3102    details.
3103 */
3104
3105 /* Produce codes (escape sequence) for designating CHARSET to graphic
3106    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3107    '@', 'A', or 'B' and the coding system CODING allows, produce
3108    designation sequence of short-form.  */
3109
3110 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3111   do {                                                                  \
3112     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3113     char *intermediate_char_94 = "()*+";                                \
3114     char *intermediate_char_96 = ",-./";                                \
3115     int revision = -1;                                                  \
3116     int c;                                                              \
3117                                                                         \
3118     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3119       revision = CHARSET_ISO_REVISION (charset);                        \
3120                                                                         \
3121     if (revision >= 0)                                                  \
3122       {                                                                 \
3123         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3124         EMIT_ONE_BYTE ('@' + revision);                                 \
3125       }                                                                 \
3126     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3127     if (CHARSET_DIMENSION (charset) == 1)                               \
3128       {                                                                 \
3129         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3130           c = intermediate_char_94[reg];                                \
3131         else                                                            \
3132           c = intermediate_char_96[reg];                                \
3133         EMIT_ONE_ASCII_BYTE (c);                                        \
3134       }                                                                 \
3135     else                                                                \
3136       {                                                                 \
3137         EMIT_ONE_ASCII_BYTE ('$');                                      \
3138         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3139           {                                                             \
3140             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3141                 || reg != 0                                             \
3142                 || final_char < '@' || final_char > 'B')                \
3143               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3144           }                                                             \
3145         else                                                            \
3146           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3147       }                                                                 \
3148     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3149                                                                         \
3150     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3151   } while (0)
3152
3153
3154 /* The following two macros produce codes (control character or escape
3155    sequence) for ISO2022 single-shift functions (single-shift-2 and
3156    single-shift-3).  */
3157
3158 #define ENCODE_SINGLE_SHIFT_2                                           \
3159   do {                                                                  \
3160     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3161       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3162     else                                                                \
3163       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3164     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3165   } while (0)
3166
3167
3168 #define ENCODE_SINGLE_SHIFT_3                                           \
3169   do {                                                                  \
3170     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3171       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3172     else                                                                \
3173       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3174     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3175   } while (0)
3176
3177
3178 /* The following four macros produce codes (control character or
3179    escape sequence) for ISO2022 locking-shift functions (shift-in,
3180    shift-out, locking-shift-2, and locking-shift-3).  */
3181
3182 #define ENCODE_SHIFT_IN                                 \
3183   do {                                                  \
3184     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3185     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3186   } while (0)
3187
3188
3189 #define ENCODE_SHIFT_OUT                                \
3190   do {                                                  \
3191     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3192     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3193   } while (0)
3194
3195
3196 #define ENCODE_LOCKING_SHIFT_2                          \
3197   do {                                                  \
3198     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3199     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3200   } while (0)
3201
3202
3203 #define ENCODE_LOCKING_SHIFT_3                          \
3204   do {                                                  \
3205     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3206     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3207   } while (0)
3208
3209
3210 /* Produce codes for a DIMENSION1 character whose character set is
3211    CHARSET and whose position-code is C1.  Designation and invocation
3212    sequences are also produced in advance if necessary.  */
3213
3214 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3215   do {                                                                  \
3216     int id = CHARSET_ID (charset);                                      \
3217                                                                         \
3218     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3219         && id == charset_ascii)                                         \
3220       {                                                                 \
3221         id = charset_jisx0201_roman;                                    \
3222         charset = CHARSET_FROM_ID (id);                                 \
3223       }                                                                 \
3224                                                                         \
3225     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3226       {                                                                 \
3227         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3228           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3229         else                                                            \
3230           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3231         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3232         break;                                                          \
3233       }                                                                 \
3234     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3235       {                                                                 \
3236         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3237         break;                                                          \
3238       }                                                                 \
3239     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3240       {                                                                 \
3241         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3242         break;                                                          \
3243       }                                                                 \
3244     else                                                                \
3245       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3246          must invoke it, or, at first, designate it to some graphic     \
3247          register.  Then repeat the loop to actually produce the        \
3248          character.  */                                                 \
3249       dst = encode_invocation_designation (charset, coding, dst,        \
3250                                            &produced_chars);            \
3251   } while (1)
3252
3253
3254 /* Produce codes for a DIMENSION2 character whose character set is
3255    CHARSET and whose position-codes are C1 and C2.  Designation and
3256    invocation codes are also produced in advance if necessary.  */
3257
3258 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3259   do {                                                                  \
3260     int id = CHARSET_ID (charset);                                      \
3261                                                                         \
3262     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3263         && id == charset_jisx0208)                                      \
3264       {                                                                 \
3265         id = charset_jisx0208_1978;                                     \
3266         charset = CHARSET_FROM_ID (id);                                 \
3267       }                                                                 \
3268                                                                         \
3269     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3270       {                                                                 \
3271         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3272           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3273         else                                                            \
3274           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3275         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3276         break;                                                          \
3277       }                                                                 \
3278     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3279       {                                                                 \
3280         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3281         break;                                                          \
3282       }                                                                 \
3283     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3284       {                                                                 \
3285         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3286         break;                                                          \
3287       }                                                                 \
3288     else                                                                \
3289       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3290          must invoke it, or, at first, designate it to some graphic     \
3291          register.  Then repeat the loop to actually produce the        \
3292          character.  */                                                 \
3293       dst = encode_invocation_designation (charset, coding, dst,        \
3294                                            &produced_chars);            \
3295   } while (1)
3296
3297
3298 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3299   do {                                                                     \
3300     int code = ENCODE_CHAR ((charset),(c));                                \
3301                                                                            \
3302     if (CHARSET_DIMENSION (charset) == 1)                                  \
3303       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3304     else                                                                   \
3305       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3306   } while (0)
3307
3308
3309 /* Produce designation and invocation codes at a place pointed by DST
3310    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3311    Return new DST.  */
3312
3313 unsigned char *
3314 encode_invocation_designation (charset, coding, dst, p_nchars)
3315      struct charset *charset;
3316      struct coding_system *coding;
3317      unsigned char *dst;
3318      int *p_nchars;
3319 {
3320   int multibytep = coding->dst_multibyte;
3321   int produced_chars = *p_nchars;
3322   int reg;                      /* graphic register number */
3323   int id = CHARSET_ID (charset);
3324
3325   /* At first, check designations.  */
3326   for (reg = 0; reg < 4; reg++)
3327     if (id == CODING_ISO_DESIGNATION (coding, reg))
3328       break;
3329
3330   if (reg >= 4)
3331     {
3332       /* CHARSET is not yet designated to any graphic registers.  */
3333       /* At first check the requested designation.  */
3334       reg = CODING_ISO_REQUEST (coding, id);
3335       if (reg < 0)
3336         /* Since CHARSET requests no special designation, designate it
3337            to graphic register 0.  */
3338         reg = 0;
3339
3340       ENCODE_DESIGNATION (charset, reg, coding);
3341     }
3342
3343   if (CODING_ISO_INVOCATION (coding, 0) != reg
3344       && CODING_ISO_INVOCATION (coding, 1) != reg)
3345     {
3346       /* Since the graphic register REG is not invoked to any graphic
3347          planes, invoke it to graphic plane 0.  */
3348       switch (reg)
3349         {
3350         case 0:                 /* graphic register 0 */
3351           ENCODE_SHIFT_IN;
3352           break;
3353
3354         case 1:                 /* graphic register 1 */
3355           ENCODE_SHIFT_OUT;
3356           break;
3357
3358         case 2:                 /* graphic register 2 */
3359           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3360             ENCODE_SINGLE_SHIFT_2;
3361           else
3362             ENCODE_LOCKING_SHIFT_2;
3363           break;
3364
3365         case 3:                 /* graphic register 3 */
3366           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3367             ENCODE_SINGLE_SHIFT_3;
3368           else
3369             ENCODE_LOCKING_SHIFT_3;
3370           break;
3371         }
3372     }
3373
3374   *p_nchars = produced_chars;
3375   return dst;
3376 }
3377
3378 /* The following three macros produce codes for indicating direction
3379    of text.  */
3380 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3381   do {                                                                  \
3382     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3383       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3384     else                                                                \
3385       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3386   } while (0)
3387
3388
3389 #define ENCODE_DIRECTION_R2L()                  \
3390   do {                                          \
3391     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3392     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3393   } while (0)
3394
3395
3396 #define ENCODE_DIRECTION_L2R()                  \
3397   do {                                          \
3398     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3399     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3400   } while (0)
3401
3402
3403 /* Produce codes for designation and invocation to reset the graphic
3404    planes and registers to initial state.  */
3405 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3406   do {                                                                  \
3407     int reg;                                                            \
3408     struct charset *charset;                                            \
3409                                                                         \
3410     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3411       ENCODE_SHIFT_IN;                                                  \
3412     for (reg = 0; reg < 4; reg++)                                       \
3413       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3414           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3415               != CODING_ISO_INITIAL (coding, reg)))                     \
3416         {                                                               \
3417           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3418           ENCODE_DESIGNATION (charset, reg, coding);                    \
3419         }                                                               \
3420   } while (0)
3421
3422
3423 /* Produce designation sequences of charsets in the line started from
3424    SRC to a place pointed by DST, and return updated DST.
3425
3426    If the current block ends before any end-of-line, we may fail to
3427    find all the necessary designations.  */
3428
3429 static unsigned char *
3430 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3431      struct coding_system *coding;
3432      int *charbuf, *charbuf_end;
3433      unsigned char *dst;
3434 {
3435   struct charset *charset;
3436   /* Table of charsets to be designated to each graphic register.  */
3437   int r[4];
3438   int c, found = 0, reg;
3439   int produced_chars = 0;
3440   int multibytep = coding->dst_multibyte;
3441   Lisp_Object attrs;
3442   Lisp_Object charset_list;
3443
3444   attrs = CODING_ID_ATTRS (coding->id);
3445   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3446   if (EQ (charset_list, Qiso_2022))
3447     charset_list = Viso_2022_charset_list;
3448
3449   for (reg = 0; reg < 4; reg++)
3450     r[reg] = -1;
3451
3452   while (found < 4)
3453     {
3454       int id;
3455
3456       c = *charbuf++;
3457       if (c == '\n')
3458         break;
3459       charset = char_charset (c, charset_list, NULL);
3460       id = CHARSET_ID (charset);
3461       reg = CODING_ISO_REQUEST (coding, id);
3462       if (reg >= 0 && r[reg] < 0)
3463         {
3464           found++;
3465           r[reg] = id;
3466         }
3467     }
3468
3469   if (found)
3470     {
3471       for (reg = 0; reg < 4; reg++)
3472         if (r[reg] >= 0
3473             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3474           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3475     }
3476
3477   return dst;
3478 }
3479
3480 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3481
3482 static int
3483 encode_coding_iso_2022 (coding)
3484      struct coding_system *coding;
3485 {
3486   int multibytep = coding->dst_multibyte;
3487   int *charbuf = coding->charbuf;
3488   int *charbuf_end = charbuf + coding->charbuf_used;
3489   unsigned char *dst = coding->destination + coding->produced;
3490   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3491   int safe_room = 16;
3492   int bol_designation
3493     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3494        && CODING_ISO_BOL (coding));
3495   int produced_chars = 0;
3496   Lisp_Object attrs, eol_type, charset_list;
3497   int ascii_compatible;
3498   int c;
3499
3500   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3501   setup_iso_safe_charsets (attrs);
3502   coding->safe_charsets
3503     = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data;
3504
3505   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3506
3507   while (charbuf < charbuf_end)
3508     {
3509       ASSURE_DESTINATION (safe_room);
3510
3511       if (bol_designation)
3512         {
3513           unsigned char *dst_prev = dst;
3514
3515           /* We have to produce designation sequences if any now.  */
3516           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3517           bol_designation = 0;
3518           /* We are sure that designation sequences are all ASCII bytes.  */
3519           produced_chars += dst - dst_prev;
3520         }
3521
3522       c = *charbuf++;
3523
3524       /* Now encode the character C.  */
3525       if (c < 0x20 || c == 0x7F)
3526         {
3527           if (c == '\n'
3528               || (c == '\r' && EQ (eol_type, Qmac)))
3529             {
3530               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3531                 ENCODE_RESET_PLANE_AND_REGISTER ();
3532               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3533                 {
3534                   int i;
3535
3536                   for (i = 0; i < 4; i++)
3537                     CODING_ISO_DESIGNATION (coding, i)
3538                       = CODING_ISO_INITIAL (coding, i);
3539                 }
3540               bol_designation
3541                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3542             }
3543           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3544             ENCODE_RESET_PLANE_AND_REGISTER ();
3545           EMIT_ONE_ASCII_BYTE (c);
3546         }
3547       else if (ASCII_CHAR_P (c))
3548         {
3549           if (ascii_compatible)
3550             EMIT_ONE_ASCII_BYTE (c);
3551           else
3552             {
3553               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3554               ENCODE_ISO_CHARACTER (charset, c);
3555             }
3556         }
3557       else if (CHAR_BYTE8_P (c))
3558         {
3559           c = CHAR_TO_BYTE8 (c);
3560           EMIT_ONE_BYTE (c);
3561         }
3562       else
3563         {
3564           struct charset *charset = char_charset (c, charset_list, NULL);
3565
3566           if (!charset)
3567             {
3568               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3569                 {
3570                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3571                   charset = CHARSET_FROM_ID (charset_ascii);
3572                 }
3573               else
3574                 {
3575                   c = coding->default_char;
3576                   charset = char_charset (c, charset_list, NULL);
3577                 }
3578             }
3579           ENCODE_ISO_CHARACTER (charset, c);
3580         }
3581     }
3582
3583   if (coding->mode & CODING_MODE_LAST_BLOCK
3584       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3585     {
3586       ASSURE_DESTINATION (safe_room);
3587       ENCODE_RESET_PLANE_AND_REGISTER ();
3588     }
3589   coding->result = CODING_RESULT_SUCCESS;
3590   CODING_ISO_BOL (coding) = bol_designation;
3591   coding->produced_char += produced_chars;
3592   coding->produced = dst - coding->destination;
3593   return 0;
3594 }
3595
3596 \f
3597 /*** 8,9. SJIS and BIG5 handlers ***/
3598
3599 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3600    quite widely.  So, for the moment, Emacs supports them in the bare
3601    C code.  But, in the future, they may be supported only by CCL.  */
3602
3603 /* SJIS is a coding system encoding three character sets: ASCII, right
3604    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3605    as is.  A character of charset katakana-jisx0201 is encoded by
3606    "position-code + 0x80".  A character of charset japanese-jisx0208
3607    is encoded in 2-byte but two position-codes are divided and shifted
3608    so that it fit in the range below.
3609
3610    --- CODE RANGE of SJIS ---
3611    (character set)      (range)
3612    ASCII                0x00 .. 0x7F
3613    KATAKANA-JISX0201    0xA0 .. 0xDF
3614    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3615             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3616    -------------------------------
3617
3618 */
3619
3620 /* BIG5 is a coding system encoding two character sets: ASCII and
3621    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
3622    character set and is encoded in two-byte.
3623
3624    --- CODE RANGE of BIG5 ---
3625    (character set)      (range)
3626    ASCII                0x00 .. 0x7F
3627    Big5 (1st byte)      0xA1 .. 0xFE
3628         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
3629    --------------------------
3630
3631   */
3632
3633 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3634    Check if a text is encoded in SJIS.  If it is, return
3635    CATEGORY_MASK_SJIS, else return 0.  */
3636
3637 static int
3638 detect_coding_sjis (coding, mask)
3639      struct coding_system *coding;
3640      int *mask;
3641 {
3642   unsigned char *src = coding->source, *src_base = src;
3643   unsigned char *src_end = coding->source + coding->src_bytes;
3644   int multibytep = coding->src_multibyte;
3645   int consumed_chars = 0;
3646   int found = 0;
3647   int c;
3648
3649   /* A coding system of this category is always ASCII compatible.  */
3650   src += coding->head_ascii;
3651
3652   while (1)
3653     {
3654       ONE_MORE_BYTE (c);
3655       if (c < 0x80)
3656         continue;
3657       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
3658         {
3659           ONE_MORE_BYTE (c);
3660           if (c < 0x40 || c == 0x7F || c > 0xFC)
3661             break;
3662           found = 1;
3663         }
3664       else if (c >= 0xA0 && c < 0xE0)
3665         found = 1;
3666       else
3667         break;
3668     }
3669   *mask &= ~CATEGORY_MASK_SJIS;
3670   return 0;
3671
3672  no_more_source:
3673   if (!found)
3674     return 0;
3675   *mask &= CATEGORY_MASK_SJIS;
3676   return 1;
3677 }
3678
3679 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
3680    Check if a text is encoded in BIG5.  If it is, return
3681    CATEGORY_MASK_BIG5, else return 0.  */
3682
3683 static int
3684 detect_coding_big5 (coding, mask)
3685      struct coding_system *coding;
3686      int *mask;
3687 {
3688   unsigned char *src = coding->source, *src_base = src;
3689   unsigned char *src_end = coding->source + coding->src_bytes;
3690   int multibytep = coding->src_multibyte;
3691   int consumed_chars = 0;
3692   int found = 0;
3693   int c;
3694
3695   /* A coding system of this category is always ASCII compatible.  */
3696   src += coding->head_ascii;
3697
3698   while (1)
3699     {
3700       ONE_MORE_BYTE (c);
3701       if (c < 0x80)
3702         continue;
3703       if (c >= 0xA1)
3704         {
3705           ONE_MORE_BYTE (c);
3706           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
3707             return 0;
3708           found = 1;
3709         }
3710       else
3711         break;
3712     }
3713   *mask &= ~CATEGORY_MASK_BIG5;
3714   return 0;
3715
3716  no_more_source:
3717   if (!found)
3718     return 0;
3719   *mask &= CATEGORY_MASK_BIG5;
3720   return 1;
3721 }
3722
3723 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
3724    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
3725
3726 static void
3727 decode_coding_sjis (coding)
3728      struct coding_system *coding;
3729 {
3730   unsigned char *src = coding->source + coding->consumed;
3731   unsigned char *src_end = coding->source + coding->src_bytes;
3732   unsigned char *src_base;
3733   int *charbuf = coding->charbuf;
3734   int *charbuf_end = charbuf + coding->charbuf_size;
3735   int consumed_chars = 0, consumed_chars_base;
3736   int multibytep = coding->src_multibyte;
3737   struct charset *charset_roman, *charset_kanji, *charset_kana;
3738   Lisp_Object attrs, eol_type, charset_list, val;
3739
3740   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3741
3742   val = charset_list;
3743   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3744   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3745   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val)));
3746
3747   while (1)
3748     {
3749       int c, c1;
3750
3751       src_base = src;
3752       consumed_chars_base = consumed_chars;
3753
3754       if (charbuf >= charbuf_end)
3755         break;
3756
3757       ONE_MORE_BYTE (c);
3758
3759       if (c == '\r')
3760         {
3761           if (EQ (eol_type, Qdos))
3762             {
3763               if (src == src_end)
3764                 goto no_more_source;
3765               if (*src == '\n')
3766                 ONE_MORE_BYTE (c);
3767             }
3768           else if (EQ (eol_type, Qmac))
3769             c = '\n';
3770         }
3771       else
3772         {
3773           struct charset *charset;
3774
3775           if (c < 0x80)
3776             charset = charset_roman;
3777           else
3778             {
3779               if (c >= 0xF0)
3780                 goto invalid_code;
3781               if (c < 0xA0 || c >= 0xE0)
3782                 {
3783                   /* SJIS -> JISX0208 */
3784                   ONE_MORE_BYTE (c1);
3785                   if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
3786                     goto invalid_code;
3787                   c = (c << 8) | c1;
3788                   SJIS_TO_JIS (c);
3789                   charset = charset_kanji;
3790                 }
3791               else
3792                 /* SJIS -> JISX0201-Kana */
3793                 charset = charset_kana;
3794             }
3795           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3796         }
3797       *charbuf++ = c;
3798       continue;
3799
3800     invalid_code:
3801       src = src_base;
3802       consumed_chars = consumed_chars_base;
3803       ONE_MORE_BYTE (c);
3804       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3805       coding->errors++;
3806     }
3807
3808  no_more_source:
3809   coding->consumed_char += consumed_chars_base;
3810   coding->consumed = src_base - coding->source;
3811   coding->charbuf_used = charbuf - coding->charbuf;
3812 }
3813
3814 static void
3815 decode_coding_big5 (coding)
3816      struct coding_system *coding;
3817 {
3818   unsigned char *src = coding->source + coding->consumed;
3819   unsigned char *src_end = coding->source + coding->src_bytes;
3820   unsigned char *src_base;
3821   int *charbuf = coding->charbuf;
3822   int *charbuf_end = charbuf + coding->charbuf_size;
3823   int consumed_chars = 0, consumed_chars_base;
3824   int multibytep = coding->src_multibyte;
3825   struct charset *charset_roman, *charset_big5;
3826   Lisp_Object attrs, eol_type, charset_list, val;
3827
3828   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3829   val = charset_list;
3830   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3831   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3832
3833   while (1)
3834     {
3835       int c, c1;
3836
3837       src_base = src;
3838       consumed_chars_base = consumed_chars;
3839
3840       if (charbuf >= charbuf_end)
3841         break;
3842
3843       ONE_MORE_BYTE (c);
3844
3845       if (c == '\r')
3846         {
3847           if (EQ (eol_type, Qdos))
3848             {
3849               if (src == src_end)
3850                 goto no_more_source;
3851               if (*src == '\n')
3852                 ONE_MORE_BYTE (c);
3853             }
3854           else if (EQ (eol_type, Qmac))
3855             c = '\n';
3856         }
3857       else
3858         {
3859           struct charset *charset;
3860           if (c < 0x80)
3861             charset = charset_roman;
3862           else
3863             {
3864               /* BIG5 -> Big5 */
3865               if (c < 0xA1 || c > 0xFE)
3866                 goto invalid_code;
3867               ONE_MORE_BYTE (c1);
3868               if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
3869                 goto invalid_code;
3870               c = c << 8 | c1;
3871               charset = charset_big5;
3872             }
3873           CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
3874         }
3875
3876       *charbuf++ = c;
3877       continue;
3878
3879     invalid_code:
3880       src = src_base;
3881       consumed_chars = consumed_chars_base;
3882       ONE_MORE_BYTE (c);
3883       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3884       coding->errors++;
3885     }
3886
3887  no_more_source:
3888   coding->consumed_char += consumed_chars_base;
3889   coding->consumed = src_base - coding->source;
3890   coding->charbuf_used = charbuf - coding->charbuf;
3891 }
3892
3893 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
3894    This function can encode charsets `ascii', `katakana-jisx0201',
3895    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
3896    are sure that all these charsets are registered as official charset
3897    (i.e. do not have extended leading-codes).  Characters of other
3898    charsets are produced without any encoding.  If SJIS_P is 1, encode
3899    SJIS text, else encode BIG5 text.  */
3900
3901 static int
3902 encode_coding_sjis (coding)
3903      struct coding_system *coding;
3904 {
3905   int multibytep = coding->dst_multibyte;
3906   int *charbuf = coding->charbuf;
3907   int *charbuf_end = charbuf + coding->charbuf_used;
3908   unsigned char *dst = coding->destination + coding->produced;
3909   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3910   int safe_room = 4;
3911   int produced_chars = 0;
3912   Lisp_Object attrs, eol_type, charset_list, val;
3913   int ascii_compatible;
3914   struct charset *charset_roman, *charset_kanji, *charset_kana;
3915   int c;
3916
3917   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3918   val = charset_list;
3919   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3920   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3921   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
3922
3923   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3924
3925   while (charbuf < charbuf_end)
3926     {
3927       ASSURE_DESTINATION (safe_room);
3928       c = *charbuf++;
3929       /* Now encode the character C.  */
3930       if (ASCII_CHAR_P (c) && ascii_compatible)
3931         EMIT_ONE_ASCII_BYTE (c);
3932       else if (CHAR_BYTE8_P (c))
3933         {
3934           c = CHAR_TO_BYTE8 (c);
3935           EMIT_ONE_BYTE (c);
3936         }
3937       else
3938         {
3939           unsigned code;
3940           struct charset *charset = char_charset (c, charset_list, &code);
3941
3942           if (!charset)
3943             {
3944               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3945                 {
3946                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3947                   charset = CHARSET_FROM_ID (charset_ascii);
3948                 }
3949               else
3950                 {
3951                   c = coding->default_char;
3952                   charset = char_charset (c, charset_list, &code);
3953                 }
3954             }
3955           if (code == CHARSET_INVALID_CODE (charset))
3956             abort ();
3957           if (charset == charset_kanji)
3958             {
3959               int c1, c2;
3960               JIS_TO_SJIS (code);
3961               c1 = code >> 8, c2 = code & 0xFF;
3962               EMIT_TWO_BYTES (c1, c2);
3963             }
3964           else if (charset == charset_kana)
3965             EMIT_ONE_BYTE (code | 0x80);
3966           else
3967             EMIT_ONE_ASCII_BYTE (code & 0x7F);
3968         }
3969     }
3970   coding->result = CODING_RESULT_SUCCESS;
3971   coding->produced_char += produced_chars;
3972   coding->produced = dst - coding->destination;
3973   return 0;
3974 }
3975
3976 static int
3977 encode_coding_big5 (coding)
3978      struct coding_system *coding;
3979 {
3980   int multibytep = coding->dst_multibyte;
3981   int *charbuf = coding->charbuf;
3982   int *charbuf_end = charbuf + coding->charbuf_used;
3983   unsigned char *dst = coding->destination + coding->produced;
3984   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3985   int safe_room = 4;
3986   int produced_chars = 0;
3987   Lisp_Object attrs, eol_type, charset_list, val;
3988   int ascii_compatible;
3989   struct charset *charset_roman, *charset_big5;
3990   int c;
3991
3992   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
3993   val = charset_list;
3994   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
3995   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
3996   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3997
3998   while (charbuf < charbuf_end)
3999     {
4000       ASSURE_DESTINATION (safe_room);
4001       c = *charbuf++;
4002       /* Now encode the character C.  */
4003       if (ASCII_CHAR_P (c) && ascii_compatible)
4004         EMIT_ONE_ASCII_BYTE (c);
4005       else if (CHAR_BYTE8_P (c))
4006         {
4007           c = CHAR_TO_BYTE8 (c);
4008           EMIT_ONE_BYTE (c);
4009         }
4010       else
4011         {
4012           unsigned code;
4013           struct charset *charset = char_charset (c, charset_list, &code);
4014
4015           if (! charset)
4016             {
4017               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4018                 {
4019                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4020                   charset = CHARSET_FROM_ID (charset_ascii);
4021                 }
4022               else
4023                 {
4024                   c = coding->default_char;
4025                   charset = char_charset (c, charset_list, &code);
4026                 }
4027             }
4028           if (code == CHARSET_INVALID_CODE (charset))
4029             abort ();
4030           if (charset == charset_big5)
4031             {
4032               int c1, c2;
4033
4034               c1 = code >> 8, c2 = code & 0xFF;
4035               EMIT_TWO_BYTES (c1, c2);
4036             }
4037           else
4038             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4039         }
4040     }
4041   coding->result = CODING_RESULT_SUCCESS;
4042   coding->produced_char += produced_chars;
4043   coding->produced = dst - coding->destination;
4044   return 0;
4045 }
4046
4047 \f
4048 /*** 10. CCL handlers ***/
4049
4050 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4051    Check if a text is encoded in a coding system of which
4052    encoder/decoder are written in CCL program.  If it is, return
4053    CATEGORY_MASK_CCL, else return 0.  */
4054
4055 static int
4056 detect_coding_ccl (coding, mask)
4057      struct coding_system *coding;
4058      int *mask;
4059 {
4060   unsigned char *src = coding->source, *src_base = src;
4061   unsigned char *src_end = coding->source + coding->src_bytes;
4062   int multibytep = coding->src_multibyte;
4063   int consumed_chars = 0;
4064   int found = 0;
4065   unsigned char *valids = CODING_CCL_VALIDS (coding);
4066   int head_ascii = coding->head_ascii;
4067   Lisp_Object attrs;
4068
4069   coding = &coding_categories[coding_category_ccl];
4070   attrs = CODING_ID_ATTRS (coding->id);
4071   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4072     src += head_ascii;
4073
4074   while (1)
4075     {
4076       int c;
4077       ONE_MORE_BYTE (c);
4078       if (! valids[c])
4079         break;
4080       if (!found && valids[c] > 1)
4081         found = 1;
4082     }
4083   *mask &= ~CATEGORY_MASK_CCL;
4084   return 0;
4085
4086  no_more_source:
4087   if (!found)
4088     return 0;
4089   *mask &= CATEGORY_MASK_CCL;
4090   return 1;
4091 }
4092
4093 static void
4094 decode_coding_ccl (coding)
4095      struct coding_system *coding;
4096 {
4097   const unsigned char *src = coding->source + coding->consumed;
4098   unsigned char *src_end = coding->source + coding->src_bytes;
4099   int *charbuf = coding->charbuf;
4100   int *charbuf_end = charbuf + coding->charbuf_size;
4101   int consumed_chars = 0;
4102   int multibytep = coding->src_multibyte;
4103   struct ccl_program ccl;
4104   int source_charbuf[1024];
4105   int source_byteidx[1024];
4106
4107   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4108
4109   while (src < src_end)
4110     {
4111       const unsigned char *p = src;
4112       int *source, *source_end;
4113       int i = 0;
4114
4115       if (multibytep)
4116         while (i < 1024 && p < src_end)
4117           {
4118             source_byteidx[i] = p - src;
4119             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4120           }
4121       else
4122         while (i < 1024 && p < src_end)
4123           source_charbuf[i++] = *p++;
4124
4125       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4126         ccl.last_block = 1;
4127
4128       source = source_charbuf;
4129       source_end = source + i;
4130       while (source < source_end)
4131         {
4132           ccl_driver (&ccl, source, charbuf,
4133                       source_end - source, charbuf_end - charbuf);
4134           source += ccl.consumed;
4135           charbuf += ccl.produced;
4136           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4137             break;
4138         }
4139       if (source < source_end)
4140         src += source_byteidx[source - source_charbuf];
4141       else
4142         src = p;
4143       consumed_chars += source - source_charbuf;
4144
4145       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4146           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4147         break;
4148     }
4149
4150   switch (ccl.status)
4151     {
4152     case CCL_STAT_SUSPEND_BY_SRC:
4153       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4154       break;
4155     case CCL_STAT_SUSPEND_BY_DST:
4156       break;
4157     case CCL_STAT_QUIT:
4158     case CCL_STAT_INVALID_CMD:
4159       coding->result = CODING_RESULT_INTERRUPT;
4160       break;
4161     default:
4162       coding->result = CODING_RESULT_SUCCESS;
4163       break;
4164     }
4165   coding->consumed_char += consumed_chars;
4166   coding->consumed = src - coding->source;
4167   coding->charbuf_used = charbuf - coding->charbuf;
4168 }
4169
4170 static int
4171 encode_coding_ccl (coding)
4172      struct coding_system *coding;
4173 {
4174   struct ccl_program ccl;
4175   int multibytep = coding->dst_multibyte;
4176   int *charbuf = coding->charbuf;
4177   int *charbuf_end = charbuf + coding->charbuf_used;
4178   unsigned char *dst = coding->destination + coding->produced;
4179   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4180   unsigned char *adjusted_dst_end = dst_end - 1;
4181   int destination_charbuf[1024];
4182   int i, produced_chars = 0;
4183
4184   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4185
4186   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4187   ccl.dst_multibyte = coding->dst_multibyte;
4188
4189   while (charbuf < charbuf_end && dst < adjusted_dst_end)
4190     {
4191       int dst_bytes = dst_end - dst;
4192       if (dst_bytes > 1024)
4193         dst_bytes = 1024;
4194
4195       ccl_driver (&ccl, charbuf, destination_charbuf,
4196                   charbuf_end - charbuf, dst_bytes);
4197       charbuf += ccl.consumed;
4198       if (multibytep)
4199         for (i = 0; i < ccl.produced; i++)
4200           EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4201       else
4202         {
4203           for (i = 0; i < ccl.produced; i++)
4204             *dst++ = destination_charbuf[i] & 0xFF;
4205           produced_chars += ccl.produced;
4206         }
4207     }
4208
4209   switch (ccl.status)
4210     {
4211     case CCL_STAT_SUSPEND_BY_SRC:
4212       coding->result = CODING_RESULT_INSUFFICIENT_SRC;
4213       break;
4214     case CCL_STAT_SUSPEND_BY_DST:
4215       coding->result = CODING_RESULT_INSUFFICIENT_DST;
4216       break;
4217     case CCL_STAT_QUIT:
4218     case CCL_STAT_INVALID_CMD:
4219       coding->result = CODING_RESULT_INTERRUPT;
4220       break;
4221     default:
4222       coding->result = CODING_RESULT_SUCCESS;
4223       break;
4224     }
4225
4226   coding->produced_char += produced_chars;
4227   coding->produced = dst - coding->destination;
4228   return 0;
4229 }
4230
4231
4232 \f
4233 /*** 10, 11. no-conversion handlers ***/
4234
4235 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4236
4237 static void
4238 decode_coding_raw_text (coding)
4239      struct coding_system *coding;
4240 {
4241   coding->chars_at_source = 1;
4242   coding->consumed_char = 0;
4243   coding->consumed = 0;
4244   coding->result = CODING_RESULT_SUCCESS;
4245 }
4246
4247 static int
4248 encode_coding_raw_text (coding)
4249      struct coding_system *coding;
4250 {
4251   int multibytep = coding->dst_multibyte;
4252   int *charbuf = coding->charbuf;
4253   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4254   unsigned char *dst = coding->destination + coding->produced;
4255   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4256   int produced_chars = 0;
4257   int c;
4258
4259   if (multibytep)
4260     {
4261       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4262
4263       if (coding->src_multibyte)
4264         while (charbuf < charbuf_end)
4265           {
4266             ASSURE_DESTINATION (safe_room);
4267             c = *charbuf++;
4268             if (ASCII_CHAR_P (c))
4269               EMIT_ONE_ASCII_BYTE (c);
4270             else if (CHAR_BYTE8_P (c))
4271               {
4272                 c = CHAR_TO_BYTE8 (c);
4273                 EMIT_ONE_BYTE (c);
4274               }
4275             else
4276               {
4277                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4278
4279                 CHAR_STRING_ADVANCE (c, p1);
4280                 while (p0 < p1)
4281                   {
4282                     EMIT_ONE_BYTE (*p0);
4283                     p0++;
4284                   }
4285               }
4286           }
4287       else
4288         while (charbuf < charbuf_end)
4289           {
4290             ASSURE_DESTINATION (safe_room);
4291             c = *charbuf++;
4292             EMIT_ONE_BYTE (c);
4293           }
4294     }
4295   else
4296     {
4297       if (coding->src_multibyte)
4298         {
4299           int safe_room = MAX_MULTIBYTE_LENGTH;
4300
4301           while (charbuf < charbuf_end)
4302             {
4303               ASSURE_DESTINATION (safe_room);
4304               c = *charbuf++;
4305               if (ASCII_CHAR_P (c))
4306                 *dst++ = c;
4307               else if (CHAR_BYTE8_P (c))
4308                 *dst++ = CHAR_TO_BYTE8 (c);
4309               else
4310                 CHAR_STRING_ADVANCE (c, dst);
4311               produced_chars++;
4312             }
4313         }
4314       else
4315         {
4316           ASSURE_DESTINATION (charbuf_end - charbuf);
4317           while (charbuf < charbuf_end && dst < dst_end)
4318             *dst++ = *charbuf++;
4319           produced_chars = dst - (coding->destination + coding->dst_bytes);
4320         }
4321     }
4322   coding->result = CODING_RESULT_SUCCESS;
4323   coding->produced_char += produced_chars;
4324   coding->produced = dst - coding->destination;
4325   return 0;
4326 }
4327
4328 static int
4329 detect_coding_charset (coding, mask)
4330      struct coding_system *coding;
4331      int *mask;
4332 {
4333   unsigned char *src = coding->source, *src_base = src;
4334   unsigned char *src_end = coding->source + coding->src_bytes;
4335   int multibytep = coding->src_multibyte;
4336   int consumed_chars = 0;
4337   Lisp_Object attrs, valids;
4338
4339   coding = &coding_categories[coding_category_charset];
4340   attrs = CODING_ID_ATTRS (coding->id);
4341   valids = AREF (attrs, coding_attr_charset_valids);
4342
4343   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4344     src += coding->head_ascii;
4345
4346   while (1)
4347     {
4348       int c;
4349
4350       ONE_MORE_BYTE (c);
4351       if (NILP (AREF (valids, c)))
4352         break;
4353     }
4354   *mask &= ~CATEGORY_MASK_CHARSET;
4355   return 0;
4356
4357  no_more_source:
4358   *mask &= CATEGORY_MASK_CHARSET;
4359   return 1;
4360 }
4361
4362 static void
4363 decode_coding_charset (coding)
4364      struct coding_system *coding;
4365 {
4366   unsigned char *src = coding->source + coding->consumed;
4367   unsigned char *src_end = coding->source + coding->src_bytes;
4368   unsigned char *src_base;
4369   int *charbuf = coding->charbuf;
4370   int *charbuf_end = charbuf + coding->charbuf_size;
4371   int consumed_chars = 0, consumed_chars_base;
4372   int multibytep = coding->src_multibyte;
4373   Lisp_Object attrs, eol_type, charset_list, valids;
4374
4375   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4376   valids = AREF (attrs, coding_attr_charset_valids);
4377
4378   while (1)
4379     {
4380       int c;
4381
4382       src_base = src;
4383       consumed_chars_base = consumed_chars;
4384
4385       if (charbuf >= charbuf_end)
4386         break;
4387
4388       ONE_MORE_BYTE (c);
4389       if (c == '\r')
4390         {
4391           /* Here we assume that no charset maps '\r' to something
4392              else.  */
4393           if (EQ (eol_type, Qdos))
4394             {
4395               if (src < src_end
4396                   && *src == '\n')
4397                 ONE_MORE_BYTE (c);
4398             }
4399           else if (EQ (eol_type, Qmac))
4400             c = '\n';
4401         }
4402       else
4403         {
4404           Lisp_Object val;
4405           struct charset *charset;
4406           int dim;
4407           int len = 1;
4408           unsigned code = c;
4409
4410           val = AREF (valids, c);
4411           if (NILP (val))
4412             goto invalid_code;
4413           if (INTEGERP (val))
4414             {
4415               charset = CHARSET_FROM_ID (XFASTINT (val));
4416               dim = CHARSET_DIMENSION (charset);
4417               while (len < dim)
4418                 {
4419                   ONE_MORE_BYTE (c);
4420                   code = (code << 8) | c;
4421                   len++;
4422                 }
4423               CODING_DECODE_CHAR (coding, src, src_base, src_end,
4424                                   charset, code, c);
4425             }
4426           else
4427             {
4428               /* VAL is a list of charset IDs.  It is assured that the
4429                  list is sorted by charset dimensions (smaller one
4430                  comes first).  */
4431               while (CONSP (val))
4432                 {
4433                   charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4434                   dim = CHARSET_DIMENSION (charset);
4435                   while (len < dim)
4436                     {
4437                       ONE_MORE_BYTE (c);
4438                       code = (code << 8) | c;
4439                       len++;
4440                     }
4441                   CODING_DECODE_CHAR (coding, src, src_base,
4442                                       src_end, charset, code, c);
4443                   if (c >= 0)
4444                     break;
4445                   val = XCDR (val);
4446                 }
4447             }
4448           if (c < 0)
4449             goto invalid_code;
4450         }
4451       *charbuf++ = c;
4452       continue;
4453
4454     invalid_code:
4455       src = src_base;
4456       consumed_chars = consumed_chars_base;
4457       ONE_MORE_BYTE (c);
4458       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4459       coding->errors++;
4460     }
4461
4462  no_more_source:
4463   coding->consumed_char += consumed_chars_base;
4464   coding->consumed = src_base - coding->source;
4465   coding->charbuf_used = charbuf - coding->charbuf;
4466 }
4467
4468 static int
4469 encode_coding_charset (coding)
4470      struct coding_system *coding;
4471 {
4472   int multibytep = coding->dst_multibyte;
4473   int *charbuf = coding->charbuf;
4474   int *charbuf_end = charbuf + coding->charbuf_used;
4475   unsigned char *dst = coding->destination + coding->produced;
4476   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4477   int safe_room = MAX_MULTIBYTE_LENGTH;
4478   int produced_chars = 0;
4479   Lisp_Object attrs, eol_type, charset_list;
4480   int ascii_compatible;
4481   int c;
4482
4483   CODING_GET_INFO (coding, attrs, eol_type, charset_list);
4484   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4485
4486   while (charbuf < charbuf_end)
4487     {
4488       struct charset *charset;
4489       unsigned code;
4490
4491       ASSURE_DESTINATION (safe_room);
4492       c = *charbuf++;
4493       if (ascii_compatible && ASCII_CHAR_P (c))
4494         EMIT_ONE_ASCII_BYTE (c);
4495       else if (CHAR_BYTE8_P (c))
4496         {
4497           c = CHAR_TO_BYTE8 (c);
4498           EMIT_ONE_BYTE (c);
4499         }
4500       else
4501         {
4502           charset = char_charset (c, charset_list, &code);
4503           if (charset)
4504             {
4505               if (CHARSET_DIMENSION (charset) == 1)
4506                 EMIT_ONE_BYTE (code);
4507               else if (CHARSET_DIMENSION (charset) == 2)
4508                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
4509               else if (CHARSET_DIMENSION (charset) == 3)
4510                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
4511               else
4512                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
4513                                  (code >> 8) & 0xFF, code & 0xFF);
4514             }
4515           else
4516             {
4517               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4518                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4519               else
4520                 c = coding->default_char;
4521               EMIT_ONE_BYTE (c);
4522             }
4523         }
4524     }
4525
4526   coding->result = CODING_RESULT_SUCCESS;
4527   coding->produced_char += produced_chars;
4528   coding->produced = dst - coding->destination;
4529   return 0;
4530 }
4531
4532 \f
4533 /*** 7. C library functions ***/
4534
4535 /* Setup coding context CODING from information about CODING_SYSTEM.
4536    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
4537    CODING_SYSTEM is invalid, signal an error.  */
4538
4539 void
4540 setup_coding_system (coding_system, coding)
4541      Lisp_Object coding_system;
4542      struct coding_system *coding;
4543 {
4544   Lisp_Object attrs;
4545   Lisp_Object eol_type;
4546   Lisp_Object coding_type;
4547   Lisp_Object val;
4548
4549   if (NILP (coding_system))
4550     coding_system = Qno_conversion;
4551
4552   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
4553
4554   attrs = CODING_ID_ATTRS (coding->id);
4555   eol_type = CODING_ID_EOL_TYPE (coding->id);
4556
4557   coding->mode = 0;
4558   coding->head_ascii = -1;
4559   coding->common_flags
4560     = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0);
4561
4562   val = CODING_ATTR_SAFE_CHARSETS (attrs);
4563   coding->max_charset_id = XSTRING (val)->size - 1;
4564   coding->safe_charsets = (char *) XSTRING (val)->data;
4565   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
4566
4567   coding_type = CODING_ATTR_TYPE (attrs);
4568   if (EQ (coding_type, Qundecided))
4569     {
4570       coding->detector = NULL;
4571       coding->decoder = decode_coding_raw_text;
4572       coding->encoder = encode_coding_raw_text;
4573       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
4574     }
4575   else if (EQ (coding_type, Qiso_2022))
4576     {
4577       int i;
4578       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
4579
4580       /* Invoke graphic register 0 to plane 0.  */
4581       CODING_ISO_INVOCATION (coding, 0) = 0;
4582       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
4583       CODING_ISO_INVOCATION (coding, 1)
4584         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
4585       /* Setup the initial status of designation.  */
4586       for (i = 0; i < 4; i++)
4587         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
4588       /* Not single shifting initially.  */
4589       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
4590       /* Beginning of buffer should also be regarded as bol. */
4591       CODING_ISO_BOL (coding) = 1;
4592       coding->detector = detect_coding_iso_2022;
4593       coding->decoder = decode_coding_iso_2022;
4594       coding->encoder = encode_coding_iso_2022;
4595       if (flags & CODING_ISO_FLAG_SAFE)
4596         coding->mode |= CODING_MODE_SAFE_ENCODING;
4597       coding->common_flags
4598         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4599             | CODING_REQUIRE_FLUSHING_MASK);
4600       if (flags & CODING_ISO_FLAG_COMPOSITION)
4601         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
4602       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
4603         {
4604           setup_iso_safe_charsets (attrs);
4605           val = CODING_ATTR_SAFE_CHARSETS (attrs);
4606           coding->max_charset_id = XSTRING (val)->size - 1;
4607           coding->safe_charsets = (char *) XSTRING (val)->data;
4608         }
4609       CODING_ISO_FLAGS (coding) = flags;
4610     }
4611   else if (EQ (coding_type, Qcharset))
4612     {
4613       coding->detector = detect_coding_charset;
4614       coding->decoder = decode_coding_charset;
4615       coding->encoder = encode_coding_charset;
4616       coding->common_flags
4617         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4618     }
4619   else if (EQ (coding_type, Qutf_8))
4620     {
4621       coding->detector = detect_coding_utf_8;
4622       coding->decoder = decode_coding_utf_8;
4623       coding->encoder = encode_coding_utf_8;
4624       coding->common_flags
4625         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4626     }
4627   else if (EQ (coding_type, Qutf_16))
4628     {
4629       val = AREF (attrs, coding_attr_utf_16_bom);
4630       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
4631                                     : EQ (val, Qt) ? utf_16_with_bom
4632                                     : utf_16_without_bom);
4633       val = AREF (attrs, coding_attr_utf_16_endian);
4634       CODING_UTF_16_ENDIAN (coding) = (NILP (val) ? utf_16_big_endian
4635                                        : utf_16_little_endian);
4636       CODING_UTF_16_SURROGATE (coding) = 0;
4637       coding->detector = detect_coding_utf_16;
4638       coding->decoder = decode_coding_utf_16;
4639       coding->encoder = encode_coding_utf_16;
4640       coding->common_flags
4641         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4642     }
4643   else if (EQ (coding_type, Qccl))
4644     {
4645       coding->detector = detect_coding_ccl;
4646       coding->decoder = decode_coding_ccl;
4647       coding->encoder = encode_coding_ccl;
4648       coding->common_flags
4649         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
4650             | CODING_REQUIRE_FLUSHING_MASK);
4651     }
4652   else if (EQ (coding_type, Qemacs_mule))
4653     {
4654       coding->detector = detect_coding_emacs_mule;
4655       coding->decoder = decode_coding_emacs_mule;
4656       coding->encoder = encode_coding_emacs_mule;
4657       coding->common_flags
4658         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4659       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
4660           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
4661         {
4662           Lisp_Object tail, safe_charsets;
4663           int max_charset_id = 0;
4664
4665           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4666                tail = XCDR (tail))
4667             if (max_charset_id < XFASTINT (XCAR (tail)))
4668               max_charset_id = XFASTINT (XCAR (tail));
4669           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
4670                                         make_number (255));
4671           for (tail = Vemacs_mule_charset_list; CONSP (tail);
4672                tail = XCDR (tail))
4673             XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
4674           coding->max_charset_id = max_charset_id;
4675           coding->safe_charsets = (char *) XSTRING (safe_charsets)->data;
4676         }
4677     }
4678   else if (EQ (coding_type, Qshift_jis))
4679     {
4680       coding->detector = detect_coding_sjis;
4681       coding->decoder = decode_coding_sjis;
4682       coding->encoder = encode_coding_sjis;
4683       coding->common_flags
4684         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4685     }
4686   else if (EQ (coding_type, Qbig5))
4687     {
4688       coding->detector = detect_coding_big5;
4689       coding->decoder = decode_coding_big5;
4690       coding->encoder = encode_coding_big5;
4691       coding->common_flags
4692         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
4693     }
4694   else                          /* EQ (coding_type, Qraw_text) */
4695     {
4696       coding->detector = NULL;
4697       coding->decoder = decode_coding_raw_text;
4698       coding->encoder = encode_coding_raw_text;
4699       coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
4700     }
4701
4702   return;
4703 }
4704
4705 /* Return raw-text or one of its subsidiaries that has the same
4706    eol_type as CODING-SYSTEM.  */
4707
4708 Lisp_Object
4709 raw_text_coding_system (coding_system)
4710      Lisp_Object coding_system;
4711 {
4712   Lisp_Object spec, attrs;
4713   Lisp_Object eol_type, raw_text_eol_type;
4714
4715   spec = CODING_SYSTEM_SPEC (coding_system);
4716   attrs = AREF (spec, 0);
4717
4718   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
4719     return coding_system;
4720
4721   eol_type = AREF (spec, 2);
4722   if (VECTORP (eol_type))
4723     return Qraw_text;
4724   spec = CODING_SYSTEM_SPEC (Qraw_text);
4725   raw_text_eol_type = AREF (spec, 2);
4726   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
4727           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
4728           : AREF (raw_text_eol_type, 2));
4729 }
4730
4731
4732 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
4733    does, return one of the subsidiary that has the same eol-spec as
4734    PARENT.  Otherwise, return CODING_SYSTEM.  */
4735
4736 Lisp_Object
4737 coding_inherit_eol_type (coding_system, parent)
4738      Lisp_Object coding_system, parent;
4739 {
4740   Lisp_Object spec, attrs, eol_type;
4741
4742   spec = CODING_SYSTEM_SPEC (coding_system);
4743   attrs = AREF (spec, 0);
4744   eol_type = AREF (spec, 2);
4745   if (VECTORP (eol_type))
4746     {
4747       Lisp_Object parent_spec;
4748       Lisp_Object parent_eol_type;
4749
4750       parent_spec
4751         = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system);
4752       parent_eol_type = AREF (parent_spec, 2);
4753       if (EQ (parent_eol_type, Qunix))
4754         coding_system = AREF (eol_type, 0);
4755       else if (EQ (parent_eol_type, Qdos))
4756         coding_system = AREF (eol_type, 1);
4757       else if (EQ (parent_eol_type, Qmac))
4758         coding_system = AREF (eol_type, 2);
4759     }
4760   return coding_system;
4761 }
4762
4763 /* Emacs has a mechanism to automatically detect a coding system if it
4764    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
4765    it's impossible to distinguish some coding systems accurately
4766    because they use the same range of codes.  So, at first, coding
4767    systems are categorized into 7, those are:
4768
4769    o coding-category-emacs-mule
4770
4771         The category for a coding system which has the same code range
4772         as Emacs' internal format.  Assigned the coding-system (Lisp
4773         symbol) `emacs-mule' by default.
4774
4775    o coding-category-sjis
4776
4777         The category for a coding system which has the same code range
4778         as SJIS.  Assigned the coding-system (Lisp
4779         symbol) `japanese-shift-jis' by default.
4780
4781    o coding-category-iso-7
4782
4783         The category for a coding system which has the same code range
4784         as ISO2022 of 7-bit environment.  This doesn't use any locking
4785         shift and single shift functions.  This can encode/decode all
4786         charsets.  Assigned the coding-system (Lisp symbol)
4787         `iso-2022-7bit' by default.
4788
4789    o coding-category-iso-7-tight
4790
4791         Same as coding-category-iso-7 except that this can
4792         encode/decode only the specified charsets.
4793
4794    o coding-category-iso-8-1
4795
4796         The category for a coding system which has the same code range
4797         as ISO2022 of 8-bit environment and graphic plane 1 used only
4798         for DIMENSION1 charset.  This doesn't use any locking shift
4799         and single shift functions.  Assigned the coding-system (Lisp
4800         symbol) `iso-latin-1' by default.
4801
4802    o coding-category-iso-8-2
4803
4804         The category for a coding system which has the same code range
4805         as ISO2022 of 8-bit environment and graphic plane 1 used only
4806         for DIMENSION2 charset.  This doesn't use any locking shift
4807         and single shift functions.  Assigned the coding-system (Lisp
4808         symbol) `japanese-iso-8bit' by default.
4809
4810    o coding-category-iso-7-else
4811
4812         The category for a coding system which has the same code range
4813         as ISO2022 of 7-bit environemnt but uses locking shift or
4814         single shift functions.  Assigned the coding-system (Lisp
4815         symbol) `iso-2022-7bit-lock' by default.
4816
4817    o coding-category-iso-8-else
4818
4819         The category for a coding system which has the same code range
4820         as ISO2022 of 8-bit environemnt but uses locking shift or
4821         single shift functions.  Assigned the coding-system (Lisp
4822         symbol) `iso-2022-8bit-ss2' by default.
4823
4824    o coding-category-big5
4825
4826         The category for a coding system which has the same code range
4827         as BIG5.  Assigned the coding-system (Lisp symbol)
4828         `cn-big5' by default.
4829
4830    o coding-category-utf-8
4831
4832         The category for a coding system which has the same code range
4833         as UTF-8 (cf. RFC2279).  Assigned the coding-system (Lisp
4834         symbol) `utf-8' by default.
4835
4836    o coding-category-utf-16-be
4837
4838         The category for a coding system in which a text has an
4839         Unicode signature (cf. Unicode Standard) in the order of BIG
4840         endian at the head.  Assigned the coding-system (Lisp symbol)
4841         `utf-16-be' by default.
4842
4843    o coding-category-utf-16-le
4844
4845         The category for a coding system in which a text has an
4846         Unicode signature (cf. Unicode Standard) in the order of
4847         LITTLE endian at the head.  Assigned the coding-system (Lisp
4848         symbol) `utf-16-le' by default.
4849
4850    o coding-category-ccl
4851
4852         The category for a coding system of which encoder/decoder is
4853         written in CCL programs.  The default value is nil, i.e., no
4854         coding system is assigned.
4855
4856    o coding-category-binary
4857
4858         The category for a coding system not categorized in any of the
4859         above.  Assigned the coding-system (Lisp symbol)
4860         `no-conversion' by default.
4861
4862    Each of them is a Lisp symbol and the value is an actual
4863    `coding-system's (this is also a Lisp symbol) assigned by a user.
4864    What Emacs does actually is to detect a category of coding system.
4865    Then, it uses a `coding-system' assigned to it.  If Emacs can't
4866    decide only one possible category, it selects a category of the
4867    highest priority.  Priorities of categories are also specified by a
4868    user in a Lisp variable `coding-category-list'.
4869
4870 */
4871
4872 #define EOL_SEEN_NONE   0
4873 #define EOL_SEEN_LF     1
4874 #define EOL_SEEN_CR     2
4875 #define EOL_SEEN_CRLF   4
4876
4877 /* Detect how end-of-line of a text of length CODING->src_bytes
4878    pointed by CODING->source is encoded.  Return one of
4879    EOL_SEEN_XXX.  */
4880
4881 #define MAX_EOL_CHECK_COUNT 3
4882
4883 static int
4884 detect_eol (coding, source, src_bytes)
4885      struct coding_system *coding;
4886      unsigned char *source;
4887      EMACS_INT src_bytes;
4888 {
4889   Lisp_Object attrs, coding_type;
4890   unsigned char *src = source, *src_end = src + src_bytes;
4891   unsigned char c;
4892   int total  = 0;
4893   int eol_seen = EOL_SEEN_NONE;
4894
4895   attrs = CODING_ID_ATTRS (coding->id);
4896   coding_type = CODING_ATTR_TYPE (attrs);
4897
4898   if (EQ (coding_type, Qccl))
4899     {
4900       int msb, lsb;
4901
4902       msb = coding->spec.utf_16.endian == utf_16_little_endian;
4903       lsb = 1 - msb;
4904
4905       while (src + 1 < src_end)
4906         {
4907           c = src[lsb];
4908           if (src[msb] == 0 && (c == '\n' || c == '\r'))
4909             {
4910               int this_eol;
4911
4912               if (c == '\n')
4913                 this_eol = EOL_SEEN_LF;
4914               else if (src + 3 >= src_end
4915                        || src[msb + 2] != 0
4916                        || src[lsb + 2] != '\n')
4917                 this_eol = EOL_SEEN_CR;
4918               else
4919                 this_eol = EOL_SEEN_CRLF;
4920
4921               if (eol_seen == EOL_SEEN_NONE)
4922                 /* This is the first end-of-line.  */
4923                 eol_seen = this_eol;
4924               else if (eol_seen != this_eol)
4925                 {
4926                   /* The found type is different from what found before.  */
4927                   eol_seen = EOL_SEEN_LF;
4928                   break;
4929                 }
4930               if (++total == MAX_EOL_CHECK_COUNT)
4931                 break;
4932             }
4933           src += 2;
4934         }
4935     }
4936   else
4937     {
4938       while (src < src_end)
4939         {
4940           c = *src++;
4941           if (c == '\n' || c == '\r')
4942             {
4943               int this_eol;
4944
4945               if (c == '\n')
4946                 this_eol = EOL_SEEN_LF;
4947               else if (src >= src_end || *src != '\n')
4948                 this_eol = EOL_SEEN_CR;
4949               else
4950                 this_eol = EOL_SEEN_CRLF, src++;
4951
4952               if (eol_seen == EOL_SEEN_NONE)
4953                 /* This is the first end-of-line.  */
4954                 eol_seen = this_eol;
4955               else if (eol_seen != this_eol)
4956                 {
4957                   /* The found type is different from what found before.  */
4958                   eol_seen = EOL_SEEN_LF;
4959                   break;
4960                 }
4961               if (++total == MAX_EOL_CHECK_COUNT)
4962                 break;
4963             }
4964         }
4965     }
4966   return eol_seen;
4967 }
4968
4969
4970 static void
4971 adjust_coding_eol_type (coding, eol_seen)
4972      struct coding_system *coding;
4973      int eol_seen;
4974 {
4975   Lisp_Object eol_type;
4976
4977   eol_type = CODING_ID_EOL_TYPE (coding->id);
4978   if (eol_seen & EOL_SEEN_LF)
4979     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
4980   else if (eol_seen & EOL_SEEN_CRLF)
4981     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
4982   else if (eol_seen & EOL_SEEN_CR)
4983     coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
4984 }
4985
4986 /* Detect how a text specified in CODING is encoded.  If a coding
4987    system is detected, update fields of CODING by the detected coding
4988    system.  */
4989
4990 void
4991 detect_coding (coding)
4992      struct coding_system *coding;
4993 {
4994   unsigned char *src, *src_end;
4995   Lisp_Object attrs, coding_type;
4996
4997   coding->consumed = coding->consumed_char = 0;
4998   coding->produced = coding->produced_char = 0;
4999   coding_set_source (coding);
5000
5001   src_end = coding->source + coding->src_bytes;
5002
5003   /* If we have not yet decided the text encoding type, detect it
5004      now.  */
5005   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5006     {
5007       int mask = CATEGORY_MASK_ANY;
5008       int c, i;
5009
5010       for (src = coding->source; src < src_end; src++)
5011         {
5012           c = *src;
5013           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
5014                                         || c == ISO_CODE_SI
5015                                         || c == ISO_CODE_SO)))
5016             break;
5017         }
5018       coding->head_ascii = src - (coding->source + coding->consumed);
5019
5020       if (coding->head_ascii < coding->src_bytes)
5021         {
5022           int detected = 0;
5023
5024           for (i = 0; i < coding_category_raw_text; i++)
5025             {
5026               enum coding_category category = coding_priorities[i];
5027               struct coding_system *this = coding_categories + category;
5028
5029               if (category >= coding_category_raw_text
5030                   || detected & (1 << category))
5031                 continue;
5032
5033               if (this->id < 0)
5034                 {
5035                   /* No coding system of this category is defined.  */
5036                   mask &= ~(1 << category);
5037                 }
5038               else
5039                 {
5040                   detected |= detected_mask[category];
5041                   if ((*(this->detector)) (coding, &mask))
5042                     break;
5043                 }
5044             }
5045           if (! mask)
5046             setup_coding_system (Qraw_text, coding);
5047           else if (mask != CATEGORY_MASK_ANY)
5048             for (i = 0; i < coding_category_raw_text; i++)
5049               {
5050                 enum coding_category category = coding_priorities[i];
5051                 struct coding_system *this = coding_categories + category;
5052
5053                 if (mask & (1 << category))
5054                   {
5055                     setup_coding_system (CODING_ID_NAME (this->id), coding);
5056                     break;
5057                   }
5058               }
5059         }
5060     }
5061
5062   attrs = CODING_ID_ATTRS (coding->id);
5063   coding_type = CODING_ATTR_TYPE (attrs);
5064
5065   /* If we have not yet decided the EOL type, detect it now.  But, the
5066      detection is impossible for a CCL based coding system, in which
5067      case, we detct the EOL type after decoding.  */
5068   if (VECTORP (CODING_ID_EOL_TYPE (coding->id))
5069       && ! EQ (coding_type, Qccl))
5070     {
5071       int eol_seen = detect_eol (coding, coding->source, coding->src_bytes);
5072
5073       if (eol_seen != EOL_SEEN_NONE)
5074         adjust_coding_eol_type (coding, eol_seen);
5075     }
5076 }
5077
5078
5079 static void
5080 decode_eol (coding)
5081      struct coding_system *coding;
5082 {
5083   if (VECTORP (CODING_ID_EOL_TYPE (coding->id)))
5084     {
5085       unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5086       unsigned char *pend = p + coding->produced;
5087       int eol_seen = EOL_SEEN_NONE;
5088
5089       for (; p < pend; p++)
5090         {
5091           if (*p == '\n')
5092             eol_seen |= EOL_SEEN_LF;
5093           else if (*p == '\r')
5094             {
5095               if (p + 1 < pend && *(p + 1) == '\n')
5096                 {
5097                   eol_seen |= EOL_SEEN_CRLF;
5098                   p++;
5099                 }
5100               else
5101                 eol_seen |= EOL_SEEN_CR;
5102             }
5103         }
5104       if (eol_seen != EOL_SEEN_NONE)
5105         adjust_coding_eol_type (coding, eol_seen);
5106     }
5107
5108   if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac))
5109     {
5110       unsigned char *p = CHAR_POS_ADDR (coding->dst_pos);
5111       unsigned char *pend = p + coding->produced;
5112
5113       for (; p < pend; p++)
5114         if (*p == '\r')
5115           *p = '\n';
5116     }
5117   else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos))
5118     {
5119       unsigned char *p, *pbeg, *pend;
5120       Lisp_Object undo_list;
5121
5122       move_gap_both (coding->dst_pos + coding->produced_char,
5123                      coding->dst_pos_byte + coding->produced);
5124       undo_list = current_buffer->undo_list;
5125       current_buffer->undo_list = Qt;
5126       del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0);
5127       current_buffer->undo_list = undo_list;
5128       pbeg = GPT_ADDR;
5129       pend = pbeg + coding->produced;
5130
5131       for (p = pend - 1; p >= pbeg; p--)
5132         if (*p == '\r')
5133           {
5134             safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1);
5135             pend--;
5136           }
5137       coding->produced_char -= coding->produced - (pend - pbeg);
5138       coding->produced = pend - pbeg;
5139       insert_from_gap (coding->produced_char, coding->produced);
5140     }
5141 }
5142
5143 static void
5144 translate_chars (coding, table)
5145      struct coding_system *coding;
5146      Lisp_Object table;
5147 {
5148   int *charbuf = coding->charbuf;
5149   int *charbuf_end = charbuf + coding->charbuf_used;
5150   int c;
5151
5152   if (coding->chars_at_source)
5153     return;
5154
5155   while (charbuf < charbuf_end)
5156     {
5157       c = *charbuf;
5158       if (c < 0)
5159         charbuf += c;
5160       else
5161         *charbuf++ = translate_char (table, c);
5162     }
5163 }
5164
5165 static int
5166 produce_chars (coding)
5167      struct coding_system *coding;
5168 {
5169   unsigned char *dst = coding->destination + coding->produced;
5170   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5171   int produced;
5172   int produced_chars = 0;
5173
5174   if (! coding->chars_at_source)
5175     {
5176       /* Characters are in coding->charbuf.  */
5177       int *buf = coding->charbuf;
5178       int *buf_end = buf + coding->charbuf_used;
5179       unsigned char *adjusted_dst_end;
5180
5181       if (BUFFERP (coding->src_object)
5182           && EQ (coding->src_object, coding->dst_object))
5183         dst_end = coding->source + coding->consumed;
5184       adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5185
5186       while (buf < buf_end)
5187         {
5188           int c = *buf++;
5189
5190           if (dst >= adjusted_dst_end)
5191             {
5192               dst = alloc_destination (coding,
5193                                        buf_end - buf + MAX_MULTIBYTE_LENGTH,
5194                                        dst);
5195               dst_end = coding->destination + coding->dst_bytes;
5196               adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
5197             }
5198           if (c >= 0)
5199             {
5200               if (coding->dst_multibyte
5201                   || ! CHAR_BYTE8_P (c))
5202                 CHAR_STRING_ADVANCE (c, dst);
5203               else
5204                 *dst++ = CHAR_TO_BYTE8 (c);
5205               produced_chars++;
5206             }
5207           else
5208             /* This is an annotation data.  */
5209             buf -= c + 1;
5210         }
5211     }
5212   else
5213     {
5214       unsigned char *src = coding->source;
5215       unsigned char *src_end = src + coding->src_bytes;
5216       Lisp_Object eol_type;
5217
5218       eol_type = CODING_ID_EOL_TYPE (coding->id);
5219
5220       if (coding->src_multibyte != coding->dst_multibyte)
5221         {
5222           if (coding->src_multibyte)
5223             {
5224               int multibytep = 1;
5225               int consumed_chars;
5226
5227               while (1)
5228                 {
5229                   unsigned char *src_base = src;
5230                   int c;
5231
5232                   ONE_MORE_BYTE (c);
5233                   if (c == '\r')
5234                     {
5235                       if (EQ (eol_type, Qdos))
5236                         {
5237                           if (src < src_end
5238                               && *src == '\n')
5239                             c = *src++;
5240                         }
5241                       else if (EQ (eol_type, Qmac))
5242                         c = '\n';
5243                     }
5244                   if (dst == dst_end)
5245                     {
5246                       coding->consumed = src - coding->source;
5247
5248                     if (EQ (coding->src_object, coding->dst_object))
5249                       dst_end = src;
5250                     if (dst == dst_end)
5251                       {
5252                         dst = alloc_destination (coding, src_end - src + 1,
5253                                                  dst);
5254                         dst_end = coding->destination + coding->dst_bytes;
5255                         coding_set_source (coding);
5256                         src = coding->source + coding->consumed;
5257                         src_end = coding->source + coding->src_bytes;
5258                       }
5259                     }
5260                   *dst++ = c;
5261                   produced_chars++;
5262                 }
5263             no_more_source:
5264               ;
5265             }
5266           else
5267             while (src < src_end)
5268               {
5269                 int multibytep = 1;
5270                 int c = *src++;
5271
5272                 if (c == '\r')
5273                   {
5274                     if (EQ (eol_type, Qdos))
5275                       {
5276                         if (src < src_end
5277                             && *src == '\n')
5278                           c = *src++;
5279                       }
5280                     else if (EQ (eol_type, Qmac))
5281                       c = '\n';
5282                   }
5283                 if (dst >= dst_end - 1)
5284                   {
5285                     coding->consumed = src - coding->source;
5286
5287                     if (EQ (coding->src_object, coding->dst_object))
5288                       dst_end = src;
5289                     if (dst >= dst_end - 1)
5290                       {
5291                         dst = alloc_destination (coding, src_end - src + 2,
5292                                                  dst);
5293                         dst_end = coding->destination + coding->dst_bytes;
5294                         coding_set_source (coding);
5295                         src = coding->source + coding->consumed;
5296                         src_end = coding->source + coding->src_bytes;
5297                       }
5298                   }
5299                 EMIT_ONE_BYTE (c);
5300               }
5301         }
5302       else
5303         {
5304           if (!EQ (coding->src_object, coding->dst_object))
5305             {
5306               int require = coding->src_bytes - coding->dst_bytes;
5307
5308               if (require > 0)
5309                 {
5310                   EMACS_INT offset = src - coding->source;
5311
5312                   dst = alloc_destination (coding, require, dst);
5313                   coding_set_source (coding);
5314                   src = coding->source + offset;
5315                   src_end = coding->source + coding->src_bytes;
5316                 }
5317             }
5318           produced_chars = coding->src_chars;
5319           while (src < src_end)
5320             {
5321               int c = *src++;
5322
5323               if (c == '\r')
5324                 {
5325                   if (EQ (eol_type, Qdos))
5326                     {
5327                       if (src < src_end
5328                           && *src == '\n')
5329                         c = *src++;
5330                       produced_chars--;
5331                     }
5332                   else if (EQ (eol_type, Qmac))
5333                     c = '\n';
5334                 }
5335               *dst++ = c;
5336             }
5337         }
5338       coding->consumed = coding->src_bytes;
5339       coding->consumed_char = coding->src_chars;
5340     }
5341
5342   produced = dst - (coding->destination + coding->produced);
5343   if (BUFFERP (coding->dst_object))
5344     insert_from_gap (produced_chars, produced);
5345   coding->produced += produced;
5346   coding->produced_char += produced_chars;
5347   return produced_chars;
5348 }
5349
5350 /* [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN ]
5351         or
5352    [ -LENGTH CHAR_POS_OFFSET MASK METHOD COMP_LEN COMPONENTS... ]
5353  */
5354
5355 static INLINE void
5356 produce_composition (coding, charbuf)
5357      struct coding_system *coding;
5358      int *charbuf;
5359 {
5360   Lisp_Object buffer;
5361   int len;
5362   EMACS_INT pos;
5363   enum composition_method method;
5364   int cmp_len;
5365   Lisp_Object components;
5366
5367   buffer = coding->dst_object;
5368   len = -charbuf[0];
5369   pos = coding->dst_pos + charbuf[1];
5370   method = (enum composition_method) (charbuf[3]);
5371   cmp_len = charbuf[4];
5372
5373   if (method == COMPOSITION_RELATIVE)
5374     components = Qnil;
5375   else
5376     {
5377       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
5378       int i;
5379
5380       len -= 5;
5381       charbuf += 5;
5382       for (i = 0; i < len; i++)
5383         args[i] = make_number (charbuf[i]);
5384       components = (method == COMPOSITION_WITH_ALTCHARS
5385                     ? Fstring (len, args) : Fvector (len, args));
5386     }
5387   compose_text (pos, pos + cmp_len, components, Qnil, Qnil);
5388 }
5389
5390 static int *
5391 save_composition_data (buf, buf_end, prop)
5392      int *buf, *buf_end;
5393      Lisp_Object prop;
5394 {
5395   enum composition_method method = COMPOSITION_METHOD (prop);
5396   int cmp_len = COMPOSITION_LENGTH (prop);
5397
5398   if (buf + 4 + (MAX_COMPOSITION_COMPONENTS * 2 - 1) > buf_end)
5399     return NULL;
5400
5401   buf[1] = CODING_ANNOTATE_COMPOSITION_MASK;
5402   buf[2] = method;
5403   buf[3] = cmp_len;
5404
5405   if (method == COMPOSITION_RELATIVE)
5406     buf[0] = 4;
5407   else
5408     {
5409       Lisp_Object components;
5410       int len, i;
5411
5412       components = COMPOSITION_COMPONENTS (prop);
5413       if (VECTORP (components))
5414         {
5415           len = XVECTOR (components)->size;
5416           for (i = 0; i < len; i++)
5417             buf[4 + i] = XINT (AREF (components, i));
5418         }
5419       else if (STRINGP (components))
5420         {
5421           int i_byte;
5422
5423           len = XSTRING (components)->size;
5424           i = i_byte = 0;
5425           while (i < len)
5426             FETCH_STRING_CHAR_ADVANCE (buf[4 + i], components, i, i_byte);
5427         }
5428       else if (INTEGERP (components))
5429         {
5430           len = 1;
5431           buf[4] = XINT (components);
5432         }
5433       else if (CONSP (components))
5434         {
5435           for (len = 0; CONSP (components);
5436                len++, components = XCDR (components))
5437             buf[4 + len] = XINT (XCAR (components));
5438         }
5439       else
5440         abort ();
5441       buf[0] = 4 + len;
5442     }
5443   return (buf + buf[0]);
5444 }
5445
5446 #define CHARBUF_SIZE 0x4000
5447
5448 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
5449   do {                                                                  \
5450     int size = CHARBUF_SIZE;;                                           \
5451                                                                         \
5452     coding->charbuf = NULL;                                             \
5453     while (size > 1024)                                                 \
5454       {                                                                 \
5455         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
5456         if (coding->charbuf)                                            \
5457           break;                                                        \
5458         size >>= 1;                                                     \
5459       }                                                                 \
5460     if (! coding->charbuf)                                              \
5461       {                                                                 \
5462         coding->result = CODING_RESULT_INSUFFICIENT_MEM;                \
5463         return coding->result;                                          \
5464       }                                                                 \
5465     coding->charbuf_size = size;                                        \
5466   } while (0)
5467
5468
5469 static void
5470 produce_annotation (coding)
5471      struct coding_system *coding;
5472 {
5473   int *charbuf = coding->charbuf;
5474   int *charbuf_end = charbuf + coding->charbuf_used;
5475
5476   while (charbuf < charbuf_end)
5477     {
5478       if (*charbuf >= 0)
5479         charbuf++;
5480       else
5481         {
5482           int len = -*charbuf;
5483           switch (charbuf[2])
5484             {
5485             case CODING_ANNOTATE_COMPOSITION_MASK:
5486               produce_composition (coding, charbuf);
5487               break;
5488             default:
5489               abort ();
5490             }
5491           charbuf += len;
5492         }
5493     }
5494 }
5495
5496 /* Decode the data at CODING->src_object into CODING->dst_object.
5497    CODING->src_object is a buffer, a string, or nil.
5498    CODING->dst_object is a buffer.
5499
5500    If CODING->src_object is a buffer, it must be the current buffer.
5501    In this case, if CODING->src_pos is positive, it is a position of
5502    the source text in the buffer, otherwise, the source text is in the
5503    gap area of the buffer, and CODING->src_pos specifies the offset of
5504    the text from GPT (which must be the same as PT).  If this is the
5505    same buffer as CODING->dst_object, CODING->src_pos must be
5506    negative.
5507
5508    If CODING->src_object is a string, CODING->src_pos in an index to
5509    that string.
5510
5511    If CODING->src_object is nil, CODING->source must already point to
5512    the non-relocatable memory area.  In this case, CODING->src_pos is
5513    an offset from CODING->source.
5514
5515    The decoded data is inserted at the current point of the buffer
5516    CODING->dst_object.
5517 */
5518
5519 static int
5520 decode_coding (coding)
5521      struct coding_system *coding;
5522 {
5523   Lisp_Object attrs;
5524
5525   if (BUFFERP (coding->src_object)
5526       && coding->src_pos > 0
5527       && coding->src_pos < GPT
5528       && coding->src_pos + coding->src_chars > GPT)
5529     move_gap_both (coding->src_pos, coding->src_pos_byte);
5530
5531   if (BUFFERP (coding->dst_object))
5532     {
5533       if (current_buffer != XBUFFER (coding->dst_object))
5534         set_buffer_internal (XBUFFER (coding->dst_object));
5535       if (GPT != PT)
5536         move_gap_both (PT, PT_BYTE);
5537     }
5538
5539   coding->consumed = coding->consumed_char = 0;
5540   coding->produced = coding->produced_char = 0;
5541   coding->chars_at_source = 0;
5542   coding->result = CODING_RESULT_SUCCESS;
5543   coding->errors = 0;
5544
5545   ALLOC_CONVERSION_WORK_AREA (coding);
5546
5547   attrs = CODING_ID_ATTRS (coding->id);
5548
5549   do
5550     {
5551       coding_set_source (coding);
5552       coding->annotated = 0;
5553       (*(coding->decoder)) (coding);
5554       if (!NILP (CODING_ATTR_DECODE_TBL (attrs)))
5555         translate_chars (CODING_ATTR_DECODE_TBL (attrs), coding);
5556       coding_set_destination (coding);
5557       produce_chars (coding);
5558       if (coding->annotated)
5559         produce_annotation (coding);
5560     }
5561   while (coding->consumed < coding->src_bytes
5562          && ! coding->result);
5563
5564   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl)
5565       && SYMBOLP (CODING_ID_EOL_TYPE (coding->id))
5566       && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
5567     decode_eol (coding);
5568
5569   coding->carryover_bytes = 0;
5570   if (coding->consumed < coding->src_bytes)
5571     {
5572       int nbytes = coding->src_bytes - coding->consumed;
5573       unsigned char *src;
5574
5575       coding_set_source (coding);
5576       coding_set_destination (coding);
5577       src = coding->source + coding->consumed;
5578
5579       if (coding->mode & CODING_MODE_LAST_BLOCK)
5580         {
5581           /* Flush out unprocessed data as binary chars.  We are sure
5582              that the number of data is less than the size of
5583              coding->charbuf.  */
5584           int *charbuf = coding->charbuf;
5585
5586           while (nbytes-- > 0)
5587             {
5588               int c = *src++;
5589               *charbuf++ =  (c & 0x80 ? - c : c);
5590             }
5591           produce_chars (coding);
5592         }
5593       else
5594         {
5595           /* Record unprocessed bytes in coding->carryover.  We are
5596              sure that the number of data is less than the size of
5597              coding->carryover.  */
5598           unsigned char *p = coding->carryover;
5599
5600           coding->carryover_bytes = nbytes;
5601           while (nbytes-- > 0)
5602             *p++ = *src++;
5603         }
5604       coding->consumed = coding->src_bytes;
5605     }
5606
5607   return coding->result;
5608 }
5609
5610 static void
5611 consume_chars (coding)
5612      struct coding_system *coding;
5613 {
5614   int *buf = coding->charbuf;
5615   /* -1 is to compensate for CRLF.  */
5616   int *buf_end = coding->charbuf + coding->charbuf_size - 1;
5617   const unsigned char *src = coding->source + coding->consumed;
5618   int pos = coding->src_pos + coding->consumed_char;
5619   int end_pos = coding->src_pos + coding->src_chars;
5620   int multibytep = coding->src_multibyte;
5621   Lisp_Object eol_type;
5622   int c;
5623   int start, end, stop;
5624   Lisp_Object object, prop;
5625
5626   eol_type = CODING_ID_EOL_TYPE (coding->id);
5627   if (VECTORP (eol_type))
5628     eol_type = Qunix;
5629
5630   object = coding->src_object;
5631
5632   /* Note: composition handling is not yet implemented.  */
5633   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
5634
5635   if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK
5636       && find_composition (pos, end_pos, &start, &end, &prop, object)
5637       && end <= end_pos
5638       && (start >= pos
5639           || (find_composition (end, end_pos, &start, &end, &prop, object)
5640               && end <= end_pos)))
5641     stop = start;
5642   else
5643     stop = end_pos;
5644
5645   while (buf < buf_end)
5646     {
5647       if (pos == stop)
5648         {
5649           int *p;
5650
5651           if (pos == end_pos)
5652             break;
5653           p = save_composition_data (buf, buf_end, prop);
5654           if (p == NULL)
5655             break;
5656           buf = p;
5657           if (find_composition (end, end_pos, &start, &end, &prop, object)
5658               && end <= end_pos)
5659             stop = start;
5660           else
5661             stop = end_pos;
5662         }
5663
5664       if (! multibytep)
5665         c = *src++;
5666       else
5667         c = STRING_CHAR_ADVANCE (src);
5668       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
5669         c = '\n';
5670       if (! EQ (eol_type, Qunix))
5671         {
5672           if (c == '\n')
5673             {
5674               if (EQ (eol_type, Qdos))
5675                 *buf++ = '\r';
5676               else
5677                 c = '\r';
5678             }
5679         }
5680       *buf++ = c;
5681       pos++;
5682     }
5683
5684   coding->consumed = src - coding->source;
5685   coding->consumed_char = pos - coding->src_pos;
5686   coding->charbuf_used = buf - coding->charbuf;
5687   coding->chars_at_source = 0;
5688 }
5689
5690
5691 /* Encode the text at CODING->src_object into CODING->dst_object.
5692    CODING->src_object is a buffer or a string.
5693    CODING->dst_object is a buffer or nil.
5694
5695    If CODING->src_object is a buffer, it must be the current buffer.
5696    In this case, if CODING->src_pos is positive, it is a position of
5697    the source text in the buffer, otherwise. the source text is in the
5698    gap area of the buffer, and coding->src_pos specifies the offset of
5699    the text from GPT (which must be the same as PT).  If this is the
5700    same buffer as CODING->dst_object, CODING->src_pos must be
5701    negative and CODING should not have `pre-write-conversion'.
5702
5703    If CODING->src_object is a string, CODING should not have
5704    `pre-write-conversion'.
5705
5706    If CODING->dst_object is a buffer, the encoded data is inserted at
5707    the current point of that buffer.
5708
5709    If CODING->dst_object is nil, the encoded data is placed at the
5710    memory area specified by CODING->destination.  */
5711
5712 static int
5713 encode_coding (coding)
5714      struct coding_system *coding;
5715 {
5716   Lisp_Object attrs;
5717
5718   attrs = CODING_ID_ATTRS (coding->id);
5719
5720   if (BUFFERP (coding->dst_object))
5721     {
5722       set_buffer_internal (XBUFFER (coding->dst_object));
5723       coding->dst_multibyte
5724         = ! NILP (current_buffer->enable_multibyte_characters);
5725     }
5726
5727   coding->consumed = coding->consumed_char = 0;
5728   coding->produced = coding->produced_char = 0;
5729   coding->result = CODING_RESULT_SUCCESS;
5730   coding->errors = 0;
5731
5732   ALLOC_CONVERSION_WORK_AREA (coding);
5733
5734   do {
5735     coding_set_source (coding);
5736     consume_chars (coding);
5737
5738     if (!NILP (CODING_ATTR_ENCODE_TBL (attrs)))
5739       translate_chars (CODING_ATTR_ENCODE_TBL (attrs), coding);
5740
5741     coding_set_destination (coding);
5742     (*(coding->encoder)) (coding);
5743   } while (coding->consumed_char < coding->src_chars);
5744
5745   if (BUFFERP (coding->dst_object))
5746     insert_from_gap (coding->produced_char, coding->produced);
5747
5748   return (coding->result);
5749 }
5750
5751 /* Work buffer */
5752
5753 /* List of currently used working buffer.  */
5754 Lisp_Object Vcode_conversion_work_buf_list;
5755
5756 /* A working buffer used by the top level conversion.  */
5757 Lisp_Object Vcode_conversion_reused_work_buf;
5758
5759
5760 /* Return a working buffer that can be freely used by the following
5761    code conversion.  MULTIBYTEP specifies the multibyteness of the
5762    buffer.  */
5763
5764 Lisp_Object
5765 make_conversion_work_buffer (multibytep)
5766      int multibytep;
5767 {
5768   struct buffer *current = current_buffer;
5769   Lisp_Object buf;
5770
5771   if (NILP (Vcode_conversion_work_buf_list))
5772     {
5773       if (NILP (Vcode_conversion_reused_work_buf))
5774         Vcode_conversion_reused_work_buf
5775           = Fget_buffer_create (build_string (" *code-conversion-work*"));
5776       Vcode_conversion_work_buf_list
5777         = Fcons (Vcode_conversion_reused_work_buf, Qnil);
5778     }
5779   else
5780     {
5781       int depth = XINT (Flength (Vcode_conversion_work_buf_list));
5782       char str[128];
5783
5784       sprintf (str, " *code-conversion-work*<%d>", depth);
5785       Vcode_conversion_work_buf_list
5786         = Fcons (Fget_buffer_create (build_string (str)),
5787                  Vcode_conversion_work_buf_list);
5788     }
5789
5790   buf = XCAR (Vcode_conversion_work_buf_list);
5791   set_buffer_internal (XBUFFER (buf));
5792   current_buffer->undo_list = Qt;
5793   Ferase_buffer ();
5794   Fset_buffer_multibyte (multibytep ? Qt : Qnil, Qnil);
5795   set_buffer_internal (current);
5796   return buf;
5797 }
5798
5799 static struct coding_system *saved_coding;
5800
5801 Lisp_Object
5802 code_conversion_restore (info)
5803      Lisp_Object info;
5804 {
5805   int depth = XINT (Flength (Vcode_conversion_work_buf_list));
5806   Lisp_Object buf;
5807
5808   if (depth > 0)
5809     {
5810       buf = XCAR (Vcode_conversion_work_buf_list);
5811       Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list);
5812       if (depth > 1 && !NILP (Fbuffer_live_p (buf)))
5813         Fkill_buffer (buf);
5814     }
5815
5816   if (EQ (saved_coding->dst_object, Qt)
5817       && saved_coding->destination)
5818     xfree (saved_coding->destination);
5819
5820   return save_excursion_restore (info);
5821 }
5822
5823
5824 int
5825 decode_coding_gap (coding, chars, bytes)
5826      struct coding_system *coding;
5827      EMACS_INT chars, bytes;
5828 {
5829   int count = specpdl_ptr - specpdl;
5830
5831   saved_coding = coding;
5832   record_unwind_protect (code_conversion_restore, save_excursion_save ());
5833
5834   coding->src_object = Fcurrent_buffer ();
5835   coding->src_chars = chars;
5836   coding->src_bytes = bytes;
5837   coding->src_pos = -chars;
5838   coding->src_pos_byte = -bytes;
5839   coding->src_multibyte = chars < bytes;
5840   coding->dst_object = coding->src_object;
5841   coding->dst_pos = PT;
5842   coding->dst_pos_byte = PT_BYTE;
5843   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
5844
5845   if (CODING_REQUIRE_DETECTION (coding))
5846     detect_coding (coding);
5847
5848   decode_coding (coding);
5849
5850   unbind_to (count, Qnil);
5851   return coding->result;
5852 }
5853
5854 int
5855 encode_coding_gap (coding, chars, bytes)
5856      struct coding_system *coding;
5857      EMACS_INT chars, bytes;
5858 {
5859   int count = specpdl_ptr - specpdl;
5860   Lisp_Object buffer;
5861
5862   saved_coding = coding;
5863   record_unwind_protect (code_conversion_restore, save_excursion_save ());
5864
5865   buffer = Fcurrent_buffer ();
5866   coding->src_object = buffer;
5867   coding->src_chars = chars;
5868   coding->src_bytes = bytes;
5869   coding->src_pos = -chars;
5870   coding->src_pos_byte = -bytes;
5871   coding->src_multibyte = chars < bytes;
5872   coding->dst_object = coding->src_object;
5873   coding->dst_pos = PT;
5874   coding->dst_pos_byte = PT_BYTE;
5875
5876   encode_coding (coding);
5877
5878   unbind_to (count, Qnil);
5879   return coding->result;
5880 }
5881
5882
5883 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
5884    SRC_OBJECT into DST_OBJECT by coding context CODING.
5885
5886    SRC_OBJECT is a buffer, a string, or Qnil.
5887
5888    If it is a buffer, the text is at point of the buffer.  FROM and TO
5889    are positions in the buffer.
5890
5891    If it is a string, the text is at the beginning of the string.
5892    FROM and TO are indices to the string.
5893
5894    If it is nil, the text is at coding->source.  FROM and TO are
5895    indices to coding->source.
5896
5897    DST_OBJECT is a buffer, Qt, or Qnil.
5898
5899    If it is a buffer, the decoded text is inserted at point of the
5900    buffer.  If the buffer is the same as SRC_OBJECT, the source text
5901    is deleted.
5902
5903    If it is Qt, a string is made from the decoded text, and
5904    set in CODING->dst_object.
5905
5906    If it is Qnil, the decoded text is stored at CODING->destination.
5907    The called must allocate CODING->dst_bytes bytes at
5908    CODING->destination by xmalloc.  If the decoded text is longer than
5909    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
5910  */
5911
5912 void
5913 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
5914                       dst_object)
5915      struct coding_system *coding;
5916      Lisp_Object src_object;
5917      EMACS_INT from, from_byte, to, to_byte;
5918      Lisp_Object dst_object;
5919 {
5920   int count = specpdl_ptr - specpdl;
5921   unsigned char *destination;
5922   EMACS_INT dst_bytes;
5923   EMACS_INT chars = to - from;
5924   EMACS_INT bytes = to_byte - from_byte;
5925   Lisp_Object attrs;
5926
5927   saved_coding = coding;
5928   record_unwind_protect (code_conversion_restore, save_excursion_save ());
5929
5930   if (NILP (dst_object))
5931     {
5932       destination = coding->destination;
5933       dst_bytes = coding->dst_bytes;
5934     }
5935
5936   coding->src_object = src_object;
5937   coding->src_chars = chars;
5938   coding->src_bytes = bytes;
5939   coding->src_multibyte = chars < bytes;
5940
5941   if (STRINGP (src_object))
5942     {
5943       coding->src_pos = from;
5944       coding->src_pos_byte = from_byte;
5945     }
5946   else if (BUFFERP (src_object))
5947     {
5948       set_buffer_internal (XBUFFER (src_object));
5949       if (from != GPT)
5950         move_gap_both (from, from_byte);
5951       if (EQ (src_object, dst_object))
5952         {
5953           TEMP_SET_PT_BOTH (from, from_byte);
5954           del_range_both (from, from_byte, to, to_byte, 1);
5955           coding->src_pos = -chars;
5956           coding->src_pos_byte = -bytes;
5957         }
5958       else
5959         {
5960           coding->src_pos = from;
5961           coding->src_pos_byte = from_byte;
5962         }
5963     }
5964
5965   if (CODING_REQUIRE_DETECTION (coding))
5966     detect_coding (coding);
5967   attrs = CODING_ID_ATTRS (coding->id);
5968
5969   if (! NILP (CODING_ATTR_POST_READ (attrs))
5970       || EQ (dst_object, Qt))
5971     {
5972       coding->dst_object = make_conversion_work_buffer (1);
5973       coding->dst_pos = BEG;
5974       coding->dst_pos_byte = BEG_BYTE;
5975       coding->dst_multibyte = 1;
5976     }
5977   else if (BUFFERP (dst_object))
5978     {
5979       coding->dst_object = dst_object;
5980       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
5981       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
5982       coding->dst_multibyte
5983         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
5984     }
5985   else
5986     {
5987       coding->dst_object = Qnil;
5988       coding->dst_multibyte = 1;
5989     }
5990
5991   decode_coding (coding);
5992
5993   if (BUFFERP (coding->dst_object))
5994     set_buffer_internal (XBUFFER (coding->dst_object));
5995
5996   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5997     {
5998       struct gcpro gcpro1, gcpro2;
5999       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6000       Lisp_Object val;
6001
6002       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6003       GCPRO2 (coding->src_object, coding->dst_object);
6004       val = call1 (CODING_ATTR_POST_READ (attrs),
6005                    make_number (coding->produced_char));
6006       UNGCPRO;
6007       CHECK_NATNUM (val);
6008       coding->produced_char += Z - prev_Z;
6009       coding->produced += Z_BYTE - prev_Z_BYTE;
6010     }
6011
6012   if (EQ (dst_object, Qt))
6013     {
6014       coding->dst_object = Fbuffer_string ();
6015     }
6016   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6017     {
6018       set_buffer_internal (XBUFFER (coding->dst_object));
6019       if (dst_bytes < coding->produced)
6020         {
6021           destination
6022             = (unsigned char *) xrealloc (destination, coding->produced);
6023           if (! destination)
6024             {
6025               coding->result = CODING_RESULT_INSUFFICIENT_DST;
6026               unbind_to (count, Qnil);
6027               return;
6028             }
6029           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
6030             move_gap_both (BEGV, BEGV_BYTE);
6031           bcopy (BEGV_ADDR, destination, coding->produced);
6032           coding->destination = destination;
6033         }
6034     }
6035
6036   unbind_to (count, Qnil);
6037 }
6038
6039
6040 void
6041 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6042                       dst_object)
6043      struct coding_system *coding;
6044      Lisp_Object src_object;
6045      EMACS_INT from, from_byte, to, to_byte;
6046      Lisp_Object dst_object;
6047 {
6048   int count = specpdl_ptr - specpdl;
6049   EMACS_INT chars = to - from;
6050   EMACS_INT bytes = to_byte - from_byte;
6051   Lisp_Object attrs;
6052
6053   saved_coding = coding;
6054   record_unwind_protect (code_conversion_restore, save_excursion_save ());
6055
6056   coding->src_object = src_object;
6057   coding->src_chars = chars;
6058   coding->src_bytes = bytes;
6059   coding->src_multibyte = chars < bytes;
6060
6061   attrs = CODING_ID_ATTRS (coding->id);
6062
6063   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
6064     {
6065       coding->src_object = make_conversion_work_buffer (coding->src_multibyte);
6066       set_buffer_internal (XBUFFER (coding->src_object));
6067       if (STRINGP (src_object))
6068         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
6069       else if (BUFFERP (src_object))
6070         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
6071       else
6072         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
6073
6074       if (EQ (src_object, dst_object))
6075         {
6076           set_buffer_internal (XBUFFER (src_object));
6077           del_range_both (from, from_byte, to, to_byte, 1);
6078           set_buffer_internal (XBUFFER (coding->src_object));
6079         }
6080
6081       call2 (CODING_ATTR_PRE_WRITE (attrs),
6082              make_number (BEG), make_number (Z));
6083       coding->src_object = Fcurrent_buffer ();
6084       if (BEG != GPT)
6085         move_gap_both (BEG, BEG_BYTE);
6086       coding->src_chars = Z - BEG;
6087       coding->src_bytes = Z_BYTE - BEG_BYTE;
6088       coding->src_pos = BEG;
6089       coding->src_pos_byte = BEG_BYTE;
6090       coding->src_multibyte = Z < Z_BYTE;
6091     }
6092   else if (STRINGP (src_object))
6093     {
6094       coding->src_pos = from;
6095       coding->src_pos_byte = from_byte;
6096     }
6097   else if (BUFFERP (src_object))
6098     {
6099       set_buffer_internal (XBUFFER (src_object));
6100       if (from != GPT)
6101         move_gap_both (from, from_byte);
6102       if (EQ (src_object, dst_object))
6103         {
6104           del_range_both (from, from_byte, to, to_byte, 1);
6105           coding->src_pos = -chars;
6106           coding->src_pos_byte = -bytes;
6107         }
6108       else
6109         {
6110           coding->src_pos = from;
6111           coding->src_pos_byte = from_byte;
6112         }
6113     }
6114
6115   if (BUFFERP (dst_object))
6116     {
6117       coding->dst_object = dst_object;
6118       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6119       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6120       coding->dst_multibyte
6121         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6122     }
6123   else if (EQ (dst_object, Qt))
6124     {
6125       coding->dst_object = Qnil;
6126       coding->dst_bytes = coding->src_chars;
6127       if (coding->dst_bytes == 0)
6128         coding->dst_bytes = 1;
6129       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
6130       coding->dst_multibyte = 0;
6131     }
6132   else
6133     {
6134       coding->dst_object = Qnil;
6135       coding->dst_multibyte = 0;
6136     }
6137
6138   encode_coding (coding);
6139
6140   if (EQ (dst_object, Qt))
6141     {
6142       if (BUFFERP (coding->dst_object))
6143         coding->dst_object = Fbuffer_string ();
6144       else
6145         {
6146           coding->dst_object
6147             = make_unibyte_string ((char *) coding->destination,
6148                                    coding->produced);
6149           xfree (coding->destination);
6150         }
6151     }
6152
6153   unbind_to (count, Qnil);
6154 }
6155
6156
6157 Lisp_Object
6158 preferred_coding_system ()
6159 {
6160   int id = coding_categories[coding_priorities[0]].id;
6161
6162   return CODING_ID_NAME (id);
6163 }
6164
6165 \f
6166 #ifdef emacs
6167 /*** 8. Emacs Lisp library functions ***/
6168
6169 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
6170        doc: /* Return t if OBJECT is nil or a coding-system.
6171 See the documentation of `define-coding-system' for information
6172 about coding-system objects.  */)
6173      (obj)
6174      Lisp_Object obj;
6175 {
6176   return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil);
6177 }
6178
6179 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
6180        Sread_non_nil_coding_system, 1, 1, 0,
6181        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
6182      (prompt)
6183      Lisp_Object prompt;
6184 {
6185   Lisp_Object val;
6186   do
6187     {
6188       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6189                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
6190     }
6191   while (XSTRING (val)->size == 0);
6192   return (Fintern (val, Qnil));
6193 }
6194
6195 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
6196        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
6197 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.  */)
6198      (prompt, default_coding_system)
6199      Lisp_Object prompt, default_coding_system;
6200 {
6201   Lisp_Object val;
6202   if (SYMBOLP (default_coding_system))
6203     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
6204   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
6205                           Qt, Qnil, Qcoding_system_history,
6206                           default_coding_system, Qnil);
6207   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
6208 }
6209
6210 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
6211        1, 1, 0,
6212        doc: /* Check validity of CODING-SYSTEM.
6213 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
6214 It is valid if it is a symbol with a non-nil `coding-system' property.
6215 The value of property should be a vector of length 5.  */)
6216   (coding_system)
6217      Lisp_Object coding_system;
6218 {
6219   CHECK_SYMBOL (coding_system);
6220   if (!NILP (Fcoding_system_p (coding_system)))
6221     return coding_system;
6222   while (1)
6223     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
6224 }
6225
6226 \f
6227 Lisp_Object
6228 detect_coding_system (src, src_bytes, highest, multibytep, coding_system)
6229      unsigned char *src;
6230      int src_bytes, highest;
6231      int multibytep;
6232      Lisp_Object coding_system;
6233 {
6234   unsigned char *src_end = src + src_bytes;
6235   int mask = CATEGORY_MASK_ANY;
6236   int detected = 0;
6237   int c, i;
6238   Lisp_Object attrs, eol_type;
6239   Lisp_Object val;
6240   struct coding_system coding;
6241
6242   if (NILP (coding_system))
6243     coding_system = Qundecided;
6244   setup_coding_system (coding_system, &coding);
6245   attrs = CODING_ID_ATTRS (coding.id);
6246   eol_type = CODING_ID_EOL_TYPE (coding.id);
6247
6248   coding.source = src;
6249   coding.src_bytes = src_bytes;
6250   coding.src_multibyte = multibytep;
6251   coding.consumed = 0;
6252
6253   if (XINT (CODING_ATTR_CATEGORY (attrs)) != coding_category_undecided)
6254     {
6255       mask = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
6256     }
6257   else
6258     {
6259       coding_system = Qnil;
6260       for (; src < src_end; src++)
6261         {
6262           c = *src;
6263           if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC
6264                                         || c == ISO_CODE_SI
6265                                         || c == ISO_CODE_SO)))
6266             break;
6267         }
6268       coding.head_ascii = src - coding.source;
6269
6270       if (src < src_end)
6271         for (i = 0; i < coding_category_raw_text; i++)
6272           {
6273             enum coding_category category = coding_priorities[i];
6274             struct coding_system *this = coding_categories + category;
6275
6276             if (category >= coding_category_raw_text
6277                 || detected & (1 << category))
6278               continue;
6279
6280             if (this->id < 0)
6281               {
6282                 /* No coding system of this category is defined.  */
6283                 mask &= ~(1 << category);
6284               }
6285             else
6286               {
6287                 detected |= detected_mask[category];
6288                 if ((*(coding_categories[category].detector)) (&coding, &mask)
6289                     && highest)
6290                   {
6291                     mask &= detected_mask[category];
6292                     break;
6293                   }
6294               }
6295           }
6296     }
6297
6298   if (!mask)
6299     val = Fcons (make_number (coding_category_raw_text), Qnil);
6300   else if (mask == CATEGORY_MASK_ANY)
6301     val = Fcons (make_number (coding_category_undecided), Qnil);
6302   else if (highest)
6303     {
6304       for (i = 0; i < coding_category_raw_text; i++)
6305         if (mask & (1 << coding_priorities[i]))
6306           {
6307             val = Fcons (make_number (coding_priorities[i]), Qnil);
6308             break;
6309           }
6310     }
6311   else
6312     {
6313       val = Qnil;
6314       for (i = coding_category_raw_text - 1; i >= 0; i--)
6315         if (mask & (1 << coding_priorities[i]))
6316           val = Fcons (make_number (coding_priorities[i]), val);
6317     }
6318
6319   {
6320     int one_byte_eol = -1, two_byte_eol = -1;
6321     Lisp_Object tail;
6322
6323     for (tail = val; CONSP (tail); tail = XCDR (tail))
6324       {
6325         struct coding_system *this
6326           = (NILP (coding_system) ? coding_categories + XINT (XCAR (tail))
6327              : &coding);
6328         int this_eol;
6329
6330         attrs = CODING_ID_ATTRS (this->id);
6331         eol_type = CODING_ID_EOL_TYPE (this->id);
6332         XSETCAR (tail, CODING_ID_NAME (this->id));
6333         if (VECTORP (eol_type))
6334           {
6335             if (EQ (CODING_ATTR_TYPE (attrs), Qutf_16))
6336               {
6337                 if (two_byte_eol < 0)
6338                   two_byte_eol = detect_eol (this, coding.source, src_bytes);
6339                 this_eol = two_byte_eol;
6340               }
6341             else
6342               {
6343                 if (one_byte_eol < 0)
6344                   one_byte_eol =detect_eol (this, coding.source, src_bytes);
6345                 this_eol = one_byte_eol;
6346               }
6347             if (this_eol == EOL_SEEN_LF)
6348               XSETCAR (tail, AREF (eol_type, 0));
6349             else if (this_eol == EOL_SEEN_CRLF)
6350               XSETCAR (tail, AREF (eol_type, 1));
6351             else if (this_eol == EOL_SEEN_CR)
6352               XSETCAR (tail, AREF (eol_type, 2));
6353           }
6354       }
6355   }
6356
6357   return (highest ? XCAR (val) : val);
6358 }
6359
6360
6361 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
6362        2, 3, 0,
6363        doc: /* Detect coding system of the text in the region between START and END.
6364 Return a list of possible coding systems ordered by priority.
6365
6366 If only ASCII characters are found, it returns a list of single element
6367 `undecided' or its subsidiary coding system according to a detected
6368 end-of-line format.
6369
6370 If optional argument HIGHEST is non-nil, return the coding system of
6371 highest priority.  */)
6372      (start, end, highest)
6373      Lisp_Object start, end, highest;
6374 {
6375   int from, to;
6376   int from_byte, to_byte;
6377
6378   CHECK_NUMBER_COERCE_MARKER (start);
6379   CHECK_NUMBER_COERCE_MARKER (end);
6380
6381   validate_region (&start, &end);
6382   from = XINT (start), to = XINT (end);
6383   from_byte = CHAR_TO_BYTE (from);
6384   to_byte = CHAR_TO_BYTE (to);
6385
6386   if (from < GPT && to >= GPT)
6387     move_gap_both (to, to_byte);
6388
6389   return detect_coding_system (BYTE_POS_ADDR (from_byte),
6390                                to_byte - from_byte,
6391                                !NILP (highest),
6392                                !NILP (current_buffer
6393                                       ->enable_multibyte_characters),
6394                                Qnil);
6395 }
6396
6397 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
6398        1, 2, 0,
6399        doc: /* Detect coding system of the text in STRING.
6400 Return a list of possible coding systems ordered by priority.
6401
6402 If only ASCII characters are found, it returns a list of single element
6403 `undecided' or its subsidiary coding system according to a detected
6404 end-of-line format.
6405
6406 If optional argument HIGHEST is non-nil, return the coding system of
6407 highest priority.  */)
6408      (string, highest)
6409      Lisp_Object string, highest;
6410 {
6411   CHECK_STRING (string);
6412
6413   return detect_coding_system (XSTRING (string)->data,
6414                                STRING_BYTES (XSTRING (string)),
6415                                !NILP (highest),
6416                                STRING_MULTIBYTE (string),
6417                                Qnil);
6418 }
6419
6420
6421 static INLINE int
6422 char_encodable_p (c, attrs)
6423      int c;
6424      Lisp_Object attrs;
6425 {
6426   Lisp_Object tail;
6427   struct charset *charset;
6428
6429   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
6430        CONSP (tail); tail = XCDR (tail))
6431     {
6432       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
6433       if (CHAR_CHARSET_P (c, charset))
6434         break;
6435     }
6436   return (! NILP (tail));
6437 }
6438
6439
6440 /* Return a list of coding systems that safely encode the text between
6441    START and END.  If EXCLUDE is non-nil, it is a list of coding
6442    systems not to check.  The returned list doesn't contain any such
6443    coding systems.  In any case, if the text contains only ASCII or is
6444    unibyte, return t.  */
6445
6446 DEFUN ("find-coding-systems-region-internal",
6447        Ffind_coding_systems_region_internal,
6448        Sfind_coding_systems_region_internal, 2, 3, 0,
6449        doc: /* Internal use only.  */)
6450      (start, end, exclude)
6451      Lisp_Object start, end, exclude;
6452 {
6453   Lisp_Object coding_attrs_list, safe_codings;
6454   EMACS_INT start_byte, end_byte;
6455   const unsigned char *p, *pbeg, *pend;
6456   int c;
6457   Lisp_Object tail, elt;
6458
6459   if (STRINGP (start))
6460     {
6461       if (!STRING_MULTIBYTE (start)
6462           || XSTRING (start)->size == STRING_BYTES (XSTRING (start)))
6463         return Qt;
6464       start_byte = 0;
6465       end_byte = STRING_BYTES (XSTRING (start));
6466     }
6467   else
6468     {
6469       CHECK_NUMBER_COERCE_MARKER (start);
6470       CHECK_NUMBER_COERCE_MARKER (end);
6471       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6472         args_out_of_range (start, end);
6473       if (NILP (current_buffer->enable_multibyte_characters))
6474         return Qt;
6475       start_byte = CHAR_TO_BYTE (XINT (start));
6476       end_byte = CHAR_TO_BYTE (XINT (end));
6477       if (XINT (end) - XINT (start) == end_byte - start_byte)
6478         return Qt;
6479
6480       if (start < GPT && end > GPT)
6481         {
6482           if ((GPT - start) < (end - GPT))
6483             move_gap_both (start, start_byte);
6484           else
6485             move_gap_both (end, end_byte);
6486         }
6487     }
6488
6489   coding_attrs_list = Qnil;
6490   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
6491     if (NILP (exclude)
6492         || NILP (Fmemq (XCAR (tail), exclude)))
6493       {
6494         Lisp_Object attrs;
6495
6496         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
6497         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
6498             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
6499           coding_attrs_list = Fcons (attrs, coding_attrs_list);
6500       }
6501
6502   if (STRINGP (start))
6503     p = pbeg = XSTRING (start)->data;
6504   else
6505     p = pbeg = BYTE_POS_ADDR (start_byte);
6506   pend = p + (end_byte - start_byte);
6507
6508   while (p < pend && ASCII_BYTE_P (*p)) p++;
6509   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6510
6511   while (p < pend)
6512     {
6513       if (ASCII_BYTE_P (*p))
6514         p++;
6515       else
6516         {
6517           c = STRING_CHAR_ADVANCE (p);
6518
6519           charset_map_loaded = 0;
6520           for (tail = coding_attrs_list; CONSP (tail);)
6521             {
6522               elt = XCAR (tail);
6523               if (NILP (elt))
6524                 tail = XCDR (tail);
6525               else if (char_encodable_p (c, elt))
6526                 tail = XCDR (tail);
6527               else if (CONSP (XCDR (tail)))
6528                 {
6529                   XSETCAR (tail, XCAR (XCDR (tail)));
6530                   XSETCDR (tail, XCDR (XCDR (tail)));
6531                 }
6532               else
6533                 {
6534                   XSETCAR (tail, Qnil);
6535                   tail = XCDR (tail);
6536                 }
6537             }
6538           if (charset_map_loaded)
6539             {
6540               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6541
6542               if (STRINGP (start))
6543                 pbeg = XSTRING (start)->data;
6544               else
6545                 pbeg = BYTE_POS_ADDR (start_byte);
6546               p = pbeg + p_offset;
6547               pend = pbeg + pend_offset;
6548             }
6549         }
6550     }
6551
6552   safe_codings = Qnil;
6553   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
6554     if (! NILP (XCAR (tail)))
6555       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
6556
6557   return safe_codings;
6558 }
6559
6560
6561 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
6562        Scheck_coding_systems_region, 3, 3, 0,
6563        doc: /* Check if the region is encodable by coding systems.
6564
6565 START and END are buffer positions specifying the region.
6566 CODING-SYSTEM-LIST is a list of coding systems to check.
6567
6568 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
6569 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
6570 whole region, POS0, POS1, ... are buffer positions where non-encodable
6571 characters are found.
6572
6573 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
6574 value is nil.
6575
6576 START may be a string.  In that case, check if the string is
6577 encodable, and the value contains indices to the string instead of
6578 buffer positions.  END is ignored.  */)
6579      (start, end, coding_system_list)
6580      Lisp_Object start, end, coding_system_list;
6581 {
6582   Lisp_Object list;
6583   EMACS_INT start_byte, end_byte;
6584   int pos;
6585   const unsigned char *p, *pbeg, *pend;
6586   int c;
6587   Lisp_Object tail, elt;
6588
6589   if (STRINGP (start))
6590     {
6591       if (!STRING_MULTIBYTE (start)
6592           && XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
6593         return Qnil;
6594       start_byte = 0;
6595       end_byte = STRING_BYTES (XSTRING (start));
6596       pos = 0;
6597     }
6598   else
6599     {
6600       CHECK_NUMBER_COERCE_MARKER (start);
6601       CHECK_NUMBER_COERCE_MARKER (end);
6602       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
6603         args_out_of_range (start, end);
6604       if (NILP (current_buffer->enable_multibyte_characters))
6605         return Qnil;
6606       start_byte = CHAR_TO_BYTE (XINT (start));
6607       end_byte = CHAR_TO_BYTE (XINT (end));
6608       if (XINT (end) - XINT (start) == end_byte - start_byte)
6609         return Qt;
6610
6611       if (start < GPT && end > GPT)
6612         {
6613           if ((GPT - start) < (end - GPT))
6614             move_gap_both (start, start_byte);
6615           else
6616             move_gap_both (end, end_byte);
6617         }
6618       pos = start;
6619     }
6620
6621   list = Qnil;
6622   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
6623     {
6624       elt = XCAR (tail);
6625       list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0),
6626                                        Qnil)),
6627                     list);
6628     }
6629
6630   if (STRINGP (start))
6631     p = pbeg = XSTRING (start)->data;
6632   else
6633     p = pbeg = BYTE_POS_ADDR (start_byte);
6634   pend = p + (end_byte - start_byte);
6635
6636   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
6637   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
6638
6639   while (p < pend)
6640     {
6641       if (ASCII_BYTE_P (*p))
6642         p++;
6643       else
6644         {
6645           c = STRING_CHAR_ADVANCE (p);
6646
6647           charset_map_loaded = 0;
6648           for (tail = list; CONSP (tail); tail = XCDR (tail))
6649             {
6650               elt = XCDR (XCAR (tail));
6651               if (! char_encodable_p (c, XCAR (elt)))
6652                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
6653             }
6654           if (charset_map_loaded)
6655             {
6656               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
6657
6658               if (STRINGP (start))
6659                 pbeg = XSTRING (start)->data;
6660               else
6661                 pbeg = BYTE_POS_ADDR (start_byte);
6662               p = pbeg + p_offset;
6663               pend = pbeg + pend_offset;
6664             }
6665         }
6666       pos++;
6667     }
6668
6669   tail = list;
6670   list = Qnil;
6671   for (; CONSP (tail); tail = XCDR (tail))
6672     {
6673       elt = XCAR (tail);
6674       if (CONSP (XCDR (XCDR (elt))))
6675         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
6676                       list);
6677     }
6678
6679   return list;
6680 }
6681
6682
6683
6684 Lisp_Object
6685 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
6686      Lisp_Object start, end, coding_system, dst_object;
6687      int encodep, norecord;
6688 {
6689   struct coding_system coding;
6690   EMACS_INT from, from_byte, to, to_byte;
6691   Lisp_Object src_object;
6692
6693   CHECK_NUMBER_COERCE_MARKER (start);
6694   CHECK_NUMBER_COERCE_MARKER (end);
6695   if (NILP (coding_system))
6696     coding_system = Qno_conversion;
6697   else
6698     CHECK_CODING_SYSTEM (coding_system);
6699   src_object = Fcurrent_buffer ();
6700   if (NILP (dst_object))
6701     dst_object = src_object;
6702   else if (! EQ (dst_object, Qt))
6703     CHECK_BUFFER (dst_object);
6704
6705   validate_region (&start, &end);
6706   from = XFASTINT (start);
6707   from_byte = CHAR_TO_BYTE (from);
6708   to = XFASTINT (end);
6709   to_byte = CHAR_TO_BYTE (to);
6710
6711   setup_coding_system (coding_system, &coding);
6712   coding.mode |= CODING_MODE_LAST_BLOCK;
6713
6714   if (encodep)
6715     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6716                           dst_object);
6717   else
6718     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
6719                           dst_object);
6720   if (! norecord)
6721     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
6722
6723   if (coding.result != CODING_RESULT_SUCCESS)
6724     error ("Code conversion error: %d", coding.result);
6725
6726   return (BUFFERP (dst_object)
6727           ? make_number (coding.produced_char)
6728           : coding.dst_object);
6729 }
6730
6731
6732 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
6733        3, 4, "r\nzCoding system: ",
6734        doc: /* Decode the current region from the specified coding system.
6735 When called from a program, takes four arguments:
6736         START, END, CODING-SYSTEM, and DESTINATION.
6737 START and END are buffer positions.
6738
6739 Optional 4th arguments DESTINATION specifies where the decoded text goes.
6740 If nil, the region between START and END is replace by the decoded text.
6741 If buffer, the decoded text is inserted in the buffer.
6742 If t, the decoded text is returned.
6743
6744 This function sets `last-coding-system-used' to the precise coding system
6745 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6746 not fully specified.)
6747 It returns the length of the decoded text.  */)
6748      (start, end, coding_system, destination)
6749      Lisp_Object start, end, coding_system, destination;
6750 {
6751   return code_convert_region (start, end, coding_system, destination, 0, 0);
6752 }
6753
6754 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
6755        3, 4, "r\nzCoding system: ",
6756        doc: /* Encode the current region by specified coding system.
6757 When called from a program, takes three arguments:
6758 START, END, and CODING-SYSTEM.  START and END are buffer positions.
6759
6760 Optional 4th arguments DESTINATION specifies where the encoded text goes.
6761 If nil, the region between START and END is replace by the encoded text.
6762 If buffer, the encoded text is inserted in the buffer.
6763 If t, the encoded text is returned.
6764
6765 This function sets `last-coding-system-used' to the precise coding system
6766 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6767 not fully specified.)
6768 It returns the length of the encoded text.  */)
6769   (start, end, coding_system, destination)
6770      Lisp_Object start, end, coding_system, destination;
6771 {
6772   return code_convert_region (start, end, coding_system, destination, 1, 0);
6773 }
6774
6775 Lisp_Object
6776 code_convert_string (string, coding_system, dst_object,
6777                      encodep, nocopy, norecord)
6778      Lisp_Object string, coding_system, dst_object;
6779      int encodep, nocopy, norecord;
6780 {
6781   struct coding_system coding;
6782   EMACS_INT chars, bytes;
6783
6784   CHECK_STRING (string);
6785   if (NILP (coding_system))
6786     {
6787       if (! norecord)
6788         Vlast_coding_system_used = Qno_conversion;
6789       if (NILP (dst_object))
6790         return (nocopy ? Fcopy_sequence (string) : string);
6791     }
6792
6793   if (NILP (coding_system))
6794     coding_system = Qno_conversion;
6795   else
6796     CHECK_CODING_SYSTEM (coding_system);
6797   if (NILP (dst_object))
6798     dst_object = Qt;
6799   else if (! EQ (dst_object, Qt))
6800     CHECK_BUFFER (dst_object);
6801
6802   setup_coding_system (coding_system, &coding);
6803   coding.mode |= CODING_MODE_LAST_BLOCK;
6804   chars = XSTRING (string)->size;
6805   bytes = STRING_BYTES (XSTRING (string));
6806   if (encodep)
6807     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6808   else
6809     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
6810   if (! norecord)
6811     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
6812
6813   if (coding.result != CODING_RESULT_SUCCESS)
6814     error ("Code conversion error: %d", coding.result);
6815
6816   return (BUFFERP (dst_object)
6817           ? make_number (coding.produced_char)
6818           : coding.dst_object);
6819 }
6820
6821
6822 /* Encode or decode STRING according to CODING_SYSTEM.
6823    Do not set Vlast_coding_system_used.
6824
6825    This function is called only from macros DECODE_FILE and
6826    ENCODE_FILE, thus we ignore character composition.  */
6827
6828 Lisp_Object
6829 code_convert_string_norecord (string, coding_system, encodep)
6830      Lisp_Object string, coding_system;
6831      int encodep;
6832 {
6833   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
6834 }
6835
6836
6837 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
6838        2, 4, 0,
6839        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
6840
6841 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
6842 if the decoding operation is trivial.
6843
6844 Optional fourth arg BUFFER non-nil meant that the decoded text is
6845 inserted in BUFFER instead of returned as a string.  In this case,
6846 the return value is BUFFER.
6847
6848 This function sets `last-coding-system-used' to the precise coding system
6849 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6850 not fully specified.  */)
6851   (string, coding_system, nocopy, buffer)
6852      Lisp_Object string, coding_system, nocopy, buffer;
6853 {
6854   return code_convert_string (string, coding_system, buffer,
6855                               0, ! NILP (nocopy), 0);
6856 }
6857
6858 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
6859        2, 4, 0,
6860        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
6861
6862 Optional third arg NOCOPY non-nil means it is OK to return STRING
6863 itself if the encoding operation is trivial.
6864
6865 Optional fourth arg BUFFER non-nil meant that the encoded text is
6866 inserted in BUFFER instead of returned as a string.  In this case,
6867 the return value is BUFFER.
6868
6869 This function sets `last-coding-system-used' to the precise coding system
6870 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
6871 not fully specified.)  */)
6872      (string, coding_system, nocopy, buffer)
6873      Lisp_Object string, coding_system, nocopy, buffer;
6874 {
6875   return code_convert_string (string, coding_system, buffer,
6876                               1, ! NILP (nocopy), 1);
6877 }
6878
6879 \f
6880 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
6881        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
6882 Return the corresponding character.  */)
6883      (code)
6884      Lisp_Object code;
6885 {
6886   Lisp_Object spec, attrs, val;
6887   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
6888   int c;
6889
6890   CHECK_NATNUM (code);
6891   c = XFASTINT (code);
6892   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6893   attrs = AREF (spec, 0);
6894
6895   if (ASCII_BYTE_P (c)
6896       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6897     return code;
6898
6899   val = CODING_ATTR_CHARSET_LIST (attrs);
6900   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6901   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6902   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
6903
6904   if (c <= 0x7F)
6905     charset = charset_roman;
6906   else if (c >= 0xA0 && c < 0xDF)
6907     {
6908       charset = charset_kana;
6909       c -= 0x80;
6910     }
6911   else
6912     {
6913       int s1 = c >> 8, s2 = c & 0xFF;
6914
6915       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
6916           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
6917         error ("Invalid code: %d", code);
6918       SJIS_TO_JIS (c);
6919       charset = charset_kanji;
6920     }
6921   c = DECODE_CHAR (charset, c);
6922   if (c < 0)
6923     error ("Invalid code: %d", code);
6924   return make_number (c);
6925 }
6926
6927
6928 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
6929        doc: /* Encode a Japanese character CHAR to shift_jis encoding.
6930 Return the corresponding code in SJIS.  */)
6931      (ch)
6932     Lisp_Object ch;
6933 {
6934   Lisp_Object spec, attrs, charset_list;
6935   int c;
6936   struct charset *charset;
6937   unsigned code;
6938
6939   CHECK_CHARACTER (ch);
6940   c = XFASTINT (ch);
6941   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
6942   attrs = AREF (spec, 0);
6943
6944   if (ASCII_CHAR_P (c)
6945       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6946     return ch;
6947
6948   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
6949   charset = char_charset (c, charset_list, &code);
6950   if (code == CHARSET_INVALID_CODE (charset))
6951     error ("Can't encode by shift_jis encoding: %d", c);
6952   JIS_TO_SJIS (code);
6953
6954   return make_number (code);
6955 }
6956
6957 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
6958        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
6959 Return the corresponding character.  */)
6960      (code)
6961      Lisp_Object code;
6962 {
6963   Lisp_Object spec, attrs, val;
6964   struct charset *charset_roman, *charset_big5, *charset;
6965   int c;
6966
6967   CHECK_NATNUM (code);
6968   c = XFASTINT (code);
6969   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
6970   attrs = AREF (spec, 0);
6971
6972   if (ASCII_BYTE_P (c)
6973       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
6974     return code;
6975
6976   val = CODING_ATTR_CHARSET_LIST (attrs);
6977   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
6978   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
6979
6980   if (c <= 0x7F)
6981     charset = charset_roman;
6982   else
6983     {
6984       int b1 = c >> 8, b2 = c & 0x7F;
6985       if (b1 < 0xA1 || b1 > 0xFE
6986           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
6987         error ("Invalid code: %d", code);
6988       charset = charset_big5;
6989     }
6990   c = DECODE_CHAR (charset, (unsigned )c);
6991   if (c < 0)
6992     error ("Invalid code: %d", code);
6993   return make_number (c);
6994 }
6995
6996 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
6997        doc: /* Encode the Big5 character CHAR to BIG5 coding system.
6998 Return the corresponding character code in Big5.  */)
6999      (ch)
7000      Lisp_Object ch;
7001 {
7002   Lisp_Object spec, attrs, charset_list;
7003   struct charset *charset;
7004   int c;
7005   unsigned code;
7006
7007   CHECK_CHARACTER (ch);
7008   c = XFASTINT (ch);
7009   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
7010   attrs = AREF (spec, 0);
7011   if (ASCII_CHAR_P (c)
7012       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
7013     return ch;
7014
7015   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7016   charset = char_charset (c, charset_list, &code);
7017   if (code == CHARSET_INVALID_CODE (charset))
7018     error ("Can't encode by Big5 encoding: %d", c);
7019
7020   return make_number (code);
7021 }
7022
7023 \f
7024 DEFUN ("set-terminal-coding-system-internal",
7025        Fset_terminal_coding_system_internal,
7026        Sset_terminal_coding_system_internal, 1, 1, 0,
7027        doc: /* Internal use only.  */)
7028      (coding_system)
7029      Lisp_Object coding_system;
7030 {
7031   CHECK_SYMBOL (coding_system);
7032   setup_coding_system (Fcheck_coding_system (coding_system),
7033                         &terminal_coding);
7034
7035   /* We had better not send unsafe characters to terminal.  */
7036   terminal_coding.mode |= CODING_MODE_SAFE_ENCODING;
7037   /* Characer composition should be disabled.  */
7038   terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7039   terminal_coding.src_multibyte = 1;
7040   terminal_coding.dst_multibyte = 0;
7041   return Qnil;
7042 }
7043
7044 DEFUN ("set-safe-terminal-coding-system-internal",
7045        Fset_safe_terminal_coding_system_internal,
7046        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
7047        doc: /* Internal use only.  */)
7048      (coding_system)
7049      Lisp_Object coding_system;
7050 {
7051   CHECK_SYMBOL (coding_system);
7052   setup_coding_system (Fcheck_coding_system (coding_system),
7053                        &safe_terminal_coding);
7054   /* Characer composition should be disabled.  */
7055   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7056   safe_terminal_coding.src_multibyte = 1;
7057   safe_terminal_coding.dst_multibyte = 0;
7058   return Qnil;
7059 }
7060
7061 DEFUN ("terminal-coding-system",
7062        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
7063        doc: /* Return coding system specified for terminal output.  */)
7064      ()
7065 {
7066   return CODING_ID_NAME (terminal_coding.id);
7067 }
7068
7069 DEFUN ("set-keyboard-coding-system-internal",
7070        Fset_keyboard_coding_system_internal,
7071        Sset_keyboard_coding_system_internal, 1, 1, 0,
7072        doc: /* Internal use only.  */)
7073      (coding_system)
7074      Lisp_Object coding_system;
7075 {
7076   CHECK_SYMBOL (coding_system);
7077   setup_coding_system (Fcheck_coding_system (coding_system),
7078                        &keyboard_coding);
7079   /* Characer composition should be disabled.  */
7080   keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
7081   return Qnil;
7082 }
7083
7084 DEFUN ("keyboard-coding-system",
7085        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
7086        doc: /* Return coding system specified for decoding keyboard input.  */)
7087      ()
7088 {
7089   return CODING_ID_NAME (keyboard_coding.id);
7090 }
7091
7092 \f
7093 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
7094        Sfind_operation_coding_system,  1, MANY, 0,
7095        doc: /* Choose a coding system for an operation based on the target name.
7096 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
7097 DECODING-SYSTEM is the coding system to use for decoding
7098 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
7099 for encoding (in case OPERATION does encoding).
7100
7101 The first argument OPERATION specifies an I/O primitive:
7102   For file I/O, `insert-file-contents' or `write-region'.
7103   For process I/O, `call-process', `call-process-region', or `start-process'.
7104   For network I/O, `open-network-stream'.
7105
7106 The remaining arguments should be the same arguments that were passed
7107 to the primitive.  Depending on which primitive, one of those arguments
7108 is selected as the TARGET.  For example, if OPERATION does file I/O,
7109 whichever argument specifies the file name is TARGET.
7110
7111 TARGET has a meaning which depends on OPERATION:
7112   For file I/O, TARGET is a file name.
7113   For process I/O, TARGET is a process name.
7114   For network I/O, TARGET is a service name or a port number
7115
7116 This function looks up what specified for TARGET in,
7117 `file-coding-system-alist', `process-coding-system-alist',
7118 or `network-coding-system-alist' depending on OPERATION.
7119 They may specify a coding system, a cons of coding systems,
7120 or a function symbol to call.
7121 In the last case, we call the function with one argument,
7122 which is a list of all the arguments given to this function.
7123
7124 usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
7125      (nargs, args)
7126      int nargs;
7127      Lisp_Object *args;
7128 {
7129   Lisp_Object operation, target_idx, target, val;
7130   register Lisp_Object chain;
7131
7132   if (nargs < 2)
7133     error ("Too few arguments");
7134   operation = args[0];
7135   if (!SYMBOLP (operation)
7136       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
7137     error ("Invalid first arguement");
7138   if (nargs < 1 + XINT (target_idx))
7139     error ("Too few arguments for operation: %s",
7140            XSYMBOL (operation)->name->data);
7141   target = args[XINT (target_idx) + 1];
7142   if (!(STRINGP (target)
7143         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
7144     error ("Invalid %dth argument", XINT (target_idx) + 1);
7145
7146   chain = ((EQ (operation, Qinsert_file_contents)
7147             || EQ (operation, Qwrite_region))
7148            ? Vfile_coding_system_alist
7149            : (EQ (operation, Qopen_network_stream)
7150               ? Vnetwork_coding_system_alist
7151               : Vprocess_coding_system_alist));
7152   if (NILP (chain))
7153     return Qnil;
7154
7155   for (; CONSP (chain); chain = XCDR (chain))
7156     {
7157       Lisp_Object elt;
7158
7159       elt = XCAR (chain);
7160       if (CONSP (elt)
7161           && ((STRINGP (target)
7162                && STRINGP (XCAR (elt))
7163                && fast_string_match (XCAR (elt), target) >= 0)
7164               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
7165         {
7166           val = XCDR (elt);
7167           /* Here, if VAL is both a valid coding system and a valid
7168              function symbol, we return VAL as a coding system.  */
7169           if (CONSP (val))
7170             return val;
7171           if (! SYMBOLP (val))
7172             return Qnil;
7173           if (! NILP (Fcoding_system_p (val)))
7174             return Fcons (val, val);
7175           if (! NILP (Ffboundp (val)))
7176             {
7177               val = call1 (val, Flist (nargs, args));
7178               if (CONSP (val))
7179                 return val;
7180               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
7181                 return Fcons (val, val);
7182             }
7183           return Qnil;
7184         }
7185     }
7186   return Qnil;
7187 }
7188
7189 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
7190        Sset_coding_system_priority, 0, MANY, 0,
7191        doc: /* Assign higher priority to the coding systems given as arguments.
7192 usage: (set-coding-system-priority CODING-SYSTEM ...)  */)
7193      (nargs, args)
7194      int nargs;
7195      Lisp_Object *args;
7196 {
7197   int i, j;
7198   int changed[coding_category_max];
7199   enum coding_category priorities[coding_category_max];
7200
7201   bzero (changed, sizeof changed);
7202
7203   for (i = j = 0; i < nargs; i++)
7204     {
7205       enum coding_category category;
7206       Lisp_Object spec, attrs;
7207
7208       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
7209       attrs = AREF (spec, 0);
7210       category = XINT (CODING_ATTR_CATEGORY (attrs));
7211       if (changed[category])
7212         /* Ignore this coding system because a coding system of the
7213            same category already had a higher priority.  */
7214         continue;
7215       changed[category] = 1;
7216       priorities[j++] = category;
7217       if (coding_categories[category].id >= 0
7218           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
7219         setup_coding_system (args[i], &coding_categories[category]);
7220     }
7221
7222   /* Now we have decided top J priorities.  Reflect the order of the
7223      original priorities to the remaining priorities.  */
7224
7225   for (i = j, j = 0; i < coding_category_max; i++, j++)
7226     {
7227       while (j < coding_category_max
7228              && changed[coding_priorities[j]])
7229         j++;
7230       if (j == coding_category_max)
7231         abort ();
7232       priorities[i] = coding_priorities[j];
7233     }
7234
7235   bcopy (priorities, coding_priorities, sizeof priorities);
7236   return Qnil;
7237 }
7238
7239 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
7240        Scoding_system_priority_list, 0, 1, 0,
7241        doc: /* Return a list of coding systems ordered by their priorities.
7242 HIGHESTP non-nil means just return the highest priority one.  */)
7243      (highestp)
7244      Lisp_Object highestp;
7245 {
7246   int i;
7247   Lisp_Object val;
7248
7249   for (i = 0, val = Qnil; i < coding_category_max; i++)
7250     {
7251       enum coding_category category = coding_priorities[i];
7252       int id = coding_categories[category].id;
7253       Lisp_Object attrs;
7254
7255       if (id < 0)
7256         continue;
7257       attrs = CODING_ID_ATTRS (id);
7258       if (! NILP (highestp))
7259         return CODING_ATTR_BASE_NAME (attrs);
7260       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
7261     }
7262   return Fnreverse (val);
7263 }
7264
7265 static char *suffixes[] = { "-unix", "-dos", "-mac" };
7266
7267 static Lisp_Object
7268 make_subsidiaries (base)
7269      Lisp_Object base;
7270 {
7271   Lisp_Object subsidiaries;
7272   int base_name_len = STRING_BYTES (XSYMBOL (base)->name);
7273   char *buf = (char *) alloca (base_name_len + 6);
7274   int i;
7275
7276   bcopy (XSYMBOL (base)->name->data, buf, base_name_len);
7277   subsidiaries = Fmake_vector (make_number (3), Qnil);
7278   for (i = 0; i < 3; i++)
7279     {
7280       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
7281       ASET (subsidiaries, i, intern (buf));
7282     }
7283   return subsidiaries;
7284 }
7285
7286
7287 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
7288        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
7289        doc: /* For internal use only.
7290 usage: (define-coding-system-internal ...)  */)
7291      (nargs, args)
7292      int nargs;
7293      Lisp_Object *args;
7294 {
7295   Lisp_Object name;
7296   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
7297   Lisp_Object attrs;            /* Vector of attributes.  */
7298   Lisp_Object eol_type;
7299   Lisp_Object aliases;
7300   Lisp_Object coding_type, charset_list, safe_charsets;
7301   enum coding_category category;
7302   Lisp_Object tail, val;
7303   int max_charset_id = 0;
7304   int i;
7305
7306   if (nargs < coding_arg_max)
7307     goto short_args;
7308
7309   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
7310
7311   name = args[coding_arg_name];
7312   CHECK_SYMBOL (name);
7313   CODING_ATTR_BASE_NAME (attrs) = name;
7314
7315   val = args[coding_arg_mnemonic];
7316   if (! STRINGP (val))
7317     CHECK_CHARACTER (val);
7318   CODING_ATTR_MNEMONIC (attrs) = val;
7319
7320   coding_type = args[coding_arg_coding_type];
7321   CHECK_SYMBOL (coding_type);
7322   CODING_ATTR_TYPE (attrs) = coding_type;
7323
7324   charset_list = args[coding_arg_charset_list];
7325   if (SYMBOLP (charset_list))
7326     {
7327       if (EQ (charset_list, Qiso_2022))
7328         {
7329           if (! EQ (coding_type, Qiso_2022))
7330             error ("Invalid charset-list");
7331           charset_list = Viso_2022_charset_list;
7332         }
7333       else if (EQ (charset_list, Qemacs_mule))
7334         {
7335           if (! EQ (coding_type, Qemacs_mule))
7336             error ("Invalid charset-list");
7337           charset_list = Vemacs_mule_charset_list;
7338         }
7339       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7340         if (max_charset_id < XFASTINT (XCAR (tail)))
7341           max_charset_id = XFASTINT (XCAR (tail));
7342     }
7343   else
7344     {
7345       charset_list = Fcopy_sequence (charset_list);
7346       for (tail = charset_list; !NILP (tail); tail = Fcdr (tail))
7347         {
7348           struct charset *charset;
7349
7350           val = Fcar (tail);
7351           CHECK_CHARSET_GET_CHARSET (val, charset);
7352           if (EQ (coding_type, Qiso_2022)
7353               ? CHARSET_ISO_FINAL (charset) < 0
7354               : EQ (coding_type, Qemacs_mule)
7355               ? CHARSET_EMACS_MULE_ID (charset) < 0
7356               : 0)
7357             error ("Can't handle charset `%s'",
7358                    XSYMBOL (CHARSET_NAME (charset))->name->data);
7359
7360           XCAR (tail) = make_number (charset->id);
7361           if (max_charset_id < charset->id)
7362             max_charset_id = charset->id;
7363         }
7364     }
7365   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
7366
7367   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
7368                                 make_number (255));
7369   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7370     XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7371   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7372
7373   val = args[coding_arg_decode_translation_table];
7374   if (! NILP (val))
7375     CHECK_CHAR_TABLE (val);
7376   CODING_ATTR_DECODE_TBL (attrs) = val;
7377
7378   val = args[coding_arg_encode_translation_table];
7379   if (! NILP (val))
7380     CHECK_CHAR_TABLE (val);
7381   CODING_ATTR_ENCODE_TBL (attrs) = val;
7382
7383   val = args[coding_arg_post_read_conversion];
7384   CHECK_SYMBOL (val);
7385   CODING_ATTR_POST_READ (attrs) = val;
7386
7387   val = args[coding_arg_pre_write_conversion];
7388   CHECK_SYMBOL (val);
7389   CODING_ATTR_PRE_WRITE (attrs) = val;
7390
7391   val = args[coding_arg_default_char];
7392   if (NILP (val))
7393     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
7394   else
7395     {
7396       CHECK_CHARACTER (val);
7397       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
7398     }
7399
7400   val = args[coding_arg_plist];
7401   CHECK_LIST (val);
7402   CODING_ATTR_PLIST (attrs) = val;
7403
7404   if (EQ (coding_type, Qcharset))
7405     {
7406       /* Generate a lisp vector of 256 elements.  Each element is nil,
7407          integer, or a list of charset IDs.
7408
7409          If Nth element is nil, the byte code N is invalid in this
7410          coding system.
7411
7412          If Nth element is a number NUM, N is the first byte of a
7413          charset whose ID is NUM.
7414
7415          If Nth element is a list of charset IDs, N is the first byte
7416          of one of them.  The list is sorted by dimensions of the
7417          charsets.  A charset of smaller dimension comes firtst.
7418       */
7419       val = Fmake_vector (make_number (256), Qnil);
7420
7421       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7422         {
7423           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7424           int dim = CHARSET_DIMENSION (charset);
7425           int idx = (dim - 1) * 4;
7426
7427           for (i = charset->code_space[idx];
7428                i <= charset->code_space[idx + 1]; i++)
7429             {
7430               Lisp_Object tmp, tmp2;
7431               int dim2;
7432
7433               tmp = AREF (val, i);
7434               if (NILP (tmp))
7435                 tmp = XCAR (tail);
7436               else if (NUMBERP (tmp))
7437                 {
7438                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
7439                   if (dim < dim2)
7440                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
7441                   else
7442                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
7443                 }
7444               else
7445                 {
7446                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
7447                     {
7448                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
7449                       if (dim < dim2)
7450                         break;
7451                     }
7452                   if (NILP (tmp2))
7453                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
7454                   else
7455                     {
7456                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
7457                       XSETCAR (tmp2, XCAR (tail));
7458                     }
7459                 }
7460               ASET (val, i, tmp);
7461             }
7462         }
7463       ASET (attrs, coding_attr_charset_valids, val);
7464       category = coding_category_charset;
7465     }
7466   else if (EQ (coding_type, Qccl))
7467     {
7468       Lisp_Object valids;
7469
7470       if (nargs < coding_arg_ccl_max)
7471         goto short_args;
7472
7473       val = args[coding_arg_ccl_decoder];
7474       CHECK_CCL_PROGRAM (val);
7475       if (VECTORP (val))
7476         val = Fcopy_sequence (val);
7477       ASET (attrs, coding_attr_ccl_decoder, val);
7478
7479       val = args[coding_arg_ccl_encoder];
7480       CHECK_CCL_PROGRAM (val);
7481       if (VECTORP (val))
7482         val = Fcopy_sequence (val);
7483       ASET (attrs, coding_attr_ccl_encoder, val);
7484
7485       val = args[coding_arg_ccl_valids];
7486       valids = Fmake_string (make_number (256), make_number (0));
7487       for (tail = val; !NILP (tail); tail = Fcdr (tail))
7488         {
7489           val = Fcar (tail);
7490           if (INTEGERP (val))
7491             ASET (valids, XINT (val), make_number (1));
7492           else
7493             {
7494               int from, to;
7495
7496               CHECK_CONS (val);
7497               CHECK_NUMBER (XCAR (val));
7498               CHECK_NUMBER (XCDR (val));
7499               from = XINT (XCAR (val));
7500               to = XINT (XCDR (val));
7501               for (i = from; i <= to; i++)
7502                 ASET (valids, i, make_number (1));
7503             }
7504         }
7505       ASET (attrs, coding_attr_ccl_valids, valids);
7506
7507       category = coding_category_ccl;
7508     }
7509   else if (EQ (coding_type, Qutf_16))
7510     {
7511       Lisp_Object bom, endian;
7512
7513       if (nargs < coding_arg_utf16_max)
7514         goto short_args;
7515
7516       bom = args[coding_arg_utf16_bom];
7517       if (! NILP (bom) && ! EQ (bom, Qt))
7518         {
7519           CHECK_CONS (bom);
7520           CHECK_CODING_SYSTEM (XCAR (bom));
7521           CHECK_CODING_SYSTEM (XCDR (bom));
7522         }
7523       ASET (attrs, coding_attr_utf_16_bom, bom);
7524
7525       endian = args[coding_arg_utf16_endian];
7526       ASET (attrs, coding_attr_utf_16_endian, endian);
7527
7528       category = (CONSP (bom)
7529                   ? coding_category_utf_16_auto
7530                   : NILP (bom)
7531                   ? (NILP (endian)
7532                      ? coding_category_utf_16_be_nosig
7533                      : coding_category_utf_16_le_nosig)
7534                   : (NILP (endian)
7535                      ? coding_category_utf_16_be
7536                      : coding_category_utf_16_le));
7537     }
7538   else if (EQ (coding_type, Qiso_2022))
7539     {
7540       Lisp_Object initial, reg_usage, request, flags;
7541       int i, id;
7542
7543       if (nargs < coding_arg_iso2022_max)
7544         goto short_args;
7545
7546       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
7547       CHECK_VECTOR (initial);
7548       for (i = 0; i < 4; i++)
7549         {
7550           val = Faref (initial, make_number (i));
7551           if (! NILP (val))
7552             {
7553               CHECK_CHARSET_GET_ID (val, id);
7554               ASET (initial, i, make_number (id));
7555             }
7556           else
7557             ASET (initial, i, make_number (-1));
7558         }
7559
7560       reg_usage = args[coding_arg_iso2022_reg_usage];
7561       CHECK_CONS (reg_usage);
7562       CHECK_NATNUM (XCAR (reg_usage));
7563       CHECK_NATNUM (XCDR (reg_usage));
7564
7565       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
7566       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
7567         {
7568           int id;
7569
7570           val = Fcar (tail);
7571           CHECK_CONS (val);
7572           CHECK_CHARSET_GET_ID (XCAR (val), id);
7573           CHECK_NATNUM (XCDR (val));
7574           if (XINT (XCDR (val)) >= 4)
7575             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
7576           XCAR (val) = make_number (id);
7577         }
7578
7579       flags = args[coding_arg_iso2022_flags];
7580       CHECK_NATNUM (flags);
7581       i = XINT (flags);
7582       if (EQ (args[coding_arg_charset_list], Qiso_2022))
7583         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
7584
7585       ASET (attrs, coding_attr_iso_initial, initial);
7586       ASET (attrs, coding_attr_iso_usage, reg_usage);
7587       ASET (attrs, coding_attr_iso_request, request);
7588       ASET (attrs, coding_attr_iso_flags, flags);
7589       setup_iso_safe_charsets (attrs);
7590
7591       if (i & CODING_ISO_FLAG_SEVEN_BITS)
7592         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
7593                           | CODING_ISO_FLAG_SINGLE_SHIFT))
7594                     ? coding_category_iso_7_else
7595                     : EQ (args[coding_arg_charset_list], Qiso_2022)
7596                     ? coding_category_iso_7
7597                     : coding_category_iso_7_tight);
7598       else
7599         {
7600           int id = XINT (AREF (initial, 1));
7601
7602           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
7603                        || EQ (args[coding_arg_charset_list], Qiso_2022)
7604                        || id < 0)
7605                       ? coding_category_iso_8_else
7606                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
7607                       ? coding_category_iso_8_1
7608                       : coding_category_iso_8_2);
7609         }
7610     }
7611   else if (EQ (coding_type, Qemacs_mule))
7612     {
7613       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
7614         ASET (attrs, coding_attr_emacs_mule_full, Qt);
7615
7616       category = coding_category_emacs_mule;
7617     }
7618   else if (EQ (coding_type, Qshift_jis))
7619     {
7620
7621       struct charset *charset;
7622
7623       if (XINT (Flength (charset_list)) != 3)
7624         error ("There should be just three charsets");
7625
7626       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7627       if (CHARSET_DIMENSION (charset) != 1)
7628         error ("Dimension of charset %s is not one",
7629                XSYMBOL (CHARSET_NAME (charset))->name->data);
7630
7631       charset_list = XCDR (charset_list);
7632       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7633       if (CHARSET_DIMENSION (charset) != 1)
7634         error ("Dimension of charset %s is not one",
7635                XSYMBOL (CHARSET_NAME (charset))->name->data);
7636
7637       charset_list = XCDR (charset_list);
7638       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7639       if (CHARSET_DIMENSION (charset) != 2)
7640         error ("Dimension of charset %s is not two",
7641                XSYMBOL (CHARSET_NAME (charset))->name->data);
7642
7643       category = coding_category_sjis;
7644       Vsjis_coding_system = name;
7645     }
7646   else if (EQ (coding_type, Qbig5))
7647     {
7648       struct charset *charset;
7649
7650       if (XINT (Flength (charset_list)) != 2)
7651         error ("There should be just two charsets");
7652
7653       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7654       if (CHARSET_DIMENSION (charset) != 1)
7655         error ("Dimension of charset %s is not one",
7656                XSYMBOL (CHARSET_NAME (charset))->name->data);
7657
7658       charset_list = XCDR (charset_list);
7659       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7660       if (CHARSET_DIMENSION (charset) != 2)
7661         error ("Dimension of charset %s is not two",
7662                XSYMBOL (CHARSET_NAME (charset))->name->data);
7663
7664       category = coding_category_big5;
7665       Vbig5_coding_system = name;
7666     }
7667   else if (EQ (coding_type, Qraw_text))
7668     category = coding_category_raw_text;
7669   else if (EQ (coding_type, Qutf_8))
7670     category = coding_category_utf_8;
7671   else if (EQ (coding_type, Qundecided))
7672     category = coding_category_undecided;
7673   else
7674     error ("Invalid coding system type: %s",
7675            XSYMBOL (coding_type)->name->data);
7676
7677   CODING_ATTR_CATEGORY (attrs) = make_number (category);
7678
7679   eol_type = args[coding_arg_eol_type];
7680   if (! NILP (eol_type)
7681       && ! EQ (eol_type, Qunix)
7682       && ! EQ (eol_type, Qdos)
7683       && ! EQ (eol_type, Qmac))
7684     error ("Invalid eol-type");
7685
7686   aliases = Fcons (name, Qnil);
7687
7688   if (NILP (eol_type))
7689     {
7690       eol_type = make_subsidiaries (name);
7691       for (i = 0; i < 3; i++)
7692         {
7693           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
7694
7695           this_name = AREF (eol_type, i);
7696           this_aliases = Fcons (this_name, Qnil);
7697           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
7698           this_spec = Fmake_vector (make_number (3), attrs);
7699           ASET (this_spec, 1, this_aliases);
7700           ASET (this_spec, 2, this_eol_type);
7701           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
7702           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
7703           Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
7704                                         Vcoding_system_alist);
7705         }
7706     }
7707
7708   spec_vec = Fmake_vector (make_number (3), attrs);
7709   ASET (spec_vec, 1, aliases);
7710   ASET (spec_vec, 2, eol_type);
7711
7712   Fputhash (name, spec_vec, Vcoding_system_hash_table);
7713   Vcoding_system_list = Fcons (name, Vcoding_system_list);
7714   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
7715                                 Vcoding_system_alist);
7716
7717   {
7718     int id = coding_categories[category].id;
7719
7720     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
7721       setup_coding_system (name, &coding_categories[category]);
7722   }
7723
7724   return Qnil;
7725
7726  short_args:
7727   return Fsignal (Qwrong_number_of_arguments,
7728                   Fcons (intern ("define-coding-system-internal"),
7729                          make_number (nargs)));
7730 }
7731
7732 /* Fixme: should this record the alias relationships for
7733    diagnostics?  */
7734 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
7735        Sdefine_coding_system_alias, 2, 2, 0,
7736        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
7737      (alias, coding_system)
7738      Lisp_Object alias, coding_system;
7739 {
7740   Lisp_Object spec, aliases, eol_type;
7741
7742   CHECK_SYMBOL (alias);
7743   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7744   aliases = AREF (spec, 1);
7745   while (!NILP (XCDR (aliases)))
7746     aliases = XCDR (aliases);
7747   XCDR (aliases) = Fcons (alias, Qnil);
7748
7749   eol_type = AREF (spec, 2);
7750   if (VECTORP (eol_type))
7751     {
7752       Lisp_Object subsidiaries;
7753       int i;
7754
7755       subsidiaries = make_subsidiaries (alias);
7756       for (i = 0; i < 3; i++)
7757         Fdefine_coding_system_alias (AREF (subsidiaries, i),
7758                                      AREF (eol_type, i));
7759
7760       ASET (spec, 2, subsidiaries);
7761     }
7762
7763   Fputhash (alias, spec, Vcoding_system_hash_table);
7764   Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
7765                                 Vcoding_system_alist);
7766
7767   return Qnil;
7768 }
7769
7770 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
7771        1, 1, 0,
7772        doc: /* Return the base of CODING-SYSTEM.
7773 Any alias or subsidiary coding system is not a base coding system.  */)
7774   (coding_system)
7775      Lisp_Object coding_system;
7776 {
7777   Lisp_Object spec, attrs;
7778
7779   if (NILP (coding_system))
7780     return (Qno_conversion);
7781   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7782   attrs = AREF (spec, 0);
7783   return CODING_ATTR_BASE_NAME (attrs);
7784 }
7785
7786 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
7787        1, 1, 0,
7788        doc: "Return the property list of CODING-SYSTEM.")
7789      (coding_system)
7790      Lisp_Object coding_system;
7791 {
7792   Lisp_Object spec, attrs;
7793
7794   if (NILP (coding_system))
7795     coding_system = Qno_conversion;
7796   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7797   attrs = AREF (spec, 0);
7798   return CODING_ATTR_PLIST (attrs);
7799 }
7800
7801
7802 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
7803        1, 1, 0,
7804        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
7805      (coding_system)
7806      Lisp_Object coding_system;
7807 {
7808   Lisp_Object spec;
7809
7810   if (NILP (coding_system))
7811     coding_system = Qno_conversion;
7812   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
7813   return AREF (spec, 1);
7814 }
7815
7816 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
7817        Scoding_system_eol_type, 1, 1, 0,
7818        doc: /* Return eol-type of CODING-SYSTEM.
7819 An eol-type is integer 0, 1, 2, or a vector of coding systems.
7820
7821 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
7822 and CR respectively.
7823
7824 A vector value indicates that a format of end-of-line should be
7825 detected automatically.  Nth element of the vector is the subsidiary
7826 coding system whose eol-type is N.  */)
7827      (coding_system)
7828      Lisp_Object coding_system;
7829 {
7830   Lisp_Object spec, eol_type;
7831   int n;
7832
7833   if (NILP (coding_system))
7834     coding_system = Qno_conversion;
7835   if (! CODING_SYSTEM_P (coding_system))
7836     return Qnil;
7837   spec = CODING_SYSTEM_SPEC (coding_system);
7838   eol_type = AREF (spec, 2);
7839   if (VECTORP (eol_type))
7840     return Fcopy_sequence (eol_type);
7841   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
7842   return make_number (n);
7843 }
7844
7845 #endif /* emacs */
7846
7847 \f
7848 /*** 9. Post-amble ***/
7849
7850 void
7851 init_coding_once ()
7852 {
7853   int i;
7854
7855   for (i = 0; i < coding_category_max; i++)
7856     {
7857       coding_categories[i].id = -1;
7858       coding_priorities[i] = i;
7859     }
7860
7861   /* ISO2022 specific initialize routine.  */
7862   for (i = 0; i < 0x20; i++)
7863     iso_code_class[i] = ISO_control_0;
7864   for (i = 0x21; i < 0x7F; i++)
7865     iso_code_class[i] = ISO_graphic_plane_0;
7866   for (i = 0x80; i < 0xA0; i++)
7867     iso_code_class[i] = ISO_control_1;
7868   for (i = 0xA1; i < 0xFF; i++)
7869     iso_code_class[i] = ISO_graphic_plane_1;
7870   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
7871   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
7872   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
7873   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
7874   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
7875   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
7876   iso_code_class[ISO_CODE_ESC] = ISO_escape;
7877   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
7878   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
7879   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
7880
7881   inhibit_pre_post_conversion = 0;
7882
7883   for (i = 0; i < 256; i++)
7884     {
7885       emacs_mule_bytes[i] = 1;
7886     }
7887   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
7888   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
7889   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
7890   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
7891 }
7892
7893 #ifdef emacs
7894
7895 void
7896 syms_of_coding ()
7897 {
7898   staticpro (&Vcoding_system_hash_table);
7899   Vcoding_system_hash_table = Fmakehash (Qeq);
7900
7901   staticpro (&Vsjis_coding_system);
7902   Vsjis_coding_system = Qnil;
7903
7904   staticpro (&Vbig5_coding_system);
7905   Vbig5_coding_system = Qnil;
7906
7907   staticpro (&Vcode_conversion_work_buf_list);
7908   Vcode_conversion_work_buf_list = Qnil;
7909
7910   staticpro (&Vcode_conversion_reused_work_buf);
7911   Vcode_conversion_reused_work_buf = Qnil;
7912
7913   DEFSYM (Qcharset, "charset");
7914   DEFSYM (Qtarget_idx, "target-idx");
7915   DEFSYM (Qcoding_system_history, "coding-system-history");
7916   Fset (Qcoding_system_history, Qnil);
7917
7918   /* Target FILENAME is the first argument.  */
7919   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
7920   /* Target FILENAME is the third argument.  */
7921   Fput (Qwrite_region, Qtarget_idx, make_number (2));
7922
7923   DEFSYM (Qcall_process, "call-process");
7924   /* Target PROGRAM is the first argument.  */
7925   Fput (Qcall_process, Qtarget_idx, make_number (0));
7926
7927   DEFSYM (Qcall_process_region, "call-process-region");
7928   /* Target PROGRAM is the third argument.  */
7929   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
7930
7931   DEFSYM (Qstart_process, "start-process");
7932   /* Target PROGRAM is the third argument.  */
7933   Fput (Qstart_process, Qtarget_idx, make_number (2));
7934
7935   DEFSYM (Qopen_network_stream, "open-network-stream");
7936   /* Target SERVICE is the fourth argument.  */
7937   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
7938
7939   DEFSYM (Qcoding_system, "coding-system");
7940   DEFSYM (Qcoding_aliases, "coding-aliases");
7941
7942   DEFSYM (Qeol_type, "eol-type");
7943   DEFSYM (Qunix, "unix");
7944   DEFSYM (Qdos, "dos");
7945
7946   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
7947   DEFSYM (Qpost_read_conversion, "post-read-conversion");
7948   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
7949   DEFSYM (Qdefault_char, "default-char");
7950   DEFSYM (Qundecided, "undecided");
7951   DEFSYM (Qno_conversion, "no-conversion");
7952   DEFSYM (Qraw_text, "raw-text");
7953
7954   DEFSYM (Qiso_2022, "iso-2022");
7955
7956   DEFSYM (Qutf_8, "utf-8");
7957
7958   DEFSYM (Qutf_16, "utf-16");
7959   DEFSYM (Qutf_16_be, "utf-16-be");
7960   DEFSYM (Qutf_16_be_nosig, "utf-16-be-nosig");
7961   DEFSYM (Qutf_16_le, "utf-16-l3");
7962   DEFSYM (Qutf_16_le_nosig, "utf-16-le-nosig");
7963   DEFSYM (Qsignature, "signature");
7964   DEFSYM (Qendian, "endian");
7965   DEFSYM (Qbig, "big");
7966   DEFSYM (Qlittle, "little");
7967
7968   DEFSYM (Qshift_jis, "shift-jis");
7969   DEFSYM (Qbig5, "big5");
7970
7971   DEFSYM (Qcoding_system_p, "coding-system-p");
7972
7973   DEFSYM (Qcoding_system_error, "coding-system-error");
7974   Fput (Qcoding_system_error, Qerror_conditions,
7975         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
7976   Fput (Qcoding_system_error, Qerror_message,
7977         build_string ("Invalid coding system"));
7978
7979   /* Intern this now in case it isn't already done.
7980      Setting this variable twice is harmless.
7981      But don't staticpro it here--that is done in alloc.c.  */
7982   Qchar_table_extra_slots = intern ("char-table-extra-slots");
7983
7984   DEFSYM (Qtranslation_table, "translation-table");
7985   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1));
7986   DEFSYM (Qtranslation_table_id, "translation-table-id");
7987   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
7988   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
7989
7990   DEFSYM (Qvalid_codes, "valid-codes");
7991
7992   DEFSYM (Qemacs_mule, "emacs-mule");
7993
7994   Vcoding_category_table
7995     = Fmake_vector (make_number (coding_category_max), Qnil);
7996   staticpro (&Vcoding_category_table);
7997   /* Followings are target of code detection.  */
7998   ASET (Vcoding_category_table, coding_category_iso_7,
7999         intern ("coding-category-iso-7"));
8000   ASET (Vcoding_category_table, coding_category_iso_7_tight,
8001         intern ("coding-category-iso-7-tight"));
8002   ASET (Vcoding_category_table, coding_category_iso_8_1,
8003         intern ("coding-category-iso-8-1"));
8004   ASET (Vcoding_category_table, coding_category_iso_8_2,
8005         intern ("coding-category-iso-8-2"));
8006   ASET (Vcoding_category_table, coding_category_iso_7_else,
8007         intern ("coding-category-iso-7-else"));
8008   ASET (Vcoding_category_table, coding_category_iso_8_else,
8009         intern ("coding-category-iso-8-else"));
8010   ASET (Vcoding_category_table, coding_category_utf_8,
8011         intern ("coding-category-utf-8"));
8012   ASET (Vcoding_category_table, coding_category_utf_16_be,
8013         intern ("coding-category-utf-16-be"));
8014   ASET (Vcoding_category_table, coding_category_utf_16_le,
8015         intern ("coding-category-utf-16-le"));
8016   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
8017         intern ("coding-category-utf-16-be-nosig"));
8018   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
8019         intern ("coding-category-utf-16-le-nosig"));
8020   ASET (Vcoding_category_table, coding_category_charset,
8021         intern ("coding-category-charset"));
8022   ASET (Vcoding_category_table, coding_category_sjis,
8023         intern ("coding-category-sjis"));
8024   ASET (Vcoding_category_table, coding_category_big5,
8025         intern ("coding-category-big5"));
8026   ASET (Vcoding_category_table, coding_category_ccl,
8027         intern ("coding-category-ccl"));
8028   ASET (Vcoding_category_table, coding_category_emacs_mule,
8029         intern ("coding-category-emacs-mule"));
8030   /* Followings are NOT target of code detection.  */
8031   ASET (Vcoding_category_table, coding_category_raw_text,
8032         intern ("coding-category-raw-text"));
8033   ASET (Vcoding_category_table, coding_category_undecided,
8034         intern ("coding-category-undecided"));
8035
8036   defsubr (&Scoding_system_p);
8037   defsubr (&Sread_coding_system);
8038   defsubr (&Sread_non_nil_coding_system);
8039   defsubr (&Scheck_coding_system);
8040   defsubr (&Sdetect_coding_region);
8041   defsubr (&Sdetect_coding_string);
8042   defsubr (&Sfind_coding_systems_region_internal);
8043   defsubr (&Scheck_coding_systems_region);
8044   defsubr (&Sdecode_coding_region);
8045   defsubr (&Sencode_coding_region);
8046   defsubr (&Sdecode_coding_string);
8047   defsubr (&Sencode_coding_string);
8048   defsubr (&Sdecode_sjis_char);
8049   defsubr (&Sencode_sjis_char);
8050   defsubr (&Sdecode_big5_char);
8051   defsubr (&Sencode_big5_char);
8052   defsubr (&Sset_terminal_coding_system_internal);
8053   defsubr (&Sset_safe_terminal_coding_system_internal);
8054   defsubr (&Sterminal_coding_system);
8055   defsubr (&Sset_keyboard_coding_system_internal);
8056   defsubr (&Skeyboard_coding_system);
8057   defsubr (&Sfind_operation_coding_system);
8058   defsubr (&Sset_coding_system_priority);
8059   defsubr (&Sdefine_coding_system_internal);
8060   defsubr (&Sdefine_coding_system_alias);
8061   defsubr (&Scoding_system_base);
8062   defsubr (&Scoding_system_plist);
8063   defsubr (&Scoding_system_aliases);
8064   defsubr (&Scoding_system_eol_type);
8065   defsubr (&Scoding_system_priority_list);
8066
8067   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
8068                doc: /* List of coding systems.
8069
8070 Do not alter the value of this variable manually.  This variable should be
8071 updated by the functions `define-coding-system' and
8072 `define-coding-system-alias'.  */);
8073   Vcoding_system_list = Qnil;
8074
8075   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
8076                doc: /* Alist of coding system names.
8077 Each element is one element list of coding system name.
8078 This variable is given to `completing-read' as TABLE argument.
8079
8080 Do not alter the value of this variable manually.  This variable should be
8081 updated by the functions `make-coding-system' and
8082 `define-coding-system-alias'.  */);
8083   Vcoding_system_alist = Qnil;
8084
8085   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
8086                doc: /* List of coding-categories (symbols) ordered by priority.
8087
8088 On detecting a coding system, Emacs tries code detection algorithms
8089 associated with each coding-category one by one in this order.  When
8090 one algorithm agrees with a byte sequence of source text, the coding
8091 system bound to the corresponding coding-category is selected.  */);
8092   {
8093     int i;
8094
8095     Vcoding_category_list = Qnil;
8096     for (i = coding_category_max - 1; i >= 0; i--)
8097       Vcoding_category_list
8098         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
8099                  Vcoding_category_list);
8100   }
8101
8102   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
8103                doc: /* Specify the coding system for read operations.
8104 It is useful to bind this variable with `let', but do not set it globally.
8105 If the value is a coding system, it is used for decoding on read operation.
8106 If not, an appropriate element is used from one of the coding system alists:
8107 There are three such tables, `file-coding-system-alist',
8108 `process-coding-system-alist', and `network-coding-system-alist'.  */);
8109   Vcoding_system_for_read = Qnil;
8110
8111   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
8112                doc: /* Specify the coding system for write operations.
8113 Programs bind this variable with `let', but you should not set it globally.
8114 If the value is a coding system, it is used for encoding of output,
8115 when writing it to a file and when sending it to a file or subprocess.
8116
8117 If this does not specify a coding system, an appropriate element
8118 is used from one of the coding system alists:
8119 There are three such tables, `file-coding-system-alist',
8120 `process-coding-system-alist', and `network-coding-system-alist'.
8121 For output to files, if the above procedure does not specify a coding system,
8122 the value of `buffer-file-coding-system' is used.  */);
8123   Vcoding_system_for_write = Qnil;
8124
8125   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
8126                doc: /*
8127 Coding system used in the latest file or process I/O.  */);
8128   Vlast_coding_system_used = Qnil;
8129
8130   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
8131                doc: /*
8132 *Non-nil means always inhibit code conversion of end-of-line format.
8133 See info node `Coding Systems' and info node `Text and Binary' concerning
8134 such conversion.  */);
8135   inhibit_eol_conversion = 0;
8136
8137   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
8138                doc: /*
8139 Non-nil means process buffer inherits coding system of process output.
8140 Bind it to t if the process output is to be treated as if it were a file
8141 read from some filesystem.  */);
8142   inherit_process_coding_system = 0;
8143
8144   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
8145                doc: /*
8146 Alist to decide a coding system to use for a file I/O operation.
8147 The format is ((PATTERN . VAL) ...),
8148 where PATTERN is a regular expression matching a file name,
8149 VAL is a coding system, a cons of coding systems, or a function symbol.
8150 If VAL is a coding system, it is used for both decoding and encoding
8151 the file contents.
8152 If VAL is a cons of coding systems, the car part is used for decoding,
8153 and the cdr part is used for encoding.
8154 If VAL is a function symbol, the function must return a coding system
8155 or a cons of coding systems which are used as above.  The function gets
8156 the arguments with which `find-operation-coding-systems' was called.
8157
8158 See also the function `find-operation-coding-system'
8159 and the variable `auto-coding-alist'.  */);
8160   Vfile_coding_system_alist = Qnil;
8161
8162   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
8163                doc: /*
8164 Alist to decide a coding system to use for a process I/O operation.
8165 The format is ((PATTERN . VAL) ...),
8166 where PATTERN is a regular expression matching a program name,
8167 VAL is a coding system, a cons of coding systems, or a function symbol.
8168 If VAL is a coding system, it is used for both decoding what received
8169 from the program and encoding what sent to the program.
8170 If VAL is a cons of coding systems, the car part is used for decoding,
8171 and the cdr part is used for encoding.
8172 If VAL is a function symbol, the function must return a coding system
8173 or a cons of coding systems which are used as above.
8174
8175 See also the function `find-operation-coding-system'.  */);
8176   Vprocess_coding_system_alist = Qnil;
8177
8178   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
8179                doc: /*
8180 Alist to decide a coding system to use for a network I/O operation.
8181 The format is ((PATTERN . VAL) ...),
8182 where PATTERN is a regular expression matching a network service name
8183 or is a port number to connect to,
8184 VAL is a coding system, a cons of coding systems, or a function symbol.
8185 If VAL is a coding system, it is used for both decoding what received
8186 from the network stream and encoding what sent to the network stream.
8187 If VAL is a cons of coding systems, the car part is used for decoding,
8188 and the cdr part is used for encoding.
8189 If VAL is a function symbol, the function must return a coding system
8190 or a cons of coding systems which are used as above.
8191
8192 See also the function `find-operation-coding-system'.  */);
8193   Vnetwork_coding_system_alist = Qnil;
8194
8195   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
8196                doc: /* Coding system to use with system messages.
8197 Also used for decoding keyboard input on X Window system.  */);
8198   Vlocale_coding_system = Qnil;
8199
8200   /* The eol mnemonics are reset in startup.el system-dependently.  */
8201   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
8202                doc: /*
8203 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
8204   eol_mnemonic_unix = build_string (":");
8205
8206   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
8207                doc: /*
8208 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
8209   eol_mnemonic_dos = build_string ("\\");
8210
8211   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
8212                doc: /*
8213 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
8214   eol_mnemonic_mac = build_string ("/");
8215
8216   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
8217                doc: /*
8218 *String displayed in mode line when end-of-line format is not yet determined.  */);
8219   eol_mnemonic_undecided = build_string (":");
8220
8221   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
8222                doc: /*
8223 *Non-nil enables character translation while encoding and decoding.  */);
8224   Venable_character_translation = Qt;
8225
8226   DEFVAR_LISP ("standard-translation-table-for-decode",
8227                &Vstandard_translation_table_for_decode,
8228                doc: /* Table for translating characters while decoding.  */);
8229   Vstandard_translation_table_for_decode = Qnil;
8230
8231   DEFVAR_LISP ("standard-translation-table-for-encode",
8232                &Vstandard_translation_table_for_encode,
8233                doc: /* Table for translating characters while encoding.  */);
8234   Vstandard_translation_table_for_encode = Qnil;
8235
8236   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
8237                doc: /* Alist of charsets vs revision numbers.
8238 While encoding, if a charset (car part of an element) is found,
8239 designate it with the escape sequence identifying revision (cdr part
8240 of the element).  */);
8241   Vcharset_revision_table = Qnil;
8242
8243   DEFVAR_LISP ("default-process-coding-system",
8244                &Vdefault_process_coding_system,
8245                doc: /* Cons of coding systems used for process I/O by default.
8246 The car part is used for decoding a process output,
8247 the cdr part is used for encoding a text to be sent to a process.  */);
8248   Vdefault_process_coding_system = Qnil;
8249
8250   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
8251                doc: /*
8252 Table of extra Latin codes in the range 128..159 (inclusive).
8253 This is a vector of length 256.
8254 If Nth element is non-nil, the existence of code N in a file
8255 \(or output of subprocess) doesn't prevent it to be detected as
8256 a coding system of ISO 2022 variant which has a flag
8257 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
8258 or reading output of a subprocess.
8259 Only 128th through 159th elements has a meaning.  */);
8260   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
8261
8262   DEFVAR_LISP ("select-safe-coding-system-function",
8263                &Vselect_safe_coding_system_function,
8264                doc: /*
8265 Function to call to select safe coding system for encoding a text.
8266
8267 If set, this function is called to force a user to select a proper
8268 coding system which can encode the text in the case that a default
8269 coding system used in each operation can't encode the text.
8270
8271 The default value is `select-safe-coding-system' (which see).  */);
8272   Vselect_safe_coding_system_function = Qnil;
8273
8274   DEFVAR_BOOL ("inhibit-iso-escape-detection",
8275                &inhibit_iso_escape_detection,
8276                doc: /*
8277 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
8278
8279 By default, on reading a file, Emacs tries to detect how the text is
8280 encoded.  This code detection is sensitive to escape sequences.  If
8281 the sequence is valid as ISO2022, the code is determined as one of
8282 the ISO2022 encodings, and the file is decoded by the corresponding
8283 coding system (e.g. `iso-2022-7bit').
8284
8285 However, there may be a case that you want to read escape sequences in
8286 a file as is.  In such a case, you can set this variable to non-nil.
8287 Then, as the code detection ignores any escape sequences, no file is
8288 detected as encoded in some ISO2022 encoding.  The result is that all
8289 escape sequences become visible in a buffer.
8290
8291 The default value is nil, and it is strongly recommended not to change
8292 it.  That is because many Emacs Lisp source files that contain
8293 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
8294 in Emacs's distribution, and they won't be decoded correctly on
8295 reading if you suppress escape sequence detection.
8296
8297 The other way to read escape sequences in a file without decoding is
8298 to explicitly specify some coding system that doesn't use ISO2022's
8299 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
8300   inhibit_iso_escape_detection = 0;
8301
8302   {
8303     Lisp_Object args[coding_arg_max];
8304     Lisp_Object plist[14];
8305     int i;
8306
8307     for (i = 0; i < coding_arg_max; i++)
8308       args[i] = Qnil;
8309
8310     plist[0] = intern (":name");
8311     plist[1] = args[coding_arg_name] = Qno_conversion;
8312     plist[2] = intern (":mnemonic");
8313     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
8314     plist[4] = intern (":coding-type");
8315     plist[5] = args[coding_arg_coding_type] = Qraw_text;
8316     plist[6] = intern (":ascii-compatible-p");
8317     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
8318     plist[8] = intern (":default-char");
8319     plist[9] = args[coding_arg_default_char] = make_number (0);
8320     plist[10] = intern (":docstring");
8321     plist[11] = build_string ("Do no conversion.\n\
8322 \n\
8323 When you visit a file with this coding, the file is read into a\n\
8324 unibyte buffer as is, thus each byte of a file is treated as a\n\
8325 character.");
8326     plist[12] = intern (":eol-type");
8327     plist[13] = args[coding_arg_eol_type] = Qunix;
8328     args[coding_arg_plist] = Flist (14, plist);
8329     Fdefine_coding_system_internal (coding_arg_max, args);
8330   }
8331
8332   setup_coding_system (Qno_conversion, &keyboard_coding);
8333   setup_coding_system (Qno_conversion, &terminal_coding);
8334   setup_coding_system (Qno_conversion, &safe_terminal_coding);
8335 }
8336
8337 char *
8338 emacs_strerror (error_number)
8339      int error_number;
8340 {
8341   char *str;
8342
8343   synchronize_system_messages_locale ();
8344   str = strerror (error_number);
8345
8346   if (! NILP (Vlocale_coding_system))
8347     {
8348       Lisp_Object dec = code_convert_string_norecord (build_string (str),
8349                                                       Vlocale_coding_system,
8350                                                       0);
8351       str = (char *) XSTRING (dec)->data;
8352     }
8353
8354   return str;
8355 }
8356
8357 #endif /* emacs */