src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software; you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation; either version 3, or (at your option)
  17 any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs; see the file COPYING.  If not, write to
  26 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  27 Boston, MA 02110-1301, USA.  */
  28
  29 /*** TABLE OF CONTENTS ***
  30
  31   0. General comments
  32   1. Preamble
  33   2. Emacs' internal format (emacs-utf-8) handlers
  34   3. UTF-8 handlers
  35   4. UTF-16 handlers
  36   5. Charset-base coding systems handlers
  37   6. emacs-mule (old Emacs' internal format) handlers
  38   7. ISO2022 handlers
  39   8. Shift-JIS and BIG5 handlers
  40   9. CCL handlers
  41   10. C library functions
  42   11. Emacs Lisp library functions
  43   12. Postamble
  44
  45 */
  46
  47 /*** 0. General comments ***
  48
  49
  50 CODING SYSTEM
  51
  52   A coding system is an object for an encoding mechanism that contains
  53   information about how to convert byte sequences to character
  54   sequences and vice versa.  When we say "decode", it means converting
  55   a byte sequence of a specific coding system into a character
  56   sequence that is represented by Emacs' internal coding system
  57   `emacs-utf-8', and when we say "encode", it means converting a
  58   character sequence of emacs-utf-8 to a byte sequence of a specific
  59   coding system.
  60
  61   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  62   C level, a coding system is represented by a vector of attributes
  63   stored in the hash table Vcharset_hash_table.  The conversion from
  64   coding system symbol to attributes vector is done by looking up
  65   Vcharset_hash_table by the symbol.
  66
  67   Coding systems are classified into the following types depending on
  68   the encoding mechanism.  Here's a brief description of the types.
  69
  70   o UTF-8
  71
  72   o UTF-16
  73
  74   o Charset-base coding system
  75
  76   A coding system defined by one or more (coded) character sets.
  77   Decoding and encoding are done by a code converter defined for each
  78   character set.
  79
  80   o Old Emacs internal format (emacs-mule)
  81
  82   The coding system adopted by old versions of Emacs (20 and 21).
  83
  84   o ISO2022-base coding system
  85
  86   The most famous coding system for multiple character sets.  X's
  87   Compound Text, various EUCs (Extended Unix Code), and coding systems
  88   used in the Internet communication such as ISO-2022-JP are all
  89   variants of ISO2022.
  90
  91   o SJIS (or Shift-JIS or MS-Kanji-Code)
  92
  93   A coding system to encode character sets: ASCII, JISX0201, and
  94   JISX0208.  Widely used for PC's in Japan.  Details are described in
  95   section 8.
  96
  97   o BIG5
  98
  99   A coding system to encode character sets: ASCII and Big5.  Widely
 100   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
 101   described in section 8.  In this file, when we write "big5" (all
 102   lowercase), we mean the coding system, and when we write "Big5"
 103   (capitalized), we mean the character set.
 104
 105   o CCL
 106
 107   If a user wants to decode/encode text encoded in a coding system
 108   not listed above, he can supply a decoder and an encoder for it in
 109   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 110   program while decoding/encoding.
 111
 112   o Raw-text
 113
 114   A coding system for text containing raw eight-bit data.  Emacs
 115   treats each byte of source text as a character (except for
 116   end-of-line conversion).
 117
 118   o No-conversion
 119
 120   Like raw text, but don't do end-of-line conversion.
 121
 122
 123 END-OF-LINE FORMAT
 124
 125   How text end-of-line is encoded depends on operating system.  For
 126   instance, Unix's format is just one byte of LF (line-feed) code,
 127   whereas DOS's format is two-byte sequence of `carriage-return' and
 128   `line-feed' codes.  MacOS's format is usually one byte of
 129   `carriage-return'.
 130
 131   Since text character encoding and end-of-line encoding are
 132   independent, any coding system described above can take any format
 133   of end-of-line (except for no-conversion).
 134
 135 STRUCT CODING_SYSTEM
 136
 137   Before using a coding system for code conversion (i.e. decoding and
 138   encoding), we setup a structure of type `struct coding_system'.
 139   This structure keeps various information about a specific code
 140   conversion (e.g. the location of source and destination data).
 141
 142 */
 143
 144 /* COMMON MACROS */
 145
 146
 147 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 148
 149   These functions check if a byte sequence specified as a source in
 150   CODING conforms to the format of XXX, and update the members of
 151   DETECT_INFO.
 152
 153   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 154
 155   Below is the template of these functions.  */
 156
 157 #if 0
 158 static int
 159 detect_coding_XXX (coding, detect_info)
 160      struct coding_system *coding;
 161      struct coding_detection_info *detect_info;
 162 {
 163   const unsigned char *src = coding->source;
 164   const unsigned char *src_end = coding->source + coding->src_bytes;
 165   int multibytep = coding->src_multibyte;
 166   int consumed_chars = 0;
 167   int found = 0;
 168   ...;
 169
 170   while (1)
 171     {
 172       /* Get one byte from the source.  If the souce is exausted, jump
 173          to no_more_source:.  */
 174       ONE_MORE_BYTE (c);
 175
 176       if (! __C_conforms_to_XXX___ (c))
 177         break;
 178       if (! __C_strongly_suggests_XXX__ (c))
 179         found = CATEGORY_MASK_XXX;
 180     }
 181   /* The byte sequence is invalid for XXX.  */
 182   detect_info->rejected |= CATEGORY_MASK_XXX;
 183   return 0;
 184
 185  no_more_source:
 186   /* The source exausted successfully.  */
 187   detect_info->found |= found;
 188   return 1;
 189 }
 190 #endif
 191
 192 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 193
 194   These functions decode a byte sequence specified as a source by
 195   CODING.  The resulting multibyte text goes to a place pointed to by
 196   CODING->charbuf, the length of which should not exceed
 197   CODING->charbuf_size;
 198
 199   These functions set the information of original and decoded texts in
 200   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 201   They also set CODING->result to one of CODING_RESULT_XXX indicating
 202   how the decoding is finished.
 203
 204   Below is the template of these functions.  */
 205
 206 #if 0
 207 static void
 208 decode_coding_XXXX (coding)
 209      struct coding_system *coding;
 210 {
 211   const unsigned char *src = coding->source + coding->consumed;
 212   const unsigned char *src_end = coding->source + coding->src_bytes;
 213   /* SRC_BASE remembers the start position in source in each loop.
 214      The loop will be exited when there's not enough source code, or
 215      when there's no room in CHARBUF for a decoded character.  */
 216   const unsigned char *src_base;
 217   /* A buffer to produce decoded characters.  */
 218   int *charbuf = coding->charbuf + coding->charbuf_used;
 219   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 220   int multibytep = coding->src_multibyte;
 221
 222   while (1)
 223     {
 224       src_base = src;
 225       if (charbuf < charbuf_end)
 226         /* No more room to produce a decoded character.  */
 227         break;
 228       ONE_MORE_BYTE (c);
 229       /* Decode it. */
 230     }
 231
 232  no_more_source:
 233   if (src_base < src_end
 234       && coding->mode & CODING_MODE_LAST_BLOCK)
 235     /* If the source ends by partial bytes to construct a character,
 236        treat them as eight-bit raw data.  */
 237     while (src_base < src_end && charbuf < charbuf_end)
 238       *charbuf++ = *src_base++;
 239   /* Remember how many bytes and characters we consumed.  If the
 240      source is multibyte, the bytes and chars are not identical.  */
 241   coding->consumed = coding->consumed_char = src_base - coding->source;
 242   /* Remember how many characters we produced.  */
 243   coding->charbuf_used = charbuf - coding->charbuf;
 244 }
 245 #endif
 246
 247 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 248
 249   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 250   internal multibyte format by CODING.  The resulting byte sequence
 251   goes to a place pointed to by DESTINATION, the length of which
 252   should not exceed DST_BYTES.
 253
 254   These functions set the information of original and encoded texts in
 255   the members produced, produced_char, consumed, and consumed_char of
 256   the structure *CODING.  They also set the member result to one of
 257   CODING_RESULT_XXX indicating how the encoding finished.
 258
 259   DST_BYTES zero means that source area and destination area are
 260   overlapped, which means that we can produce a encoded text until it
 261   reaches at the head of not-yet-encoded source text.
 262
 263   Below is a template of these functions.  */
 264 #if 0
 265 static void
 266 encode_coding_XXX (coding)
 267      struct coding_system *coding;
 268 {
 269   int multibytep = coding->dst_multibyte;
 270   int *charbuf = coding->charbuf;
 271   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 272   unsigned char *dst = coding->destination + coding->produced;
 273   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 274   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 275   int produced_chars = 0;
 276
 277   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 278     {
 279       int c = *charbuf;
 280       /* Encode C into DST, and increment DST.  */
 281     }
 282  label_no_more_destination:
 283   /* How many chars and bytes we produced.  */
 284   coding->produced_char += produced_chars;
 285   coding->produced = dst - coding->destination;
 286 }
 287 #endif
 288
 289 \f
 290 /*** 1. Preamble ***/
 291
 292 #include <config.h>
 293 #include <stdio.h>
 294
 295 #include "lisp.h"
 296 #include "buffer.h"
 297 #include "character.h"
 298 #include "charset.h"
 299 #include "ccl.h"
 300 #include "composite.h"
 301 #include "coding.h"
 302 #include "window.h"
 303 #include "frame.h"
 304 #include "termhooks.h"
 305
 306 Lisp_Object Vcoding_system_hash_table;
 307
 308 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 309 Lisp_Object Qunix, Qdos;
 310 extern Lisp_Object Qmac;        /* frame.c */
 311 Lisp_Object Qbuffer_file_coding_system;
 312 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 313 Lisp_Object Qdefault_char;
 314 Lisp_Object Qno_conversion, Qundecided;
 315 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 316 Lisp_Object Qbig, Qlittle;
 317 Lisp_Object Qcoding_system_history;
 318 Lisp_Object Qvalid_codes;
 319 Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
 320 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 321 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 322 Lisp_Object QCascii_compatible_p;
 323
 324 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 325 Lisp_Object Qcall_process, Qcall_process_region;
 326 Lisp_Object Qstart_process, Qopen_network_stream;
 327 Lisp_Object Qtarget_idx;
 328
 329 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 330 Lisp_Object Qinterrupted, Qinsufficient_memory;
 331
 332 extern Lisp_Object Qcompletion_ignore_case;
 333
 334 /* If a symbol has this property, evaluate the value to define the
 335    symbol as a coding system.  */
 336 static Lisp_Object Qcoding_system_define_form;
 337
 338 int coding_system_require_warning;
 339
 340 Lisp_Object Vselect_safe_coding_system_function;
 341
 342 /* Mnemonic string for each format of end-of-line.  */
 343 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 344 /* Mnemonic string to indicate format of end-of-line is not yet
 345    decided.  */
 346 Lisp_Object eol_mnemonic_undecided;
 347
 348 /* Format of end-of-line decided by system.  This is Qunix on
 349    Unix and Mac, Qdos on DOS/Windows.
 350    This has an effect only for external encoding (i.e. for output to
 351    file and process), not for in-buffer or Lisp string encoding.  */
 352 static Lisp_Object system_eol_type;
 353
 354 #ifdef emacs
 355
 356 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 357
 358 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 359
 360 /* Coding system emacs-mule and raw-text are for converting only
 361    end-of-line format.  */
 362 Lisp_Object Qemacs_mule, Qraw_text;
 363 Lisp_Object Qutf_8_emacs;
 364
 365 /* Coding-systems are handed between Emacs Lisp programs and C internal
 366    routines by the following three variables.  */
 367 /* Coding-system for reading files and receiving data from process.  */
 368 Lisp_Object Vcoding_system_for_read;
 369 /* Coding-system for writing files and sending data to process.  */
 370 Lisp_Object Vcoding_system_for_write;
 371 /* Coding-system actually used in the latest I/O.  */
 372 Lisp_Object Vlast_coding_system_used;
 373 /* Set to non-nil when an error is detected while code conversion.  */
 374 Lisp_Object Vlast_code_conversion_error;
 375 /* A vector of length 256 which contains information about special
 376    Latin codes (especially for dealing with Microsoft codes).  */
 377 Lisp_Object Vlatin_extra_code_table;
 378
 379 /* Flag to inhibit code conversion of end-of-line format.  */
 380 int inhibit_eol_conversion;
 381
 382 /* Flag to inhibit ISO2022 escape sequence detection.  */
 383 int inhibit_iso_escape_detection;
 384
 385 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 386 int inherit_process_coding_system;
 387
 388 /* Coding system to be used to encode text for terminal display when
 389    terminal coding system is nil.  */
 390 struct coding_system safe_terminal_coding;
 391
 392 Lisp_Object Vfile_coding_system_alist;
 393 Lisp_Object Vprocess_coding_system_alist;
 394 Lisp_Object Vnetwork_coding_system_alist;
 395
 396 Lisp_Object Vlocale_coding_system;
 397
 398 #endif /* emacs */
 399
 400 /* Flag to tell if we look up translation table on character code
 401    conversion.  */
 402 Lisp_Object Venable_character_translation;
 403 /* Standard translation table to look up on decoding (reading).  */
 404 Lisp_Object Vstandard_translation_table_for_decode;
 405 /* Standard translation table to look up on encoding (writing).  */
 406 Lisp_Object Vstandard_translation_table_for_encode;
 407
 408 Lisp_Object Qtranslation_table;
 409 Lisp_Object Qtranslation_table_id;
 410 Lisp_Object Qtranslation_table_for_decode;
 411 Lisp_Object Qtranslation_table_for_encode;
 412
 413 /* Alist of charsets vs revision number.  */
 414 static Lisp_Object Vcharset_revision_table;
 415
 416 /* Default coding systems used for process I/O.  */
 417 Lisp_Object Vdefault_process_coding_system;
 418
 419 /* Char table for translating Quail and self-inserting input.  */
 420 Lisp_Object Vtranslation_table_for_input;
 421
 422 /* Two special coding systems.  */
 423 Lisp_Object Vsjis_coding_system;
 424 Lisp_Object Vbig5_coding_system;
 425
 426 /* ISO2022 section */
 427
 428 #define CODING_ISO_INITIAL(coding, reg)                 \
 429   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 430                      coding_attr_iso_initial),          \
 431                reg)))
 432
 433
 434 #define CODING_ISO_REQUEST(coding, charset_id)  \
 435   ((charset_id <= (coding)->max_charset_id      \
 436     ? (coding)->safe_charsets[charset_id]       \
 437     : -1))
 438
 439
 440 #define CODING_ISO_FLAGS(coding)        \
 441   ((coding)->spec.iso_2022.flags)
 442 #define CODING_ISO_DESIGNATION(coding, reg)     \
 443   ((coding)->spec.iso_2022.current_designation[reg])
 444 #define CODING_ISO_INVOCATION(coding, plane)    \
 445   ((coding)->spec.iso_2022.current_invocation[plane])
 446 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 447   ((coding)->spec.iso_2022.single_shifting)
 448 #define CODING_ISO_BOL(coding)  \
 449   ((coding)->spec.iso_2022.bol)
 450 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 451   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 452
 453 /* Control characters of ISO2022.  */
 454                         /* code */      /* function */
 455 #define ISO_CODE_LF     0x0A            /* line-feed */
 456 #define ISO_CODE_CR     0x0D            /* carriage-return */
 457 #define ISO_CODE_SO     0x0E            /* shift-out */
 458 #define ISO_CODE_SI     0x0F            /* shift-in */
 459 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 460 #define ISO_CODE_ESC    0x1B            /* escape */
 461 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 462 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 463 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 464
 465 /* All code (1-byte) of ISO2022 is classified into one of the
 466    followings.  */
 467 enum iso_code_class_type
 468   {
 469     ISO_control_0,              /* Control codes in the range
 470                                    0x00..0x1F and 0x7F, except for the
 471                                    following 5 codes.  */
 472     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 473     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 474     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 475     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 476     ISO_control_1,              /* Control codes in the range
 477                                    0x80..0x9F, except for the
 478                                    following 3 codes.  */
 479     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 480     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 481     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 482     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 483     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 484     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 485     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 486   };
 487
 488 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 489     `iso-flags' attribute of an iso2022 coding system.  */
 490
 491 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 492    instead of the correct short-form sequence (e.g. ESC $ A).  */
 493 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 494
 495 /* If set, reset graphic planes and registers at end-of-line to the
 496    initial state.  */
 497 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 498
 499 /* If set, reset graphic planes and registers before any control
 500    characters to the initial state.  */
 501 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 502
 503 /* If set, encode by 7-bit environment.  */
 504 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 505
 506 /* If set, use locking-shift function.  */
 507 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 508
 509 /* If set, use single-shift function.  Overwrite
 510    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 511 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 512
 513 /* If set, use designation escape sequence.  */
 514 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 515
 516 /* If set, produce revision number sequence.  */
 517 #define CODING_ISO_FLAG_REVISION        0x0080
 518
 519 /* If set, produce ISO6429's direction specifying sequence.  */
 520 #define CODING_ISO_FLAG_DIRECTION       0x0100
 521
 522 /* If set, assume designation states are reset at beginning of line on
 523    output.  */
 524 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 525
 526 /* If set, designation sequence should be placed at beginning of line
 527    on output.  */
 528 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 529
 530 /* If set, do not encode unsafe charactes on output.  */
 531 #define CODING_ISO_FLAG_SAFE            0x0800
 532
 533 /* If set, extra latin codes (128..159) are accepted as a valid code
 534    on input.  */
 535 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 536
 537 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 538
 539 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 540
 541 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 542
 543 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 544
 545 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 546
 547 /* A character to be produced on output if encoding of the original
 548    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 549 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 550
 551
 552 /* UTF-16 section */
 553 #define CODING_UTF_16_BOM(coding)       \
 554   ((coding)->spec.utf_16.bom)
 555
 556 #define CODING_UTF_16_ENDIAN(coding)    \
 557   ((coding)->spec.utf_16.endian)
 558
 559 #define CODING_UTF_16_SURROGATE(coding) \
 560   ((coding)->spec.utf_16.surrogate)
 561
 562
 563 /* CCL section */
 564 #define CODING_CCL_DECODER(coding)      \
 565   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 566 #define CODING_CCL_ENCODER(coding)      \
 567   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 568 #define CODING_CCL_VALIDS(coding)                                          \
 569   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 570
 571 /* Index for each coding category in `coding_categories' */
 572
 573 enum coding_category
 574   {
 575     coding_category_iso_7,
 576     coding_category_iso_7_tight,
 577     coding_category_iso_8_1,
 578     coding_category_iso_8_2,
 579     coding_category_iso_7_else,
 580     coding_category_iso_8_else,
 581     coding_category_utf_8,
 582     coding_category_utf_16_auto,
 583     coding_category_utf_16_be,
 584     coding_category_utf_16_le,
 585     coding_category_utf_16_be_nosig,
 586     coding_category_utf_16_le_nosig,
 587     coding_category_charset,
 588     coding_category_sjis,
 589     coding_category_big5,
 590     coding_category_ccl,
 591     coding_category_emacs_mule,
 592     /* All above are targets of code detection.  */
 593     coding_category_raw_text,
 594     coding_category_undecided,
 595     coding_category_max
 596   };
 597
 598 /* Definitions of flag bits used in detect_coding_XXXX.  */
 599 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 600 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 601 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 602 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 603 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 604 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 605 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 606 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 607 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 608 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 609 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 610 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 611 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 612 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 613 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 614 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 615 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 616 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 617
 618 /* This value is returned if detect_coding_mask () find nothing other
 619    than ASCII characters.  */
 620 #define CATEGORY_MASK_ANY               \
 621   (CATEGORY_MASK_ISO_7                  \
 622    | CATEGORY_MASK_ISO_7_TIGHT          \
 623    | CATEGORY_MASK_ISO_8_1              \
 624    | CATEGORY_MASK_ISO_8_2              \
 625    | CATEGORY_MASK_ISO_7_ELSE           \
 626    | CATEGORY_MASK_ISO_8_ELSE           \
 627    | CATEGORY_MASK_UTF_8                \
 628    | CATEGORY_MASK_UTF_16_BE            \
 629    | CATEGORY_MASK_UTF_16_LE            \
 630    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 631    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 632    | CATEGORY_MASK_CHARSET              \
 633    | CATEGORY_MASK_SJIS                 \
 634    | CATEGORY_MASK_BIG5                 \
 635    | CATEGORY_MASK_CCL                  \
 636    | CATEGORY_MASK_EMACS_MULE)
 637
 638
 639 #define CATEGORY_MASK_ISO_7BIT \
 640   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 641
 642 #define CATEGORY_MASK_ISO_8BIT \
 643   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 644
 645 #define CATEGORY_MASK_ISO_ELSE \
 646   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 647
 648 #define CATEGORY_MASK_ISO_ESCAPE        \
 649   (CATEGORY_MASK_ISO_7                  \
 650    | CATEGORY_MASK_ISO_7_TIGHT          \
 651    | CATEGORY_MASK_ISO_7_ELSE           \
 652    | CATEGORY_MASK_ISO_8_ELSE)
 653
 654 #define CATEGORY_MASK_ISO       \
 655   (  CATEGORY_MASK_ISO_7BIT     \
 656      | CATEGORY_MASK_ISO_8BIT   \
 657      | CATEGORY_MASK_ISO_ELSE)
 658
 659 #define CATEGORY_MASK_UTF_16            \
 660   (CATEGORY_MASK_UTF_16_BE              \
 661    | CATEGORY_MASK_UTF_16_LE            \
 662    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 663    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 664
 665
 666 /* List of symbols `coding-category-xxx' ordered by priority.  This
 667    variable is exposed to Emacs Lisp.  */
 668 static Lisp_Object Vcoding_category_list;
 669
 670 /* Table of coding categories (Lisp symbols).  This variable is for
 671    internal use oly.  */
 672 static Lisp_Object Vcoding_category_table;
 673
 674 /* Table of coding-categories ordered by priority.  */
 675 static enum coding_category coding_priorities[coding_category_max];
 676
 677 /* Nth element is a coding context for the coding system bound to the
 678    Nth coding category.  */
 679 static struct coding_system coding_categories[coding_category_max];
 680
 681 /*** Commonly used macros and functions ***/
 682
 683 #ifndef min
 684 #define min(a, b) ((a) < (b) ? (a) : (b))
 685 #endif
 686 #ifndef max
 687 #define max(a, b) ((a) > (b) ? (a) : (b))
 688 #endif
 689
 690 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 691   do {                                                  \
 692     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 693     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 694   } while (0)
 695
 696
 697 /* Safely get one byte from the source text pointed by SRC which ends
 698    at SRC_END, and set C to that byte.  If there are not enough bytes
 699    in the source, it jumps to `no_more_source'.  If multibytep is
 700    nonzero, and a multibyte character is found at SRC, set C to the
 701    negative value of the character code.  The caller should declare
 702    and set these variables appropriately in advance:
 703         src, src_end, multibytep */
 704
 705 #define ONE_MORE_BYTE(c)                                \
 706   do {                                                  \
 707     if (src == src_end)                                 \
 708       {                                                 \
 709         if (src_base < src)                             \
 710           record_conversion_result                      \
 711             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 712         goto no_more_source;                            \
 713       }                                                 \
 714     c = *src++;                                         \
 715     if (multibytep && (c & 0x80))                       \
 716       {                                                 \
 717         if ((c & 0xFE) == 0xC0)                         \
 718           c = ((c & 1) << 6) | *src++;                  \
 719         else                                            \
 720           {                                             \
 721             src--;                                      \
 722             c = - string_char (src, &src, NULL);        \
 723             record_conversion_result                    \
 724               (coding, CODING_RESULT_INVALID_SRC);      \
 725           }                                             \
 726       }                                                 \
 727     consumed_chars++;                                   \
 728   } while (0)
 729
 730
 731 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 732   do {                                                  \
 733     c = *src++;                                         \
 734     if (multibytep && (c & 0x80))                       \
 735       {                                                 \
 736         if ((c & 0xFE) == 0xC0)                         \
 737           c = ((c & 1) << 6) | *src++;                  \
 738         else                                            \
 739           {                                             \
 740             src--;                                      \
 741             c = - string_char (src, &src, NULL);        \
 742             record_conversion_result                    \
 743               (coding, CODING_RESULT_INVALID_SRC);      \
 744           }                                             \
 745       }                                                 \
 746     consumed_chars++;                                   \
 747   } while (0)
 748
 749
 750 /* Store a byte C in the place pointed by DST and increment DST to the
 751    next free point, and increment PRODUCED_CHARS.  The caller should
 752    assure that C is 0..127, and declare and set the variable `dst'
 753    appropriately in advance.
 754 */
 755
 756
 757 #define EMIT_ONE_ASCII_BYTE(c)  \
 758   do {                          \
 759     produced_chars++;           \
 760     *dst++ = (c);               \
 761   } while (0)
 762
 763
 764 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 765
 766 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 767   do {                                  \
 768     produced_chars += 2;                \
 769     *dst++ = (c1), *dst++ = (c2);       \
 770   } while (0)
 771
 772
 773 /* Store a byte C in the place pointed by DST and increment DST to the
 774    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 775    nonzero, store in an appropriate multibyte from.  The caller should
 776    declare and set the variables `dst' and `multibytep' appropriately
 777    in advance.  */
 778
 779 #define EMIT_ONE_BYTE(c)                \
 780   do {                                  \
 781     produced_chars++;                   \
 782     if (multibytep)                     \
 783       {                                 \
 784         int ch = (c);                   \
 785         if (ch >= 0x80)                 \
 786           ch = BYTE8_TO_CHAR (ch);      \
 787         CHAR_STRING_ADVANCE (ch, dst);  \
 788       }                                 \
 789     else                                \
 790       *dst++ = (c);                     \
 791   } while (0)
 792
 793
 794 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 795
 796 #define EMIT_TWO_BYTES(c1, c2)          \
 797   do {                                  \
 798     produced_chars += 2;                \
 799     if (multibytep)                     \
 800       {                                 \
 801         int ch;                         \
 802                                         \
 803         ch = (c1);                      \
 804         if (ch >= 0x80)                 \
 805           ch = BYTE8_TO_CHAR (ch);      \
 806         CHAR_STRING_ADVANCE (ch, dst);  \
 807         ch = (c2);                      \
 808         if (ch >= 0x80)                 \
 809           ch = BYTE8_TO_CHAR (ch);      \
 810         CHAR_STRING_ADVANCE (ch, dst);  \
 811       }                                 \
 812     else                                \
 813       {                                 \
 814         *dst++ = (c1);                  \
 815         *dst++ = (c2);                  \
 816       }                                 \
 817   } while (0)
 818
 819
 820 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 821   do {                                  \
 822     EMIT_ONE_BYTE (c1);                 \
 823     EMIT_TWO_BYTES (c2, c3);            \
 824   } while (0)
 825
 826
 827 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 828   do {                                          \
 829     EMIT_TWO_BYTES (c1, c2);                    \
 830     EMIT_TWO_BYTES (c3, c4);                    \
 831   } while (0)
 832
 833
 834 /* Prototypes for static functions.  */
 835 static void record_conversion_result P_ ((struct coding_system *coding,
 836                                           enum coding_result_code result));
 837 static int detect_coding_utf_8 P_ ((struct coding_system *,
 838                                     struct coding_detection_info *info));
 839 static void decode_coding_utf_8 P_ ((struct coding_system *));
 840 static int encode_coding_utf_8 P_ ((struct coding_system *));
 841
 842 static int detect_coding_utf_16 P_ ((struct coding_system *,
 843                                      struct coding_detection_info *info));
 844 static void decode_coding_utf_16 P_ ((struct coding_system *));
 845 static int encode_coding_utf_16 P_ ((struct coding_system *));
 846
 847 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 848                                        struct coding_detection_info *info));
 849 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 850 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 851
 852 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 853                                          struct coding_detection_info *info));
 854 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 855 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 856
 857 static int detect_coding_sjis P_ ((struct coding_system *,
 858                                    struct coding_detection_info *info));
 859 static void decode_coding_sjis P_ ((struct coding_system *));
 860 static int encode_coding_sjis P_ ((struct coding_system *));
 861
 862 static int detect_coding_big5 P_ ((struct coding_system *,
 863                                    struct coding_detection_info *info));
 864 static void decode_coding_big5 P_ ((struct coding_system *));
 865 static int encode_coding_big5 P_ ((struct coding_system *));
 866
 867 static int detect_coding_ccl P_ ((struct coding_system *,
 868                                   struct coding_detection_info *info));
 869 static void decode_coding_ccl P_ ((struct coding_system *));
 870 static int encode_coding_ccl P_ ((struct coding_system *));
 871
 872 static void decode_coding_raw_text P_ ((struct coding_system *));
 873 static int encode_coding_raw_text P_ ((struct coding_system *));
 874
 875 static void coding_set_source P_ ((struct coding_system *));
 876 static void coding_set_destination P_ ((struct coding_system *));
 877 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 878 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 879                                             EMACS_INT, EMACS_INT));
 880 static unsigned char *alloc_destination P_ ((struct coding_system *,
 881                                              EMACS_INT, unsigned char *));
 882 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 883 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 884                                                      int *, int *,
 885                                                      unsigned char *));
 886 static int detect_eol P_ ((const unsigned char *,
 887                            EMACS_INT, enum coding_category));
 888 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 889 static void decode_eol P_ ((struct coding_system *));
 890 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 891 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
 892                                         int, int *, int *));
 893 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 894 static INLINE void produce_composition P_ ((struct coding_system *, int *,
 895                                             EMACS_INT));
 896 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 897                                         EMACS_INT));
 898 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 899 static int decode_coding P_ ((struct coding_system *));
 900 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 901                                                       struct coding_system *,
 902                                                       int *, EMACS_INT *));
 903 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 904                                                   struct coding_system *,
 905                                                   int *, EMACS_INT *));
 906 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 907 static int encode_coding P_ ((struct coding_system *));
 908 static Lisp_Object make_conversion_work_buffer P_ ((int));
 909 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 910 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 911 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 912
 913 static void
 914 record_conversion_result (struct coding_system *coding,
 915                           enum coding_result_code result)
 916 {
 917   coding->result = result;
 918   switch (result)
 919     {
 920     case CODING_RESULT_INSUFFICIENT_SRC:
 921       Vlast_code_conversion_error = Qinsufficient_source;
 922       break;
 923     case CODING_RESULT_INCONSISTENT_EOL:
 924       Vlast_code_conversion_error = Qinconsistent_eol;
 925       break;
 926     case CODING_RESULT_INVALID_SRC:
 927       Vlast_code_conversion_error = Qinvalid_source;
 928       break;
 929     case CODING_RESULT_INTERRUPT:
 930       Vlast_code_conversion_error = Qinterrupted;
 931       break;
 932     case CODING_RESULT_INSUFFICIENT_MEM:
 933       Vlast_code_conversion_error = Qinsufficient_memory;
 934       break;
 935     default:
 936       Vlast_code_conversion_error = intern ("Unknown error");
 937     }
 938 }
 939
 940 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 941   do {                                                                       \
 942     charset_map_loaded = 0;                                                  \
 943     c = DECODE_CHAR (charset, code);                                         \
 944     if (charset_map_loaded)                                                  \
 945       {                                                                      \
 946         const unsigned char *orig = coding->source;                          \
 947         EMACS_INT offset;                                                    \
 948                                                                              \
 949         coding_set_source (coding);                                          \
 950         offset = coding->source - orig;                                      \
 951         src += offset;                                                       \
 952         src_base += offset;                                                  \
 953         src_end += offset;                                                   \
 954       }                                                                      \
 955   } while (0)
 956
 957
 958 /* If there are at least BYTES length of room at dst, allocate memory
 959    for coding->destination and update dst and dst_end.  We don't have
 960    to take care of coding->source which will be relocated.  It is
 961    handled by calling coding_set_source in encode_coding.  */
 962
 963 #define ASSURE_DESTINATION(bytes)                               \
 964   do {                                                          \
 965     if (dst + (bytes) >= dst_end)                               \
 966       {                                                         \
 967         int more_bytes = charbuf_end - charbuf + (bytes);       \
 968                                                                 \
 969         dst = alloc_destination (coding, more_bytes, dst);      \
 970         dst_end = coding->destination + coding->dst_bytes;      \
 971       }                                                         \
 972   } while (0)
 973
 974
 975 /* Store multibyte form of the character C in P, and advance P to the
 976    end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
 977    never calls MAYBE_UNIFY_CHAR.  */
 978
 979 #define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)      \
 980   do {                                          \
 981     if ((c) <= MAX_1_BYTE_CHAR)                 \
 982       *(p)++ = (c);                             \
 983     else if ((c) <= MAX_2_BYTE_CHAR)            \
 984       *(p)++ = (0xC0 | ((c) >> 6)),             \
 985         *(p)++ = (0x80 | ((c) & 0x3F));         \
 986     else if ((c) <= MAX_3_BYTE_CHAR)            \
 987       *(p)++ = (0xE0 | ((c) >> 12)),            \
 988         *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
 989         *(p)++ = (0x80 | ((c) & 0x3F));         \
 990     else if ((c) <= MAX_4_BYTE_CHAR)            \
 991       *(p)++ = (0xF0 | (c >> 18)),              \
 992         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
 993         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
 994         *(p)++ = (0x80 | (c & 0x3F));           \
 995     else if ((c) <= MAX_5_BYTE_CHAR)            \
 996       *(p)++ = 0xF8,                            \
 997         *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
 998         *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
 999         *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
1000         *(p)++ = (0x80 | (c & 0x3F));           \
1001     else                                        \
1002       (p) += BYTE8_STRING ((c) - 0x3FFF80, p);  \
1003   } while (0)
1004
1005
1006 /* Return the character code of character whose multibyte form is at
1007    P, and advance P to the end of the multibyte form.  This is like
1008    STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
1009
1010 #define STRING_CHAR_ADVANCE_NO_UNIFY(p)                         \
1011   (!((p)[0] & 0x80)                                             \
1012    ? *(p)++                                                     \
1013    : ! ((p)[0] & 0x20)                                          \
1014    ? ((p) += 2,                                                 \
1015       ((((p)[-2] & 0x1F) << 6)                                  \
1016        | ((p)[-1] & 0x3F)                                       \
1017        | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))    \
1018    : ! ((p)[0] & 0x10)                                          \
1019    ? ((p) += 3,                                                 \
1020       ((((p)[-3] & 0x0F) << 12)                                 \
1021        | (((p)[-2] & 0x3F) << 6)                                \
1022        | ((p)[-1] & 0x3F)))                                     \
1023    : ! ((p)[0] & 0x08)                                          \
1024    ? ((p) += 4,                                                 \
1025       ((((p)[-4] & 0xF) << 18)                                  \
1026        | (((p)[-3] & 0x3F) << 12)                               \
1027        | (((p)[-2] & 0x3F) << 6)                                \
1028        | ((p)[-1] & 0x3F)))                                     \
1029    : ((p) += 5,                                                 \
1030       ((((p)[-4] & 0x3F) << 18)                                 \
1031        | (((p)[-3] & 0x3F) << 12)                               \
1032        | (((p)[-2] & 0x3F) << 6)                                \
1033        | ((p)[-1] & 0x3F))))
1034
1035
1036 static void
1037 coding_set_source (coding)
1038      struct coding_system *coding;
1039 {
1040   if (BUFFERP (coding->src_object))
1041     {
1042       struct buffer *buf = XBUFFER (coding->src_object);
1043
1044       if (coding->src_pos < 0)
1045         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
1046       else
1047         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
1048     }
1049   else if (STRINGP (coding->src_object))
1050     {
1051       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
1052     }
1053   else
1054     /* Otherwise, the source is C string and is never relocated
1055        automatically.  Thus we don't have to update anything.  */
1056     ;
1057 }
1058
1059 static void
1060 coding_set_destination (coding)
1061      struct coding_system *coding;
1062 {
1063   if (BUFFERP (coding->dst_object))
1064     {
1065       if (coding->src_pos < 0)
1066         {
1067           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1068           coding->dst_bytes = (GAP_END_ADDR
1069                                - (coding->src_bytes - coding->consumed)
1070                                - coding->destination);
1071         }
1072       else
1073         {
1074           /* We are sure that coding->dst_pos_byte is before the gap
1075              of the buffer. */
1076           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1077                                  + coding->dst_pos_byte - BEG_BYTE);
1078           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1079                                - coding->destination);
1080         }
1081     }
1082   else
1083     /* Otherwise, the destination is C string and is never relocated
1084        automatically.  Thus we don't have to update anything.  */
1085     ;
1086 }
1087
1088
1089 static void
1090 coding_alloc_by_realloc (coding, bytes)
1091      struct coding_system *coding;
1092      EMACS_INT bytes;
1093 {
1094   coding->destination = (unsigned char *) xrealloc (coding->destination,
1095                                                     coding->dst_bytes + bytes);
1096   coding->dst_bytes += bytes;
1097 }
1098
1099 static void
1100 coding_alloc_by_making_gap (coding, gap_head_used, bytes)
1101      struct coding_system *coding;
1102      EMACS_INT gap_head_used, bytes;
1103 {
1104   if (EQ (coding->src_object, coding->dst_object))
1105     {
1106       /* The gap may contain the produced data at the head and not-yet
1107          consumed data at the tail.  To preserve those data, we at
1108          first make the gap size to zero, then increase the gap
1109          size.  */
1110       EMACS_INT add = GAP_SIZE;
1111
1112       GPT += gap_head_used, GPT_BYTE += gap_head_used;
1113       GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1114       make_gap (bytes);
1115       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1116       GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
1117     }
1118   else
1119     {
1120       Lisp_Object this_buffer;
1121
1122       this_buffer = Fcurrent_buffer ();
1123       set_buffer_internal (XBUFFER (coding->dst_object));
1124       make_gap (bytes);
1125       set_buffer_internal (XBUFFER (this_buffer));
1126     }
1127 }
1128
1129
1130 static unsigned char *
1131 alloc_destination (coding, nbytes, dst)
1132      struct coding_system *coding;
1133      EMACS_INT nbytes;
1134      unsigned char *dst;
1135 {
1136   EMACS_INT offset = dst - coding->destination;
1137
1138   if (BUFFERP (coding->dst_object))
1139     {
1140       struct buffer *buf = XBUFFER (coding->dst_object);
1141
1142       coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
1143     }
1144   else
1145     coding_alloc_by_realloc (coding, nbytes);
1146   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1147   coding_set_destination (coding);
1148   dst = coding->destination + offset;
1149   return dst;
1150 }
1151
1152 /** Macros for annotations.  */
1153
1154 /* Maximum length of annotation data (sum of annotations for
1155    composition and charset).  */
1156 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1157
1158 /* An annotation data is stored in the array coding->charbuf in this
1159    format:
1160      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1161    LENGTH is the number of elements in the annotation.
1162    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1163    NCHARS is the number of characters in the text annotated.
1164
1165    The format of the following elements depend on ANNOTATION_MASK.
1166
1167    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1168    follows:
1169      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1170    METHOD is one of enum composition_method.
1171    Optionnal COMPOSITION-COMPONENTS are characters and composition
1172    rules.
1173
1174    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1175    follows.  */
1176
1177 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1178   do {                                                  \
1179     *(buf)++ = -(len);                                  \
1180     *(buf)++ = (mask);                                  \
1181     *(buf)++ = (nchars);                                \
1182     coding->annotated = 1;                              \
1183   } while (0);
1184
1185 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1186   do {                                                                      \
1187     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1188     *buf++ = method;                                                        \
1189   } while (0)
1190
1191
1192 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1193   do {                                                                  \
1194     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1195     *buf++ = id;                                                        \
1196   } while (0)
1197
1198 \f
1199 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1200
1201
1202
1203 \f
1204 /*** 3. UTF-8 ***/
1205
1206 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1207    Check if a text is encoded in UTF-8.  If it is, return 1, else
1208    return 0.  */
1209
1210 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1211 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1212 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1213 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1214 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1215 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1216
1217 static int
1218 detect_coding_utf_8 (coding, detect_info)
1219      struct coding_system *coding;
1220      struct coding_detection_info *detect_info;
1221 {
1222   const unsigned char *src = coding->source, *src_base;
1223   const unsigned char *src_end = coding->source + coding->src_bytes;
1224   int multibytep = coding->src_multibyte;
1225   int consumed_chars = 0;
1226   int found = 0;
1227
1228   detect_info->checked |= CATEGORY_MASK_UTF_8;
1229   /* A coding system of this category is always ASCII compatible.  */
1230   src += coding->head_ascii;
1231
1232   while (1)
1233     {
1234       int c, c1, c2, c3, c4;
1235
1236       src_base = src;
1237       ONE_MORE_BYTE (c);
1238       if (c < 0 || UTF_8_1_OCTET_P (c))
1239         continue;
1240       ONE_MORE_BYTE (c1);
1241       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1242         break;
1243       if (UTF_8_2_OCTET_LEADING_P (c))
1244         {
1245           found = CATEGORY_MASK_UTF_8;
1246           continue;
1247         }
1248       ONE_MORE_BYTE (c2);
1249       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1250         break;
1251       if (UTF_8_3_OCTET_LEADING_P (c))
1252         {
1253           found = CATEGORY_MASK_UTF_8;
1254           continue;
1255         }
1256       ONE_MORE_BYTE (c3);
1257       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1258         break;
1259       if (UTF_8_4_OCTET_LEADING_P (c))
1260         {
1261           found = CATEGORY_MASK_UTF_8;
1262           continue;
1263         }
1264       ONE_MORE_BYTE (c4);
1265       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1266         break;
1267       if (UTF_8_5_OCTET_LEADING_P (c))
1268         {
1269           found = CATEGORY_MASK_UTF_8;
1270           continue;
1271         }
1272       break;
1273     }
1274   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1275   return 0;
1276
1277  no_more_source:
1278   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1279     {
1280       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1281       return 0;
1282     }
1283   detect_info->found |= found;
1284   return 1;
1285 }
1286
1287
1288 static void
1289 decode_coding_utf_8 (coding)
1290      struct coding_system *coding;
1291 {
1292   const unsigned char *src = coding->source + coding->consumed;
1293   const unsigned char *src_end = coding->source + coding->src_bytes;
1294   const unsigned char *src_base;
1295   int *charbuf = coding->charbuf + coding->charbuf_used;
1296   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1297   int consumed_chars = 0, consumed_chars_base;
1298   int multibytep = coding->src_multibyte;
1299   Lisp_Object attr, charset_list;
1300   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1301   int byte_after_cr = -1;
1302
1303   CODING_GET_INFO (coding, attr, charset_list);
1304
1305   while (1)
1306     {
1307       int c, c1, c2, c3, c4, c5;
1308
1309       src_base = src;
1310       consumed_chars_base = consumed_chars;
1311
1312       if (charbuf >= charbuf_end)
1313         break;
1314
1315       if (byte_after_cr >= 0)
1316         c1 = byte_after_cr, byte_after_cr = -1;
1317       else
1318         ONE_MORE_BYTE (c1);
1319       if (c1 < 0)
1320         {
1321           c = - c1;
1322         }
1323       else if (UTF_8_1_OCTET_P(c1))
1324         {
1325           if (eol_crlf && c1 == '\r')
1326             ONE_MORE_BYTE (byte_after_cr);
1327           c = c1;
1328         }
1329       else
1330         {
1331           ONE_MORE_BYTE (c2);
1332           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1333             goto invalid_code;
1334           if (UTF_8_2_OCTET_LEADING_P (c1))
1335             {
1336               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1337               /* Reject overlong sequences here and below.  Encoders
1338                  producing them are incorrect, they can be misleading,
1339                  and they mess up read/write invariance.  */
1340               if (c < 128)
1341                 goto invalid_code;
1342             }
1343           else
1344             {
1345               ONE_MORE_BYTE (c3);
1346               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1347                 goto invalid_code;
1348               if (UTF_8_3_OCTET_LEADING_P (c1))
1349                 {
1350                   c = (((c1 & 0xF) << 12)
1351                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1352                   if (c < 0x800
1353                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1354                     goto invalid_code;
1355                 }
1356               else
1357                 {
1358                   ONE_MORE_BYTE (c4);
1359                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1360                     goto invalid_code;
1361                   if (UTF_8_4_OCTET_LEADING_P (c1))
1362                     {
1363                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1364                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1365                     if (c < 0x10000)
1366                       goto invalid_code;
1367                     }
1368                   else
1369                     {
1370                       ONE_MORE_BYTE (c5);
1371                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1372                         goto invalid_code;
1373                       if (UTF_8_5_OCTET_LEADING_P (c1))
1374                         {
1375                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1376                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1377                                | (c5 & 0x3F));
1378                           if ((c > MAX_CHAR) || (c < 0x200000))
1379                             goto invalid_code;
1380                         }
1381                       else
1382                         goto invalid_code;
1383                     }
1384                 }
1385             }
1386         }
1387
1388       *charbuf++ = c;
1389       continue;
1390
1391     invalid_code:
1392       src = src_base;
1393       consumed_chars = consumed_chars_base;
1394       ONE_MORE_BYTE (c);
1395       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1396       coding->errors++;
1397     }
1398
1399  no_more_source:
1400   coding->consumed_char += consumed_chars_base;
1401   coding->consumed = src_base - coding->source;
1402   coding->charbuf_used = charbuf - coding->charbuf;
1403 }
1404
1405
1406 static int
1407 encode_coding_utf_8 (coding)
1408      struct coding_system *coding;
1409 {
1410   int multibytep = coding->dst_multibyte;
1411   int *charbuf = coding->charbuf;
1412   int *charbuf_end = charbuf + coding->charbuf_used;
1413   unsigned char *dst = coding->destination + coding->produced;
1414   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1415   int produced_chars = 0;
1416   int c;
1417
1418   if (multibytep)
1419     {
1420       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1421
1422       while (charbuf < charbuf_end)
1423         {
1424           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1425
1426           ASSURE_DESTINATION (safe_room);
1427           c = *charbuf++;
1428           if (CHAR_BYTE8_P (c))
1429             {
1430               c = CHAR_TO_BYTE8 (c);
1431               EMIT_ONE_BYTE (c);
1432             }
1433           else
1434             {
1435               CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
1436               for (p = str; p < pend; p++)
1437                 EMIT_ONE_BYTE (*p);
1438             }
1439         }
1440     }
1441   else
1442     {
1443       int safe_room = MAX_MULTIBYTE_LENGTH;
1444
1445       while (charbuf < charbuf_end)
1446         {
1447           ASSURE_DESTINATION (safe_room);
1448           c = *charbuf++;
1449           if (CHAR_BYTE8_P (c))
1450             *dst++ = CHAR_TO_BYTE8 (c);
1451           else
1452             CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
1453           produced_chars++;
1454         }
1455     }
1456   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1457   coding->produced_char += produced_chars;
1458   coding->produced = dst - coding->destination;
1459   return 0;
1460 }
1461
1462
1463 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1464    Check if a text is encoded in one of UTF-16 based coding systems.
1465    If it is, return 1, else return 0.  */
1466
1467 #define UTF_16_HIGH_SURROGATE_P(val) \
1468   (((val) & 0xFC00) == 0xD800)
1469
1470 #define UTF_16_LOW_SURROGATE_P(val) \
1471   (((val) & 0xFC00) == 0xDC00)
1472
1473 #define UTF_16_INVALID_P(val)   \
1474   (((val) == 0xFFFE)            \
1475    || ((val) == 0xFFFF)         \
1476    || UTF_16_LOW_SURROGATE_P (val))
1477
1478
1479 static int
1480 detect_coding_utf_16 (coding, detect_info)
1481      struct coding_system *coding;
1482      struct coding_detection_info *detect_info;
1483 {
1484   const unsigned char *src = coding->source, *src_base = src;
1485   const unsigned char *src_end = coding->source + coding->src_bytes;
1486   int multibytep = coding->src_multibyte;
1487   int consumed_chars = 0;
1488   int c1, c2;
1489
1490   detect_info->checked |= CATEGORY_MASK_UTF_16;
1491   if (coding->mode & CODING_MODE_LAST_BLOCK
1492       && (coding->src_chars & 1))
1493     {
1494       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1495       return 0;
1496     }
1497
1498   ONE_MORE_BYTE (c1);
1499   ONE_MORE_BYTE (c2);
1500   if ((c1 == 0xFF) && (c2 == 0xFE))
1501     {
1502       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1503                              | CATEGORY_MASK_UTF_16_AUTO);
1504       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1505                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1506                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1507     }
1508   else if ((c1 == 0xFE) && (c2 == 0xFF))
1509     {
1510       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1511                              | CATEGORY_MASK_UTF_16_AUTO);
1512       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1513                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1514                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1515     }
1516   else if (c1 >= 0 && c2 >= 0)
1517     {
1518       detect_info->rejected
1519         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1520     }
1521  no_more_source:
1522   return 1;
1523 }
1524
1525 static void
1526 decode_coding_utf_16 (coding)
1527      struct coding_system *coding;
1528 {
1529   const unsigned char *src = coding->source + coding->consumed;
1530   const unsigned char *src_end = coding->source + coding->src_bytes;
1531   const unsigned char *src_base;
1532   int *charbuf = coding->charbuf + coding->charbuf_used;
1533   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1534   int consumed_chars = 0, consumed_chars_base;
1535   int multibytep = coding->src_multibyte;
1536   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1537   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1538   int surrogate = CODING_UTF_16_SURROGATE (coding);
1539   Lisp_Object attr, charset_list;
1540   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1541   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1542
1543   CODING_GET_INFO (coding, attr, charset_list);
1544
1545   if (bom == utf_16_with_bom)
1546     {
1547       int c, c1, c2;
1548
1549       src_base = src;
1550       ONE_MORE_BYTE (c1);
1551       ONE_MORE_BYTE (c2);
1552       c = (c1 << 8) | c2;
1553
1554       if (endian == utf_16_big_endian
1555           ? c != 0xFEFF : c != 0xFFFE)
1556         {
1557           /* The first two bytes are not BOM.  Treat them as bytes
1558              for a normal character.  */
1559           src = src_base;
1560           coding->errors++;
1561         }
1562       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1563     }
1564   else if (bom == utf_16_detect_bom)
1565     {
1566       /* We have already tried to detect BOM and failed in
1567          detect_coding.  */
1568       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1569     }
1570
1571   while (1)
1572     {
1573       int c, c1, c2;
1574
1575       src_base = src;
1576       consumed_chars_base = consumed_chars;
1577
1578       if (charbuf + 2 >= charbuf_end)
1579         break;
1580
1581       if (byte_after_cr1 >= 0)
1582         c1 = byte_after_cr1, byte_after_cr1 = -1;
1583       else
1584         ONE_MORE_BYTE (c1);
1585       if (c1 < 0)
1586         {
1587           *charbuf++ = -c1;
1588           continue;
1589         }
1590       if (byte_after_cr2 >= 0)
1591         c2 = byte_after_cr2, byte_after_cr2 = -1;
1592       else
1593         ONE_MORE_BYTE (c2);
1594       if (c2 < 0)
1595         {
1596           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1597           *charbuf++ = -c2;
1598           continue;
1599         }
1600       c = (endian == utf_16_big_endian
1601            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1602
1603       if (surrogate)
1604         {
1605           if (! UTF_16_LOW_SURROGATE_P (c))
1606             {
1607               if (endian == utf_16_big_endian)
1608                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1609               else
1610                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1611               *charbuf++ = c1;
1612               *charbuf++ = c2;
1613               coding->errors++;
1614               if (UTF_16_HIGH_SURROGATE_P (c))
1615                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1616               else
1617                 *charbuf++ = c;
1618             }
1619           else
1620             {
1621               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1622               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1623               *charbuf++ = 0x10000 + c;
1624             }
1625         }
1626       else
1627         {
1628           if (UTF_16_HIGH_SURROGATE_P (c))
1629             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1630           else
1631             {
1632               if (eol_crlf && c == '\r')
1633                 {
1634                   ONE_MORE_BYTE (byte_after_cr1);
1635                   ONE_MORE_BYTE (byte_after_cr2);
1636                 }
1637               *charbuf++ = c;
1638             }
1639         }
1640     }
1641
1642  no_more_source:
1643   coding->consumed_char += consumed_chars_base;
1644   coding->consumed = src_base - coding->source;
1645   coding->charbuf_used = charbuf - coding->charbuf;
1646 }
1647
1648 static int
1649 encode_coding_utf_16 (coding)
1650      struct coding_system *coding;
1651 {
1652   int multibytep = coding->dst_multibyte;
1653   int *charbuf = coding->charbuf;
1654   int *charbuf_end = charbuf + coding->charbuf_used;
1655   unsigned char *dst = coding->destination + coding->produced;
1656   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1657   int safe_room = 8;
1658   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1659   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1660   int produced_chars = 0;
1661   Lisp_Object attrs, charset_list;
1662   int c;
1663
1664   CODING_GET_INFO (coding, attrs, charset_list);
1665
1666   if (bom != utf_16_without_bom)
1667     {
1668       ASSURE_DESTINATION (safe_room);
1669       if (big_endian)
1670         EMIT_TWO_BYTES (0xFE, 0xFF);
1671       else
1672         EMIT_TWO_BYTES (0xFF, 0xFE);
1673       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1674     }
1675
1676   while (charbuf < charbuf_end)
1677     {
1678       ASSURE_DESTINATION (safe_room);
1679       c = *charbuf++;
1680       if (c >= MAX_UNICODE_CHAR)
1681         c = coding->default_char;
1682
1683       if (c < 0x10000)
1684         {
1685           if (big_endian)
1686             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1687           else
1688             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1689         }
1690       else
1691         {
1692           int c1, c2;
1693
1694           c -= 0x10000;
1695           c1 = (c >> 10) + 0xD800;
1696           c2 = (c & 0x3FF) + 0xDC00;
1697           if (big_endian)
1698             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1699           else
1700             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1701         }
1702     }
1703   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1704   coding->produced = dst - coding->destination;
1705   coding->produced_char += produced_chars;
1706   return 0;
1707 }
1708
1709 \f
1710 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1711
1712 /* Emacs' internal format for representation of multiple character
1713    sets is a kind of multi-byte encoding, i.e. characters are
1714    represented by variable-length sequences of one-byte codes.
1715
1716    ASCII characters and control characters (e.g. `tab', `newline') are
1717    represented by one-byte sequences which are their ASCII codes, in
1718    the range 0x00 through 0x7F.
1719
1720    8-bit characters of the range 0x80..0x9F are represented by
1721    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1722    code + 0x20).
1723
1724    8-bit characters of the range 0xA0..0xFF are represented by
1725    one-byte sequences which are their 8-bit code.
1726
1727    The other characters are represented by a sequence of `base
1728    leading-code', optional `extended leading-code', and one or two
1729    `position-code's.  The length of the sequence is determined by the
1730    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1731    whereas extended leading-code and position-code take the range 0xA0
1732    through 0xFF.  See `charset.h' for more details about leading-code
1733    and position-code.
1734
1735    --- CODE RANGE of Emacs' internal format ---
1736    character set        range
1737    -------------        -----
1738    ascii                0x00..0x7F
1739    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1740    eight-bit-graphic    0xA0..0xBF
1741    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1742    ---------------------------------------------
1743
1744    As this is the internal character representation, the format is
1745    usually not used externally (i.e. in a file or in a data sent to a
1746    process).  But, it is possible to have a text externally in this
1747    format (i.e. by encoding by the coding system `emacs-mule').
1748
1749    In that case, a sequence of one-byte codes has a slightly different
1750    form.
1751
1752    At first, all characters in eight-bit-control are represented by
1753    one-byte sequences which are their 8-bit code.
1754
1755    Next, character composition data are represented by the byte
1756    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1757    where,
1758         METHOD is 0xF0 plus one of composition method (enum
1759         composition_method),
1760
1761         BYTES is 0xA0 plus a byte length of this composition data,
1762
1763         CHARS is 0x20 plus a number of characters composed by this
1764         data,
1765
1766         COMPONENTs are characters of multibye form or composition
1767         rules encoded by two-byte of ASCII codes.
1768
1769    In addition, for backward compatibility, the following formats are
1770    also recognized as composition data on decoding.
1771
1772    0x80 MSEQ ...
1773    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1774
1775    Here,
1776         MSEQ is a multibyte form but in these special format:
1777           ASCII: 0xA0 ASCII_CODE+0x80,
1778           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1779         RULE is a one byte code of the range 0xA0..0xF0 that
1780         represents a composition rule.
1781   */
1782
1783 char emacs_mule_bytes[256];
1784
1785 int
1786 emacs_mule_char (coding, src, nbytes, nchars, id)
1787      struct coding_system *coding;
1788      const unsigned char *src;
1789      int *nbytes, *nchars, *id;
1790 {
1791   const unsigned char *src_end = coding->source + coding->src_bytes;
1792   const unsigned char *src_base = src;
1793   int multibytep = coding->src_multibyte;
1794   struct charset *charset;
1795   unsigned code;
1796   int c;
1797   int consumed_chars = 0;
1798
1799   ONE_MORE_BYTE (c);
1800   if (c < 0)
1801     {
1802       c = -c;
1803       charset = emacs_mule_charset[0];
1804     }
1805   else
1806     {
1807       if (c >= 0xA0)
1808         {
1809           /* Old style component character of a composition.  */
1810           if (c == 0xA0)
1811             {
1812               ONE_MORE_BYTE (c);
1813               c -= 0x80;
1814             }
1815           else
1816             c -= 0x20;
1817         }
1818
1819       switch (emacs_mule_bytes[c])
1820         {
1821         case 2:
1822           if (! (charset = emacs_mule_charset[c]))
1823             goto invalid_code;
1824           ONE_MORE_BYTE (c);
1825           if (c < 0xA0)
1826             goto invalid_code;
1827           code = c & 0x7F;
1828           break;
1829
1830         case 3:
1831           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1832               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1833             {
1834               ONE_MORE_BYTE (c);
1835               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
1836                 goto invalid_code;
1837               ONE_MORE_BYTE (c);
1838               if (c < 0xA0)
1839                 goto invalid_code;
1840               code = c & 0x7F;
1841             }
1842           else
1843             {
1844               if (! (charset = emacs_mule_charset[c]))
1845                 goto invalid_code;
1846               ONE_MORE_BYTE (c);
1847               if (c < 0xA0)
1848                 goto invalid_code;
1849               code = (c & 0x7F) << 8;
1850               ONE_MORE_BYTE (c);
1851               if (c < 0xA0)
1852                 goto invalid_code;
1853               code |= c & 0x7F;
1854             }
1855           break;
1856
1857         case 4:
1858           ONE_MORE_BYTE (c);
1859           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1860             goto invalid_code;
1861           ONE_MORE_BYTE (c);
1862           if (c < 0xA0)
1863             goto invalid_code;
1864           code = (c & 0x7F) << 8;
1865           ONE_MORE_BYTE (c);
1866           if (c < 0xA0)
1867             goto invalid_code;
1868           code |= c & 0x7F;
1869           break;
1870
1871         case 1:
1872           code = c;
1873           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1874                                      ? charset_ascii : charset_eight_bit);
1875           break;
1876
1877         default:
1878           abort ();
1879         }
1880       c = DECODE_CHAR (charset, code);
1881       if (c < 0)
1882         goto invalid_code;
1883     }
1884   *nbytes = src - src_base;
1885   *nchars = consumed_chars;
1886   if (id)
1887     *id = charset->id;
1888   return c;
1889
1890  no_more_source:
1891   return -2;
1892
1893  invalid_code:
1894   return -1;
1895 }
1896
1897
1898 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1899    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1900    else return 0.  */
1901
1902 static int
1903 detect_coding_emacs_mule (coding, detect_info)
1904      struct coding_system *coding;
1905      struct coding_detection_info *detect_info;
1906 {
1907   const unsigned char *src = coding->source, *src_base;
1908   const unsigned char *src_end = coding->source + coding->src_bytes;
1909   int multibytep = coding->src_multibyte;
1910   int consumed_chars = 0;
1911   int c;
1912   int found = 0;
1913
1914   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1915   /* A coding system of this category is always ASCII compatible.  */
1916   src += coding->head_ascii;
1917
1918   while (1)
1919     {
1920       src_base = src;
1921       ONE_MORE_BYTE (c);
1922       if (c < 0)
1923         continue;
1924       if (c == 0x80)
1925         {
1926           /* Perhaps the start of composite character.  We simple skip
1927              it because analyzing it is too heavy for detecting.  But,
1928              at least, we check that the composite character
1929              constitues of more than 4 bytes.  */
1930           const unsigned char *src_base;
1931
1932         repeat:
1933           src_base = src;
1934           do
1935             {
1936               ONE_MORE_BYTE (c);
1937             }
1938           while (c >= 0xA0);
1939
1940           if (src - src_base <= 4)
1941             break;
1942           found = CATEGORY_MASK_EMACS_MULE;
1943           if (c == 0x80)
1944             goto repeat;
1945         }
1946
1947       if (c < 0x80)
1948         {
1949           if (c < 0x20
1950               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1951             break;
1952         }
1953       else
1954         {
1955           int more_bytes = emacs_mule_bytes[*src_base] - 1;
1956
1957           while (more_bytes > 0)
1958             {
1959               ONE_MORE_BYTE (c);
1960               if (c < 0xA0)
1961                 {
1962                   src--;        /* Unread the last byte.  */
1963                   break;
1964                 }
1965               more_bytes--;
1966             }
1967           if (more_bytes != 0)
1968             break;
1969           found = CATEGORY_MASK_EMACS_MULE;
1970         }
1971     }
1972   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1973   return 0;
1974
1975  no_more_source:
1976   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1977     {
1978       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1979       return 0;
1980     }
1981   detect_info->found |= found;
1982   return 1;
1983 }
1984
1985
1986 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1987
1988 /* Decode a character represented as a component of composition
1989    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1990    update SRC to the head of next character (or an encoded composition
1991    rule).  If SRC doesn't points a composition component, set C to -1.
1992    If SRC points an invalid byte sequence, global exit by a return
1993    value 0.  */
1994
1995 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1996   do                                                            \
1997     {                                                           \
1998       int c;                                                    \
1999       int nbytes, nchars;                                       \
2000                                                                 \
2001       if (src == src_end)                                       \
2002         break;                                                  \
2003       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
2004       if (c < 0)                                                \
2005         {                                                       \
2006           if (c == -2)                                          \
2007             break;                                              \
2008           goto invalid_code;                                    \
2009         }                                                       \
2010       *buf++ = c;                                               \
2011       src += nbytes;                                            \
2012       consumed_chars += nchars;                                 \
2013     }                                                           \
2014   while (0)
2015
2016
2017 /* Decode a composition rule represented as a component of composition
2018    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
2019    and increment BUF.  If SRC points an invalid byte sequence, set C
2020    to -1.  */
2021
2022 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
2023   do {                                                  \
2024     int c, gref, nref;                                  \
2025                                                         \
2026     if (src >= src_end)                                 \
2027       goto invalid_code;                                \
2028     ONE_MORE_BYTE_NO_CHECK (c);                         \
2029     c -= 0xA0;                                          \
2030     if (c < 0 || c >= 81)                               \
2031       goto invalid_code;                                \
2032                                                         \
2033     gref = c / 9, nref = c % 9;                         \
2034     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2035   } while (0)
2036
2037
2038 /* Decode a composition rule represented as a component of composition
2039    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
2040    and increment BUF.  If SRC points an invalid byte sequence, set C
2041    to -1.  */
2042
2043 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
2044   do {                                                  \
2045     int gref, nref;                                     \
2046                                                         \
2047     if (src + 1>= src_end)                              \
2048       goto invalid_code;                                \
2049     ONE_MORE_BYTE_NO_CHECK (gref);                      \
2050     gref -= 0x20;                                       \
2051     ONE_MORE_BYTE_NO_CHECK (nref);                      \
2052     nref -= 0x20;                                       \
2053     if (gref < 0 || gref >= 81                          \
2054         || nref < 0 || nref >= 81)                      \
2055       goto invalid_code;                                \
2056     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
2057   } while (0)
2058
2059
2060 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
2061   do {                                                                  \
2062     /* Emacs 21 style format.  The first three bytes at SRC are         \
2063        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
2064        the byte length of this composition information, CHARS is the    \
2065        number of characters composed by this composition.  */           \
2066     enum composition_method method = c - 0xF2;                          \
2067     int *charbuf_base = charbuf;                                        \
2068     int consumed_chars_limit;                                           \
2069     int nbytes, nchars;                                                 \
2070                                                                         \
2071     ONE_MORE_BYTE (c);                                                  \
2072     if (c < 0)                                                          \
2073       goto invalid_code;                                                \
2074     nbytes = c - 0xA0;                                                  \
2075     if (nbytes < 3)                                                     \
2076       goto invalid_code;                                                \
2077     ONE_MORE_BYTE (c);                                                  \
2078     if (c < 0)                                                          \
2079       goto invalid_code;                                                \
2080     nchars = c - 0xA0;                                                  \
2081     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2082     consumed_chars_limit = consumed_chars_base + nbytes;                \
2083     if (method != COMPOSITION_RELATIVE)                                 \
2084       {                                                                 \
2085         int i = 0;                                                      \
2086         while (consumed_chars < consumed_chars_limit)                   \
2087           {                                                             \
2088             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
2089               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
2090             else                                                        \
2091               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
2092             i++;                                                        \
2093           }                                                             \
2094         if (consumed_chars < consumed_chars_limit)                      \
2095           goto invalid_code;                                            \
2096         charbuf_base[0] -= i;                                           \
2097       }                                                                 \
2098   } while (0)
2099
2100
2101 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)                    \
2102   do {                                                                  \
2103     /* Emacs 20 style format for relative composition.  */              \
2104     /* Store multibyte form of characters to be composed.  */           \
2105     enum composition_method method = COMPOSITION_RELATIVE;              \
2106     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];                 \
2107     int *buf = components;                                              \
2108     int i, j;                                                           \
2109                                                                         \
2110     src = src_base;                                                     \
2111     ONE_MORE_BYTE (c);          /* skip 0x80 */                         \
2112     for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++)    \
2113       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                         \
2114     if (i < 2)                                                          \
2115       goto invalid_code;                                                \
2116     ADD_COMPOSITION_DATA (charbuf, i, method);                          \
2117     for (j = 0; j < i; j++)                                             \
2118       *charbuf++ = components[j];                                       \
2119   } while (0)
2120
2121
2122 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
2123   do {                                                          \
2124     /* Emacs 20 style format for rule-base composition.  */     \
2125     /* Store multibyte form of characters to be composed.  */   \
2126     enum composition_method method = COMPOSITION_WITH_RULE;     \
2127     int *charbuf_base = charbuf;                                \
2128     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
2129     int *buf = components;                                      \
2130     int i, j;                                                   \
2131                                                                 \
2132     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
2133     for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2134       {                                                         \
2135         if (*src < 0xA0)                                        \
2136           break;                                                \
2137         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
2138         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
2139       }                                                         \
2140     if (i <= 1 || (buf - components) % 2 == 0)                  \
2141       goto invalid_code;                                        \
2142     if (charbuf + i + (i / 2) + 1 >= charbuf_end)               \
2143       goto no_more_source;                                      \
2144     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
2145     i = i * 2 - 1;                                              \
2146     for (j = 0; j < i; j++)                                     \
2147       *charbuf++ = components[j];                               \
2148     charbuf_base[0] -= i;                                       \
2149     for (j = 0; j < i; j += 2)                                  \
2150       *charbuf++ = components[j];                               \
2151   } while (0)
2152
2153
2154 static void
2155 decode_coding_emacs_mule (coding)
2156      struct coding_system *coding;
2157 {
2158   const unsigned char *src = coding->source + coding->consumed;
2159   const unsigned char *src_end = coding->source + coding->src_bytes;
2160   const unsigned char *src_base;
2161   int *charbuf = coding->charbuf + coding->charbuf_used;
2162   int *charbuf_end
2163     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2164   int consumed_chars = 0, consumed_chars_base;
2165   int multibytep = coding->src_multibyte;
2166   Lisp_Object attrs, charset_list;
2167   int char_offset = coding->produced_char;
2168   int last_offset = char_offset;
2169   int last_id = charset_ascii;
2170   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2171   int byte_after_cr = -1;
2172
2173   CODING_GET_INFO (coding, attrs, charset_list);
2174
2175   while (1)
2176     {
2177       int c;
2178
2179       src_base = src;
2180       consumed_chars_base = consumed_chars;
2181
2182       if (charbuf >= charbuf_end)
2183         break;
2184
2185       if (byte_after_cr >= 0)
2186         c = byte_after_cr, byte_after_cr = -1;
2187       else
2188         ONE_MORE_BYTE (c);
2189       if (c < 0)
2190         {
2191           *charbuf++ = -c;
2192           char_offset++;
2193         }
2194       else if (c < 0x80)
2195         {
2196           if (eol_crlf && c == '\r')
2197             ONE_MORE_BYTE (byte_after_cr);
2198           *charbuf++ = c;
2199           char_offset++;
2200         }
2201       else if (c == 0x80)
2202         {
2203           ONE_MORE_BYTE (c);
2204           if (c < 0)
2205             goto invalid_code;
2206           if (c - 0xF2 >= COMPOSITION_RELATIVE
2207               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2208             DECODE_EMACS_MULE_21_COMPOSITION (c);
2209           else if (c < 0xC0)
2210             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2211           else if (c == 0xFF)
2212             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2213           else
2214             goto invalid_code;
2215         }
2216       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2217         {
2218           int nbytes, nchars;
2219           int id;
2220
2221           src = src_base;
2222           consumed_chars = consumed_chars_base;
2223           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2224           if (c < 0)
2225             {
2226               if (c == -2)
2227                 break;
2228               goto invalid_code;
2229             }
2230           if (last_id != id)
2231             {
2232               if (last_id != charset_ascii)
2233                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2234               last_id = id;
2235               last_offset = char_offset;
2236             }
2237           *charbuf++ = c;
2238           src += nbytes;
2239           consumed_chars += nchars;
2240           char_offset++;
2241         }
2242       else
2243         goto invalid_code;
2244       continue;
2245
2246     invalid_code:
2247       src = src_base;
2248       consumed_chars = consumed_chars_base;
2249       ONE_MORE_BYTE (c);
2250       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2251       char_offset++;
2252       coding->errors++;
2253     }
2254
2255  no_more_source:
2256   if (last_id != charset_ascii)
2257     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2258   coding->consumed_char += consumed_chars_base;
2259   coding->consumed = src_base - coding->source;
2260   coding->charbuf_used = charbuf - coding->charbuf;
2261 }
2262
2263
2264 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2265   do {                                          \
2266     if (id < 0xA0)                              \
2267       codes[0] = id, codes[1] = 0;              \
2268     else if (id < 0xE0)                         \
2269       codes[0] = 0x9A, codes[1] = id;           \
2270     else if (id < 0xF0)                         \
2271       codes[0] = 0x9B, codes[1] = id;           \
2272     else if (id < 0xF5)                         \
2273       codes[0] = 0x9C, codes[1] = id;           \
2274     else                                        \
2275       codes[0] = 0x9D, codes[1] = id;           \
2276   } while (0);
2277
2278
2279 static int
2280 encode_coding_emacs_mule (coding)
2281      struct coding_system *coding;
2282 {
2283   int multibytep = coding->dst_multibyte;
2284   int *charbuf = coding->charbuf;
2285   int *charbuf_end = charbuf + coding->charbuf_used;
2286   unsigned char *dst = coding->destination + coding->produced;
2287   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2288   int safe_room = 8;
2289   int produced_chars = 0;
2290   Lisp_Object attrs, charset_list;
2291   int c;
2292   int preferred_charset_id = -1;
2293
2294   CODING_GET_INFO (coding, attrs, charset_list);
2295   if (! EQ (charset_list, Vemacs_mule_charset_list))
2296     {
2297       CODING_ATTR_CHARSET_LIST (attrs)
2298         = charset_list = Vemacs_mule_charset_list;
2299     }
2300
2301   while (charbuf < charbuf_end)
2302     {
2303       ASSURE_DESTINATION (safe_room);
2304       c = *charbuf++;
2305
2306       if (c < 0)
2307         {
2308           /* Handle an annotation.  */
2309           switch (*charbuf)
2310             {
2311             case CODING_ANNOTATE_COMPOSITION_MASK:
2312               /* Not yet implemented.  */
2313               break;
2314             case CODING_ANNOTATE_CHARSET_MASK:
2315               preferred_charset_id = charbuf[3];
2316               if (preferred_charset_id >= 0
2317                   && NILP (Fmemq (make_number (preferred_charset_id),
2318                                   charset_list)))
2319                 preferred_charset_id = -1;
2320               break;
2321             default:
2322               abort ();
2323             }
2324           charbuf += -c - 1;
2325           continue;
2326         }
2327
2328       if (ASCII_CHAR_P (c))
2329         EMIT_ONE_ASCII_BYTE (c);
2330       else if (CHAR_BYTE8_P (c))
2331         {
2332           c = CHAR_TO_BYTE8 (c);
2333           EMIT_ONE_BYTE (c);
2334         }
2335       else
2336         {
2337           struct charset *charset;
2338           unsigned code;
2339           int dimension;
2340           int emacs_mule_id;
2341           unsigned char leading_codes[2];
2342
2343           if (preferred_charset_id >= 0)
2344             {
2345               charset = CHARSET_FROM_ID (preferred_charset_id);
2346               if (! CHAR_CHARSET_P (c, charset))
2347                 charset = char_charset (c, charset_list, NULL);
2348             }
2349           else
2350             charset = char_charset (c, charset_list, &code);
2351           if (! charset)
2352             {
2353               c = coding->default_char;
2354               if (ASCII_CHAR_P (c))
2355                 {
2356                   EMIT_ONE_ASCII_BYTE (c);
2357                   continue;
2358                 }
2359               charset = char_charset (c, charset_list, &code);
2360             }
2361           dimension = CHARSET_DIMENSION (charset);
2362           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2363           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2364           EMIT_ONE_BYTE (leading_codes[0]);
2365           if (leading_codes[1])
2366             EMIT_ONE_BYTE (leading_codes[1]);
2367           if (dimension == 1)
2368             EMIT_ONE_BYTE (code | 0x80);
2369           else
2370             {
2371               code |= 0x8080;
2372               EMIT_ONE_BYTE (code >> 8);
2373               EMIT_ONE_BYTE (code & 0xFF);
2374             }
2375         }
2376     }
2377   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2378   coding->produced_char += produced_chars;
2379   coding->produced = dst - coding->destination;
2380   return 0;
2381 }
2382
2383 \f
2384 /*** 7. ISO2022 handlers ***/
2385
2386 /* The following note describes the coding system ISO2022 briefly.
2387    Since the intention of this note is to help understand the
2388    functions in this file, some parts are NOT ACCURATE or are OVERLY
2389    SIMPLIFIED.  For thorough understanding, please refer to the
2390    original document of ISO2022.  This is equivalent to the standard
2391    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2392
2393    ISO2022 provides many mechanisms to encode several character sets
2394    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2395    is encoded using bytes less than 128.  This may make the encoded
2396    text a little bit longer, but the text passes more easily through
2397    several types of gateway, some of which strip off the MSB (Most
2398    Significant Bit).
2399
2400    There are two kinds of character sets: control character sets and
2401    graphic character sets.  The former contain control characters such
2402    as `newline' and `escape' to provide control functions (control
2403    functions are also provided by escape sequences).  The latter
2404    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2405    two control character sets and many graphic character sets.
2406
2407    Graphic character sets are classified into one of the following
2408    four classes, according to the number of bytes (DIMENSION) and
2409    number of characters in one dimension (CHARS) of the set:
2410    - DIMENSION1_CHARS94
2411    - DIMENSION1_CHARS96
2412    - DIMENSION2_CHARS94
2413    - DIMENSION2_CHARS96
2414
2415    In addition, each character set is assigned an identification tag,
2416    unique for each set, called the "final character" (denoted as <F>
2417    hereafter).  The <F> of each character set is decided by ECMA(*)
2418    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2419    (0x30..0x3F are for private use only).
2420
2421    Note (*): ECMA = European Computer Manufacturers Association
2422
2423    Here are examples of graphic character sets [NAME(<F>)]:
2424         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2425         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2426         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2427         o DIMENSION2_CHARS96 -- none for the moment
2428
2429    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2430         C0 [0x00..0x1F] -- control character plane 0
2431         GL [0x20..0x7F] -- graphic character plane 0
2432         C1 [0x80..0x9F] -- control character plane 1
2433         GR [0xA0..0xFF] -- graphic character plane 1
2434
2435    A control character set is directly designated and invoked to C0 or
2436    C1 by an escape sequence.  The most common case is that:
2437    - ISO646's  control character set is designated/invoked to C0, and
2438    - ISO6429's control character set is designated/invoked to C1,
2439    and usually these designations/invocations are omitted in encoded
2440    text.  In a 7-bit environment, only C0 can be used, and a control
2441    character for C1 is encoded by an appropriate escape sequence to
2442    fit into the environment.  All control characters for C1 are
2443    defined to have corresponding escape sequences.
2444
2445    A graphic character set is at first designated to one of four
2446    graphic registers (G0 through G3), then these graphic registers are
2447    invoked to GL or GR.  These designations and invocations can be
2448    done independently.  The most common case is that G0 is invoked to
2449    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2450    these invocations and designations are omitted in encoded text.
2451    In a 7-bit environment, only GL can be used.
2452
2453    When a graphic character set of CHARS94 is invoked to GL, codes
2454    0x20 and 0x7F of the GL area work as control characters SPACE and
2455    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2456    be used.
2457
2458    There are two ways of invocation: locking-shift and single-shift.
2459    With locking-shift, the invocation lasts until the next different
2460    invocation, whereas with single-shift, the invocation affects the
2461    following character only and doesn't affect the locking-shift
2462    state.  Invocations are done by the following control characters or
2463    escape sequences:
2464
2465    ----------------------------------------------------------------------
2466    abbrev  function                  cntrl escape seq   description
2467    ----------------------------------------------------------------------
2468    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2469    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2470    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2471    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2472    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2473    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2474    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2475    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2476    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2477    ----------------------------------------------------------------------
2478    (*) These are not used by any known coding system.
2479
2480    Control characters for these functions are defined by macros
2481    ISO_CODE_XXX in `coding.h'.
2482
2483    Designations are done by the following escape sequences:
2484    ----------------------------------------------------------------------
2485    escape sequence      description
2486    ----------------------------------------------------------------------
2487    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2488    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2489    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2490    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2491    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2492    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2493    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2494    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2495    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2496    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2497    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2498    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2499    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2500    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2501    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2502    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2503    ----------------------------------------------------------------------
2504
2505    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2506    of dimension 1, chars 94, and final character <F>, etc...
2507
2508    Note (*): Although these designations are not allowed in ISO2022,
2509    Emacs accepts them on decoding, and produces them on encoding
2510    CHARS96 character sets in a coding system which is characterized as
2511    7-bit environment, non-locking-shift, and non-single-shift.
2512
2513    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2514    '(' must be omitted.  We refer to this as "short-form" hereafter.
2515
2516    Now you may notice that there are a lot of ways of encoding the
2517    same multilingual text in ISO2022.  Actually, there exist many
2518    coding systems such as Compound Text (used in X11's inter client
2519    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2520    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2521    localized platforms), and all of these are variants of ISO2022.
2522
2523    In addition to the above, Emacs handles two more kinds of escape
2524    sequences: ISO6429's direction specification and Emacs' private
2525    sequence for specifying character composition.
2526
2527    ISO6429's direction specification takes the following form:
2528         o CSI ']'      -- end of the current direction
2529         o CSI '0' ']'  -- end of the current direction
2530         o CSI '1' ']'  -- start of left-to-right text
2531         o CSI '2' ']'  -- start of right-to-left text
2532    The control character CSI (0x9B: control sequence introducer) is
2533    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2534
2535    Character composition specification takes the following form:
2536         o ESC '0' -- start relative composition
2537         o ESC '1' -- end composition
2538         o ESC '2' -- start rule-base composition (*)
2539         o ESC '3' -- start relative composition with alternate chars  (**)
2540         o ESC '4' -- start rule-base composition with alternate chars  (**)
2541   Since these are not standard escape sequences of any ISO standard,
2542   the use of them with these meanings is restricted to Emacs only.
2543
2544   (*) This form is used only in Emacs 20.7 and older versions,
2545   but newer versions can safely decode it.
2546   (**) This form is used only in Emacs 21.1 and newer versions,
2547   and older versions can't decode it.
2548
2549   Here's a list of example usages of these composition escape
2550   sequences (categorized by `enum composition_method').
2551
2552   COMPOSITION_RELATIVE:
2553         ESC 0 CHAR [ CHAR ] ESC 1
2554   COMPOSITION_WITH_RULE:
2555         ESC 2 CHAR [ RULE CHAR ] ESC 1
2556   COMPOSITION_WITH_ALTCHARS:
2557         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2558   COMPOSITION_WITH_RULE_ALTCHARS:
2559         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2560
2561 enum iso_code_class_type iso_code_class[256];
2562
2563 #define SAFE_CHARSET_P(coding, id)      \
2564   ((id) <= (coding)->max_charset_id     \
2565    && (coding)->safe_charsets[id] >= 0)
2566
2567
2568 #define SHIFT_OUT_OK(category)  \
2569   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2570
2571 static void
2572 setup_iso_safe_charsets (attrs)
2573      Lisp_Object attrs;
2574 {
2575   Lisp_Object charset_list, safe_charsets;
2576   Lisp_Object request;
2577   Lisp_Object reg_usage;
2578   Lisp_Object tail;
2579   int reg94, reg96;
2580   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2581   int max_charset_id;
2582
2583   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2584   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2585       && ! EQ (charset_list, Viso_2022_charset_list))
2586     {
2587       CODING_ATTR_CHARSET_LIST (attrs)
2588         = charset_list = Viso_2022_charset_list;
2589       ASET (attrs, coding_attr_safe_charsets, Qnil);
2590     }
2591
2592   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2593     return;
2594
2595   max_charset_id = 0;
2596   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2597     {
2598       int id = XINT (XCAR (tail));
2599       if (max_charset_id < id)
2600         max_charset_id = id;
2601     }
2602
2603   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2604                                 make_number (255));
2605   request = AREF (attrs, coding_attr_iso_request);
2606   reg_usage = AREF (attrs, coding_attr_iso_usage);
2607   reg94 = XINT (XCAR (reg_usage));
2608   reg96 = XINT (XCDR (reg_usage));
2609
2610   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2611     {
2612       Lisp_Object id;
2613       Lisp_Object reg;
2614       struct charset *charset;
2615
2616       id = XCAR (tail);
2617       charset = CHARSET_FROM_ID (XINT (id));
2618       reg = Fcdr (Fassq (id, request));
2619       if (! NILP (reg))
2620         SSET (safe_charsets, XINT (id), XINT (reg));
2621       else if (charset->iso_chars_96)
2622         {
2623           if (reg96 < 4)
2624             SSET (safe_charsets, XINT (id), reg96);
2625         }
2626       else
2627         {
2628           if (reg94 < 4)
2629             SSET (safe_charsets, XINT (id), reg94);
2630         }
2631     }
2632   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2633 }
2634
2635
2636 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2637    Check if a text is encoded in one of ISO-2022 based codig systems.
2638    If it is, return 1, else return 0.  */
2639
2640 static int
2641 detect_coding_iso_2022 (coding, detect_info)
2642      struct coding_system *coding;
2643      struct coding_detection_info *detect_info;
2644 {
2645   const unsigned char *src = coding->source, *src_base = src;
2646   const unsigned char *src_end = coding->source + coding->src_bytes;
2647   int multibytep = coding->src_multibyte;
2648   int single_shifting = 0;
2649   int id;
2650   int c, c1;
2651   int consumed_chars = 0;
2652   int i;
2653   int rejected = 0;
2654   int found = 0;
2655
2656   detect_info->checked |= CATEGORY_MASK_ISO;
2657
2658   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2659     {
2660       struct coding_system *this = &(coding_categories[i]);
2661       Lisp_Object attrs, val;
2662
2663       attrs = CODING_ID_ATTRS (this->id);
2664       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2665           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2666         setup_iso_safe_charsets (attrs);
2667       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2668       this->max_charset_id = SCHARS (val) - 1;
2669       this->safe_charsets = (char *) SDATA (val);
2670     }
2671
2672   /* A coding system of this category is always ASCII compatible.  */
2673   src += coding->head_ascii;
2674
2675   while (rejected != CATEGORY_MASK_ISO)
2676     {
2677       src_base = src;
2678       ONE_MORE_BYTE (c);
2679       switch (c)
2680         {
2681         case ISO_CODE_ESC:
2682           if (inhibit_iso_escape_detection)
2683             break;
2684           single_shifting = 0;
2685           ONE_MORE_BYTE (c);
2686           if (c >= '(' && c <= '/')
2687             {
2688               /* Designation sequence for a charset of dimension 1.  */
2689               ONE_MORE_BYTE (c1);
2690               if (c1 < ' ' || c1 >= 0x80
2691                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2692                 /* Invalid designation sequence.  Just ignore.  */
2693                 break;
2694             }
2695           else if (c == '$')
2696             {
2697               /* Designation sequence for a charset of dimension 2.  */
2698               ONE_MORE_BYTE (c);
2699               if (c >= '@' && c <= 'B')
2700                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2701                 id = iso_charset_table[1][0][c];
2702               else if (c >= '(' && c <= '/')
2703                 {
2704                   ONE_MORE_BYTE (c1);
2705                   if (c1 < ' ' || c1 >= 0x80
2706                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2707                     /* Invalid designation sequence.  Just ignore.  */
2708                     break;
2709                 }
2710               else
2711                 /* Invalid designation sequence.  Just ignore it.  */
2712                 break;
2713             }
2714           else if (c == 'N' || c == 'O')
2715             {
2716               /* ESC <Fe> for SS2 or SS3.  */
2717               single_shifting = 1;
2718               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2719               break;
2720             }
2721           else if (c >= '0' && c <= '4')
2722             {
2723               /* ESC <Fp> for start/end composition.  */
2724               found |= CATEGORY_MASK_ISO;
2725               break;
2726             }
2727           else
2728             {
2729               /* Invalid escape sequence.  Just ignore it.  */
2730               break;
2731             }
2732
2733           /* We found a valid designation sequence for CHARSET.  */
2734           rejected |= CATEGORY_MASK_ISO_8BIT;
2735           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2736                               id))
2737             found |= CATEGORY_MASK_ISO_7;
2738           else
2739             rejected |= CATEGORY_MASK_ISO_7;
2740           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2741                               id))
2742             found |= CATEGORY_MASK_ISO_7_TIGHT;
2743           else
2744             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2745           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2746                               id))
2747             found |= CATEGORY_MASK_ISO_7_ELSE;
2748           else
2749             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2750           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2751                               id))
2752             found |= CATEGORY_MASK_ISO_8_ELSE;
2753           else
2754             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2755           break;
2756
2757         case ISO_CODE_SO:
2758         case ISO_CODE_SI:
2759           /* Locking shift out/in.  */
2760           if (inhibit_iso_escape_detection)
2761             break;
2762           single_shifting = 0;
2763           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2764           break;
2765
2766         case ISO_CODE_CSI:
2767           /* Control sequence introducer.  */
2768           single_shifting = 0;
2769           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2770           found |= CATEGORY_MASK_ISO_8_ELSE;
2771           goto check_extra_latin;
2772
2773         case ISO_CODE_SS2:
2774         case ISO_CODE_SS3:
2775           /* Single shift.   */
2776           if (inhibit_iso_escape_detection)
2777             break;
2778           single_shifting = 0;
2779           rejected |= CATEGORY_MASK_ISO_7BIT;
2780           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2781               & CODING_ISO_FLAG_SINGLE_SHIFT)
2782             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2783           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2784               & CODING_ISO_FLAG_SINGLE_SHIFT)
2785             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2786           if (single_shifting)
2787             break;
2788           goto check_extra_latin;
2789
2790         default:
2791           if (c < 0)
2792             continue;
2793           if (c < 0x80)
2794             {
2795               single_shifting = 0;
2796               break;
2797             }
2798           if (c >= 0xA0)
2799             {
2800               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2801               found |= CATEGORY_MASK_ISO_8_1;
2802               /* Check the length of succeeding codes of the range
2803                  0xA0..0FF.  If the byte length is even, we include
2804                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2805                  only when we are not single shifting.  */
2806               if (! single_shifting
2807                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2808                 {
2809                   int i = 1;
2810                   while (src < src_end)
2811                     {
2812                       ONE_MORE_BYTE (c);
2813                       if (c < 0xA0)
2814                         break;
2815                       i++;
2816                     }
2817
2818                   if (i & 1 && src < src_end)
2819                     rejected |= CATEGORY_MASK_ISO_8_2;
2820                   else
2821                     found |= CATEGORY_MASK_ISO_8_2;
2822                 }
2823               break;
2824             }
2825         check_extra_latin:
2826           single_shifting = 0;
2827           if (! VECTORP (Vlatin_extra_code_table)
2828               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2829             {
2830               rejected = CATEGORY_MASK_ISO;
2831               break;
2832             }
2833           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2834               & CODING_ISO_FLAG_LATIN_EXTRA)
2835             found |= CATEGORY_MASK_ISO_8_1;
2836           else
2837             rejected |= CATEGORY_MASK_ISO_8_1;
2838           rejected |= CATEGORY_MASK_ISO_8_2;
2839         }
2840     }
2841   detect_info->rejected |= CATEGORY_MASK_ISO;
2842   return 0;
2843
2844  no_more_source:
2845   detect_info->rejected |= rejected;
2846   detect_info->found |= (found & ~rejected);
2847   return 1;
2848 }
2849
2850
2851 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
2852    escape sequence should be kept.  */
2853 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2854   do {                                                                  \
2855     int id, prev;                                                       \
2856                                                                         \
2857     if (final < '0' || final >= 128                                     \
2858         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2859         || !SAFE_CHARSET_P (coding, id))                                \
2860       {                                                                 \
2861         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2862         chars_96 = -1;                                                  \
2863         break;                                                          \
2864       }                                                                 \
2865     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2866     if (id == charset_jisx0201_roman)                                   \
2867       {                                                                 \
2868         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2869           id = charset_ascii;                                           \
2870       }                                                                 \
2871     else if (id == charset_jisx0208_1978)                               \
2872       {                                                                 \
2873         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2874           id = charset_jisx0208;                                        \
2875       }                                                                 \
2876     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2877     /* If there was an invalid designation to REG previously, and this  \
2878        designation is ASCII to REG, we should keep this designation     \
2879        sequence.  */                                                    \
2880     if (prev == -2 && id == charset_ascii)                              \
2881       chars_96 = -1;                                                    \
2882   } while (0)
2883
2884
2885 #define MAYBE_FINISH_COMPOSITION()                              \
2886   do {                                                          \
2887     int i;                                                      \
2888     if (composition_state == COMPOSING_NO)                      \
2889       break;                                                    \
2890     /* It is assured that we have enough room for producing     \
2891        characters stored in the table `components'.  */         \
2892     if (charbuf + component_idx > charbuf_end)                  \
2893       goto no_more_source;                                      \
2894     composition_state = COMPOSING_NO;                           \
2895     if (method == COMPOSITION_RELATIVE                          \
2896         || method == COMPOSITION_WITH_ALTCHARS)                 \
2897       {                                                         \
2898         for (i = 0; i < component_idx; i++)                     \
2899           *charbuf++ = components[i];                           \
2900         char_offset += component_idx;                           \
2901       }                                                         \
2902     else                                                        \
2903       {                                                         \
2904         for (i = 0; i < component_idx; i += 2)                  \
2905           *charbuf++ = components[i];                           \
2906         char_offset += (component_idx / 2) + 1;                 \
2907       }                                                         \
2908   } while (0)
2909
2910
2911 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2912    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2913    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2914    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2915    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2916   */
2917
2918 #define DECODE_COMPOSITION_START(c1)                                    \
2919   do {                                                                  \
2920     if (c1 == '0'                                                       \
2921         && composition_state == COMPOSING_COMPONENT_RULE)               \
2922       {                                                                 \
2923         component_len = component_idx;                                  \
2924         composition_state = COMPOSING_CHAR;                             \
2925       }                                                                 \
2926     else                                                                \
2927       {                                                                 \
2928         const unsigned char *p;                                         \
2929                                                                         \
2930         MAYBE_FINISH_COMPOSITION ();                                    \
2931         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2932           goto no_more_source;                                          \
2933         for (p = src; p < src_end - 1; p++)                             \
2934           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2935             break;                                                      \
2936         if (p == src_end - 1)                                           \
2937           {                                                             \
2938             /* The current composition doesn't end in the current       \
2939                source.  */                                              \
2940             record_conversion_result                                    \
2941               (coding, CODING_RESULT_INSUFFICIENT_SRC);                 \
2942             goto no_more_source;                                        \
2943           }                                                             \
2944                                                                         \
2945         /* This is surely the start of a composition.  */               \
2946         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2947                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2948                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2949                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2950         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2951                              : COMPOSING_COMPONENT_CHAR);               \
2952         component_idx = component_len = 0;                              \
2953       }                                                                 \
2954   } while (0)
2955
2956
2957 /* Handle compositoin end sequence ESC 1.  */
2958
2959 #define DECODE_COMPOSITION_END()                                        \
2960   do {                                                                  \
2961     int nchars = (component_len > 0 ? component_idx - component_len     \
2962                   : method == COMPOSITION_RELATIVE ? component_idx      \
2963                   : (component_idx + 1) / 2);                           \
2964     int i;                                                              \
2965     int *saved_charbuf = charbuf;                                       \
2966                                                                         \
2967     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2968     if (method != COMPOSITION_RELATIVE)                                 \
2969       {                                                                 \
2970         if (component_len == 0)                                         \
2971           for (i = 0; i < component_idx; i++)                           \
2972             *charbuf++ = components[i];                                 \
2973         else                                                            \
2974           for (i = 0; i < component_len; i++)                           \
2975             *charbuf++ = components[i];                                 \
2976         *saved_charbuf = saved_charbuf - charbuf;                       \
2977       }                                                                 \
2978     if (method == COMPOSITION_WITH_RULE)                                \
2979       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2980         *charbuf++ = components[i];                                     \
2981     else                                                                \
2982       for (i = component_len; i < component_idx; i++, char_offset++)    \
2983         *charbuf++ = components[i];                                     \
2984     coding->annotated = 1;                                              \
2985     composition_state = COMPOSING_NO;                                   \
2986   } while (0)
2987
2988
2989 /* Decode a composition rule from the byte C1 (and maybe one more byte
2990    from SRC) and store one encoded composition rule in
2991    coding->cmp_data.  */
2992
2993 #define DECODE_COMPOSITION_RULE(c1)                                     \
2994   do {                                                                  \
2995     (c1) -= 32;                                                         \
2996     if (c1 < 81)                /* old format (before ver.21) */        \
2997       {                                                                 \
2998         int gref = (c1) / 9;                                            \
2999         int nref = (c1) % 9;                                            \
3000         if (gref == 4) gref = 10;                                       \
3001         if (nref == 4) nref = 10;                                       \
3002         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
3003       }                                                                 \
3004     else if (c1 < 93)           /* new format (after ver.21) */         \
3005       {                                                                 \
3006         ONE_MORE_BYTE (c2);                                             \
3007         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
3008       }                                                                 \
3009     else                                                                \
3010       c1 = 0;                                                           \
3011   } while (0)
3012
3013
3014 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
3015
3016 static void
3017 decode_coding_iso_2022 (coding)
3018      struct coding_system *coding;
3019 {
3020   const unsigned char *src = coding->source + coding->consumed;
3021   const unsigned char *src_end = coding->source + coding->src_bytes;
3022   const unsigned char *src_base;
3023   int *charbuf = coding->charbuf + coding->charbuf_used;
3024   int *charbuf_end
3025     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
3026   int consumed_chars = 0, consumed_chars_base;
3027   int multibytep = coding->src_multibyte;
3028   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
3029   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3030   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3031   int charset_id_2, charset_id_3;
3032   struct charset *charset;
3033   int c;
3034   /* For handling composition sequence.  */
3035 #define COMPOSING_NO                    0
3036 #define COMPOSING_CHAR                  1
3037 #define COMPOSING_RULE                  2
3038 #define COMPOSING_COMPONENT_CHAR        3
3039 #define COMPOSING_COMPONENT_RULE        4
3040
3041   int composition_state = COMPOSING_NO;
3042   enum composition_method method;
3043   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
3044   int component_idx;
3045   int component_len;
3046   Lisp_Object attrs, charset_list;
3047   int char_offset = coding->produced_char;
3048   int last_offset = char_offset;
3049   int last_id = charset_ascii;
3050   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3051   int byte_after_cr = -1;
3052
3053   CODING_GET_INFO (coding, attrs, charset_list);
3054   setup_iso_safe_charsets (attrs);
3055   /* Charset list may have been changed.  */
3056   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3057   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3058
3059   while (1)
3060     {
3061       int c1, c2;
3062
3063       src_base = src;
3064       consumed_chars_base = consumed_chars;
3065
3066       if (charbuf >= charbuf_end)
3067         break;
3068
3069       if (byte_after_cr >= 0)
3070         c1 = byte_after_cr, byte_after_cr = -1;
3071       else
3072         ONE_MORE_BYTE (c1);
3073       if (c1 < 0)
3074         goto invalid_code;
3075
3076       /* We produce at most one character.  */
3077       switch (iso_code_class [c1])
3078         {
3079         case ISO_0x20_or_0x7F:
3080           if (composition_state != COMPOSING_NO)
3081             {
3082               if (composition_state == COMPOSING_RULE
3083                   || composition_state == COMPOSING_COMPONENT_RULE)
3084                 {
3085                   DECODE_COMPOSITION_RULE (c1);
3086                   components[component_idx++] = c1;
3087                   composition_state--;
3088                   continue;
3089                 }
3090             }
3091           if (charset_id_0 < 0
3092               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3093             /* This is SPACE or DEL.  */
3094             charset = CHARSET_FROM_ID (charset_ascii);
3095           else
3096             charset = CHARSET_FROM_ID (charset_id_0);
3097           break;
3098
3099         case ISO_graphic_plane_0:
3100           if (composition_state != COMPOSING_NO)
3101             {
3102               if (composition_state == COMPOSING_RULE
3103                   || composition_state == COMPOSING_COMPONENT_RULE)
3104                 {
3105                   DECODE_COMPOSITION_RULE (c1);
3106                   components[component_idx++] = c1;
3107                   composition_state--;
3108                   continue;
3109                 }
3110             }
3111           if (charset_id_0 < 0)
3112             charset = CHARSET_FROM_ID (charset_ascii);
3113           else
3114             charset = CHARSET_FROM_ID (charset_id_0);
3115           break;
3116
3117         case ISO_0xA0_or_0xFF:
3118           if (charset_id_1 < 0
3119               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3120               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3121             goto invalid_code;
3122           /* This is a graphic character, we fall down ... */
3123
3124         case ISO_graphic_plane_1:
3125           if (charset_id_1 < 0)
3126             goto invalid_code;
3127           charset = CHARSET_FROM_ID (charset_id_1);
3128           break;
3129
3130         case ISO_control_0:
3131           if (eol_crlf && c1 == '\r')
3132             ONE_MORE_BYTE (byte_after_cr);
3133           MAYBE_FINISH_COMPOSITION ();
3134           charset = CHARSET_FROM_ID (charset_ascii);
3135           break;
3136
3137         case ISO_control_1:
3138           MAYBE_FINISH_COMPOSITION ();
3139           goto invalid_code;
3140
3141         case ISO_shift_out:
3142           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3143               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3144             goto invalid_code;
3145           CODING_ISO_INVOCATION (coding, 0) = 1;
3146           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3147           continue;
3148
3149         case ISO_shift_in:
3150           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3151             goto invalid_code;
3152           CODING_ISO_INVOCATION (coding, 0) = 0;
3153           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3154           continue;
3155
3156         case ISO_single_shift_2_7:
3157         case ISO_single_shift_2:
3158           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3159             goto invalid_code;
3160           /* SS2 is handled as an escape sequence of ESC 'N' */
3161           c1 = 'N';
3162           goto label_escape_sequence;
3163
3164         case ISO_single_shift_3:
3165           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3166             goto invalid_code;
3167           /* SS2 is handled as an escape sequence of ESC 'O' */
3168           c1 = 'O';
3169           goto label_escape_sequence;
3170
3171         case ISO_control_sequence_introducer:
3172           /* CSI is handled as an escape sequence of ESC '[' ...  */
3173           c1 = '[';
3174           goto label_escape_sequence;
3175
3176         case ISO_escape:
3177           ONE_MORE_BYTE (c1);
3178         label_escape_sequence:
3179           /* Escape sequences handled here are invocation,
3180              designation, direction specification, and character
3181              composition specification.  */
3182           switch (c1)
3183             {
3184             case '&':           /* revision of following character set */
3185               ONE_MORE_BYTE (c1);
3186               if (!(c1 >= '@' && c1 <= '~'))
3187                 goto invalid_code;
3188               ONE_MORE_BYTE (c1);
3189               if (c1 != ISO_CODE_ESC)
3190                 goto invalid_code;
3191               ONE_MORE_BYTE (c1);
3192               goto label_escape_sequence;
3193
3194             case '$':           /* designation of 2-byte character set */
3195               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3196                 goto invalid_code;
3197               {
3198                 int reg, chars96;
3199
3200                 ONE_MORE_BYTE (c1);
3201                 if (c1 >= '@' && c1 <= 'B')
3202                   {     /* designation of JISX0208.1978, GB2312.1980,
3203                            or JISX0208.1980 */
3204                     reg = 0, chars96 = 0;
3205                   }
3206                 else if (c1 >= 0x28 && c1 <= 0x2B)
3207                   { /* designation of DIMENSION2_CHARS94 character set */
3208                     reg = c1 - 0x28, chars96 = 0;
3209                     ONE_MORE_BYTE (c1);
3210                   }
3211                 else if (c1 >= 0x2C && c1 <= 0x2F)
3212                   { /* designation of DIMENSION2_CHARS96 character set */
3213                     reg = c1 - 0x2C, chars96 = 1;
3214                     ONE_MORE_BYTE (c1);
3215                   }
3216                 else
3217                   goto invalid_code;
3218                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3219                 /* We must update these variables now.  */
3220                 if (reg == 0)
3221                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3222                 else if (reg == 1)
3223                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3224                 if (chars96 < 0)
3225                   goto invalid_code;
3226               }
3227               continue;
3228
3229             case 'n':           /* invocation of locking-shift-2 */
3230               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3231                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3232                 goto invalid_code;
3233               CODING_ISO_INVOCATION (coding, 0) = 2;
3234               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3235               continue;
3236
3237             case 'o':           /* invocation of locking-shift-3 */
3238               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3239                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3240                 goto invalid_code;
3241               CODING_ISO_INVOCATION (coding, 0) = 3;
3242               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3243               continue;
3244
3245             case 'N':           /* invocation of single-shift-2 */
3246               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3247                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3248                 goto invalid_code;
3249               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3250               if (charset_id_2 < 0)
3251                 charset = CHARSET_FROM_ID (charset_ascii);
3252               else
3253                 charset = CHARSET_FROM_ID (charset_id_2);
3254               ONE_MORE_BYTE (c1);
3255               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3256                 goto invalid_code;
3257               break;
3258
3259             case 'O':           /* invocation of single-shift-3 */
3260               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3261                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3262                 goto invalid_code;
3263               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3264               if (charset_id_3 < 0)
3265                 charset = CHARSET_FROM_ID (charset_ascii);
3266               else
3267                 charset = CHARSET_FROM_ID (charset_id_3);
3268               ONE_MORE_BYTE (c1);
3269               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3270                 goto invalid_code;
3271               break;
3272
3273             case '0': case '2': case '3': case '4': /* start composition */
3274               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3275                 goto invalid_code;
3276               DECODE_COMPOSITION_START (c1);
3277               continue;
3278
3279             case '1':           /* end composition */
3280               if (composition_state == COMPOSING_NO)
3281                 goto invalid_code;
3282               DECODE_COMPOSITION_END ();
3283               continue;
3284
3285             case '[':           /* specification of direction */
3286               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3287                 goto invalid_code;
3288               /* For the moment, nested direction is not supported.
3289                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3290                  left-to-right, and nozero means right-to-left.  */
3291               ONE_MORE_BYTE (c1);
3292               switch (c1)
3293                 {
3294                 case ']':       /* end of the current direction */
3295                   coding->mode &= ~CODING_MODE_DIRECTION;
3296
3297                 case '0':       /* end of the current direction */
3298                 case '1':       /* start of left-to-right direction */
3299                   ONE_MORE_BYTE (c1);
3300                   if (c1 == ']')
3301                     coding->mode &= ~CODING_MODE_DIRECTION;
3302                   else
3303                     goto invalid_code;
3304                   break;
3305
3306                 case '2':       /* start of right-to-left direction */
3307                   ONE_MORE_BYTE (c1);
3308                   if (c1 == ']')
3309                     coding->mode |= CODING_MODE_DIRECTION;
3310                   else
3311                     goto invalid_code;
3312                   break;
3313
3314                 default:
3315                   goto invalid_code;
3316                 }
3317               continue;
3318
3319             case '%':
3320               ONE_MORE_BYTE (c1);
3321               if (c1 == '/')
3322                 {
3323                   /* CTEXT extended segment:
3324                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3325                      We keep these bytes as is for the moment.
3326                      They may be decoded by post-read-conversion.  */
3327                   int dim, M, L;
3328                   int size;
3329
3330                   ONE_MORE_BYTE (dim);
3331                   ONE_MORE_BYTE (M);
3332                   ONE_MORE_BYTE (L);
3333                   size = ((M - 128) * 128) + (L - 128);
3334                   if (charbuf + 8 + size > charbuf_end)
3335                     goto break_loop;
3336                   *charbuf++ = ISO_CODE_ESC;
3337                   *charbuf++ = '%';
3338                   *charbuf++ = '/';
3339                   *charbuf++ = dim;
3340                   *charbuf++ = BYTE8_TO_CHAR (M);
3341                   *charbuf++ = BYTE8_TO_CHAR (L);
3342                   while (size-- > 0)
3343                     {
3344                       ONE_MORE_BYTE (c1);
3345                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3346                     }
3347                 }
3348               else if (c1 == 'G')
3349                 {
3350                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3351                      ESC % G --UTF-8-BYTES-- ESC % @
3352                      We keep these bytes as is for the moment.
3353                      They may be decoded by post-read-conversion.  */
3354                   int *p = charbuf;
3355
3356                   if (p + 6 > charbuf_end)
3357                     goto break_loop;
3358                   *p++ = ISO_CODE_ESC;
3359                   *p++ = '%';
3360                   *p++ = 'G';
3361                   while (p < charbuf_end)
3362                     {
3363                       ONE_MORE_BYTE (c1);
3364                       if (c1 == ISO_CODE_ESC
3365                           && src + 1 < src_end
3366                           && src[0] == '%'
3367                           && src[1] == '@')
3368                         {
3369                           src += 2;
3370                           break;
3371                         }
3372                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3373                     }
3374                   if (p + 3 > charbuf_end)
3375                     goto break_loop;
3376                   *p++ = ISO_CODE_ESC;
3377                   *p++ = '%';
3378                   *p++ = '@';
3379                   charbuf = p;
3380                 }
3381               else
3382                 goto invalid_code;
3383               continue;
3384               break;
3385
3386             default:
3387               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3388                 goto invalid_code;
3389               {
3390                 int reg, chars96;
3391
3392                 if (c1 >= 0x28 && c1 <= 0x2B)
3393                   { /* designation of DIMENSION1_CHARS94 character set */
3394                     reg = c1 - 0x28, chars96 = 0;
3395                     ONE_MORE_BYTE (c1);
3396                   }
3397                 else if (c1 >= 0x2C && c1 <= 0x2F)
3398                   { /* designation of DIMENSION1_CHARS96 character set */
3399                     reg = c1 - 0x2C, chars96 = 1;
3400                     ONE_MORE_BYTE (c1);
3401                   }
3402                 else
3403                   goto invalid_code;
3404                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3405                 /* We must update these variables now.  */
3406                 if (reg == 0)
3407                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3408                 else if (reg == 1)
3409                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3410                 if (chars96 < 0)
3411                   goto invalid_code;
3412               }
3413               continue;
3414             }
3415         }
3416
3417       if (charset->id != charset_ascii
3418           && last_id != charset->id)
3419         {
3420           if (last_id != charset_ascii)
3421             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3422           last_id = charset->id;
3423           last_offset = char_offset;
3424         }
3425
3426       /* Now we know CHARSET and 1st position code C1 of a character.
3427          Produce a decoded character while getting 2nd position code
3428          C2 if necessary.  */
3429       c1 &= 0x7F;
3430       if (CHARSET_DIMENSION (charset) > 1)
3431         {
3432           ONE_MORE_BYTE (c2);
3433           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3434             /* C2 is not in a valid range.  */
3435             goto invalid_code;
3436           c1 = (c1 << 8) | (c2 & 0x7F);
3437           if (CHARSET_DIMENSION (charset) > 2)
3438             {
3439               ONE_MORE_BYTE (c2);
3440               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3441                 /* C2 is not in a valid range.  */
3442                 goto invalid_code;
3443               c1 = (c1 << 8) | (c2 & 0x7F);
3444             }
3445         }
3446
3447       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3448       if (c < 0)
3449         {
3450           MAYBE_FINISH_COMPOSITION ();
3451           for (; src_base < src; src_base++, char_offset++)
3452             {
3453               if (ASCII_BYTE_P (*src_base))
3454                 *charbuf++ = *src_base;
3455               else
3456                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3457             }
3458         }
3459       else if (composition_state == COMPOSING_NO)
3460         {
3461           *charbuf++ = c;
3462           char_offset++;
3463         }
3464       else
3465         {
3466           components[component_idx++] = c;
3467           if (method == COMPOSITION_WITH_RULE
3468               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3469                   && composition_state == COMPOSING_COMPONENT_CHAR))
3470             composition_state++;
3471         }
3472       continue;
3473
3474     invalid_code:
3475       MAYBE_FINISH_COMPOSITION ();
3476       src = src_base;
3477       consumed_chars = consumed_chars_base;
3478       ONE_MORE_BYTE (c);
3479       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3480       char_offset++;
3481       coding->errors++;
3482       continue;
3483
3484     break_loop:
3485       break;
3486     }
3487
3488  no_more_source:
3489   if (last_id != charset_ascii)
3490     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3491   coding->consumed_char += consumed_chars_base;
3492   coding->consumed = src_base - coding->source;
3493   coding->charbuf_used = charbuf - coding->charbuf;
3494 }
3495
3496
3497 /* ISO2022 encoding stuff.  */
3498
3499 /*
3500    It is not enough to say just "ISO2022" on encoding, we have to
3501    specify more details.  In Emacs, each coding system of ISO2022
3502    variant has the following specifications:
3503         1. Initial designation to G0 thru G3.
3504         2. Allows short-form designation?
3505         3. ASCII should be designated to G0 before control characters?
3506         4. ASCII should be designated to G0 at end of line?
3507         5. 7-bit environment or 8-bit environment?
3508         6. Use locking-shift?
3509         7. Use Single-shift?
3510    And the following two are only for Japanese:
3511         8. Use ASCII in place of JIS0201-1976-Roman?
3512         9. Use JISX0208-1983 in place of JISX0208-1978?
3513    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3514    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3515    details.
3516 */
3517
3518 /* Produce codes (escape sequence) for designating CHARSET to graphic
3519    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3520    '@', 'A', or 'B' and the coding system CODING allows, produce
3521    designation sequence of short-form.  */
3522
3523 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3524   do {                                                                  \
3525     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3526     char *intermediate_char_94 = "()*+";                                \
3527     char *intermediate_char_96 = ",-./";                                \
3528     int revision = -1;                                                  \
3529     int c;                                                              \
3530                                                                         \
3531     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3532       revision = CHARSET_ISO_REVISION (charset);                        \
3533                                                                         \
3534     if (revision >= 0)                                                  \
3535       {                                                                 \
3536         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3537         EMIT_ONE_BYTE ('@' + revision);                                 \
3538       }                                                                 \
3539     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3540     if (CHARSET_DIMENSION (charset) == 1)                               \
3541       {                                                                 \
3542         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3543           c = intermediate_char_94[reg];                                \
3544         else                                                            \
3545           c = intermediate_char_96[reg];                                \
3546         EMIT_ONE_ASCII_BYTE (c);                                        \
3547       }                                                                 \
3548     else                                                                \
3549       {                                                                 \
3550         EMIT_ONE_ASCII_BYTE ('$');                                      \
3551         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3552           {                                                             \
3553             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3554                 || reg != 0                                             \
3555                 || final_char < '@' || final_char > 'B')                \
3556               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3557           }                                                             \
3558         else                                                            \
3559           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3560       }                                                                 \
3561     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3562                                                                         \
3563     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3564   } while (0)
3565
3566
3567 /* The following two macros produce codes (control character or escape
3568    sequence) for ISO2022 single-shift functions (single-shift-2 and
3569    single-shift-3).  */
3570
3571 #define ENCODE_SINGLE_SHIFT_2                                           \
3572   do {                                                                  \
3573     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3574       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3575     else                                                                \
3576       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3577     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3578   } while (0)
3579
3580
3581 #define ENCODE_SINGLE_SHIFT_3                                           \
3582   do {                                                                  \
3583     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3584       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3585     else                                                                \
3586       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3587     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3588   } while (0)
3589
3590
3591 /* The following four macros produce codes (control character or
3592    escape sequence) for ISO2022 locking-shift functions (shift-in,
3593    shift-out, locking-shift-2, and locking-shift-3).  */
3594
3595 #define ENCODE_SHIFT_IN                                 \
3596   do {                                                  \
3597     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3598     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3599   } while (0)
3600
3601
3602 #define ENCODE_SHIFT_OUT                                \
3603   do {                                                  \
3604     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3605     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3606   } while (0)
3607
3608
3609 #define ENCODE_LOCKING_SHIFT_2                          \
3610   do {                                                  \
3611     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3612     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3613   } while (0)
3614
3615
3616 #define ENCODE_LOCKING_SHIFT_3                          \
3617   do {                                                  \
3618     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3619     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3620   } while (0)
3621
3622
3623 /* Produce codes for a DIMENSION1 character whose character set is
3624    CHARSET and whose position-code is C1.  Designation and invocation
3625    sequences are also produced in advance if necessary.  */
3626
3627 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3628   do {                                                                  \
3629     int id = CHARSET_ID (charset);                                      \
3630                                                                         \
3631     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3632         && id == charset_ascii)                                         \
3633       {                                                                 \
3634         id = charset_jisx0201_roman;                                    \
3635         charset = CHARSET_FROM_ID (id);                                 \
3636       }                                                                 \
3637                                                                         \
3638     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3639       {                                                                 \
3640         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3641           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3642         else                                                            \
3643           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3644         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3645         break;                                                          \
3646       }                                                                 \
3647     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3648       {                                                                 \
3649         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3650         break;                                                          \
3651       }                                                                 \
3652     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3653       {                                                                 \
3654         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3655         break;                                                          \
3656       }                                                                 \
3657     else                                                                \
3658       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3659          must invoke it, or, at first, designate it to some graphic     \
3660          register.  Then repeat the loop to actually produce the        \
3661          character.  */                                                 \
3662       dst = encode_invocation_designation (charset, coding, dst,        \
3663                                            &produced_chars);            \
3664   } while (1)
3665
3666
3667 /* Produce codes for a DIMENSION2 character whose character set is
3668    CHARSET and whose position-codes are C1 and C2.  Designation and
3669    invocation codes are also produced in advance if necessary.  */
3670
3671 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3672   do {                                                                  \
3673     int id = CHARSET_ID (charset);                                      \
3674                                                                         \
3675     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3676         && id == charset_jisx0208)                                      \
3677       {                                                                 \
3678         id = charset_jisx0208_1978;                                     \
3679         charset = CHARSET_FROM_ID (id);                                 \
3680       }                                                                 \
3681                                                                         \
3682     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3683       {                                                                 \
3684         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3685           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3686         else                                                            \
3687           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3688         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3689         break;                                                          \
3690       }                                                                 \
3691     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3692       {                                                                 \
3693         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3694         break;                                                          \
3695       }                                                                 \
3696     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3697       {                                                                 \
3698         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3699         break;                                                          \
3700       }                                                                 \
3701     else                                                                \
3702       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3703          must invoke it, or, at first, designate it to some graphic     \
3704          register.  Then repeat the loop to actually produce the        \
3705          character.  */                                                 \
3706       dst = encode_invocation_designation (charset, coding, dst,        \
3707                                            &produced_chars);            \
3708   } while (1)
3709
3710
3711 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3712   do {                                                                     \
3713     int code = ENCODE_CHAR ((charset),(c));                                \
3714                                                                            \
3715     if (CHARSET_DIMENSION (charset) == 1)                                  \
3716       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3717     else                                                                   \
3718       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3719   } while (0)
3720
3721
3722 /* Produce designation and invocation codes at a place pointed by DST
3723    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3724    Return new DST.  */
3725
3726 unsigned char *
3727 encode_invocation_designation (charset, coding, dst, p_nchars)
3728      struct charset *charset;
3729      struct coding_system *coding;
3730      unsigned char *dst;
3731      int *p_nchars;
3732 {
3733   int multibytep = coding->dst_multibyte;
3734   int produced_chars = *p_nchars;
3735   int reg;                      /* graphic register number */
3736   int id = CHARSET_ID (charset);
3737
3738   /* At first, check designations.  */
3739   for (reg = 0; reg < 4; reg++)
3740     if (id == CODING_ISO_DESIGNATION (coding, reg))
3741       break;
3742
3743   if (reg >= 4)
3744     {
3745       /* CHARSET is not yet designated to any graphic registers.  */
3746       /* At first check the requested designation.  */
3747       reg = CODING_ISO_REQUEST (coding, id);
3748       if (reg < 0)
3749         /* Since CHARSET requests no special designation, designate it
3750            to graphic register 0.  */
3751         reg = 0;
3752
3753       ENCODE_DESIGNATION (charset, reg, coding);
3754     }
3755
3756   if (CODING_ISO_INVOCATION (coding, 0) != reg
3757       && CODING_ISO_INVOCATION (coding, 1) != reg)
3758     {
3759       /* Since the graphic register REG is not invoked to any graphic
3760          planes, invoke it to graphic plane 0.  */
3761       switch (reg)
3762         {
3763         case 0:                 /* graphic register 0 */
3764           ENCODE_SHIFT_IN;
3765           break;
3766
3767         case 1:                 /* graphic register 1 */
3768           ENCODE_SHIFT_OUT;
3769           break;
3770
3771         case 2:                 /* graphic register 2 */
3772           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3773             ENCODE_SINGLE_SHIFT_2;
3774           else
3775             ENCODE_LOCKING_SHIFT_2;
3776           break;
3777
3778         case 3:                 /* graphic register 3 */
3779           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3780             ENCODE_SINGLE_SHIFT_3;
3781           else
3782             ENCODE_LOCKING_SHIFT_3;
3783           break;
3784         }
3785     }
3786
3787   *p_nchars = produced_chars;
3788   return dst;
3789 }
3790
3791 /* The following three macros produce codes for indicating direction
3792    of text.  */
3793 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3794   do {                                                                  \
3795     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3796       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3797     else                                                                \
3798       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3799   } while (0)
3800
3801
3802 #define ENCODE_DIRECTION_R2L()                  \
3803   do {                                          \
3804     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3805     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3806   } while (0)
3807
3808
3809 #define ENCODE_DIRECTION_L2R()                  \
3810   do {                                          \
3811     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3812     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3813   } while (0)
3814
3815
3816 /* Produce codes for designation and invocation to reset the graphic
3817    planes and registers to initial state.  */
3818 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3819   do {                                                                  \
3820     int reg;                                                            \
3821     struct charset *charset;                                            \
3822                                                                         \
3823     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3824       ENCODE_SHIFT_IN;                                                  \
3825     for (reg = 0; reg < 4; reg++)                                       \
3826       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3827           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3828               != CODING_ISO_INITIAL (coding, reg)))                     \
3829         {                                                               \
3830           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3831           ENCODE_DESIGNATION (charset, reg, coding);                    \
3832         }                                                               \
3833   } while (0)
3834
3835
3836 /* Produce designation sequences of charsets in the line started from
3837    SRC to a place pointed by DST, and return updated DST.
3838
3839    If the current block ends before any end-of-line, we may fail to
3840    find all the necessary designations.  */
3841
3842 static unsigned char *
3843 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3844      struct coding_system *coding;
3845      int *charbuf, *charbuf_end;
3846      unsigned char *dst;
3847 {
3848   struct charset *charset;
3849   /* Table of charsets to be designated to each graphic register.  */
3850   int r[4];
3851   int c, found = 0, reg;
3852   int produced_chars = 0;
3853   int multibytep = coding->dst_multibyte;
3854   Lisp_Object attrs;
3855   Lisp_Object charset_list;
3856
3857   attrs = CODING_ID_ATTRS (coding->id);
3858   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3859   if (EQ (charset_list, Qiso_2022))
3860     charset_list = Viso_2022_charset_list;
3861
3862   for (reg = 0; reg < 4; reg++)
3863     r[reg] = -1;
3864
3865   while (found < 4)
3866     {
3867       int id;
3868
3869       c = *charbuf++;
3870       if (c == '\n')
3871         break;
3872       charset = char_charset (c, charset_list, NULL);
3873       id = CHARSET_ID (charset);
3874       reg = CODING_ISO_REQUEST (coding, id);
3875       if (reg >= 0 && r[reg] < 0)
3876         {
3877           found++;
3878           r[reg] = id;
3879         }
3880     }
3881
3882   if (found)
3883     {
3884       for (reg = 0; reg < 4; reg++)
3885         if (r[reg] >= 0
3886             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3887           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3888     }
3889
3890   return dst;
3891 }
3892
3893 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3894
3895 static int
3896 encode_coding_iso_2022 (coding)
3897      struct coding_system *coding;
3898 {
3899   int multibytep = coding->dst_multibyte;
3900   int *charbuf = coding->charbuf;
3901   int *charbuf_end = charbuf + coding->charbuf_used;
3902   unsigned char *dst = coding->destination + coding->produced;
3903   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3904   int safe_room = 16;
3905   int bol_designation
3906     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3907        && CODING_ISO_BOL (coding));
3908   int produced_chars = 0;
3909   Lisp_Object attrs, eol_type, charset_list;
3910   int ascii_compatible;
3911   int c;
3912   int preferred_charset_id = -1;
3913
3914   CODING_GET_INFO (coding, attrs, charset_list);
3915   eol_type = CODING_ID_EOL_TYPE (coding->id);
3916   if (VECTORP (eol_type))
3917     eol_type = Qunix;
3918
3919   setup_iso_safe_charsets (attrs);
3920   /* Charset list may have been changed.  */
3921   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3922   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3923
3924   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3925
3926   while (charbuf < charbuf_end)
3927     {
3928       ASSURE_DESTINATION (safe_room);
3929
3930       if (bol_designation)
3931         {
3932           unsigned char *dst_prev = dst;
3933
3934           /* We have to produce designation sequences if any now.  */
3935           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3936           bol_designation = 0;
3937           /* We are sure that designation sequences are all ASCII bytes.  */
3938           produced_chars += dst - dst_prev;
3939         }
3940
3941       c = *charbuf++;
3942
3943       if (c < 0)
3944         {
3945           /* Handle an annotation.  */
3946           switch (*charbuf)
3947             {
3948             case CODING_ANNOTATE_COMPOSITION_MASK:
3949               /* Not yet implemented.  */
3950               break;
3951             case CODING_ANNOTATE_CHARSET_MASK:
3952               preferred_charset_id = charbuf[2];
3953               if (preferred_charset_id >= 0
3954                   && NILP (Fmemq (make_number (preferred_charset_id),
3955                                   charset_list)))
3956                 preferred_charset_id = -1;
3957               break;
3958             default:
3959               abort ();
3960             }
3961           charbuf += -c - 1;
3962           continue;
3963         }
3964
3965       /* Now encode the character C.  */
3966       if (c < 0x20 || c == 0x7F)
3967         {
3968           if (c == '\n'
3969               || (c == '\r' && EQ (eol_type, Qmac)))
3970             {
3971               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3972                 ENCODE_RESET_PLANE_AND_REGISTER ();
3973               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3974                 {
3975                   int i;
3976
3977                   for (i = 0; i < 4; i++)
3978                     CODING_ISO_DESIGNATION (coding, i)
3979                       = CODING_ISO_INITIAL (coding, i);
3980                 }
3981               bol_designation
3982                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3983             }
3984           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3985             ENCODE_RESET_PLANE_AND_REGISTER ();
3986           EMIT_ONE_ASCII_BYTE (c);
3987         }
3988       else if (ASCII_CHAR_P (c))
3989         {
3990           if (ascii_compatible)
3991             EMIT_ONE_ASCII_BYTE (c);
3992           else
3993             {
3994               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3995               ENCODE_ISO_CHARACTER (charset, c);
3996             }
3997         }
3998       else if (CHAR_BYTE8_P (c))
3999         {
4000           c = CHAR_TO_BYTE8 (c);
4001           EMIT_ONE_BYTE (c);
4002         }
4003       else
4004         {
4005           struct charset *charset;
4006
4007           if (preferred_charset_id >= 0)
4008             {
4009               charset = CHARSET_FROM_ID (preferred_charset_id);
4010               if (! CHAR_CHARSET_P (c, charset))
4011                 charset = char_charset (c, charset_list, NULL);
4012             }
4013           else
4014             charset = char_charset (c, charset_list, NULL);
4015           if (!charset)
4016             {
4017               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4018                 {
4019                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4020                   charset = CHARSET_FROM_ID (charset_ascii);
4021                 }
4022               else
4023                 {
4024                   c = coding->default_char;
4025                   charset = char_charset (c, charset_list, NULL);
4026                 }
4027             }
4028           ENCODE_ISO_CHARACTER (charset, c);
4029         }
4030     }
4031
4032   if (coding->mode & CODING_MODE_LAST_BLOCK
4033       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
4034     {
4035       ASSURE_DESTINATION (safe_room);
4036       ENCODE_RESET_PLANE_AND_REGISTER ();
4037     }
4038   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4039   CODING_ISO_BOL (coding) = bol_designation;
4040   coding->produced_char += produced_chars;
4041   coding->produced = dst - coding->destination;
4042   return 0;
4043 }
4044
4045 \f
4046 /*** 8,9. SJIS and BIG5 handlers ***/
4047
4048 /* Although SJIS and BIG5 are not ISO's coding system, they are used
4049    quite widely.  So, for the moment, Emacs supports them in the bare
4050    C code.  But, in the future, they may be supported only by CCL.  */
4051
4052 /* SJIS is a coding system encoding three character sets: ASCII, right
4053    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
4054    as is.  A character of charset katakana-jisx0201 is encoded by
4055    "position-code + 0x80".  A character of charset japanese-jisx0208
4056    is encoded in 2-byte but two position-codes are divided and shifted
4057    so that it fit in the range below.
4058
4059    --- CODE RANGE of SJIS ---
4060    (character set)      (range)
4061    ASCII                0x00 .. 0x7F
4062    KATAKANA-JISX0201    0xA0 .. 0xDF
4063    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
4064             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
4065    -------------------------------
4066
4067 */
4068
4069 /* BIG5 is a coding system encoding two character sets: ASCII and
4070    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4071    character set and is encoded in two-byte.
4072
4073    --- CODE RANGE of BIG5 ---
4074    (character set)      (range)
4075    ASCII                0x00 .. 0x7F
4076    Big5 (1st byte)      0xA1 .. 0xFE
4077         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4078    --------------------------
4079
4080   */
4081
4082 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4083    Check if a text is encoded in SJIS.  If it is, return
4084    CATEGORY_MASK_SJIS, else return 0.  */
4085
4086 static int
4087 detect_coding_sjis (coding, detect_info)
4088      struct coding_system *coding;
4089      struct coding_detection_info *detect_info;
4090 {
4091   const unsigned char *src = coding->source, *src_base;
4092   const unsigned char *src_end = coding->source + coding->src_bytes;
4093   int multibytep = coding->src_multibyte;
4094   int consumed_chars = 0;
4095   int found = 0;
4096   int c;
4097
4098   detect_info->checked |= CATEGORY_MASK_SJIS;
4099   /* A coding system of this category is always ASCII compatible.  */
4100   src += coding->head_ascii;
4101
4102   while (1)
4103     {
4104       src_base = src;
4105       ONE_MORE_BYTE (c);
4106       if (c < 0x80)
4107         continue;
4108       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4109         {
4110           ONE_MORE_BYTE (c);
4111           if (c < 0x40 || c == 0x7F || c > 0xFC)
4112             break;
4113           found = CATEGORY_MASK_SJIS;
4114         }
4115       else if (c >= 0xA0 && c < 0xE0)
4116         found = CATEGORY_MASK_SJIS;
4117       else
4118         break;
4119     }
4120   detect_info->rejected |= CATEGORY_MASK_SJIS;
4121   return 0;
4122
4123  no_more_source:
4124   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4125     {
4126       detect_info->rejected |= CATEGORY_MASK_SJIS;
4127       return 0;
4128     }
4129   detect_info->found |= found;
4130   return 1;
4131 }
4132
4133 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4134    Check if a text is encoded in BIG5.  If it is, return
4135    CATEGORY_MASK_BIG5, else return 0.  */
4136
4137 static int
4138 detect_coding_big5 (coding, detect_info)
4139      struct coding_system *coding;
4140      struct coding_detection_info *detect_info;
4141 {
4142   const unsigned char *src = coding->source, *src_base;
4143   const unsigned char *src_end = coding->source + coding->src_bytes;
4144   int multibytep = coding->src_multibyte;
4145   int consumed_chars = 0;
4146   int found = 0;
4147   int c;
4148
4149   detect_info->checked |= CATEGORY_MASK_BIG5;
4150   /* A coding system of this category is always ASCII compatible.  */
4151   src += coding->head_ascii;
4152
4153   while (1)
4154     {
4155       src_base = src;
4156       ONE_MORE_BYTE (c);
4157       if (c < 0x80)
4158         continue;
4159       if (c >= 0xA1)
4160         {
4161           ONE_MORE_BYTE (c);
4162           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4163             return 0;
4164           found = CATEGORY_MASK_BIG5;
4165         }
4166       else
4167         break;
4168     }
4169   detect_info->rejected |= CATEGORY_MASK_BIG5;
4170   return 0;
4171
4172  no_more_source:
4173   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4174     {
4175       detect_info->rejected |= CATEGORY_MASK_BIG5;
4176       return 0;
4177     }
4178   detect_info->found |= found;
4179   return 1;
4180 }
4181
4182 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4183    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4184
4185 static void
4186 decode_coding_sjis (coding)
4187      struct coding_system *coding;
4188 {
4189   const unsigned char *src = coding->source + coding->consumed;
4190   const unsigned char *src_end = coding->source + coding->src_bytes;
4191   const unsigned char *src_base;
4192   int *charbuf = coding->charbuf + coding->charbuf_used;
4193   int *charbuf_end
4194     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4195   int consumed_chars = 0, consumed_chars_base;
4196   int multibytep = coding->src_multibyte;
4197   struct charset *charset_roman, *charset_kanji, *charset_kana;
4198   struct charset *charset_kanji2;
4199   Lisp_Object attrs, charset_list, val;
4200   int char_offset = coding->produced_char;
4201   int last_offset = char_offset;
4202   int last_id = charset_ascii;
4203   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4204   int byte_after_cr = -1;
4205
4206   CODING_GET_INFO (coding, attrs, charset_list);
4207
4208   val = charset_list;
4209   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4210   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4211   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4212   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4213
4214   while (1)
4215     {
4216       int c, c1;
4217       struct charset *charset;
4218
4219       src_base = src;
4220       consumed_chars_base = consumed_chars;
4221
4222       if (charbuf >= charbuf_end)
4223         break;
4224
4225       if (byte_after_cr >= 0)
4226         c = byte_after_cr, byte_after_cr = -1;
4227       else
4228         ONE_MORE_BYTE (c);
4229       if (c < 0)
4230         goto invalid_code;
4231       if (c < 0x80)
4232         {
4233           if (eol_crlf && c == '\r')
4234             ONE_MORE_BYTE (byte_after_cr);
4235           charset = charset_roman;
4236         }
4237       else if (c == 0x80 || c == 0xA0)
4238         goto invalid_code;
4239       else if (c >= 0xA1 && c <= 0xDF)
4240         {
4241           /* SJIS -> JISX0201-Kana */
4242           c &= 0x7F;
4243           charset = charset_kana;
4244         }
4245       else if (c <= 0xEF)
4246         {
4247           /* SJIS -> JISX0208 */
4248           ONE_MORE_BYTE (c1);
4249           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4250             goto invalid_code;
4251           c = (c << 8) | c1;
4252           SJIS_TO_JIS (c);
4253           charset = charset_kanji;
4254         }
4255       else if (c <= 0xFC && charset_kanji2)
4256         {
4257           /* SJIS -> JISX0213-2 */
4258           ONE_MORE_BYTE (c1);
4259           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4260             goto invalid_code;
4261           c = (c << 8) | c1;
4262           SJIS_TO_JIS2 (c);
4263           charset = charset_kanji2;
4264         }
4265       else
4266         goto invalid_code;
4267       if (charset->id != charset_ascii
4268           && last_id != charset->id)
4269         {
4270           if (last_id != charset_ascii)
4271             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4272           last_id = charset->id;
4273           last_offset = char_offset;
4274         }
4275       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4276       *charbuf++ = c;
4277       char_offset++;
4278       continue;
4279
4280     invalid_code:
4281       src = src_base;
4282       consumed_chars = consumed_chars_base;
4283       ONE_MORE_BYTE (c);
4284       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4285       char_offset++;
4286       coding->errors++;
4287     }
4288
4289  no_more_source:
4290   if (last_id != charset_ascii)
4291     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4292   coding->consumed_char += consumed_chars_base;
4293   coding->consumed = src_base - coding->source;
4294   coding->charbuf_used = charbuf - coding->charbuf;
4295 }
4296
4297 static void
4298 decode_coding_big5 (coding)
4299      struct coding_system *coding;
4300 {
4301   const unsigned char *src = coding->source + coding->consumed;
4302   const unsigned char *src_end = coding->source + coding->src_bytes;
4303   const unsigned char *src_base;
4304   int *charbuf = coding->charbuf + coding->charbuf_used;
4305   int *charbuf_end
4306     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4307   int consumed_chars = 0, consumed_chars_base;
4308   int multibytep = coding->src_multibyte;
4309   struct charset *charset_roman, *charset_big5;
4310   Lisp_Object attrs, charset_list, val;
4311   int char_offset = coding->produced_char;
4312   int last_offset = char_offset;
4313   int last_id = charset_ascii;
4314   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4315   int byte_after_cr = -1;
4316
4317   CODING_GET_INFO (coding, attrs, charset_list);
4318   val = charset_list;
4319   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4320   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4321
4322   while (1)
4323     {
4324       int c, c1;
4325       struct charset *charset;
4326
4327       src_base = src;
4328       consumed_chars_base = consumed_chars;
4329
4330       if (charbuf >= charbuf_end)
4331         break;
4332
4333       if (byte_after_cr >= 0)
4334         c = byte_after_cr, byte_after_cr = -1;
4335       else
4336         ONE_MORE_BYTE (c);
4337
4338       if (c < 0)
4339         goto invalid_code;
4340       if (c < 0x80)
4341         {
4342           if (eol_crlf && c == '\r')
4343             ONE_MORE_BYTE (byte_after_cr);
4344           charset = charset_roman;
4345         }
4346       else
4347         {
4348           /* BIG5 -> Big5 */
4349           if (c < 0xA1 || c > 0xFE)
4350             goto invalid_code;
4351           ONE_MORE_BYTE (c1);
4352           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4353             goto invalid_code;
4354           c = c << 8 | c1;
4355           charset = charset_big5;
4356         }
4357       if (charset->id != charset_ascii
4358           && last_id != charset->id)
4359         {
4360           if (last_id != charset_ascii)
4361             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4362           last_id = charset->id;
4363           last_offset = char_offset;
4364         }
4365       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4366       *charbuf++ = c;
4367       char_offset++;
4368       continue;
4369
4370     invalid_code:
4371       src = src_base;
4372       consumed_chars = consumed_chars_base;
4373       ONE_MORE_BYTE (c);
4374       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4375       char_offset++;
4376       coding->errors++;
4377     }
4378
4379  no_more_source:
4380   if (last_id != charset_ascii)
4381     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4382   coding->consumed_char += consumed_chars_base;
4383   coding->consumed = src_base - coding->source;
4384   coding->charbuf_used = charbuf - coding->charbuf;
4385 }
4386
4387 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4388    This function can encode charsets `ascii', `katakana-jisx0201',
4389    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4390    are sure that all these charsets are registered as official charset
4391    (i.e. do not have extended leading-codes).  Characters of other
4392    charsets are produced without any encoding.  If SJIS_P is 1, encode
4393    SJIS text, else encode BIG5 text.  */
4394
4395 static int
4396 encode_coding_sjis (coding)
4397      struct coding_system *coding;
4398 {
4399   int multibytep = coding->dst_multibyte;
4400   int *charbuf = coding->charbuf;
4401   int *charbuf_end = charbuf + coding->charbuf_used;
4402   unsigned char *dst = coding->destination + coding->produced;
4403   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4404   int safe_room = 4;
4405   int produced_chars = 0;
4406   Lisp_Object attrs, charset_list, val;
4407   int ascii_compatible;
4408   struct charset *charset_roman, *charset_kanji, *charset_kana;
4409   struct charset *charset_kanji2;
4410   int c;
4411
4412   CODING_GET_INFO (coding, attrs, charset_list);
4413   val = charset_list;
4414   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4415   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4416   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4417   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4418
4419   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4420
4421   while (charbuf < charbuf_end)
4422     {
4423       ASSURE_DESTINATION (safe_room);
4424       c = *charbuf++;
4425       /* Now encode the character C.  */
4426       if (ASCII_CHAR_P (c) && ascii_compatible)
4427         EMIT_ONE_ASCII_BYTE (c);
4428       else if (CHAR_BYTE8_P (c))
4429         {
4430           c = CHAR_TO_BYTE8 (c);
4431           EMIT_ONE_BYTE (c);
4432         }
4433       else
4434         {
4435           unsigned code;
4436           struct charset *charset = char_charset (c, charset_list, &code);
4437
4438           if (!charset)
4439             {
4440               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4441                 {
4442                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4443                   charset = CHARSET_FROM_ID (charset_ascii);
4444                 }
4445               else
4446                 {
4447                   c = coding->default_char;
4448                   charset = char_charset (c, charset_list, &code);
4449                 }
4450             }
4451           if (code == CHARSET_INVALID_CODE (charset))
4452             abort ();
4453           if (charset == charset_kanji)
4454             {
4455               int c1, c2;
4456               JIS_TO_SJIS (code);
4457               c1 = code >> 8, c2 = code & 0xFF;
4458               EMIT_TWO_BYTES (c1, c2);
4459             }
4460           else if (charset == charset_kana)
4461             EMIT_ONE_BYTE (code | 0x80);
4462           else if (charset_kanji2 && charset == charset_kanji2)
4463             {
4464               int c1, c2;
4465
4466               c1 = code >> 8;
4467               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4468                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4469                 {
4470                   JIS_TO_SJIS2 (code);
4471                   c1 = code >> 8, c2 = code & 0xFF;
4472                   EMIT_TWO_BYTES (c1, c2);
4473                 }
4474               else
4475                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4476             }
4477           else
4478             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4479         }
4480     }
4481   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4482   coding->produced_char += produced_chars;
4483   coding->produced = dst - coding->destination;
4484   return 0;
4485 }
4486
4487 static int
4488 encode_coding_big5 (coding)
4489      struct coding_system *coding;
4490 {
4491   int multibytep = coding->dst_multibyte;
4492   int *charbuf = coding->charbuf;
4493   int *charbuf_end = charbuf + coding->charbuf_used;
4494   unsigned char *dst = coding->destination + coding->produced;
4495   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4496   int safe_room = 4;
4497   int produced_chars = 0;
4498   Lisp_Object attrs, charset_list, val;
4499   int ascii_compatible;
4500   struct charset *charset_roman, *charset_big5;
4501   int c;
4502
4503   CODING_GET_INFO (coding, attrs, charset_list);
4504   val = charset_list;
4505   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4506   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4507   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4508
4509   while (charbuf < charbuf_end)
4510     {
4511       ASSURE_DESTINATION (safe_room);
4512       c = *charbuf++;
4513       /* Now encode the character C.  */
4514       if (ASCII_CHAR_P (c) && ascii_compatible)
4515         EMIT_ONE_ASCII_BYTE (c);
4516       else if (CHAR_BYTE8_P (c))
4517         {
4518           c = CHAR_TO_BYTE8 (c);
4519           EMIT_ONE_BYTE (c);
4520         }
4521       else
4522         {
4523           unsigned code;
4524           struct charset *charset = char_charset (c, charset_list, &code);
4525
4526           if (! charset)
4527             {
4528               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4529                 {
4530                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4531                   charset = CHARSET_FROM_ID (charset_ascii);
4532                 }
4533               else
4534                 {
4535                   c = coding->default_char;
4536                   charset = char_charset (c, charset_list, &code);
4537                 }
4538             }
4539           if (code == CHARSET_INVALID_CODE (charset))
4540             abort ();
4541           if (charset == charset_big5)
4542             {
4543               int c1, c2;
4544
4545               c1 = code >> 8, c2 = code & 0xFF;
4546               EMIT_TWO_BYTES (c1, c2);
4547             }
4548           else
4549             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4550         }
4551     }
4552   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4553   coding->produced_char += produced_chars;
4554   coding->produced = dst - coding->destination;
4555   return 0;
4556 }
4557
4558 \f
4559 /*** 10. CCL handlers ***/
4560
4561 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4562    Check if a text is encoded in a coding system of which
4563    encoder/decoder are written in CCL program.  If it is, return
4564    CATEGORY_MASK_CCL, else return 0.  */
4565
4566 static int
4567 detect_coding_ccl (coding, detect_info)
4568      struct coding_system *coding;
4569      struct coding_detection_info *detect_info;
4570 {
4571   const unsigned char *src = coding->source, *src_base;
4572   const unsigned char *src_end = coding->source + coding->src_bytes;
4573   int multibytep = coding->src_multibyte;
4574   int consumed_chars = 0;
4575   int found = 0;
4576   unsigned char *valids;
4577   int head_ascii = coding->head_ascii;
4578   Lisp_Object attrs;
4579
4580   detect_info->checked |= CATEGORY_MASK_CCL;
4581
4582   coding = &coding_categories[coding_category_ccl];
4583   valids = CODING_CCL_VALIDS (coding);
4584   attrs = CODING_ID_ATTRS (coding->id);
4585   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4586     src += head_ascii;
4587
4588   while (1)
4589     {
4590       int c;
4591
4592       src_base = src;
4593       ONE_MORE_BYTE (c);
4594       if (c < 0 || ! valids[c])
4595         break;
4596       if ((valids[c] > 1))
4597         found = CATEGORY_MASK_CCL;
4598     }
4599   detect_info->rejected |= CATEGORY_MASK_CCL;
4600   return 0;
4601
4602  no_more_source:
4603   detect_info->found |= found;
4604   return 1;
4605 }
4606
4607 static void
4608 decode_coding_ccl (coding)
4609      struct coding_system *coding;
4610 {
4611   const unsigned char *src = coding->source + coding->consumed;
4612   const unsigned char *src_end = coding->source + coding->src_bytes;
4613   int *charbuf = coding->charbuf + coding->charbuf_used;
4614   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4615   int consumed_chars = 0;
4616   int multibytep = coding->src_multibyte;
4617   struct ccl_program ccl;
4618   int source_charbuf[1024];
4619   int source_byteidx[1024];
4620   Lisp_Object attrs, charset_list;
4621
4622   CODING_GET_INFO (coding, attrs, charset_list);
4623   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4624
4625   while (src < src_end)
4626     {
4627       const unsigned char *p = src;
4628       int *source, *source_end;
4629       int i = 0;
4630
4631       if (multibytep)
4632         while (i < 1024 && p < src_end)
4633           {
4634             source_byteidx[i] = p - src;
4635             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4636           }
4637       else
4638         while (i < 1024 && p < src_end)
4639           source_charbuf[i++] = *p++;
4640
4641       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4642         ccl.last_block = 1;
4643
4644       source = source_charbuf;
4645       source_end = source + i;
4646       while (source < source_end)
4647         {
4648           ccl_driver (&ccl, source, charbuf,
4649                       source_end - source, charbuf_end - charbuf,
4650                       charset_list);
4651           source += ccl.consumed;
4652           charbuf += ccl.produced;
4653           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4654             break;
4655         }
4656       if (source < source_end)
4657         src += source_byteidx[source - source_charbuf];
4658       else
4659         src = p;
4660       consumed_chars += source - source_charbuf;
4661
4662       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4663           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4664         break;
4665     }
4666
4667   switch (ccl.status)
4668     {
4669     case CCL_STAT_SUSPEND_BY_SRC:
4670       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4671       break;
4672     case CCL_STAT_SUSPEND_BY_DST:
4673       break;
4674     case CCL_STAT_QUIT:
4675     case CCL_STAT_INVALID_CMD:
4676       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4677       break;
4678     default:
4679       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4680       break;
4681     }
4682   coding->consumed_char += consumed_chars;
4683   coding->consumed = src - coding->source;
4684   coding->charbuf_used = charbuf - coding->charbuf;
4685 }
4686
4687 static int
4688 encode_coding_ccl (coding)
4689      struct coding_system *coding;
4690 {
4691   struct ccl_program ccl;
4692   int multibytep = coding->dst_multibyte;
4693   int *charbuf = coding->charbuf;
4694   int *charbuf_end = charbuf + coding->charbuf_used;
4695   unsigned char *dst = coding->destination + coding->produced;
4696   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4697   int destination_charbuf[1024];
4698   int i, produced_chars = 0;
4699   Lisp_Object attrs, charset_list;
4700
4701   CODING_GET_INFO (coding, attrs, charset_list);
4702   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4703
4704   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4705   ccl.dst_multibyte = coding->dst_multibyte;
4706
4707   while (charbuf < charbuf_end)
4708     {
4709       ccl_driver (&ccl, charbuf, destination_charbuf,
4710                   charbuf_end - charbuf, 1024, charset_list);
4711       if (multibytep)
4712         {
4713           ASSURE_DESTINATION (ccl.produced * 2);
4714           for (i = 0; i < ccl.produced; i++)
4715             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4716         }
4717       else
4718         {
4719           ASSURE_DESTINATION (ccl.produced);
4720           for (i = 0; i < ccl.produced; i++)
4721             *dst++ = destination_charbuf[i] & 0xFF;
4722           produced_chars += ccl.produced;
4723         }
4724       charbuf += ccl.consumed;
4725       if (ccl.status == CCL_STAT_QUIT
4726           || ccl.status == CCL_STAT_INVALID_CMD)
4727         break;
4728     }
4729
4730   switch (ccl.status)
4731     {
4732     case CCL_STAT_SUSPEND_BY_SRC:
4733       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4734       break;
4735     case CCL_STAT_SUSPEND_BY_DST:
4736       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4737       break;
4738     case CCL_STAT_QUIT:
4739     case CCL_STAT_INVALID_CMD:
4740       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4741       break;
4742     default:
4743       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4744       break;
4745     }
4746
4747   coding->produced_char += produced_chars;
4748   coding->produced = dst - coding->destination;
4749   return 0;
4750 }
4751
4752
4753 \f
4754 /*** 10, 11. no-conversion handlers ***/
4755
4756 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4757
4758 static void
4759 decode_coding_raw_text (coding)
4760      struct coding_system *coding;
4761 {
4762   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4763
4764   coding->chars_at_source = 1;
4765   coding->consumed_char = coding->src_chars;
4766   coding->consumed = coding->src_bytes;
4767   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
4768     {
4769       coding->consumed_char--;
4770       coding->consumed--;
4771       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4772     }
4773   else
4774     record_conversion_result (coding, CODING_RESULT_SUCCESS);
4775 }
4776
4777 static int
4778 encode_coding_raw_text (coding)
4779      struct coding_system *coding;
4780 {
4781   int multibytep = coding->dst_multibyte;
4782   int *charbuf = coding->charbuf;
4783   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4784   unsigned char *dst = coding->destination + coding->produced;
4785   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4786   int produced_chars = 0;
4787   int c;
4788
4789   if (multibytep)
4790     {
4791       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4792
4793       if (coding->src_multibyte)
4794         while (charbuf < charbuf_end)
4795           {
4796             ASSURE_DESTINATION (safe_room);
4797             c = *charbuf++;
4798             if (ASCII_CHAR_P (c))
4799               EMIT_ONE_ASCII_BYTE (c);
4800             else if (CHAR_BYTE8_P (c))
4801               {
4802                 c = CHAR_TO_BYTE8 (c);
4803                 EMIT_ONE_BYTE (c);
4804               }
4805             else
4806               {
4807                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4808
4809                 CHAR_STRING_ADVANCE (c, p1);
4810                 while (p0 < p1)
4811                   {
4812                     EMIT_ONE_BYTE (*p0);
4813                     p0++;
4814                   }
4815               }
4816           }
4817       else
4818         while (charbuf < charbuf_end)
4819           {
4820             ASSURE_DESTINATION (safe_room);
4821             c = *charbuf++;
4822             EMIT_ONE_BYTE (c);
4823           }
4824     }
4825   else
4826     {
4827       if (coding->src_multibyte)
4828         {
4829           int safe_room = MAX_MULTIBYTE_LENGTH;
4830
4831           while (charbuf < charbuf_end)
4832             {
4833               ASSURE_DESTINATION (safe_room);
4834               c = *charbuf++;
4835               if (ASCII_CHAR_P (c))
4836                 *dst++ = c;
4837               else if (CHAR_BYTE8_P (c))
4838                 *dst++ = CHAR_TO_BYTE8 (c);
4839               else
4840                 CHAR_STRING_ADVANCE (c, dst);
4841               produced_chars++;
4842             }
4843         }
4844       else
4845         {
4846           ASSURE_DESTINATION (charbuf_end - charbuf);
4847           while (charbuf < charbuf_end && dst < dst_end)
4848             *dst++ = *charbuf++;
4849           produced_chars = dst - (coding->destination + coding->dst_bytes);
4850         }
4851     }
4852   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4853   coding->produced_char += produced_chars;
4854   coding->produced = dst - coding->destination;
4855   return 0;
4856 }
4857
4858 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4859    Check if a text is encoded in a charset-based coding system.  If it
4860    is, return 1, else return 0.  */
4861
4862 static int
4863 detect_coding_charset (coding, detect_info)
4864      struct coding_system *coding;
4865      struct coding_detection_info *detect_info;
4866 {
4867   const unsigned char *src = coding->source, *src_base;
4868   const unsigned char *src_end = coding->source + coding->src_bytes;
4869   int multibytep = coding->src_multibyte;
4870   int consumed_chars = 0;
4871   Lisp_Object attrs, valids;
4872   int found = 0;
4873   int head_ascii = coding->head_ascii;
4874
4875   detect_info->checked |= CATEGORY_MASK_CHARSET;
4876
4877   coding = &coding_categories[coding_category_charset];
4878   attrs = CODING_ID_ATTRS (coding->id);
4879   valids = AREF (attrs, coding_attr_charset_valids);
4880
4881   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4882     src += head_ascii;
4883
4884   while (1)
4885     {
4886       int c;
4887       Lisp_Object val;
4888       struct charset *charset;
4889       int dim, idx;
4890
4891       src_base = src;
4892       ONE_MORE_BYTE (c);
4893       if (c < 0)
4894         continue;
4895       val = AREF (valids, c);
4896       if (NILP (val))
4897         break;
4898       if (c >= 0x80)
4899         found = CATEGORY_MASK_CHARSET;
4900       if (INTEGERP (val))
4901         {
4902           charset = CHARSET_FROM_ID (XFASTINT (val));
4903           dim = CHARSET_DIMENSION (charset);
4904           for (idx = 1; idx < dim; idx++)
4905             {
4906               if (src == src_end)
4907                 goto too_short;
4908               ONE_MORE_BYTE (c);
4909               if (c < charset->code_space[(dim - 1 - idx) * 2]
4910                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
4911                 break;
4912             }
4913           if (idx < dim)
4914             break;
4915         }
4916       else
4917         {
4918           idx = 1;
4919           for (; CONSP (val); val = XCDR (val))
4920             {
4921               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4922               dim = CHARSET_DIMENSION (charset);
4923               while (idx < dim)
4924                 {
4925                   if (src == src_end)
4926                     goto too_short;
4927                   ONE_MORE_BYTE (c);
4928                   if (c < charset->code_space[(dim - 1 - idx) * 4]
4929                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
4930                     break;
4931                   idx++;
4932                 }
4933               if (idx == dim)
4934                 {
4935                   val = Qnil;
4936                   break;
4937                 }
4938             }
4939           if (CONSP (val))
4940             break;
4941         }
4942     }
4943  too_short:
4944   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4945   return 0;
4946
4947  no_more_source:
4948   detect_info->found |= found;
4949   return 1;
4950 }
4951
4952 static void
4953 decode_coding_charset (coding)
4954      struct coding_system *coding;
4955 {
4956   const unsigned char *src = coding->source + coding->consumed;
4957   const unsigned char *src_end = coding->source + coding->src_bytes;
4958   const unsigned char *src_base;
4959   int *charbuf = coding->charbuf + coding->charbuf_used;
4960   int *charbuf_end
4961     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4962   int consumed_chars = 0, consumed_chars_base;
4963   int multibytep = coding->src_multibyte;
4964   Lisp_Object attrs, charset_list, valids;
4965   int char_offset = coding->produced_char;
4966   int last_offset = char_offset;
4967   int last_id = charset_ascii;
4968   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4969   int byte_after_cr = -1;
4970
4971   CODING_GET_INFO (coding, attrs, charset_list);
4972   valids = AREF (attrs, coding_attr_charset_valids);
4973
4974   while (1)
4975     {
4976       int c;
4977       Lisp_Object val;
4978       struct charset *charset;
4979       int dim;
4980       int len = 1;
4981       unsigned code;
4982
4983       src_base = src;
4984       consumed_chars_base = consumed_chars;
4985
4986       if (charbuf >= charbuf_end)
4987         break;
4988
4989       if (byte_after_cr >= 0)
4990         {
4991           c = byte_after_cr;
4992           byte_after_cr = -1;
4993         }
4994       else
4995         {
4996           ONE_MORE_BYTE (c);
4997           if (eol_crlf && c == '\r')
4998             ONE_MORE_BYTE (byte_after_cr);
4999         }
5000       if (c < 0)
5001         goto invalid_code;
5002       code = c;
5003
5004       val = AREF (valids, c);
5005       if (NILP (val))
5006         goto invalid_code;
5007       if (INTEGERP (val))
5008         {
5009           charset = CHARSET_FROM_ID (XFASTINT (val));
5010           dim = CHARSET_DIMENSION (charset);
5011           while (len < dim)
5012             {
5013               ONE_MORE_BYTE (c);
5014               code = (code << 8) | c;
5015               len++;
5016             }
5017           CODING_DECODE_CHAR (coding, src, src_base, src_end,
5018                               charset, code, c);
5019         }
5020       else
5021         {
5022           /* VAL is a list of charset IDs.  It is assured that the
5023              list is sorted by charset dimensions (smaller one
5024              comes first).  */
5025           while (CONSP (val))
5026             {
5027               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
5028               dim = CHARSET_DIMENSION (charset);
5029               while (len < dim)
5030                 {
5031                   ONE_MORE_BYTE (c);
5032                   code = (code << 8) | c;
5033                   len++;
5034                 }
5035               CODING_DECODE_CHAR (coding, src, src_base,
5036                                   src_end, charset, code, c);
5037               if (c >= 0)
5038                 break;
5039               val = XCDR (val);
5040             }
5041         }
5042       if (c < 0)
5043         goto invalid_code;
5044       if (charset->id != charset_ascii
5045           && last_id != charset->id)
5046         {
5047           if (last_id != charset_ascii)
5048             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5049           last_id = charset->id;
5050           last_offset = char_offset;
5051         }
5052
5053       *charbuf++ = c;
5054       char_offset++;
5055       continue;
5056
5057     invalid_code:
5058       src = src_base;
5059       consumed_chars = consumed_chars_base;
5060       ONE_MORE_BYTE (c);
5061       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
5062       char_offset++;
5063       coding->errors++;
5064     }
5065
5066  no_more_source:
5067   if (last_id != charset_ascii)
5068     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5069   coding->consumed_char += consumed_chars_base;
5070   coding->consumed = src_base - coding->source;
5071   coding->charbuf_used = charbuf - coding->charbuf;
5072 }
5073
5074 static int
5075 encode_coding_charset (coding)
5076      struct coding_system *coding;
5077 {
5078   int multibytep = coding->dst_multibyte;
5079   int *charbuf = coding->charbuf;
5080   int *charbuf_end = charbuf + coding->charbuf_used;
5081   unsigned char *dst = coding->destination + coding->produced;
5082   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5083   int safe_room = MAX_MULTIBYTE_LENGTH;
5084   int produced_chars = 0;
5085   Lisp_Object attrs, charset_list;
5086   int ascii_compatible;
5087   int c;
5088
5089   CODING_GET_INFO (coding, attrs, charset_list);
5090   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5091
5092   while (charbuf < charbuf_end)
5093     {
5094       struct charset *charset;
5095       unsigned code;
5096
5097       ASSURE_DESTINATION (safe_room);
5098       c = *charbuf++;
5099       if (ascii_compatible && ASCII_CHAR_P (c))
5100         EMIT_ONE_ASCII_BYTE (c);
5101       else if (CHAR_BYTE8_P (c))
5102         {
5103           c = CHAR_TO_BYTE8 (c);
5104           EMIT_ONE_BYTE (c);
5105         }
5106       else
5107         {
5108           charset = char_charset (c, charset_list, &code);
5109           if (charset)
5110             {
5111               if (CHARSET_DIMENSION (charset) == 1)
5112                 EMIT_ONE_BYTE (code);
5113               else if (CHARSET_DIMENSION (charset) == 2)
5114                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5115               else if (CHARSET_DIMENSION (charset) == 3)
5116                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5117               else
5118                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5119                                  (code >> 8) & 0xFF, code & 0xFF);
5120             }
5121           else
5122             {
5123               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5124                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5125               else
5126                 c = coding->default_char;
5127               EMIT_ONE_BYTE (c);
5128             }
5129         }
5130     }
5131
5132   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5133   coding->produced_char += produced_chars;
5134   coding->produced = dst - coding->destination;
5135   return 0;
5136 }
5137
5138 \f
5139 /*** 7. C library functions ***/
5140
5141 /* Setup coding context CODING from information about CODING_SYSTEM.
5142    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5143    CODING_SYSTEM is invalid, signal an error.  */
5144
5145 void
5146 setup_coding_system (coding_system, coding)
5147      Lisp_Object coding_system;
5148      struct coding_system *coding;
5149 {
5150   Lisp_Object attrs;
5151   Lisp_Object eol_type;
5152   Lisp_Object coding_type;
5153   Lisp_Object val;
5154
5155   if (NILP (coding_system))
5156     coding_system = Qundecided;
5157
5158   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5159
5160   attrs = CODING_ID_ATTRS (coding->id);
5161   eol_type = CODING_ID_EOL_TYPE (coding->id);
5162
5163   coding->mode = 0;
5164   coding->head_ascii = -1;
5165   if (VECTORP (eol_type))
5166     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5167                             | CODING_REQUIRE_DETECTION_MASK);
5168   else if (! EQ (eol_type, Qunix))
5169     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5170                             | CODING_REQUIRE_ENCODING_MASK);
5171   else
5172     coding->common_flags = 0;
5173   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5174     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5175   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5176     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5177   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5178     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5179
5180   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5181   coding->max_charset_id = SCHARS (val) - 1;
5182   coding->safe_charsets = (char *) SDATA (val);
5183   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5184
5185   coding_type = CODING_ATTR_TYPE (attrs);
5186   if (EQ (coding_type, Qundecided))
5187     {
5188       coding->detector = NULL;
5189       coding->decoder = decode_coding_raw_text;
5190       coding->encoder = encode_coding_raw_text;
5191       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5192     }
5193   else if (EQ (coding_type, Qiso_2022))
5194     {
5195       int i;
5196       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5197
5198       /* Invoke graphic register 0 to plane 0.  */
5199       CODING_ISO_INVOCATION (coding, 0) = 0;
5200       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5201       CODING_ISO_INVOCATION (coding, 1)
5202         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5203       /* Setup the initial status of designation.  */
5204       for (i = 0; i < 4; i++)
5205         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5206       /* Not single shifting initially.  */
5207       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5208       /* Beginning of buffer should also be regarded as bol. */
5209       CODING_ISO_BOL (coding) = 1;
5210       coding->detector = detect_coding_iso_2022;
5211       coding->decoder = decode_coding_iso_2022;
5212       coding->encoder = encode_coding_iso_2022;
5213       if (flags & CODING_ISO_FLAG_SAFE)
5214         coding->mode |= CODING_MODE_SAFE_ENCODING;
5215       coding->common_flags
5216         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5217             | CODING_REQUIRE_FLUSHING_MASK);
5218       if (flags & CODING_ISO_FLAG_COMPOSITION)
5219         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5220       if (flags & CODING_ISO_FLAG_DESIGNATION)
5221         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5222       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5223         {
5224           setup_iso_safe_charsets (attrs);
5225           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5226           coding->max_charset_id = SCHARS (val) - 1;
5227           coding->safe_charsets = (char *) SDATA (val);
5228         }
5229       CODING_ISO_FLAGS (coding) = flags;
5230     }
5231   else if (EQ (coding_type, Qcharset))
5232     {
5233       coding->detector = detect_coding_charset;
5234       coding->decoder = decode_coding_charset;
5235       coding->encoder = encode_coding_charset;
5236       coding->common_flags
5237         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5238     }
5239   else if (EQ (coding_type, Qutf_8))
5240     {
5241       coding->detector = detect_coding_utf_8;
5242       coding->decoder = decode_coding_utf_8;
5243       coding->encoder = encode_coding_utf_8;
5244       coding->common_flags
5245         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5246     }
5247   else if (EQ (coding_type, Qutf_16))
5248     {
5249       val = AREF (attrs, coding_attr_utf_16_bom);
5250       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
5251                                     : EQ (val, Qt) ? utf_16_with_bom
5252                                     : utf_16_without_bom);
5253       val = AREF (attrs, coding_attr_utf_16_endian);
5254       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5255                                        : utf_16_little_endian);
5256       CODING_UTF_16_SURROGATE (coding) = 0;
5257       coding->detector = detect_coding_utf_16;
5258       coding->decoder = decode_coding_utf_16;
5259       coding->encoder = encode_coding_utf_16;
5260       coding->common_flags
5261         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5262       if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
5263         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5264     }
5265   else if (EQ (coding_type, Qccl))
5266     {
5267       coding->detector = detect_coding_ccl;
5268       coding->decoder = decode_coding_ccl;
5269       coding->encoder = encode_coding_ccl;
5270       coding->common_flags
5271         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5272             | CODING_REQUIRE_FLUSHING_MASK);
5273     }
5274   else if (EQ (coding_type, Qemacs_mule))
5275     {
5276       coding->detector = detect_coding_emacs_mule;
5277       coding->decoder = decode_coding_emacs_mule;
5278       coding->encoder = encode_coding_emacs_mule;
5279       coding->common_flags
5280         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5281       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5282           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5283         {
5284           Lisp_Object tail, safe_charsets;
5285           int max_charset_id = 0;
5286
5287           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5288                tail = XCDR (tail))
5289             if (max_charset_id < XFASTINT (XCAR (tail)))
5290               max_charset_id = XFASTINT (XCAR (tail));
5291           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5292                                         make_number (255));
5293           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5294                tail = XCDR (tail))
5295             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5296           coding->max_charset_id = max_charset_id;
5297           coding->safe_charsets = (char *) SDATA (safe_charsets);
5298         }
5299     }
5300   else if (EQ (coding_type, Qshift_jis))
5301     {
5302       coding->detector = detect_coding_sjis;
5303       coding->decoder = decode_coding_sjis;
5304       coding->encoder = encode_coding_sjis;
5305       coding->common_flags
5306         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5307     }
5308   else if (EQ (coding_type, Qbig5))
5309     {
5310       coding->detector = detect_coding_big5;
5311       coding->decoder = decode_coding_big5;
5312       coding->encoder = encode_coding_big5;
5313       coding->common_flags
5314         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5315     }
5316   else                          /* EQ (coding_type, Qraw_text) */
5317     {
5318       coding->detector = NULL;
5319       coding->decoder = decode_coding_raw_text;
5320       coding->encoder = encode_coding_raw_text;
5321       if (! EQ (eol_type, Qunix))
5322         {
5323           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5324           if (! VECTORP (eol_type))
5325             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5326         }
5327
5328     }
5329
5330   return;
5331 }
5332
5333 /* Return a list of charsets supported by CODING.  */
5334
5335 Lisp_Object
5336 coding_charset_list (coding)
5337      struct coding_system *coding;
5338 {
5339   Lisp_Object attrs, charset_list;
5340
5341   CODING_GET_INFO (coding, attrs, charset_list);
5342   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5343     {
5344       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5345
5346       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5347         charset_list = Viso_2022_charset_list;
5348     }
5349   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5350     {
5351       charset_list = Vemacs_mule_charset_list;
5352     }
5353   return charset_list;
5354 }
5355
5356
5357 /* Return raw-text or one of its subsidiaries that has the same
5358    eol_type as CODING-SYSTEM.  */
5359
5360 Lisp_Object
5361 raw_text_coding_system (coding_system)
5362      Lisp_Object coding_system;
5363 {
5364   Lisp_Object spec, attrs;
5365   Lisp_Object eol_type, raw_text_eol_type;
5366
5367   if (NILP (coding_system))
5368     return Qraw_text;
5369   spec = CODING_SYSTEM_SPEC (coding_system);
5370   attrs = AREF (spec, 0);
5371
5372   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5373     return coding_system;
5374
5375   eol_type = AREF (spec, 2);
5376   if (VECTORP (eol_type))
5377     return Qraw_text;
5378   spec = CODING_SYSTEM_SPEC (Qraw_text);
5379   raw_text_eol_type = AREF (spec, 2);
5380   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5381           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5382           : AREF (raw_text_eol_type, 2));
5383 }
5384
5385
5386 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5387    does, return one of the subsidiary that has the same eol-spec as
5388    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
5389    inherit end-of-line format from the system's setting
5390    (system_eol_type).  */
5391
5392 Lisp_Object
5393 coding_inherit_eol_type (coding_system, parent)
5394      Lisp_Object coding_system, parent;
5395 {
5396   Lisp_Object spec, eol_type;
5397
5398   if (NILP (coding_system))
5399     coding_system = Qraw_text;
5400   spec = CODING_SYSTEM_SPEC (coding_system);
5401   eol_type = AREF (spec, 2);
5402   if (VECTORP (eol_type))
5403     {
5404       Lisp_Object parent_eol_type;
5405
5406       if (! NILP (parent))
5407         {
5408           Lisp_Object parent_spec;
5409
5410           parent_spec = CODING_SYSTEM_SPEC (parent);
5411           parent_eol_type = AREF (parent_spec, 2);
5412         }
5413       else
5414         parent_eol_type = system_eol_type;
5415       if (EQ (parent_eol_type, Qunix))
5416         coding_system = AREF (eol_type, 0);
5417       else if (EQ (parent_eol_type, Qdos))
5418         coding_system = AREF (eol_type, 1);
5419       else if (EQ (parent_eol_type, Qmac))
5420         coding_system = AREF (eol_type, 2);
5421     }
5422   return coding_system;
5423 }
5424
5425 /* Emacs has a mechanism to automatically detect a coding system if it
5426    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5427    it's impossible to distinguish some coding systems accurately
5428    because they use the same range of codes.  So, at first, coding
5429    systems are categorized into 7, those are:
5430
5431    o coding-category-emacs-mule
5432
5433         The category for a coding system which has the same code range
5434         as Emacs' internal format.  Assigned the coding-system (Lisp
5435         symbol) `emacs-mule' by default.
5436
5437    o coding-category-sjis
5438
5439         The category for a coding system which has the same code range
5440         as SJIS.  Assigned the coding-system (Lisp
5441         symbol) `japanese-shift-jis' by default.
5442
5443    o coding-category-iso-7
5444
5445         The category for a coding system which has the same code range
5446         as ISO2022 of 7-bit environment.  This doesn't use any locking
5447         shift and single shift functions.  This can encode/decode all
5448         charsets.  Assigned the coding-system (Lisp symbol)
5449         `iso-2022-7bit' by default.
5450
5451    o coding-category-iso-7-tight
5452
5453         Same as coding-category-iso-7 except that this can
5454         encode/decode only the specified charsets.
5455
5456    o coding-category-iso-8-1
5457
5458         The category for a coding system which has the same code range
5459         as ISO2022 of 8-bit environment and graphic plane 1 used only
5460         for DIMENSION1 charset.  This doesn't use any locking shift
5461         and single shift functions.  Assigned the coding-system (Lisp
5462         symbol) `iso-latin-1' by default.
5463
5464    o coding-category-iso-8-2
5465
5466         The category for a coding system which has the same code range
5467         as ISO2022 of 8-bit environment and graphic plane 1 used only
5468         for DIMENSION2 charset.  This doesn't use any locking shift
5469         and single shift functions.  Assigned the coding-system (Lisp
5470         symbol) `japanese-iso-8bit' by default.
5471
5472    o coding-category-iso-7-else
5473
5474         The category for a coding system which has the same code range
5475         as ISO2022 of 7-bit environemnt but uses locking shift or
5476         single shift functions.  Assigned the coding-system (Lisp
5477         symbol) `iso-2022-7bit-lock' by default.
5478
5479    o coding-category-iso-8-else
5480
5481         The category for a coding system which has the same code range
5482         as ISO2022 of 8-bit environemnt but uses locking shift or
5483         single shift functions.  Assigned the coding-system (Lisp
5484         symbol) `iso-2022-8bit-ss2' by default.
5485
5486    o coding-category-big5
5487
5488         The category for a coding system which has the same code range
5489         as BIG5.  Assigned the coding-system (Lisp symbol)
5490         `cn-big5' by default.
5491
5492    o coding-category-utf-8
5493
5494         The category for a coding system which has the same code range
5495         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5496         symbol) `utf-8' by default.
5497
5498    o coding-category-utf-16-be
5499
5500         The category for a coding system in which a text has an
5501         Unicode signature (cf. Unicode Standard) in the order of BIG
5502         endian at the head.  Assigned the coding-system (Lisp symbol)
5503         `utf-16-be' by default.
5504
5505    o coding-category-utf-16-le
5506
5507         The category for a coding system in which a text has an
5508         Unicode signature (cf. Unicode Standard) in the order of
5509         LITTLE endian at the head.  Assigned the coding-system (Lisp
5510         symbol) `utf-16-le' by default.
5511
5512    o coding-category-ccl
5513
5514         The category for a coding system of which encoder/decoder is
5515         written in CCL programs.  The default value is nil, i.e., no
5516         coding system is assigned.
5517
5518    o coding-category-binary
5519
5520         The category for a coding system not categorized in any of the
5521         above.  Assigned the coding-system (Lisp symbol)
5522         `no-conversion' by default.
5523
5524    Each of them is a Lisp symbol and the value is an actual
5525    `coding-system's (this is also a Lisp symbol) assigned by a user.
5526    What Emacs does actually is to detect a category of coding system.
5527    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5528    decide only one possible category, it selects a category of the
5529    highest priority.  Priorities of categories are also specified by a
5530    user in a Lisp variable `coding-category-list'.
5531
5532 */
5533
5534 #define EOL_SEEN_NONE   0
5535 #define EOL_SEEN_LF     1
5536 #define EOL_SEEN_CR     2
5537 #define EOL_SEEN_CRLF   4
5538
5539 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5540    SOURCE is encoded.  If CATEGORY is one of
5541    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5542    two-byte, else they are encoded by one-byte.
5543
5544    Return one of EOL_SEEN_XXX.  */
5545
5546 #define MAX_EOL_CHECK_COUNT 3
5547
5548 static int
5549 detect_eol (source, src_bytes, category)
5550      const unsigned char *source;
5551      EMACS_INT src_bytes;
5552      enum coding_category category;
5553 {
5554   const unsigned char *src = source, *src_end = src + src_bytes;
5555   unsigned char c;
5556   int total  = 0;
5557   int eol_seen = EOL_SEEN_NONE;
5558
5559   if ((1 << category) & CATEGORY_MASK_UTF_16)
5560     {
5561       int msb, lsb;
5562
5563       msb = category == (coding_category_utf_16_le
5564                          | coding_category_utf_16_le_nosig);
5565       lsb = 1 - msb;
5566
5567       while (src + 1 < src_end)
5568         {
5569           c = src[lsb];
5570           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5571             {
5572               int this_eol;
5573
5574               if (c == '\n')
5575                 this_eol = EOL_SEEN_LF;
5576               else if (src + 3 >= src_end
5577                        || src[msb + 2] != 0
5578                        || src[lsb + 2] != '\n')
5579                 this_eol = EOL_SEEN_CR;
5580               else
5581                 this_eol = EOL_SEEN_CRLF;
5582
5583               if (eol_seen == EOL_SEEN_NONE)
5584                 /* This is the first end-of-line.  */
5585                 eol_seen = this_eol;
5586               else if (eol_seen != this_eol)
5587                 {
5588                   /* The found type is different from what found before.  */
5589                   eol_seen = EOL_SEEN_LF;
5590                   break;
5591                 }
5592               if (++total == MAX_EOL_CHECK_COUNT)
5593                 break;
5594             }
5595           src += 2;
5596         }
5597     }
5598   else
5599     {
5600       while (src < src_end)
5601         {
5602           c = *src++;
5603           if (c == '\n' || c == '\r')
5604             {
5605               int this_eol;
5606
5607               if (c == '\n')
5608                 this_eol = EOL_SEEN_LF;
5609               else if (src >= src_end || *src != '\n')
5610                 this_eol = EOL_SEEN_CR;
5611               else
5612                 this_eol = EOL_SEEN_CRLF, src++;
5613
5614               if (eol_seen == EOL_SEEN_NONE)
5615                 /* This is the first end-of-line.  */
5616                 eol_seen = this_eol;
5617               else if (eol_seen != this_eol)
5618                 {
5619                   /* The found type is different from what found before.  */
5620                   eol_seen = EOL_SEEN_LF;
5621                   break;
5622                 }
5623               if (++total == MAX_EOL_CHECK_COUNT)
5624                 break;
5625             }
5626         }
5627     }
5628   return eol_seen;
5629 }
5630
5631
5632 static Lisp_Object
5633 adjust_coding_eol_type (coding, eol_seen)
5634      struct coding_system *coding;
5635      int eol_seen;
5636 {
5637   Lisp_Object eol_type;
5638
5639   eol_type = CODING_ID_EOL_TYPE (coding->id);
5640   if (eol_seen & EOL_SEEN_LF)
5641     {
5642       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5643       eol_type = Qunix;
5644     }
5645   else if (eol_seen & EOL_SEEN_CRLF)
5646     {
5647       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5648       eol_type = Qdos;
5649     }
5650   else if (eol_seen & EOL_SEEN_CR)
5651     {
5652       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5653       eol_type = Qmac;
5654     }
5655   return eol_type;
5656 }
5657
5658 /* Detect how a text specified in CODING is encoded.  If a coding
5659    system is detected, update fields of CODING by the detected coding
5660    system.  */
5661
5662 void
5663 detect_coding (coding)
5664      struct coding_system *coding;
5665 {
5666   const unsigned char *src, *src_end;
5667
5668   coding->consumed = coding->consumed_char = 0;
5669   coding->produced = coding->produced_char = 0;
5670   coding_set_source (coding);
5671
5672   src_end = coding->source + coding->src_bytes;
5673
5674   /* If we have not yet decided the text encoding type, detect it
5675      now.  */
5676   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5677     {
5678       int c, i;
5679       struct coding_detection_info detect_info;
5680
5681       detect_info.checked = detect_info.found = detect_info.rejected = 0;
5682       for (i = 0, src = coding->source; src < src_end; i++, src++)
5683         {
5684           c = *src;
5685           if (c & 0x80)
5686             break;
5687           if (c < 0x20
5688               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5689               && ! inhibit_iso_escape_detection
5690               && ! detect_info.checked)
5691             {
5692               coding->head_ascii = src - (coding->source + coding->consumed);
5693               if (detect_coding_iso_2022 (coding, &detect_info))
5694                 {
5695                   /* We have scanned the whole data.  */
5696                   if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5697                     /* We didn't find an 8-bit code.  */
5698                     src = src_end;
5699                   break;
5700                 }
5701             }
5702         }
5703       coding->head_ascii = src - (coding->source + coding->consumed);
5704
5705       if (coding->head_ascii < coding->src_bytes
5706           || detect_info.found)
5707         {
5708           enum coding_category category;
5709           struct coding_system *this;
5710
5711           if (coding->head_ascii == coding->src_bytes)
5712             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
5713             for (i = 0; i < coding_category_raw_text; i++)
5714               {
5715                 category = coding_priorities[i];
5716                 this = coding_categories + category;
5717                 if (detect_info.found & (1 << category))
5718                   break;
5719               }
5720           else
5721             for (i = 0; i < coding_category_raw_text; i++)
5722               {
5723                 category = coding_priorities[i];
5724                 this = coding_categories + category;
5725                 if (this->id < 0)
5726                   {
5727                     /* No coding system of this category is defined.  */
5728                     detect_info.rejected |= (1 << category);
5729                   }
5730                 else if (category >= coding_category_raw_text)
5731                   continue;
5732                 else if (detect_info.checked & (1 << category))
5733                   {
5734                     if (detect_info.found & (1 << category))
5735                       break;
5736                   }
5737                 else if ((*(this->detector)) (coding, &detect_info)
5738                          && detect_info.found & (1 << category))
5739                   {
5740                     if (category == coding_category_utf_16_auto)
5741                       {
5742                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5743                           category = coding_category_utf_16_le;
5744                         else
5745                           category = coding_category_utf_16_be;
5746                       }
5747                     break;
5748                   }
5749               }
5750
5751           if (i < coding_category_raw_text)
5752             setup_coding_system (CODING_ID_NAME (this->id), coding);
5753           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5754             setup_coding_system (Qraw_text, coding);
5755           else if (detect_info.rejected)
5756             for (i = 0; i < coding_category_raw_text; i++)
5757               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5758                 {
5759                   this = coding_categories + coding_priorities[i];
5760                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5761                   break;
5762                 }
5763         }
5764     }
5765   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5766            == coding_category_utf_16_auto)
5767     {
5768       Lisp_Object coding_systems;
5769       struct coding_detection_info detect_info;
5770
5771       coding_systems
5772         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5773       detect_info.found = detect_info.rejected = 0;
5774       if (CONSP (coding_systems)
5775           && detect_coding_utf_16 (coding, &detect_info))
5776         {
5777           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5778             setup_coding_system (XCAR (coding_systems), coding);
5779           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5780             setup_coding_system (XCDR (coding_systems), coding);
5781         }
5782     }
5783 }
5784
5785
5786 static void
5787 decode_eol (coding)
5788      struct coding_system *coding;
5789 {
5790   Lisp_Object eol_type;
5791   unsigned char *p, *pbeg, *pend;
5792
5793   eol_type = CODING_ID_EOL_TYPE (coding->id);
5794   if (EQ (eol_type, Qunix))
5795     return;
5796
5797   if (NILP (coding->dst_object))
5798     pbeg = coding->destination;
5799   else
5800     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5801   pend = pbeg + coding->produced;
5802
5803   if (VECTORP (eol_type))
5804     {
5805       int eol_seen = EOL_SEEN_NONE;
5806
5807       for (p = pbeg; p < pend; p++)
5808         {
5809           if (*p == '\n')
5810             eol_seen |= EOL_SEEN_LF;
5811           else if (*p == '\r')
5812             {
5813               if (p + 1 < pend && *(p + 1) == '\n')
5814                 {
5815                   eol_seen |= EOL_SEEN_CRLF;
5816                   p++;
5817                 }
5818               else
5819                 eol_seen |= EOL_SEEN_CR;
5820             }
5821         }
5822       if (eol_seen != EOL_SEEN_NONE
5823           && eol_seen != EOL_SEEN_LF
5824           && eol_seen != EOL_SEEN_CRLF
5825           && eol_seen != EOL_SEEN_CR)
5826         eol_seen = EOL_SEEN_LF;
5827       if (eol_seen != EOL_SEEN_NONE)
5828         eol_type = adjust_coding_eol_type (coding, eol_seen);
5829     }
5830
5831   if (EQ (eol_type, Qmac))
5832     {
5833       for (p = pbeg; p < pend; p++)
5834         if (*p == '\r')
5835           *p = '\n';
5836     }
5837   else if (EQ (eol_type, Qdos))
5838     {
5839       int n = 0;
5840
5841       if (NILP (coding->dst_object))
5842         {
5843           /* Start deleting '\r' from the tail to minimize the memory
5844              movement.  */
5845           for (p = pend - 2; p >= pbeg; p--)
5846             if (*p == '\r')
5847               {
5848                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5849                 n++;
5850               }
5851         }
5852       else
5853         {
5854           int pos_byte = coding->dst_pos_byte;
5855           int pos = coding->dst_pos;
5856           int pos_end = pos + coding->produced_char - 1;
5857
5858           while (pos < pos_end)
5859             {
5860               p = BYTE_POS_ADDR (pos_byte);
5861               if (*p == '\r' && p[1] == '\n')
5862                 {
5863                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5864                   n++;
5865                   pos_end--;
5866                 }
5867               pos++;
5868               if (coding->dst_multibyte)
5869                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
5870               else
5871                 pos_byte++;
5872             }
5873         }
5874       coding->produced -= n;
5875       coding->produced_char -= n;
5876     }
5877 }
5878
5879
5880 /* Return a translation table (or list of them) from coding system
5881    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5882    decoding (ENCODEP is zero). */
5883
5884 static Lisp_Object
5885 get_translation_table (attrs, encodep, max_lookup)
5886      Lisp_Object attrs;
5887      int encodep, *max_lookup;
5888 {
5889   Lisp_Object standard, translation_table;
5890   Lisp_Object val;
5891
5892   if (encodep)
5893     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5894       standard = Vstandard_translation_table_for_encode;
5895   else
5896     translation_table = CODING_ATTR_DECODE_TBL (attrs),
5897       standard = Vstandard_translation_table_for_decode;
5898   if (NILP (translation_table))
5899     translation_table = standard;
5900   else
5901     {
5902       if (SYMBOLP (translation_table))
5903         translation_table = Fget (translation_table, Qtranslation_table);
5904       else if (CONSP (translation_table))
5905         {
5906           translation_table = Fcopy_sequence (translation_table);
5907           for (val = translation_table; CONSP (val); val = XCDR (val))
5908             if (SYMBOLP (XCAR (val)))
5909               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5910         }
5911       if (CHAR_TABLE_P (standard))
5912         {
5913           if (CONSP (translation_table))
5914             translation_table = nconc2 (translation_table,
5915                                         Fcons (standard, Qnil));
5916           else
5917             translation_table = Fcons (translation_table,
5918                                        Fcons (standard, Qnil));
5919         }
5920     }
5921
5922   if (max_lookup)
5923     {
5924       *max_lookup = 1;
5925       if (CHAR_TABLE_P (translation_table)
5926           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
5927         {
5928           val = XCHAR_TABLE (translation_table)->extras[1];
5929           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5930             *max_lookup = XFASTINT (val);
5931         }
5932       else if (CONSP (translation_table))
5933         {
5934           Lisp_Object tail, val;
5935
5936           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
5937             if (CHAR_TABLE_P (XCAR (tail))
5938                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
5939               {
5940                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
5941                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5942                   *max_lookup = XFASTINT (val);
5943               }
5944         }
5945     }
5946   return translation_table;
5947 }
5948
5949 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
5950   do {                                                          \
5951     trans = Qnil;                                               \
5952     if (CHAR_TABLE_P (table))                                   \
5953       {                                                         \
5954         trans = CHAR_TABLE_REF (table, c);                      \
5955         if (CHARACTERP (trans))                                 \
5956           c = XFASTINT (trans), trans = Qnil;                   \
5957       }                                                         \
5958     else if (CONSP (table))                                     \
5959       {                                                         \
5960         Lisp_Object tail;                                       \
5961                                                                 \
5962         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
5963           if (CHAR_TABLE_P (XCAR (tail)))                       \
5964             {                                                   \
5965               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
5966               if (CHARACTERP (trans))                           \
5967                 c = XFASTINT (trans), trans = Qnil;             \
5968               else if (! NILP (trans))                          \
5969                 break;                                          \
5970             }                                                   \
5971       }                                                         \
5972   } while (0)
5973
5974
5975 static Lisp_Object
5976 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5977      Lisp_Object val;
5978      int *buf, *buf_end;
5979      int last_block;
5980      int *from_nchars, *to_nchars;
5981 {
5982   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
5983      [TO-CHAR ...].  */
5984   if (CONSP (val))
5985     {
5986       Lisp_Object from, tail;
5987       int i, len;
5988
5989       for (tail = val; CONSP (tail); tail = XCDR (tail))
5990         {
5991           val = XCAR (tail);
5992           from = XCAR (val);
5993           len = ASIZE (from);
5994           for (i = 0; i < len; i++)
5995             {
5996               if (buf + i == buf_end)
5997                 {
5998                   if (! last_block)
5999                     return Qt;
6000                   break;
6001                 }
6002               if (XINT (AREF (from, i)) != buf[i])
6003                 break;
6004             }
6005           if (i == len)
6006             {
6007               val = XCDR (val);
6008               *from_nchars = len;
6009               break;
6010             }
6011         }
6012       if (! CONSP (tail))
6013         return Qnil;
6014     }
6015   if (VECTORP (val))
6016     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
6017   else
6018     *buf = XINT (val);
6019   return val;
6020 }
6021
6022
6023 static int
6024 produce_chars (coding, translation_table, last_block)
6025      struct coding_system *coding;
6026      Lisp_Object translation_table;
6027      int last_block;
6028 {
6029   unsigned char *dst = coding->destination + coding->produced;
6030   unsigned char *dst_end = coding->destination + coding->dst_bytes;
6031   EMACS_INT produced;
6032   EMACS_INT produced_chars = 0;
6033   int carryover = 0;
6034
6035   if (! coding->chars_at_source)
6036     {
6037       /* Source characters are in coding->charbuf.  */
6038       int *buf = coding->charbuf;
6039       int *buf_end = buf + coding->charbuf_used;
6040
6041       if (EQ (coding->src_object, coding->dst_object))
6042         {
6043           coding_set_source (coding);
6044           dst_end = ((unsigned char *) coding->source) + coding->consumed;
6045         }
6046
6047       while (buf < buf_end)
6048         {
6049           int c = *buf, i;
6050
6051           if (c >= 0)
6052             {
6053               int from_nchars = 1, to_nchars = 1;
6054               Lisp_Object trans = Qnil;
6055
6056               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6057               if (! NILP (trans))
6058                 {
6059                   trans = get_translation (trans, buf, buf_end, last_block,
6060                                            &from_nchars, &to_nchars);
6061                   if (EQ (trans, Qt))
6062                     break;
6063                   c = *buf;
6064                 }
6065
6066               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6067                 {
6068                   dst = alloc_destination (coding,
6069                                            buf_end - buf
6070                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6071                                            dst);
6072                   if (EQ (coding->src_object, coding->dst_object))
6073                     {
6074                       coding_set_source (coding);
6075                       dst_end = ((unsigned char *) coding->source) + coding->consumed;
6076                     }
6077                   else
6078                     dst_end = coding->destination + coding->dst_bytes;
6079                 }
6080
6081               for (i = 0; i < to_nchars; i++)
6082                 {
6083                   if (i > 0)
6084                     c = XINT (AREF (trans, i));
6085                   if (coding->dst_multibyte
6086                       || ! CHAR_BYTE8_P (c))
6087                     CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6088                   else
6089                     *dst++ = CHAR_TO_BYTE8 (c);
6090                 }
6091               produced_chars += to_nchars;
6092               *buf++ = to_nchars;
6093               while (--from_nchars > 0)
6094                 *buf++ = 0;
6095             }
6096           else
6097             /* This is an annotation datum.  (-C) is the length.  */
6098             buf += -c;
6099         }
6100       carryover = buf_end - buf;
6101     }
6102   else
6103     {
6104       /* Source characters are at coding->source.  */
6105       const unsigned char *src = coding->source;
6106       const unsigned char *src_end = src + coding->consumed;
6107
6108       if (EQ (coding->dst_object, coding->src_object))
6109         dst_end = (unsigned char *) src;
6110       if (coding->src_multibyte != coding->dst_multibyte)
6111         {
6112           if (coding->src_multibyte)
6113             {
6114               int multibytep = 1;
6115               EMACS_INT consumed_chars;
6116
6117               while (1)
6118                 {
6119                   const unsigned char *src_base = src;
6120                   int c;
6121
6122                   ONE_MORE_BYTE (c);
6123                   if (dst == dst_end)
6124                     {
6125                       if (EQ (coding->src_object, coding->dst_object))
6126                         dst_end = (unsigned char *) src;
6127                       if (dst == dst_end)
6128                         {
6129                           EMACS_INT offset = src - coding->source;
6130
6131                           dst = alloc_destination (coding, src_end - src + 1,
6132                                                    dst);
6133                           dst_end = coding->destination + coding->dst_bytes;
6134                           coding_set_source (coding);
6135                           src = coding->source + offset;
6136                           src_end = coding->source + coding->src_bytes;
6137                           if (EQ (coding->src_object, coding->dst_object))
6138                             dst_end = (unsigned char *) src;
6139                         }
6140                     }
6141                   *dst++ = c;
6142                   produced_chars++;
6143                 }
6144             no_more_source:
6145               ;
6146             }
6147           else
6148             while (src < src_end)
6149               {
6150                 int multibytep = 1;
6151                 int c = *src++;
6152
6153                 if (dst >= dst_end - 1)
6154                   {
6155                     if (EQ (coding->src_object, coding->dst_object))
6156                       dst_end = (unsigned char *) src;
6157                     if (dst >= dst_end - 1)
6158                       {
6159                         EMACS_INT offset = src - coding->source;
6160                         EMACS_INT more_bytes;
6161
6162                         if (EQ (coding->src_object, coding->dst_object))
6163                           more_bytes = ((src_end - src) / 2) + 2;
6164                         else
6165                           more_bytes = src_end - src + 2;
6166                         dst = alloc_destination (coding, more_bytes, dst);
6167                         dst_end = coding->destination + coding->dst_bytes;
6168                         coding_set_source (coding);
6169                         src = coding->source + offset;
6170                         src_end = coding->source + coding->src_bytes;
6171                         if (EQ (coding->src_object, coding->dst_object))
6172                           dst_end = (unsigned char *) src;
6173                       }
6174                   }
6175                 EMIT_ONE_BYTE (c);
6176               }
6177         }
6178       else
6179         {
6180           if (!EQ (coding->src_object, coding->dst_object))
6181             {
6182               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6183
6184               if (require > 0)
6185                 {
6186                   EMACS_INT offset = src - coding->source;
6187
6188                   dst = alloc_destination (coding, require, dst);
6189                   coding_set_source (coding);
6190                   src = coding->source + offset;
6191                   src_end = coding->source + coding->src_bytes;
6192                 }
6193             }
6194           produced_chars = coding->consumed_char;
6195           while (src < src_end)
6196             *dst++ = *src++;
6197         }
6198     }
6199
6200   produced = dst - (coding->destination + coding->produced);
6201   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6202     insert_from_gap (produced_chars, produced);
6203   coding->produced += produced;
6204   coding->produced_char += produced_chars;
6205   return carryover;
6206 }
6207
6208 /* Compose text in CODING->object according to the annotation data at
6209    CHARBUF.  CHARBUF is an array:
6210      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
6211  */
6212
6213 static INLINE void
6214 produce_composition (coding, charbuf, pos)
6215      struct coding_system *coding;
6216      int *charbuf;
6217      EMACS_INT pos;
6218 {
6219   int len;
6220   EMACS_INT to;
6221   enum composition_method method;
6222   Lisp_Object components;
6223
6224   len = -charbuf[0];
6225   to = pos + charbuf[2];
6226   if (to <= pos)
6227     return;
6228   method = (enum composition_method) (charbuf[3]);
6229
6230   if (method == COMPOSITION_RELATIVE)
6231     components = Qnil;
6232   else if (method >= COMPOSITION_WITH_RULE
6233            && method <= COMPOSITION_WITH_RULE_ALTCHARS)
6234     {
6235       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6236       int i;
6237
6238       len -= 4;
6239       charbuf += 4;
6240       for (i = 0; i < len; i++)
6241         {
6242           args[i] = make_number (charbuf[i]);
6243           if (charbuf[i] < 0)
6244             return;
6245         }
6246       components = (method == COMPOSITION_WITH_ALTCHARS
6247                     ? Fstring (len, args) : Fvector (len, args));
6248     }
6249   else
6250     return;
6251   compose_text (pos, to, components, Qnil, coding->dst_object);
6252 }
6253
6254
6255 /* Put `charset' property on text in CODING->object according to
6256    the annotation data at CHARBUF.  CHARBUF is an array:
6257      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6258  */
6259
6260 static INLINE void
6261 produce_charset (coding, charbuf, pos)
6262      struct coding_system *coding;
6263      int *charbuf;
6264      EMACS_INT pos;
6265 {
6266   EMACS_INT from = pos - charbuf[2];
6267   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6268
6269   Fput_text_property (make_number (from), make_number (pos),
6270                       Qcharset, CHARSET_NAME (charset),
6271                       coding->dst_object);
6272 }
6273
6274
6275 #define CHARBUF_SIZE 0x4000
6276
6277 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6278   do {                                                                  \
6279     int size = CHARBUF_SIZE;;                                           \
6280                                                                         \
6281     coding->charbuf = NULL;                                             \
6282     while (size > 1024)                                                 \
6283       {                                                                 \
6284         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6285         if (coding->charbuf)                                            \
6286           break;                                                        \
6287         size >>= 1;                                                     \
6288       }                                                                 \
6289     if (! coding->charbuf)                                              \
6290       {                                                                 \
6291         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6292         return coding->result;                                          \
6293       }                                                                 \
6294     coding->charbuf_size = size;                                        \
6295   } while (0)
6296
6297
6298 static void
6299 produce_annotation (coding, pos)
6300      struct coding_system *coding;
6301      EMACS_INT pos;
6302 {
6303   int *charbuf = coding->charbuf;
6304   int *charbuf_end = charbuf + coding->charbuf_used;
6305
6306   if (NILP (coding->dst_object))
6307     return;
6308
6309   while (charbuf < charbuf_end)
6310     {
6311       if (*charbuf >= 0)
6312         pos += *charbuf++;
6313       else
6314         {
6315           int len = -*charbuf;
6316           switch (charbuf[1])
6317             {
6318             case CODING_ANNOTATE_COMPOSITION_MASK:
6319               produce_composition (coding, charbuf, pos);
6320               break;
6321             case CODING_ANNOTATE_CHARSET_MASK:
6322               produce_charset (coding, charbuf, pos);
6323               break;
6324             default:
6325               abort ();
6326             }
6327           charbuf += len;
6328         }
6329     }
6330 }
6331
6332 /* Decode the data at CODING->src_object into CODING->dst_object.
6333    CODING->src_object is a buffer, a string, or nil.
6334    CODING->dst_object is a buffer.
6335
6336    If CODING->src_object is a buffer, it must be the current buffer.
6337    In this case, if CODING->src_pos is positive, it is a position of
6338    the source text in the buffer, otherwise, the source text is in the
6339    gap area of the buffer, and CODING->src_pos specifies the offset of
6340    the text from GPT (which must be the same as PT).  If this is the
6341    same buffer as CODING->dst_object, CODING->src_pos must be
6342    negative.
6343
6344    If CODING->src_object is a string, CODING->src_pos is an index to
6345    that string.
6346
6347    If CODING->src_object is nil, CODING->source must already point to
6348    the non-relocatable memory area.  In this case, CODING->src_pos is
6349    an offset from CODING->source.
6350
6351    The decoded data is inserted at the current point of the buffer
6352    CODING->dst_object.
6353 */
6354
6355 static int
6356 decode_coding (coding)
6357      struct coding_system *coding;
6358 {
6359   Lisp_Object attrs;
6360   Lisp_Object undo_list;
6361   Lisp_Object translation_table;
6362   int carryover;
6363   int i;
6364
6365   if (BUFFERP (coding->src_object)
6366       && coding->src_pos > 0
6367       && coding->src_pos < GPT
6368       && coding->src_pos + coding->src_chars > GPT)
6369     move_gap_both (coding->src_pos, coding->src_pos_byte);
6370
6371   undo_list = Qt;
6372   if (BUFFERP (coding->dst_object))
6373     {
6374       if (current_buffer != XBUFFER (coding->dst_object))
6375         set_buffer_internal (XBUFFER (coding->dst_object));
6376       if (GPT != PT)
6377         move_gap_both (PT, PT_BYTE);
6378       undo_list = current_buffer->undo_list;
6379       current_buffer->undo_list = Qt;
6380     }
6381
6382   coding->consumed = coding->consumed_char = 0;
6383   coding->produced = coding->produced_char = 0;
6384   coding->chars_at_source = 0;
6385   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6386   coding->errors = 0;
6387
6388   ALLOC_CONVERSION_WORK_AREA (coding);
6389
6390   attrs = CODING_ID_ATTRS (coding->id);
6391   translation_table = get_translation_table (attrs, 0, NULL);
6392
6393   carryover = 0;
6394   do
6395     {
6396       EMACS_INT pos = coding->dst_pos + coding->produced_char;
6397
6398       coding_set_source (coding);
6399       coding->annotated = 0;
6400       coding->charbuf_used = carryover;
6401       (*(coding->decoder)) (coding);
6402       coding_set_destination (coding);
6403       carryover = produce_chars (coding, translation_table, 0);
6404       if (coding->annotated)
6405         produce_annotation (coding, pos);
6406       for (i = 0; i < carryover; i++)
6407         coding->charbuf[i]
6408           = coding->charbuf[coding->charbuf_used - carryover + i];
6409     }
6410   while (coding->consumed < coding->src_bytes
6411          && (coding->result == CODING_RESULT_SUCCESS
6412              || coding->result == CODING_RESULT_INVALID_SRC));
6413
6414   if (carryover > 0)
6415     {
6416       coding_set_destination (coding);
6417       coding->charbuf_used = carryover;
6418       produce_chars (coding, translation_table, 1);
6419     }
6420
6421   coding->carryover_bytes = 0;
6422   if (coding->consumed < coding->src_bytes)
6423     {
6424       int nbytes = coding->src_bytes - coding->consumed;
6425       const unsigned char *src;
6426
6427       coding_set_source (coding);
6428       coding_set_destination (coding);
6429       src = coding->source + coding->consumed;
6430
6431       if (coding->mode & CODING_MODE_LAST_BLOCK)
6432         {
6433           /* Flush out unprocessed data as binary chars.  We are sure
6434              that the number of data is less than the size of
6435              coding->charbuf.  */
6436           coding->charbuf_used = 0;
6437           while (nbytes-- > 0)
6438             {
6439               int c = *src++;
6440
6441               if (c & 0x80)
6442                 c = BYTE8_TO_CHAR (c);
6443               coding->charbuf[coding->charbuf_used++] = c;
6444             }
6445           produce_chars (coding, Qnil, 1);
6446         }
6447       else
6448         {
6449           /* Record unprocessed bytes in coding->carryover.  We are
6450              sure that the number of data is less than the size of
6451              coding->carryover.  */
6452           unsigned char *p = coding->carryover;
6453
6454           coding->carryover_bytes = nbytes;
6455           while (nbytes-- > 0)
6456             *p++ = *src++;
6457         }
6458       coding->consumed = coding->src_bytes;
6459     }
6460
6461   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6462     decode_eol (coding);
6463   if (BUFFERP (coding->dst_object))
6464     {
6465       current_buffer->undo_list = undo_list;
6466       record_insert (coding->dst_pos, coding->produced_char);
6467     }
6468   return coding->result;
6469 }
6470
6471
6472 /* Extract an annotation datum from a composition starting at POS and
6473    ending before LIMIT of CODING->src_object (buffer or string), store
6474    the data in BUF, set *STOP to a starting position of the next
6475    composition (if any) or to LIMIT, and return the address of the
6476    next element of BUF.
6477
6478    If such an annotation is not found, set *STOP to a starting
6479    position of a composition after POS (if any) or to LIMIT, and
6480    return BUF.  */
6481
6482 static INLINE int *
6483 handle_composition_annotation (pos, limit, coding, buf, stop)
6484      EMACS_INT pos, limit;
6485      struct coding_system *coding;
6486      int *buf;
6487      EMACS_INT *stop;
6488 {
6489   EMACS_INT start, end;
6490   Lisp_Object prop;
6491
6492   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6493       || end > limit)
6494     *stop = limit;
6495   else if (start > pos)
6496     *stop = start;
6497   else
6498     {
6499       if (start == pos)
6500         {
6501           /* We found a composition.  Store the corresponding
6502              annotation data in BUF.  */
6503           int *head = buf;
6504           enum composition_method method = COMPOSITION_METHOD (prop);
6505           int nchars = COMPOSITION_LENGTH (prop);
6506
6507           ADD_COMPOSITION_DATA (buf, nchars, method);
6508           if (method != COMPOSITION_RELATIVE)
6509             {
6510               Lisp_Object components;
6511               int len, i, i_byte;
6512
6513               components = COMPOSITION_COMPONENTS (prop);
6514               if (VECTORP (components))
6515                 {
6516                   len = XVECTOR (components)->size;
6517                   for (i = 0; i < len; i++)
6518                     *buf++ = XINT (AREF (components, i));
6519                 }
6520               else if (STRINGP (components))
6521                 {
6522                   len = SCHARS (components);
6523                   i = i_byte = 0;
6524                   while (i < len)
6525                     {
6526                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6527                       buf++;
6528                     }
6529                 }
6530               else if (INTEGERP (components))
6531                 {
6532                   len = 1;
6533                   *buf++ = XINT (components);
6534                 }
6535               else if (CONSP (components))
6536                 {
6537                   for (len = 0; CONSP (components);
6538                        len++, components = XCDR (components))
6539                     *buf++ = XINT (XCAR (components));
6540                 }
6541               else
6542                 abort ();
6543               *head -= len;
6544             }
6545         }
6546
6547       if (find_composition (end, limit, &start, &end, &prop,
6548                             coding->src_object)
6549           && end <= limit)
6550         *stop = start;
6551       else
6552         *stop = limit;
6553     }
6554   return buf;
6555 }
6556
6557
6558 /* Extract an annotation datum from a text property `charset' at POS of
6559    CODING->src_object (buffer of string), store the data in BUF, set
6560    *STOP to the position where the value of `charset' property changes
6561    (limiting by LIMIT), and return the address of the next element of
6562    BUF.
6563
6564    If the property value is nil, set *STOP to the position where the
6565    property value is non-nil (limiting by LIMIT), and return BUF.  */
6566
6567 static INLINE int *
6568 handle_charset_annotation (pos, limit, coding, buf, stop)
6569      EMACS_INT pos, limit;
6570      struct coding_system *coding;
6571      int *buf;
6572      EMACS_INT *stop;
6573 {
6574   Lisp_Object val, next;
6575   int id;
6576
6577   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6578   if (! NILP (val) && CHARSETP (val))
6579     id = XINT (CHARSET_SYMBOL_ID (val));
6580   else
6581     id = -1;
6582   ADD_CHARSET_DATA (buf, 0, id);
6583   next = Fnext_single_property_change (make_number (pos), Qcharset,
6584                                        coding->src_object,
6585                                        make_number (limit));
6586   *stop = XINT (next);
6587   return buf;
6588 }
6589
6590
6591 static void
6592 consume_chars (coding, translation_table, max_lookup)
6593      struct coding_system *coding;
6594      Lisp_Object translation_table;
6595      int max_lookup;
6596 {
6597   int *buf = coding->charbuf;
6598   int *buf_end = coding->charbuf + coding->charbuf_size;
6599   const unsigned char *src = coding->source + coding->consumed;
6600   const unsigned char *src_end = coding->source + coding->src_bytes;
6601   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6602   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6603   int multibytep = coding->src_multibyte;
6604   Lisp_Object eol_type;
6605   int c;
6606   EMACS_INT stop, stop_composition, stop_charset;
6607   int *lookup_buf = NULL;
6608
6609   if (! NILP (translation_table))
6610     lookup_buf = alloca (sizeof (int) * max_lookup);
6611
6612   eol_type = CODING_ID_EOL_TYPE (coding->id);
6613   if (VECTORP (eol_type))
6614     eol_type = Qunix;
6615
6616   /* Note: composition handling is not yet implemented.  */
6617   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6618
6619   if (NILP (coding->src_object))
6620     stop = stop_composition = stop_charset = end_pos;
6621   else
6622     {
6623       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6624         stop = stop_composition = pos;
6625       else
6626         stop = stop_composition = end_pos;
6627       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6628         stop = stop_charset = pos;
6629       else
6630         stop_charset = end_pos;
6631     }
6632
6633   /* Compensate for CRLF and conversion.  */
6634   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6635   while (buf < buf_end)
6636     {
6637       Lisp_Object trans;
6638
6639       if (pos == stop)
6640         {
6641           if (pos == end_pos)
6642             break;
6643           if (pos == stop_composition)
6644             buf = handle_composition_annotation (pos, end_pos, coding,
6645                                                  buf, &stop_composition);
6646           if (pos == stop_charset)
6647             buf = handle_charset_annotation (pos, end_pos, coding,
6648                                              buf, &stop_charset);
6649           stop = (stop_composition < stop_charset
6650                   ? stop_composition : stop_charset);
6651         }
6652
6653       if (! multibytep)
6654         {
6655           EMACS_INT bytes;
6656
6657           if (coding->encoder == encode_coding_raw_text)
6658             c = *src++, pos++;
6659           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6660             c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
6661           else
6662             c = BYTE8_TO_CHAR (*src), src++, pos++;
6663         }
6664       else
6665         c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
6666       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6667         c = '\n';
6668       if (! EQ (eol_type, Qunix))
6669         {
6670           if (c == '\n')
6671             {
6672               if (EQ (eol_type, Qdos))
6673                 *buf++ = '\r';
6674               else
6675                 c = '\r';
6676             }
6677         }
6678
6679       trans = Qnil;
6680       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6681       if (NILP (trans))
6682         *buf++ = c;
6683       else
6684         {
6685           int from_nchars = 1, to_nchars = 1;
6686           int *lookup_buf_end;
6687           const unsigned char *p = src;
6688           int i;
6689
6690           lookup_buf[0] = c;
6691           for (i = 1; i < max_lookup && p < src_end; i++)
6692             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6693           lookup_buf_end = lookup_buf + i;
6694           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6695                                    &from_nchars, &to_nchars);
6696           if (EQ (trans, Qt)
6697               || buf + to_nchars > buf_end)
6698             break;
6699           *buf++ = *lookup_buf;
6700           for (i = 1; i < to_nchars; i++)
6701             *buf++ = XINT (AREF (trans, i));
6702           for (i = 1; i < from_nchars; i++, pos++)
6703             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6704         }
6705     }
6706
6707   coding->consumed = src - coding->source;
6708   coding->consumed_char = pos - coding->src_pos;
6709   coding->charbuf_used = buf - coding->charbuf;
6710   coding->chars_at_source = 0;
6711 }
6712
6713
6714 /* Encode the text at CODING->src_object into CODING->dst_object.
6715    CODING->src_object is a buffer or a string.
6716    CODING->dst_object is a buffer or nil.
6717
6718    If CODING->src_object is a buffer, it must be the current buffer.
6719    In this case, if CODING->src_pos is positive, it is a position of
6720    the source text in the buffer, otherwise. the source text is in the
6721    gap area of the buffer, and coding->src_pos specifies the offset of
6722    the text from GPT (which must be the same as PT).  If this is the
6723    same buffer as CODING->dst_object, CODING->src_pos must be
6724    negative and CODING should not have `pre-write-conversion'.
6725
6726    If CODING->src_object is a string, CODING should not have
6727    `pre-write-conversion'.
6728
6729    If CODING->dst_object is a buffer, the encoded data is inserted at
6730    the current point of that buffer.
6731
6732    If CODING->dst_object is nil, the encoded data is placed at the
6733    memory area specified by CODING->destination.  */
6734
6735 static int
6736 encode_coding (coding)
6737      struct coding_system *coding;
6738 {
6739   Lisp_Object attrs;
6740   Lisp_Object translation_table;
6741   int max_lookup;
6742
6743   attrs = CODING_ID_ATTRS (coding->id);
6744   if (coding->encoder == encode_coding_raw_text)
6745     translation_table = Qnil, max_lookup = 0;
6746   else
6747     translation_table = get_translation_table (attrs, 1, &max_lookup);
6748
6749   if (BUFFERP (coding->dst_object))
6750     {
6751       set_buffer_internal (XBUFFER (coding->dst_object));
6752       coding->dst_multibyte
6753         = ! NILP (current_buffer->enable_multibyte_characters);
6754     }
6755
6756   coding->consumed = coding->consumed_char = 0;
6757   coding->produced = coding->produced_char = 0;
6758   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6759   coding->errors = 0;
6760
6761   ALLOC_CONVERSION_WORK_AREA (coding);
6762
6763   do {
6764     coding_set_source (coding);
6765     consume_chars (coding, translation_table, max_lookup);
6766     coding_set_destination (coding);
6767     (*(coding->encoder)) (coding);
6768   } while (coding->consumed_char < coding->src_chars);
6769
6770   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
6771     insert_from_gap (coding->produced_char, coding->produced);
6772
6773   return (coding->result);
6774 }
6775
6776
6777 /* Name (or base name) of work buffer for code conversion.  */
6778 static Lisp_Object Vcode_conversion_workbuf_name;
6779
6780 /* A working buffer used by the top level conversion.  Once it is
6781    created, it is never destroyed.  It has the name
6782    Vcode_conversion_workbuf_name.  The other working buffers are
6783    destroyed after the use is finished, and their names are modified
6784    versions of Vcode_conversion_workbuf_name.  */
6785 static Lisp_Object Vcode_conversion_reused_workbuf;
6786
6787 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
6788 static int reused_workbuf_in_use;
6789
6790
6791 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
6792    multibyteness of returning buffer.  */
6793
6794 static Lisp_Object
6795 make_conversion_work_buffer (multibyte)
6796      int multibyte;
6797 {
6798   Lisp_Object name, workbuf;
6799   struct buffer *current;
6800
6801   if (reused_workbuf_in_use++)
6802     {
6803       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6804       workbuf = Fget_buffer_create (name);
6805     }
6806   else
6807     {
6808       name = Vcode_conversion_workbuf_name;
6809       workbuf = Fget_buffer_create (name);
6810       if (NILP (Vcode_conversion_reused_workbuf))
6811         Vcode_conversion_reused_workbuf = workbuf;
6812     }
6813   current = current_buffer;
6814   set_buffer_internal (XBUFFER (workbuf));
6815   Ferase_buffer ();
6816   current_buffer->undo_list = Qt;
6817   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6818   set_buffer_internal (current);
6819   return workbuf;
6820 }
6821
6822
6823 static Lisp_Object
6824 code_conversion_restore (arg)
6825      Lisp_Object arg;
6826 {
6827   Lisp_Object current, workbuf;
6828   struct gcpro gcpro1;
6829
6830   GCPRO1 (arg);
6831   current = XCAR (arg);
6832   workbuf = XCDR (arg);
6833   if (! NILP (workbuf))
6834     {
6835       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6836         reused_workbuf_in_use = 0;
6837       else if (! NILP (Fbuffer_live_p (workbuf)))
6838         Fkill_buffer (workbuf);
6839     }
6840   set_buffer_internal (XBUFFER (current));
6841   UNGCPRO;
6842   return Qnil;
6843 }
6844
6845 Lisp_Object
6846 code_conversion_save (with_work_buf, multibyte)
6847      int with_work_buf, multibyte;
6848 {
6849   Lisp_Object workbuf = Qnil;
6850
6851   if (with_work_buf)
6852     workbuf = make_conversion_work_buffer (multibyte);
6853   record_unwind_protect (code_conversion_restore,
6854                          Fcons (Fcurrent_buffer (), workbuf));
6855   return workbuf;
6856 }
6857
6858 int
6859 decode_coding_gap (coding, chars, bytes)
6860      struct coding_system *coding;
6861      EMACS_INT chars, bytes;
6862 {
6863   int count = specpdl_ptr - specpdl;
6864   Lisp_Object attrs;
6865
6866   code_conversion_save (0, 0);
6867
6868   coding->src_object = Fcurrent_buffer ();
6869   coding->src_chars = chars;
6870   coding->src_bytes = bytes;
6871   coding->src_pos = -chars;
6872   coding->src_pos_byte = -bytes;
6873   coding->src_multibyte = chars < bytes;
6874   coding->dst_object = coding->src_object;
6875   coding->dst_pos = PT;
6876   coding->dst_pos_byte = PT_BYTE;
6877   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6878
6879   if (CODING_REQUIRE_DETECTION (coding))
6880     detect_coding (coding);
6881
6882   coding->mode |= CODING_MODE_LAST_BLOCK;
6883   current_buffer->text->inhibit_shrinking = 1;
6884   decode_coding (coding);
6885   current_buffer->text->inhibit_shrinking = 0;
6886
6887   attrs = CODING_ID_ATTRS (coding->id);
6888   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6889     {
6890       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6891       Lisp_Object val;
6892
6893       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6894       val = call1 (CODING_ATTR_POST_READ (attrs),
6895                    make_number (coding->produced_char));
6896       CHECK_NATNUM (val);
6897       coding->produced_char += Z - prev_Z;
6898       coding->produced += Z_BYTE - prev_Z_BYTE;
6899     }
6900
6901   unbind_to (count, Qnil);
6902   return coding->result;
6903 }
6904
6905 int
6906 encode_coding_gap (coding, chars, bytes)
6907      struct coding_system *coding;
6908      EMACS_INT chars, bytes;
6909 {
6910   int count = specpdl_ptr - specpdl;
6911
6912   code_conversion_save (0, 0);
6913
6914   coding->src_object = Fcurrent_buffer ();
6915   coding->src_chars = chars;
6916   coding->src_bytes = bytes;
6917   coding->src_pos = -chars;
6918   coding->src_pos_byte = -bytes;
6919   coding->src_multibyte = chars < bytes;
6920   coding->dst_object = coding->src_object;
6921   coding->dst_pos = PT;
6922   coding->dst_pos_byte = PT_BYTE;
6923
6924   encode_coding (coding);
6925
6926   unbind_to (count, Qnil);
6927   return coding->result;
6928 }
6929
6930
6931 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6932    SRC_OBJECT into DST_OBJECT by coding context CODING.
6933
6934    SRC_OBJECT is a buffer, a string, or Qnil.
6935
6936    If it is a buffer, the text is at point of the buffer.  FROM and TO
6937    are positions in the buffer.
6938
6939    If it is a string, the text is at the beginning of the string.
6940    FROM and TO are indices to the string.
6941
6942    If it is nil, the text is at coding->source.  FROM and TO are
6943    indices to coding->source.
6944
6945    DST_OBJECT is a buffer, Qt, or Qnil.
6946
6947    If it is a buffer, the decoded text is inserted at point of the
6948    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6949    is deleted.
6950
6951    If it is Qt, a string is made from the decoded text, and
6952    set in CODING->dst_object.
6953
6954    If it is Qnil, the decoded text is stored at CODING->destination.
6955    The caller must allocate CODING->dst_bytes bytes at
6956    CODING->destination by xmalloc.  If the decoded text is longer than
6957    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6958  */
6959
6960 void
6961 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6962                       dst_object)
6963      struct coding_system *coding;
6964      Lisp_Object src_object;
6965      EMACS_INT from, from_byte, to, to_byte;
6966      Lisp_Object dst_object;
6967 {
6968   int count = specpdl_ptr - specpdl;
6969   unsigned char *destination;
6970   EMACS_INT dst_bytes;
6971   EMACS_INT chars = to - from;
6972   EMACS_INT bytes = to_byte - from_byte;
6973   Lisp_Object attrs;
6974   int saved_pt = -1, saved_pt_byte;
6975   int need_marker_adjustment = 0;
6976   Lisp_Object old_deactivate_mark;
6977
6978   old_deactivate_mark = Vdeactivate_mark;
6979
6980   if (NILP (dst_object))
6981     {
6982       destination = coding->destination;
6983       dst_bytes = coding->dst_bytes;
6984     }
6985
6986   coding->src_object = src_object;
6987   coding->src_chars = chars;
6988   coding->src_bytes = bytes;
6989   coding->src_multibyte = chars < bytes;
6990
6991   if (STRINGP (src_object))
6992     {
6993       coding->src_pos = from;
6994       coding->src_pos_byte = from_byte;
6995     }
6996   else if (BUFFERP (src_object))
6997     {
6998       set_buffer_internal (XBUFFER (src_object));
6999       if (from != GPT)
7000         move_gap_both (from, from_byte);
7001       if (EQ (src_object, dst_object))
7002         {
7003           struct Lisp_Marker *tail;
7004
7005           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7006             {
7007               tail->need_adjustment
7008                 = tail->charpos == (tail->insertion_type ? from : to);
7009               need_marker_adjustment |= tail->need_adjustment;
7010             }
7011           saved_pt = PT, saved_pt_byte = PT_BYTE;
7012           TEMP_SET_PT_BOTH (from, from_byte);
7013           current_buffer->text->inhibit_shrinking = 1;
7014           del_range_both (from, from_byte, to, to_byte, 1);
7015           coding->src_pos = -chars;
7016           coding->src_pos_byte = -bytes;
7017         }
7018       else
7019         {
7020           coding->src_pos = from;
7021           coding->src_pos_byte = from_byte;
7022         }
7023     }
7024
7025   if (CODING_REQUIRE_DETECTION (coding))
7026     detect_coding (coding);
7027   attrs = CODING_ID_ATTRS (coding->id);
7028
7029   if (EQ (dst_object, Qt)
7030       || (! NILP (CODING_ATTR_POST_READ (attrs))
7031           && NILP (dst_object)))
7032     {
7033       coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
7034       coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
7035       coding->dst_pos = BEG;
7036       coding->dst_pos_byte = BEG_BYTE;
7037     }
7038   else if (BUFFERP (dst_object))
7039     {
7040       code_conversion_save (0, 0);
7041       coding->dst_object = dst_object;
7042       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7043       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7044       coding->dst_multibyte
7045         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7046     }
7047   else
7048     {
7049       code_conversion_save (0, 0);
7050       coding->dst_object = Qnil;
7051       /* Most callers presume this will return a multibyte result, and they
7052          won't use `binary' or `raw-text' anyway, so let's not worry about
7053          CODING_FOR_UNIBYTE.  */
7054       coding->dst_multibyte = Qt;
7055     }
7056
7057   decode_coding (coding);
7058
7059   if (BUFFERP (coding->dst_object))
7060     set_buffer_internal (XBUFFER (coding->dst_object));
7061
7062   if (! NILP (CODING_ATTR_POST_READ (attrs)))
7063     {
7064       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7065       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
7066       Lisp_Object val;
7067
7068       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
7069       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7070               old_deactivate_mark);
7071       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
7072                         make_number (coding->produced_char));
7073       UNGCPRO;
7074       CHECK_NATNUM (val);
7075       coding->produced_char += Z - prev_Z;
7076       coding->produced += Z_BYTE - prev_Z_BYTE;
7077     }
7078
7079   if (EQ (dst_object, Qt))
7080     {
7081       coding->dst_object = Fbuffer_string ();
7082     }
7083   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
7084     {
7085       set_buffer_internal (XBUFFER (coding->dst_object));
7086       if (dst_bytes < coding->produced)
7087         {
7088           destination = xrealloc (destination, coding->produced);
7089           if (! destination)
7090             {
7091               record_conversion_result (coding,
7092                                         CODING_RESULT_INSUFFICIENT_DST);
7093               unbind_to (count, Qnil);
7094               return;
7095             }
7096           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7097             move_gap_both (BEGV, BEGV_BYTE);
7098           bcopy (BEGV_ADDR, destination, coding->produced);
7099           coding->destination = destination;
7100         }
7101     }
7102
7103   if (saved_pt >= 0)
7104     {
7105       /* This is the case of:
7106          (BUFFERP (src_object) && EQ (src_object, dst_object))
7107          As we have moved PT while replacing the original buffer
7108          contents, we must recover it now.  */
7109       set_buffer_internal (XBUFFER (src_object));
7110       current_buffer->text->inhibit_shrinking = 0;
7111       if (saved_pt < from)
7112         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7113       else if (saved_pt < from + chars)
7114         TEMP_SET_PT_BOTH (from, from_byte);
7115       else if (! NILP (current_buffer->enable_multibyte_characters))
7116         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7117                           saved_pt_byte + (coding->produced - bytes));
7118       else
7119         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7120                           saved_pt_byte + (coding->produced - bytes));
7121
7122       if (need_marker_adjustment)
7123         {
7124           struct Lisp_Marker *tail;
7125
7126           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7127             if (tail->need_adjustment)
7128               {
7129                 tail->need_adjustment = 0;
7130                 if (tail->insertion_type)
7131                   {
7132                     tail->bytepos = from_byte;
7133                     tail->charpos = from;
7134                   }
7135                 else
7136                   {
7137                     tail->bytepos = from_byte + coding->produced;
7138                     tail->charpos
7139                       = (NILP (current_buffer->enable_multibyte_characters)
7140                          ? tail->bytepos : from + coding->produced_char);
7141                   }
7142               }
7143         }
7144     }
7145
7146   Vdeactivate_mark = old_deactivate_mark;
7147   unbind_to (count, coding->dst_object);
7148 }
7149
7150
7151 void
7152 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7153                       dst_object)
7154      struct coding_system *coding;
7155      Lisp_Object src_object;
7156      EMACS_INT from, from_byte, to, to_byte;
7157      Lisp_Object dst_object;
7158 {
7159   int count = specpdl_ptr - specpdl;
7160   EMACS_INT chars = to - from;
7161   EMACS_INT bytes = to_byte - from_byte;
7162   Lisp_Object attrs;
7163   int saved_pt = -1, saved_pt_byte;
7164   int need_marker_adjustment = 0;
7165   int kill_src_buffer = 0;
7166   Lisp_Object old_deactivate_mark;
7167
7168   old_deactivate_mark = Vdeactivate_mark;
7169
7170   coding->src_object = src_object;
7171   coding->src_chars = chars;
7172   coding->src_bytes = bytes;
7173   coding->src_multibyte = chars < bytes;
7174
7175   attrs = CODING_ID_ATTRS (coding->id);
7176
7177   if (EQ (src_object, dst_object))
7178     {
7179       struct Lisp_Marker *tail;
7180
7181       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7182         {
7183           tail->need_adjustment
7184             = tail->charpos == (tail->insertion_type ? from : to);
7185           need_marker_adjustment |= tail->need_adjustment;
7186         }
7187     }
7188
7189   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7190     {
7191       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7192       set_buffer_internal (XBUFFER (coding->src_object));
7193       if (STRINGP (src_object))
7194         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7195       else if (BUFFERP (src_object))
7196         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7197       else
7198         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7199
7200       if (EQ (src_object, dst_object))
7201         {
7202           set_buffer_internal (XBUFFER (src_object));
7203           saved_pt = PT, saved_pt_byte = PT_BYTE;
7204           del_range_both (from, from_byte, to, to_byte, 1);
7205           set_buffer_internal (XBUFFER (coding->src_object));
7206         }
7207
7208       {
7209         Lisp_Object args[3];
7210         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7211
7212         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7213                 old_deactivate_mark);
7214         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7215         args[1] = make_number (BEG);
7216         args[2] = make_number (Z);
7217         safe_call (3, args);
7218         UNGCPRO;
7219       }
7220       if (XBUFFER (coding->src_object) != current_buffer)
7221         kill_src_buffer = 1;
7222       coding->src_object = Fcurrent_buffer ();
7223       if (BEG != GPT)
7224         move_gap_both (BEG, BEG_BYTE);
7225       coding->src_chars = Z - BEG;
7226       coding->src_bytes = Z_BYTE - BEG_BYTE;
7227       coding->src_pos = BEG;
7228       coding->src_pos_byte = BEG_BYTE;
7229       coding->src_multibyte = Z < Z_BYTE;
7230     }
7231   else if (STRINGP (src_object))
7232     {
7233       code_conversion_save (0, 0);
7234       coding->src_pos = from;
7235       coding->src_pos_byte = from_byte;
7236     }
7237   else if (BUFFERP (src_object))
7238     {
7239       code_conversion_save (0, 0);
7240       set_buffer_internal (XBUFFER (src_object));
7241       if (EQ (src_object, dst_object))
7242         {
7243           saved_pt = PT, saved_pt_byte = PT_BYTE;
7244           coding->src_object = del_range_1 (from, to, 1, 1);
7245           coding->src_pos = 0;
7246           coding->src_pos_byte = 0;
7247         }
7248       else
7249         {
7250           if (from < GPT && to >= GPT)
7251             move_gap_both (from, from_byte);
7252           coding->src_pos = from;
7253           coding->src_pos_byte = from_byte;
7254         }
7255     }
7256   else
7257     code_conversion_save (0, 0);
7258
7259   if (BUFFERP (dst_object))
7260     {
7261       coding->dst_object = dst_object;
7262       if (EQ (src_object, dst_object))
7263         {
7264           coding->dst_pos = from;
7265           coding->dst_pos_byte = from_byte;
7266         }
7267       else
7268         {
7269           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7270           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7271         }
7272       coding->dst_multibyte
7273         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7274     }
7275   else if (EQ (dst_object, Qt))
7276     {
7277       coding->dst_object = Qnil;
7278       coding->dst_bytes = coding->src_chars;
7279       if (coding->dst_bytes == 0)
7280         coding->dst_bytes = 1;
7281       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
7282       coding->dst_multibyte = 0;
7283     }
7284   else
7285     {
7286       coding->dst_object = Qnil;
7287       coding->dst_multibyte = 0;
7288     }
7289
7290   encode_coding (coding);
7291
7292   if (EQ (dst_object, Qt))
7293     {
7294       if (BUFFERP (coding->dst_object))
7295         coding->dst_object = Fbuffer_string ();
7296       else
7297         {
7298           coding->dst_object
7299             = make_unibyte_string ((char *) coding->destination,
7300                                    coding->produced);
7301           xfree (coding->destination);
7302         }
7303     }
7304
7305   if (saved_pt >= 0)
7306     {
7307       /* This is the case of:
7308          (BUFFERP (src_object) && EQ (src_object, dst_object))
7309          As we have moved PT while replacing the original buffer
7310          contents, we must recover it now.  */
7311       set_buffer_internal (XBUFFER (src_object));
7312       if (saved_pt < from)
7313         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7314       else if (saved_pt < from + chars)
7315         TEMP_SET_PT_BOTH (from, from_byte);
7316       else if (! NILP (current_buffer->enable_multibyte_characters))
7317         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7318                           saved_pt_byte + (coding->produced - bytes));
7319       else
7320         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7321                           saved_pt_byte + (coding->produced - bytes));
7322
7323       if (need_marker_adjustment)
7324         {
7325           struct Lisp_Marker *tail;
7326
7327           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7328             if (tail->need_adjustment)
7329               {
7330                 tail->need_adjustment = 0;
7331                 if (tail->insertion_type)
7332                   {
7333                     tail->bytepos = from_byte;
7334                     tail->charpos = from;
7335                   }
7336                 else
7337                   {
7338                     tail->bytepos = from_byte + coding->produced;
7339                     tail->charpos
7340                       = (NILP (current_buffer->enable_multibyte_characters)
7341                          ? tail->bytepos : from + coding->produced_char);
7342                   }
7343               }
7344         }
7345     }
7346
7347   if (kill_src_buffer)
7348     Fkill_buffer (coding->src_object);
7349
7350   Vdeactivate_mark = old_deactivate_mark;
7351   unbind_to (count, Qnil);
7352 }
7353
7354
7355 Lisp_Object
7356 preferred_coding_system ()
7357 {
7358   int id = coding_categories[coding_priorities[0]].id;
7359
7360   return CODING_ID_NAME (id);
7361 }
7362
7363 \f
7364 #ifdef emacs
7365 /*** 8. Emacs Lisp library functions ***/
7366
7367 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
7368        doc: /* Return t if OBJECT is nil or a coding-system.
7369 See the documentation of `define-coding-system' for information
7370 about coding-system objects.  */)
7371      (obj)
7372      Lisp_Object obj;
7373 {
7374   if (NILP (obj)
7375       || CODING_SYSTEM_ID (obj) >= 0)
7376     return Qt;
7377   if (! SYMBOLP (obj)
7378       || NILP (Fget (obj, Qcoding_system_define_form)))
7379     return Qnil;
7380   return Qt;
7381 }
7382
7383 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7384        Sread_non_nil_coding_system, 1, 1, 0,
7385        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
7386      (prompt)
7387      Lisp_Object prompt;
7388 {
7389   Lisp_Object val;
7390   do
7391     {
7392       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7393                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
7394     }
7395   while (SCHARS (val) == 0);
7396   return (Fintern (val, Qnil));
7397 }
7398
7399 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
7400        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7401 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
7402 Ignores case when completing coding systems (all Emacs coding systems
7403 are lower-case).  */)
7404      (prompt, default_coding_system)
7405      Lisp_Object prompt, default_coding_system;
7406 {
7407   Lisp_Object val;
7408   int count = SPECPDL_INDEX ();
7409
7410   if (SYMBOLP (default_coding_system))
7411     default_coding_system = SYMBOL_NAME (default_coding_system);
7412   specbind (Qcompletion_ignore_case, Qt);
7413   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7414                           Qt, Qnil, Qcoding_system_history,
7415                           default_coding_system, Qnil);
7416   unbind_to (count, Qnil);
7417   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
7418 }
7419
7420 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7421        1, 1, 0,
7422        doc: /* Check validity of CODING-SYSTEM.
7423 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7424 It is valid if it is nil or a symbol defined as a coding system by the
7425 function `define-coding-system'.  */)
7426   (coding_system)
7427      Lisp_Object coding_system;
7428 {
7429   Lisp_Object define_form;
7430
7431   define_form = Fget (coding_system, Qcoding_system_define_form);
7432   if (! NILP (define_form))
7433     {
7434       Fput (coding_system, Qcoding_system_define_form, Qnil);
7435       safe_eval (define_form);
7436     }
7437   if (!NILP (Fcoding_system_p (coding_system)))
7438     return coding_system;
7439   xsignal1 (Qcoding_system_error, coding_system);
7440 }
7441
7442 \f
7443 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
7444    HIGHEST is nonzero, return the coding system of the highest
7445    priority among the detected coding systems.  Otherwize return a
7446    list of detected coding systems sorted by their priorities.  If
7447    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7448    multibyte form but contains only ASCII and eight-bit chars.
7449    Otherwise, the bytes are raw bytes.
7450
7451    CODING-SYSTEM controls the detection as below:
7452
7453    If it is nil, detect both text-format and eol-format.  If the
7454    text-format part of CODING-SYSTEM is already specified
7455    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
7456    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7457    detect only text-format.  */
7458
7459 Lisp_Object
7460 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7461                       coding_system)
7462      const unsigned char *src;
7463      EMACS_INT src_chars, src_bytes;
7464      int highest;
7465      int multibytep;
7466      Lisp_Object coding_system;
7467 {
7468   const unsigned char *src_end = src + src_bytes;
7469   Lisp_Object attrs, eol_type;
7470   Lisp_Object val;
7471   struct coding_system coding;
7472   int id;
7473   struct coding_detection_info detect_info;
7474   enum coding_category base_category;
7475
7476   if (NILP (coding_system))
7477     coding_system = Qundecided;
7478   setup_coding_system (coding_system, &coding);
7479   attrs = CODING_ID_ATTRS (coding.id);
7480   eol_type = CODING_ID_EOL_TYPE (coding.id);
7481   coding_system = CODING_ATTR_BASE_NAME (attrs);
7482
7483   coding.source = src;
7484   coding.src_chars = src_chars;
7485   coding.src_bytes = src_bytes;
7486   coding.src_multibyte = multibytep;
7487   coding.consumed = 0;
7488   coding.mode |= CODING_MODE_LAST_BLOCK;
7489
7490   detect_info.checked = detect_info.found = detect_info.rejected = 0;
7491
7492   /* At first, detect text-format if necessary.  */
7493   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7494   if (base_category == coding_category_undecided)
7495     {
7496       enum coding_category category;
7497       struct coding_system *this;
7498       int c, i;
7499
7500       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
7501       for (i = 0; src < src_end; i++, src++)
7502         {
7503           c = *src;
7504           if (c & 0x80)
7505             break;
7506           if (c < 0x20
7507               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7508               && ! inhibit_iso_escape_detection)
7509             {
7510               coding.head_ascii = src - coding.source;
7511               if (detect_coding_iso_2022 (&coding, &detect_info))
7512                 {
7513                   /* We have scanned the whole data.  */
7514                   if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7515                     /* We didn't find an 8-bit code.  */
7516                     src = src_end;
7517                   break;
7518                 }
7519             }
7520         }
7521       coding.head_ascii = src - coding.source;
7522
7523       if (src < src_end
7524           || detect_info.found)
7525         {
7526           if (src == src_end)
7527             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
7528             for (i = 0; i < coding_category_raw_text; i++)
7529               {
7530                 category = coding_priorities[i];
7531                 this = coding_categories + category;
7532                 if (detect_info.found & (1 << category))
7533                   break;
7534               }
7535           else
7536             for (i = 0; i < coding_category_raw_text; i++)
7537               {
7538                 category = coding_priorities[i];
7539                 this = coding_categories + category;
7540
7541                 if (this->id < 0)
7542                   {
7543                     /* No coding system of this category is defined.  */
7544                     detect_info.rejected |= (1 << category);
7545                   }
7546                 else if (category >= coding_category_raw_text)
7547                   continue;
7548                 else if (detect_info.checked & (1 << category))
7549                   {
7550                     if (highest
7551                         && (detect_info.found & (1 << category)))
7552                       break;
7553                   }
7554                 else
7555                   {
7556                     if ((*(this->detector)) (&coding, &detect_info)
7557                         && highest
7558                         && (detect_info.found & (1 << category)))
7559                       {
7560                         if (category == coding_category_utf_16_auto)
7561                           {
7562                             if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7563                               category = coding_category_utf_16_le;
7564                             else
7565                               category = coding_category_utf_16_be;
7566                           }
7567                         break;
7568                       }
7569                   }
7570               }
7571         }
7572
7573       if (detect_info.rejected == CATEGORY_MASK_ANY)
7574         {
7575           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7576           id = coding_categories[coding_category_raw_text].id;
7577           val = Fcons (make_number (id), Qnil);
7578         }
7579       else if (! detect_info.rejected && ! detect_info.found)
7580         {
7581           detect_info.found = CATEGORY_MASK_ANY;
7582           id = coding_categories[coding_category_undecided].id;
7583           val = Fcons (make_number (id), Qnil);
7584         }
7585       else if (highest)
7586         {
7587           if (detect_info.found)
7588             {
7589               detect_info.found = 1 << category;
7590               val = Fcons (make_number (this->id), Qnil);
7591             }
7592           else
7593             for (i = 0; i < coding_category_raw_text; i++)
7594               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7595                 {
7596                   detect_info.found = 1 << coding_priorities[i];
7597                   id = coding_categories[coding_priorities[i]].id;
7598                   val = Fcons (make_number (id), Qnil);
7599                   break;
7600                 }
7601         }
7602       else
7603         {
7604           int mask = detect_info.rejected | detect_info.found;
7605           int found = 0;
7606           val = Qnil;
7607
7608           for (i = coding_category_raw_text - 1; i >= 0; i--)
7609             {
7610               category = coding_priorities[i];
7611               if (! (mask & (1 << category)))
7612                 {
7613                   found |= 1 << category;
7614                   id = coding_categories[category].id;
7615                   if (id >= 0)
7616                     val = Fcons (make_number (id), val);
7617                 }
7618             }
7619           for (i = coding_category_raw_text - 1; i >= 0; i--)
7620             {
7621               category = coding_priorities[i];
7622               if (detect_info.found & (1 << category))
7623                 {
7624                   id = coding_categories[category].id;
7625                   val = Fcons (make_number (id), val);
7626                 }
7627             }
7628           detect_info.found |= found;
7629         }
7630     }
7631   else if (base_category == coding_category_utf_16_auto)
7632     {
7633       if (detect_coding_utf_16 (&coding, &detect_info))
7634         {
7635           struct coding_system *this;
7636
7637           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7638             this = coding_categories + coding_category_utf_16_le;
7639           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7640             this = coding_categories + coding_category_utf_16_be;
7641           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7642             this = coding_categories + coding_category_utf_16_be_nosig;
7643           else
7644             this = coding_categories + coding_category_utf_16_le_nosig;
7645           val = Fcons (make_number (this->id), Qnil);
7646         }
7647     }
7648   else
7649     {
7650       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7651       val = Fcons (make_number (coding.id), Qnil);
7652     }
7653
7654   /* Then, detect eol-format if necessary.  */
7655   {
7656     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
7657     Lisp_Object tail;
7658
7659     if (VECTORP (eol_type))
7660       {
7661         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7662           normal_eol = detect_eol (coding.source, src_bytes,
7663                                    coding_category_raw_text);
7664         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7665                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7666           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7667                                       coding_category_utf_16_be);
7668         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7669                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7670           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7671                                       coding_category_utf_16_le);
7672       }
7673     else
7674       {
7675         if (EQ (eol_type, Qunix))
7676           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7677         else if (EQ (eol_type, Qdos))
7678           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7679         else
7680           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7681       }
7682
7683     for (tail = val; CONSP (tail); tail = XCDR (tail))
7684       {
7685         enum coding_category category;
7686         int this_eol;
7687
7688         id = XINT (XCAR (tail));
7689         attrs = CODING_ID_ATTRS (id);
7690         category = XINT (CODING_ATTR_CATEGORY (attrs));
7691         eol_type = CODING_ID_EOL_TYPE (id);
7692         if (VECTORP (eol_type))
7693           {
7694             if (category == coding_category_utf_16_be
7695                 || category == coding_category_utf_16_be_nosig)
7696               this_eol = utf_16_be_eol;
7697             else if (category == coding_category_utf_16_le
7698                      || category == coding_category_utf_16_le_nosig)
7699               this_eol = utf_16_le_eol;
7700             else
7701               this_eol = normal_eol;
7702
7703             if (this_eol == EOL_SEEN_LF)
7704               XSETCAR (tail, AREF (eol_type, 0));
7705             else if (this_eol == EOL_SEEN_CRLF)
7706               XSETCAR (tail, AREF (eol_type, 1));
7707             else if (this_eol == EOL_SEEN_CR)
7708               XSETCAR (tail, AREF (eol_type, 2));
7709             else
7710               XSETCAR (tail, CODING_ID_NAME (id));
7711           }
7712         else
7713           XSETCAR (tail, CODING_ID_NAME (id));
7714       }
7715   }
7716
7717   return (highest ? XCAR (val) : val);
7718 }
7719
7720
7721 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7722        2, 3, 0,
7723        doc: /* Detect coding system of the text in the region between START and END.
7724 Return a list of possible coding systems ordered by priority.
7725
7726 If only ASCII characters are found (except for such ISO-2022 control
7727 characters ISO-2022 as ESC), it returns a list of single element
7728 `undecided' or its subsidiary coding system according to a detected
7729 end-of-line format.
7730
7731 If optional argument HIGHEST is non-nil, return the coding system of
7732 highest priority.  */)
7733      (start, end, highest)
7734      Lisp_Object start, end, highest;
7735 {
7736   int from, to;
7737   int from_byte, to_byte;
7738
7739   CHECK_NUMBER_COERCE_MARKER (start);
7740   CHECK_NUMBER_COERCE_MARKER (end);
7741
7742   validate_region (&start, &end);
7743   from = XINT (start), to = XINT (end);
7744   from_byte = CHAR_TO_BYTE (from);
7745   to_byte = CHAR_TO_BYTE (to);
7746
7747   if (from < GPT && to >= GPT)
7748     move_gap_both (to, to_byte);
7749
7750   return detect_coding_system (BYTE_POS_ADDR (from_byte),
7751                                to - from, to_byte - from_byte,
7752                                !NILP (highest),
7753                                !NILP (current_buffer
7754                                       ->enable_multibyte_characters),
7755                                Qnil);
7756 }
7757
7758 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7759        1, 2, 0,
7760        doc: /* Detect coding system of the text in STRING.
7761 Return a list of possible coding systems ordered by priority.
7762
7763 If only ASCII characters are found (except for such ISO-2022 control
7764 characters ISO-2022 as ESC), it returns a list of single element
7765 `undecided' or its subsidiary coding system according to a detected
7766 end-of-line format.
7767
7768 If optional argument HIGHEST is non-nil, return the coding system of
7769 highest priority.  */)
7770      (string, highest)
7771      Lisp_Object string, highest;
7772 {
7773   CHECK_STRING (string);
7774
7775   return detect_coding_system (SDATA (string),
7776                                SCHARS (string), SBYTES (string),
7777                                !NILP (highest), STRING_MULTIBYTE (string),
7778                                Qnil);
7779 }
7780
7781
7782 static INLINE int
7783 char_encodable_p (c, attrs)
7784      int c;
7785      Lisp_Object attrs;
7786 {
7787   Lisp_Object tail;
7788   struct charset *charset;
7789   Lisp_Object translation_table;
7790
7791   translation_table = CODING_ATTR_TRANS_TBL (attrs);
7792   if (! NILP (translation_table))
7793     c = translate_char (translation_table, c);
7794   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7795        CONSP (tail); tail = XCDR (tail))
7796     {
7797       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7798       if (CHAR_CHARSET_P (c, charset))
7799         break;
7800     }
7801   return (! NILP (tail));
7802 }
7803
7804
7805 /* Return a list of coding systems that safely encode the text between
7806    START and END.  If EXCLUDE is non-nil, it is a list of coding
7807    systems not to check.  The returned list doesn't contain any such
7808    coding systems.  In any case, if the text contains only ASCII or is
7809    unibyte, return t.  */
7810
7811 DEFUN ("find-coding-systems-region-internal",
7812        Ffind_coding_systems_region_internal,
7813        Sfind_coding_systems_region_internal, 2, 3, 0,
7814        doc: /* Internal use only.  */)
7815      (start, end, exclude)
7816      Lisp_Object start, end, exclude;
7817 {
7818   Lisp_Object coding_attrs_list, safe_codings;
7819   EMACS_INT start_byte, end_byte;
7820   const unsigned char *p, *pbeg, *pend;
7821   int c;
7822   Lisp_Object tail, elt;
7823
7824   if (STRINGP (start))
7825     {
7826       if (!STRING_MULTIBYTE (start)
7827           || SCHARS (start) == SBYTES (start))
7828         return Qt;
7829       start_byte = 0;
7830       end_byte = SBYTES (start);
7831     }
7832   else
7833     {
7834       CHECK_NUMBER_COERCE_MARKER (start);
7835       CHECK_NUMBER_COERCE_MARKER (end);
7836       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7837         args_out_of_range (start, end);
7838       if (NILP (current_buffer->enable_multibyte_characters))
7839         return Qt;
7840       start_byte = CHAR_TO_BYTE (XINT (start));
7841       end_byte = CHAR_TO_BYTE (XINT (end));
7842       if (XINT (end) - XINT (start) == end_byte - start_byte)
7843         return Qt;
7844
7845       if (XINT (start) < GPT && XINT (end) > GPT)
7846         {
7847           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7848             move_gap_both (XINT (start), start_byte);
7849           else
7850             move_gap_both (XINT (end), end_byte);
7851         }
7852     }
7853
7854   coding_attrs_list = Qnil;
7855   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7856     if (NILP (exclude)
7857         || NILP (Fmemq (XCAR (tail), exclude)))
7858       {
7859         Lisp_Object attrs;
7860
7861         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7862         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7863             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7864           {
7865             ASET (attrs, coding_attr_trans_tbl,
7866                   get_translation_table (attrs, 1, NULL));
7867             coding_attrs_list = Fcons (attrs, coding_attrs_list);
7868           }
7869       }
7870
7871   if (STRINGP (start))
7872     p = pbeg = SDATA (start);
7873   else
7874     p = pbeg = BYTE_POS_ADDR (start_byte);
7875   pend = p + (end_byte - start_byte);
7876
7877   while (p < pend && ASCII_BYTE_P (*p)) p++;
7878   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7879
7880   while (p < pend)
7881     {
7882       if (ASCII_BYTE_P (*p))
7883         p++;
7884       else
7885         {
7886           c = STRING_CHAR_ADVANCE (p);
7887
7888           charset_map_loaded = 0;
7889           for (tail = coding_attrs_list; CONSP (tail);)
7890             {
7891               elt = XCAR (tail);
7892               if (NILP (elt))
7893                 tail = XCDR (tail);
7894               else if (char_encodable_p (c, elt))
7895                 tail = XCDR (tail);
7896               else if (CONSP (XCDR (tail)))
7897                 {
7898                   XSETCAR (tail, XCAR (XCDR (tail)));
7899                   XSETCDR (tail, XCDR (XCDR (tail)));
7900                 }
7901               else
7902                 {
7903                   XSETCAR (tail, Qnil);
7904                   tail = XCDR (tail);
7905                 }
7906             }
7907           if (charset_map_loaded)
7908             {
7909               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7910
7911               if (STRINGP (start))
7912                 pbeg = SDATA (start);
7913               else
7914                 pbeg = BYTE_POS_ADDR (start_byte);
7915               p = pbeg + p_offset;
7916               pend = pbeg + pend_offset;
7917             }
7918         }
7919     }
7920
7921   safe_codings = list2 (Qraw_text, Qno_conversion);
7922   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7923     if (! NILP (XCAR (tail)))
7924       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7925
7926   return safe_codings;
7927 }
7928
7929
7930 DEFUN ("unencodable-char-position", Funencodable_char_position,
7931        Sunencodable_char_position, 3, 5, 0,
7932        doc: /*
7933 Return position of first un-encodable character in a region.
7934 START and END specfiy the region and CODING-SYSTEM specifies the
7935 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
7936
7937 If optional 4th argument COUNT is non-nil, it specifies at most how
7938 many un-encodable characters to search.  In this case, the value is a
7939 list of positions.
7940
7941 If optional 5th argument STRING is non-nil, it is a string to search
7942 for un-encodable characters.  In that case, START and END are indexes
7943 to the string.  */)
7944      (start, end, coding_system, count, string)
7945      Lisp_Object start, end, coding_system, count, string;
7946 {
7947   int n;
7948   struct coding_system coding;
7949   Lisp_Object attrs, charset_list, translation_table;
7950   Lisp_Object positions;
7951   int from, to;
7952   const unsigned char *p, *stop, *pend;
7953   int ascii_compatible;
7954
7955   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7956   attrs = CODING_ID_ATTRS (coding.id);
7957   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7958     return Qnil;
7959   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7960   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7961   translation_table = get_translation_table (attrs, 1, NULL);
7962
7963   if (NILP (string))
7964     {
7965       validate_region (&start, &end);
7966       from = XINT (start);
7967       to = XINT (end);
7968       if (NILP (current_buffer->enable_multibyte_characters)
7969           || (ascii_compatible
7970               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7971         return Qnil;
7972       p = CHAR_POS_ADDR (from);
7973       pend = CHAR_POS_ADDR (to);
7974       if (from < GPT && to >= GPT)
7975         stop = GPT_ADDR;
7976       else
7977         stop = pend;
7978     }
7979   else
7980     {
7981       CHECK_STRING (string);
7982       CHECK_NATNUM (start);
7983       CHECK_NATNUM (end);
7984       from = XINT (start);
7985       to = XINT (end);
7986       if (from > to
7987           || to > SCHARS (string))
7988         args_out_of_range_3 (string, start, end);
7989       if (! STRING_MULTIBYTE (string))
7990         return Qnil;
7991       p = SDATA (string) + string_char_to_byte (string, from);
7992       stop = pend = SDATA (string) + string_char_to_byte (string, to);
7993       if (ascii_compatible && (to - from) == (pend - p))
7994         return Qnil;
7995     }
7996
7997   if (NILP (count))
7998     n = 1;
7999   else
8000     {
8001       CHECK_NATNUM (count);
8002       n = XINT (count);
8003     }
8004
8005   positions = Qnil;
8006   while (1)
8007     {
8008       int c;
8009
8010       if (ascii_compatible)
8011         while (p < stop && ASCII_BYTE_P (*p))
8012           p++, from++;
8013       if (p >= stop)
8014         {
8015           if (p >= pend)
8016             break;
8017           stop = pend;
8018           p = GAP_END_ADDR;
8019         }
8020
8021       c = STRING_CHAR_ADVANCE (p);
8022       if (! (ASCII_CHAR_P (c) && ascii_compatible)
8023           && ! char_charset (translate_char (translation_table, c),
8024                              charset_list, NULL))
8025         {
8026           positions = Fcons (make_number (from), positions);
8027           n--;
8028           if (n == 0)
8029             break;
8030         }
8031
8032       from++;
8033     }
8034
8035   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
8036 }
8037
8038
8039 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
8040        Scheck_coding_systems_region, 3, 3, 0,
8041        doc: /* Check if the region is encodable by coding systems.
8042
8043 START and END are buffer positions specifying the region.
8044 CODING-SYSTEM-LIST is a list of coding systems to check.
8045
8046 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
8047 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
8048 whole region, POS0, POS1, ... are buffer positions where non-encodable
8049 characters are found.
8050
8051 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
8052 value is nil.
8053
8054 START may be a string.  In that case, check if the string is
8055 encodable, and the value contains indices to the string instead of
8056 buffer positions.  END is ignored.  */)
8057      (start, end, coding_system_list)
8058      Lisp_Object start, end, coding_system_list;
8059 {
8060   Lisp_Object list;
8061   EMACS_INT start_byte, end_byte;
8062   int pos;
8063   const unsigned char *p, *pbeg, *pend;
8064   int c;
8065   Lisp_Object tail, elt, attrs;
8066
8067   if (STRINGP (start))
8068     {
8069       if (!STRING_MULTIBYTE (start)
8070           && SCHARS (start) != SBYTES (start))
8071         return Qnil;
8072       start_byte = 0;
8073       end_byte = SBYTES (start);
8074       pos = 0;
8075     }
8076   else
8077     {
8078       CHECK_NUMBER_COERCE_MARKER (start);
8079       CHECK_NUMBER_COERCE_MARKER (end);
8080       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
8081         args_out_of_range (start, end);
8082       if (NILP (current_buffer->enable_multibyte_characters))
8083         return Qnil;
8084       start_byte = CHAR_TO_BYTE (XINT (start));
8085       end_byte = CHAR_TO_BYTE (XINT (end));
8086       if (XINT (end) - XINT (start) == end_byte - start_byte)
8087         return Qt;
8088
8089       if (XINT (start) < GPT && XINT (end) > GPT)
8090         {
8091           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8092             move_gap_both (XINT (start), start_byte);
8093           else
8094             move_gap_both (XINT (end), end_byte);
8095         }
8096       pos = XINT (start);
8097     }
8098
8099   list = Qnil;
8100   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8101     {
8102       elt = XCAR (tail);
8103       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8104       ASET (attrs, coding_attr_trans_tbl,
8105             get_translation_table (attrs, 1, NULL));
8106       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8107     }
8108
8109   if (STRINGP (start))
8110     p = pbeg = SDATA (start);
8111   else
8112     p = pbeg = BYTE_POS_ADDR (start_byte);
8113   pend = p + (end_byte - start_byte);
8114
8115   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8116   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8117
8118   while (p < pend)
8119     {
8120       if (ASCII_BYTE_P (*p))
8121         p++;
8122       else
8123         {
8124           c = STRING_CHAR_ADVANCE (p);
8125
8126           charset_map_loaded = 0;
8127           for (tail = list; CONSP (tail); tail = XCDR (tail))
8128             {
8129               elt = XCDR (XCAR (tail));
8130               if (! char_encodable_p (c, XCAR (elt)))
8131                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8132             }
8133           if (charset_map_loaded)
8134             {
8135               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8136
8137               if (STRINGP (start))
8138                 pbeg = SDATA (start);
8139               else
8140                 pbeg = BYTE_POS_ADDR (start_byte);
8141               p = pbeg + p_offset;
8142               pend = pbeg + pend_offset;
8143             }
8144         }
8145       pos++;
8146     }
8147
8148   tail = list;
8149   list = Qnil;
8150   for (; CONSP (tail); tail = XCDR (tail))
8151     {
8152       elt = XCAR (tail);
8153       if (CONSP (XCDR (XCDR (elt))))
8154         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8155                       list);
8156     }
8157
8158   return list;
8159 }
8160
8161
8162 Lisp_Object
8163 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8164      Lisp_Object start, end, coding_system, dst_object;
8165      int encodep, norecord;
8166 {
8167   struct coding_system coding;
8168   EMACS_INT from, from_byte, to, to_byte;
8169   Lisp_Object src_object;
8170
8171   CHECK_NUMBER_COERCE_MARKER (start);
8172   CHECK_NUMBER_COERCE_MARKER (end);
8173   if (NILP (coding_system))
8174     coding_system = Qno_conversion;
8175   else
8176     CHECK_CODING_SYSTEM (coding_system);
8177   src_object = Fcurrent_buffer ();
8178   if (NILP (dst_object))
8179     dst_object = src_object;
8180   else if (! EQ (dst_object, Qt))
8181     CHECK_BUFFER (dst_object);
8182
8183   validate_region (&start, &end);
8184   from = XFASTINT (start);
8185   from_byte = CHAR_TO_BYTE (from);
8186   to = XFASTINT (end);
8187   to_byte = CHAR_TO_BYTE (to);
8188
8189   setup_coding_system (coding_system, &coding);
8190   coding.mode |= CODING_MODE_LAST_BLOCK;
8191
8192   if (encodep)
8193     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8194                           dst_object);
8195   else
8196     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8197                           dst_object);
8198   if (! norecord)
8199     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8200
8201   return (BUFFERP (dst_object)
8202           ? make_number (coding.produced_char)
8203           : coding.dst_object);
8204 }
8205
8206
8207 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8208        3, 4, "r\nzCoding system: ",
8209        doc: /* Decode the current region from the specified coding system.
8210 When called from a program, takes four arguments:
8211         START, END, CODING-SYSTEM, and DESTINATION.
8212 START and END are buffer positions.
8213
8214 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8215 If nil, the region between START and END is replaced by the decoded text.
8216 If buffer, the decoded text is inserted in the buffer.
8217 If t, the decoded text is returned.
8218
8219 This function sets `last-coding-system-used' to the precise coding system
8220 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8221 not fully specified.)
8222 It returns the length of the decoded text.  */)
8223      (start, end, coding_system, destination)
8224      Lisp_Object start, end, coding_system, destination;
8225 {
8226   return code_convert_region (start, end, coding_system, destination, 0, 0);
8227 }
8228
8229 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8230        3, 4, "r\nzCoding system: ",
8231        doc: /* Encode the current region by specified coding system.
8232 When called from a program, takes three arguments:
8233 START, END, and CODING-SYSTEM.  START and END are buffer positions.
8234
8235 Optional 4th arguments DESTINATION specifies where the encoded text goes.
8236 If nil, the region between START and END is replace by the encoded text.
8237 If buffer, the encoded text is inserted in the buffer.
8238 If t, the encoded text is returned.
8239
8240 This function sets `last-coding-system-used' to the precise coding system
8241 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8242 not fully specified.)
8243 It returns the length of the encoded text.  */)
8244   (start, end, coding_system, destination)
8245      Lisp_Object start, end, coding_system, destination;
8246 {
8247   return code_convert_region (start, end, coding_system, destination, 1, 0);
8248 }
8249
8250 Lisp_Object
8251 code_convert_string (string, coding_system, dst_object,
8252                      encodep, nocopy, norecord)
8253      Lisp_Object string, coding_system, dst_object;
8254      int encodep, nocopy, norecord;
8255 {
8256   struct coding_system coding;
8257   EMACS_INT chars, bytes;
8258
8259   CHECK_STRING (string);
8260   if (NILP (coding_system))
8261     {
8262       if (! norecord)
8263         Vlast_coding_system_used = Qno_conversion;
8264       if (NILP (dst_object))
8265         return (nocopy ? Fcopy_sequence (string) : string);
8266     }
8267
8268   if (NILP (coding_system))
8269     coding_system = Qno_conversion;
8270   else
8271     CHECK_CODING_SYSTEM (coding_system);
8272   if (NILP (dst_object))
8273     dst_object = Qt;
8274   else if (! EQ (dst_object, Qt))
8275     CHECK_BUFFER (dst_object);
8276
8277   setup_coding_system (coding_system, &coding);
8278   coding.mode |= CODING_MODE_LAST_BLOCK;
8279   chars = SCHARS (string);
8280   bytes = SBYTES (string);
8281   if (encodep)
8282     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8283   else
8284     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8285   if (! norecord)
8286     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8287
8288   return (BUFFERP (dst_object)
8289           ? make_number (coding.produced_char)
8290           : coding.dst_object);
8291 }
8292
8293
8294 /* Encode or decode STRING according to CODING_SYSTEM.
8295    Do not set Vlast_coding_system_used.
8296
8297    This function is called only from macros DECODE_FILE and
8298    ENCODE_FILE, thus we ignore character composition.  */
8299
8300 Lisp_Object
8301 code_convert_string_norecord (string, coding_system, encodep)
8302      Lisp_Object string, coding_system;
8303      int encodep;
8304 {
8305   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
8306 }
8307
8308
8309 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8310        2, 4, 0,
8311        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8312
8313 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8314 if the decoding operation is trivial.
8315
8316 Optional fourth arg BUFFER non-nil meant that the decoded text is
8317 inserted in BUFFER instead of returned as a string.  In this case,
8318 the return value is BUFFER.
8319
8320 This function sets `last-coding-system-used' to the precise coding system
8321 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8322 not fully specified.  */)
8323   (string, coding_system, nocopy, buffer)
8324      Lisp_Object string, coding_system, nocopy, buffer;
8325 {
8326   return code_convert_string (string, coding_system, buffer,
8327                               0, ! NILP (nocopy), 0);
8328 }
8329
8330 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8331        2, 4, 0,
8332        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8333
8334 Optional third arg NOCOPY non-nil means it is OK to return STRING
8335 itself if the encoding operation is trivial.
8336
8337 Optional fourth arg BUFFER non-nil meant that the encoded text is
8338 inserted in BUFFER instead of returned as a string.  In this case,
8339 the return value is BUFFER.
8340
8341 This function sets `last-coding-system-used' to the precise coding system
8342 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8343 not fully specified.)  */)
8344      (string, coding_system, nocopy, buffer)
8345      Lisp_Object string, coding_system, nocopy, buffer;
8346 {
8347   return code_convert_string (string, coding_system, buffer,
8348                               1, ! NILP (nocopy), 1);
8349 }
8350
8351 \f
8352 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
8353        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8354 Return the corresponding character.  */)
8355      (code)
8356      Lisp_Object code;
8357 {
8358   Lisp_Object spec, attrs, val;
8359   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8360   int c;
8361
8362   CHECK_NATNUM (code);
8363   c = XFASTINT (code);
8364   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8365   attrs = AREF (spec, 0);
8366
8367   if (ASCII_BYTE_P (c)
8368       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8369     return code;
8370
8371   val = CODING_ATTR_CHARSET_LIST (attrs);
8372   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8373   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8374   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
8375
8376   if (c <= 0x7F)
8377     charset = charset_roman;
8378   else if (c >= 0xA0 && c < 0xDF)
8379     {
8380       charset = charset_kana;
8381       c -= 0x80;
8382     }
8383   else
8384     {
8385       int s1 = c >> 8, s2 = c & 0xFF;
8386
8387       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8388           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8389         error ("Invalid code: %d", code);
8390       SJIS_TO_JIS (c);
8391       charset = charset_kanji;
8392     }
8393   c = DECODE_CHAR (charset, c);
8394   if (c < 0)
8395     error ("Invalid code: %d", code);
8396   return make_number (c);
8397 }
8398
8399
8400 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8401        doc: /* Encode a Japanese character CH to shift_jis encoding.
8402 Return the corresponding code in SJIS.  */)
8403      (ch)
8404     Lisp_Object ch;
8405 {
8406   Lisp_Object spec, attrs, charset_list;
8407   int c;
8408   struct charset *charset;
8409   unsigned code;
8410
8411   CHECK_CHARACTER (ch);
8412   c = XFASTINT (ch);
8413   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8414   attrs = AREF (spec, 0);
8415
8416   if (ASCII_CHAR_P (c)
8417       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8418     return ch;
8419
8420   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8421   charset = char_charset (c, charset_list, &code);
8422   if (code == CHARSET_INVALID_CODE (charset))
8423     error ("Can't encode by shift_jis encoding: %d", c);
8424   JIS_TO_SJIS (code);
8425
8426   return make_number (code);
8427 }
8428
8429 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
8430        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8431 Return the corresponding character.  */)
8432      (code)
8433      Lisp_Object code;
8434 {
8435   Lisp_Object spec, attrs, val;
8436   struct charset *charset_roman, *charset_big5, *charset;
8437   int c;
8438
8439   CHECK_NATNUM (code);
8440   c = XFASTINT (code);
8441   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8442   attrs = AREF (spec, 0);
8443
8444   if (ASCII_BYTE_P (c)
8445       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8446     return code;
8447
8448   val = CODING_ATTR_CHARSET_LIST (attrs);
8449   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8450   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
8451
8452   if (c <= 0x7F)
8453     charset = charset_roman;
8454   else
8455     {
8456       int b1 = c >> 8, b2 = c & 0x7F;
8457       if (b1 < 0xA1 || b1 > 0xFE
8458           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8459         error ("Invalid code: %d", code);
8460       charset = charset_big5;
8461     }
8462   c = DECODE_CHAR (charset, (unsigned )c);
8463   if (c < 0)
8464     error ("Invalid code: %d", code);
8465   return make_number (c);
8466 }
8467
8468 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8469        doc: /* Encode the Big5 character CH to BIG5 coding system.
8470 Return the corresponding character code in Big5.  */)
8471      (ch)
8472      Lisp_Object ch;
8473 {
8474   Lisp_Object spec, attrs, charset_list;
8475   struct charset *charset;
8476   int c;
8477   unsigned code;
8478
8479   CHECK_CHARACTER (ch);
8480   c = XFASTINT (ch);
8481   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8482   attrs = AREF (spec, 0);
8483   if (ASCII_CHAR_P (c)
8484       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8485     return ch;
8486
8487   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8488   charset = char_charset (c, charset_list, &code);
8489   if (code == CHARSET_INVALID_CODE (charset))
8490     error ("Can't encode by Big5 encoding: %d", c);
8491
8492   return make_number (code);
8493 }
8494
8495 \f
8496 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
8497        Sset_terminal_coding_system_internal, 1, 2, 0,
8498        doc: /* Internal use only.  */)
8499      (coding_system, terminal)
8500      Lisp_Object coding_system;
8501      Lisp_Object terminal;
8502 {
8503   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8504   CHECK_SYMBOL (coding_system);
8505   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
8506   /* We had better not send unsafe characters to terminal.  */
8507   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
8508   /* Characer composition should be disabled.  */
8509   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8510   terminal_coding->src_multibyte = 1;
8511   terminal_coding->dst_multibyte = 0;
8512   return Qnil;
8513 }
8514
8515 DEFUN ("set-safe-terminal-coding-system-internal",
8516        Fset_safe_terminal_coding_system_internal,
8517        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
8518        doc: /* Internal use only.  */)
8519      (coding_system)
8520      Lisp_Object coding_system;
8521 {
8522   CHECK_SYMBOL (coding_system);
8523   setup_coding_system (Fcheck_coding_system (coding_system),
8524                        &safe_terminal_coding);
8525   /* Characer composition should be disabled.  */
8526   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8527   safe_terminal_coding.src_multibyte = 1;
8528   safe_terminal_coding.dst_multibyte = 0;
8529   return Qnil;
8530 }
8531
8532 DEFUN ("terminal-coding-system", Fterminal_coding_system,
8533        Sterminal_coding_system, 0, 1, 0,
8534        doc: /* Return coding system specified for terminal output on the given terminal.
8535 TERMINAL may be a terminal id, a frame, or nil for the selected
8536 frame's terminal device.  */)
8537      (terminal)
8538      Lisp_Object terminal;
8539 {
8540   struct coding_system *terminal_coding
8541     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8542   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
8543
8544   /* For backward compatibility, return nil if it is `undecided'. */
8545   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
8546 }
8547
8548 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
8549        Sset_keyboard_coding_system_internal, 1, 2, 0,
8550        doc: /* Internal use only.  */)
8551      (coding_system, terminal)
8552      Lisp_Object coding_system;
8553      Lisp_Object terminal;
8554 {
8555   struct terminal *t = get_terminal (terminal, 1);
8556   CHECK_SYMBOL (coding_system);
8557   setup_coding_system (Fcheck_coding_system (coding_system),
8558                        TERMINAL_KEYBOARD_CODING (t));
8559   /* Characer composition should be disabled.  */
8560   TERMINAL_KEYBOARD_CODING (t)->common_flags
8561     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8562   return Qnil;
8563 }
8564
8565 DEFUN ("keyboard-coding-system",
8566        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
8567        doc: /* Return coding system specified for decoding keyboard input.  */)
8568      (terminal)
8569      Lisp_Object terminal;
8570 {
8571   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
8572                          (get_terminal (terminal, 1))->id);
8573 }
8574
8575 \f
8576 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8577        Sfind_operation_coding_system,  1, MANY, 0,
8578        doc: /* Choose a coding system for an operation based on the target name.
8579 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8580 DECODING-SYSTEM is the coding system to use for decoding
8581 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8582 for encoding (in case OPERATION does encoding).
8583
8584 The first argument OPERATION specifies an I/O primitive:
8585   For file I/O, `insert-file-contents' or `write-region'.
8586   For process I/O, `call-process', `call-process-region', or `start-process'.
8587   For network I/O, `open-network-stream'.
8588
8589 The remaining arguments should be the same arguments that were passed
8590 to the primitive.  Depending on which primitive, one of those arguments
8591 is selected as the TARGET.  For example, if OPERATION does file I/O,
8592 whichever argument specifies the file name is TARGET.
8593
8594 TARGET has a meaning which depends on OPERATION:
8595   For file I/O, TARGET is a file name (except for the special case below).
8596   For process I/O, TARGET is a process name.
8597   For network I/O, TARGET is a service name or a port number
8598
8599 This function looks up what specified for TARGET in,
8600 `file-coding-system-alist', `process-coding-system-alist',
8601 or `network-coding-system-alist' depending on OPERATION.
8602 They may specify a coding system, a cons of coding systems,
8603 or a function symbol to call.
8604 In the last case, we call the function with one argument,
8605 which is a list of all the arguments given to this function.
8606 If the function can't decide a coding system, it can return
8607 `undecided' so that the normal code-detection is performed.
8608
8609 If OPERATION is `insert-file-contents', the argument corresponding to
8610 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
8611 file name to look up, and BUFFER is a buffer that contains the file's
8612 contents (not yet decoded).  If `file-coding-system-alist' specifies a
8613 function to call for FILENAME, that function should examine the
8614 contents of BUFFER instead of reading the file.
8615
8616 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
8617      (nargs, args)
8618      int nargs;
8619      Lisp_Object *args;
8620 {
8621   Lisp_Object operation, target_idx, target, val;
8622   register Lisp_Object chain;
8623
8624   if (nargs < 2)
8625     error ("Too few arguments");
8626   operation = args[0];
8627   if (!SYMBOLP (operation)
8628       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8629     error ("Invalid first arguement");
8630   if (nargs < 1 + XINT (target_idx))
8631     error ("Too few arguments for operation: %s",
8632            SDATA (SYMBOL_NAME (operation)));
8633   target = args[XINT (target_idx) + 1];
8634   if (!(STRINGP (target)
8635         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8636             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
8637         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8638     error ("Invalid %dth argument", XINT (target_idx) + 1);
8639   if (CONSP (target))
8640     target = XCAR (target);
8641
8642   chain = ((EQ (operation, Qinsert_file_contents)
8643             || EQ (operation, Qwrite_region))
8644            ? Vfile_coding_system_alist
8645            : (EQ (operation, Qopen_network_stream)
8646               ? Vnetwork_coding_system_alist
8647               : Vprocess_coding_system_alist));
8648   if (NILP (chain))
8649     return Qnil;
8650
8651   for (; CONSP (chain); chain = XCDR (chain))
8652     {
8653       Lisp_Object elt;
8654
8655       elt = XCAR (chain);
8656       if (CONSP (elt)
8657           && ((STRINGP (target)
8658                && STRINGP (XCAR (elt))
8659                && fast_string_match (XCAR (elt), target) >= 0)
8660               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8661         {
8662           val = XCDR (elt);
8663           /* Here, if VAL is both a valid coding system and a valid
8664              function symbol, we return VAL as a coding system.  */
8665           if (CONSP (val))
8666             return val;
8667           if (! SYMBOLP (val))
8668             return Qnil;
8669           if (! NILP (Fcoding_system_p (val)))
8670             return Fcons (val, val);
8671           if (! NILP (Ffboundp (val)))
8672             {
8673               /* We use call1 rather than safe_call1
8674                  so as to get bug reports about functions called here
8675                  which don't handle the current interface.  */
8676               val = call1 (val, Flist (nargs, args));
8677               if (CONSP (val))
8678                 return val;
8679               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8680                 return Fcons (val, val);
8681             }
8682           return Qnil;
8683         }
8684     }
8685   return Qnil;
8686 }
8687
8688 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8689        Sset_coding_system_priority, 0, MANY, 0,
8690        doc: /* Assign higher priority to the coding systems given as arguments.
8691 If multiple coding systems belongs to the same category,
8692 all but the first one are ignored.
8693
8694 usage: (set-coding-system-priority ...)  */)
8695      (nargs, args)
8696      int nargs;
8697      Lisp_Object *args;
8698 {
8699   int i, j;
8700   int changed[coding_category_max];
8701   enum coding_category priorities[coding_category_max];
8702
8703   bzero (changed, sizeof changed);
8704
8705   for (i = j = 0; i < nargs; i++)
8706     {
8707       enum coding_category category;
8708       Lisp_Object spec, attrs;
8709
8710       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8711       attrs = AREF (spec, 0);
8712       category = XINT (CODING_ATTR_CATEGORY (attrs));
8713       if (changed[category])
8714         /* Ignore this coding system because a coding system of the
8715            same category already had a higher priority.  */
8716         continue;
8717       changed[category] = 1;
8718       priorities[j++] = category;
8719       if (coding_categories[category].id >= 0
8720           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8721         setup_coding_system (args[i], &coding_categories[category]);
8722       Fset (AREF (Vcoding_category_table, category), args[i]);
8723     }
8724
8725   /* Now we have decided top J priorities.  Reflect the order of the
8726      original priorities to the remaining priorities.  */
8727
8728   for (i = j, j = 0; i < coding_category_max; i++, j++)
8729     {
8730       while (j < coding_category_max
8731              && changed[coding_priorities[j]])
8732         j++;
8733       if (j == coding_category_max)
8734         abort ();
8735       priorities[i] = coding_priorities[j];
8736     }
8737
8738   bcopy (priorities, coding_priorities, sizeof priorities);
8739
8740   /* Update `coding-category-list'.  */
8741   Vcoding_category_list = Qnil;
8742   for (i = coding_category_max - 1; i >= 0; i--)
8743     Vcoding_category_list
8744       = Fcons (AREF (Vcoding_category_table, priorities[i]),
8745                Vcoding_category_list);
8746
8747   return Qnil;
8748 }
8749
8750 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8751        Scoding_system_priority_list, 0, 1, 0,
8752        doc: /* Return a list of coding systems ordered by their priorities.
8753 HIGHESTP non-nil means just return the highest priority one.  */)
8754      (highestp)
8755      Lisp_Object highestp;
8756 {
8757   int i;
8758   Lisp_Object val;
8759
8760   for (i = 0, val = Qnil; i < coding_category_max; i++)
8761     {
8762       enum coding_category category = coding_priorities[i];
8763       int id = coding_categories[category].id;
8764       Lisp_Object attrs;
8765
8766       if (id < 0)
8767         continue;
8768       attrs = CODING_ID_ATTRS (id);
8769       if (! NILP (highestp))
8770         return CODING_ATTR_BASE_NAME (attrs);
8771       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8772     }
8773   return Fnreverse (val);
8774 }
8775
8776 static char *suffixes[] = { "-unix", "-dos", "-mac" };
8777
8778 static Lisp_Object
8779 make_subsidiaries (base)
8780      Lisp_Object base;
8781 {
8782   Lisp_Object subsidiaries;
8783   int base_name_len = SBYTES (SYMBOL_NAME (base));
8784   char *buf = (char *) alloca (base_name_len + 6);
8785   int i;
8786
8787   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
8788   subsidiaries = Fmake_vector (make_number (3), Qnil);
8789   for (i = 0; i < 3; i++)
8790     {
8791       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8792       ASET (subsidiaries, i, intern (buf));
8793     }
8794   return subsidiaries;
8795 }
8796
8797
8798 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8799        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
8800        doc: /* For internal use only.
8801 usage: (define-coding-system-internal ...)  */)
8802      (nargs, args)
8803      int nargs;
8804      Lisp_Object *args;
8805 {
8806   Lisp_Object name;
8807   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
8808   Lisp_Object attrs;            /* Vector of attributes.  */
8809   Lisp_Object eol_type;
8810   Lisp_Object aliases;
8811   Lisp_Object coding_type, charset_list, safe_charsets;
8812   enum coding_category category;
8813   Lisp_Object tail, val;
8814   int max_charset_id = 0;
8815   int i;
8816
8817   if (nargs < coding_arg_max)
8818     goto short_args;
8819
8820   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
8821
8822   name = args[coding_arg_name];
8823   CHECK_SYMBOL (name);
8824   CODING_ATTR_BASE_NAME (attrs) = name;
8825
8826   val = args[coding_arg_mnemonic];
8827   if (! STRINGP (val))
8828     CHECK_CHARACTER (val);
8829   CODING_ATTR_MNEMONIC (attrs) = val;
8830
8831   coding_type = args[coding_arg_coding_type];
8832   CHECK_SYMBOL (coding_type);
8833   CODING_ATTR_TYPE (attrs) = coding_type;
8834
8835   charset_list = args[coding_arg_charset_list];
8836   if (SYMBOLP (charset_list))
8837     {
8838       if (EQ (charset_list, Qiso_2022))
8839         {
8840           if (! EQ (coding_type, Qiso_2022))
8841             error ("Invalid charset-list");
8842           charset_list = Viso_2022_charset_list;
8843         }
8844       else if (EQ (charset_list, Qemacs_mule))
8845         {
8846           if (! EQ (coding_type, Qemacs_mule))
8847             error ("Invalid charset-list");
8848           charset_list = Vemacs_mule_charset_list;
8849         }
8850       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8851         if (max_charset_id < XFASTINT (XCAR (tail)))
8852           max_charset_id = XFASTINT (XCAR (tail));
8853     }
8854   else
8855     {
8856       charset_list = Fcopy_sequence (charset_list);
8857       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8858         {
8859           struct charset *charset;
8860
8861           val = XCAR (tail);
8862           CHECK_CHARSET_GET_CHARSET (val, charset);
8863           if (EQ (coding_type, Qiso_2022)
8864               ? CHARSET_ISO_FINAL (charset) < 0
8865               : EQ (coding_type, Qemacs_mule)
8866               ? CHARSET_EMACS_MULE_ID (charset) < 0
8867               : 0)
8868             error ("Can't handle charset `%s'",
8869                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8870
8871           XSETCAR (tail, make_number (charset->id));
8872           if (max_charset_id < charset->id)
8873             max_charset_id = charset->id;
8874         }
8875     }
8876   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
8877
8878   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8879                                 make_number (255));
8880   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8881     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
8882   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
8883
8884   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
8885
8886   val = args[coding_arg_decode_translation_table];
8887   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8888     CHECK_SYMBOL (val);
8889   CODING_ATTR_DECODE_TBL (attrs) = val;
8890
8891   val = args[coding_arg_encode_translation_table];
8892   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8893     CHECK_SYMBOL (val);
8894   CODING_ATTR_ENCODE_TBL (attrs) = val;
8895
8896   val = args[coding_arg_post_read_conversion];
8897   CHECK_SYMBOL (val);
8898   CODING_ATTR_POST_READ (attrs) = val;
8899
8900   val = args[coding_arg_pre_write_conversion];
8901   CHECK_SYMBOL (val);
8902   CODING_ATTR_PRE_WRITE (attrs) = val;
8903
8904   val = args[coding_arg_default_char];
8905   if (NILP (val))
8906     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8907   else
8908     {
8909       CHECK_CHARACTER (val);
8910       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8911     }
8912
8913   val = args[coding_arg_for_unibyte];
8914   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
8915
8916   val = args[coding_arg_plist];
8917   CHECK_LIST (val);
8918   CODING_ATTR_PLIST (attrs) = val;
8919
8920   if (EQ (coding_type, Qcharset))
8921     {
8922       /* Generate a lisp vector of 256 elements.  Each element is nil,
8923          integer, or a list of charset IDs.
8924
8925          If Nth element is nil, the byte code N is invalid in this
8926          coding system.
8927
8928          If Nth element is a number NUM, N is the first byte of a
8929          charset whose ID is NUM.
8930
8931          If Nth element is a list of charset IDs, N is the first byte
8932          of one of them.  The list is sorted by dimensions of the
8933          charsets.  A charset of smaller dimension comes firtst. */
8934       val = Fmake_vector (make_number (256), Qnil);
8935
8936       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8937         {
8938           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8939           int dim = CHARSET_DIMENSION (charset);
8940           int idx = (dim - 1) * 4;
8941
8942           if (CHARSET_ASCII_COMPATIBLE_P (charset))
8943             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8944
8945           for (i = charset->code_space[idx];
8946                i <= charset->code_space[idx + 1]; i++)
8947             {
8948               Lisp_Object tmp, tmp2;
8949               int dim2;
8950
8951               tmp = AREF (val, i);
8952               if (NILP (tmp))
8953                 tmp = XCAR (tail);
8954               else if (NUMBERP (tmp))
8955                 {
8956                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8957                   if (dim < dim2)
8958                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
8959                   else
8960                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
8961                 }
8962               else
8963                 {
8964                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8965                     {
8966                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8967                       if (dim < dim2)
8968                         break;
8969                     }
8970                   if (NILP (tmp2))
8971                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8972                   else
8973                     {
8974                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8975                       XSETCAR (tmp2, XCAR (tail));
8976                     }
8977                 }
8978               ASET (val, i, tmp);
8979             }
8980         }
8981       ASET (attrs, coding_attr_charset_valids, val);
8982       category = coding_category_charset;
8983     }
8984   else if (EQ (coding_type, Qccl))
8985     {
8986       Lisp_Object valids;
8987
8988       if (nargs < coding_arg_ccl_max)
8989         goto short_args;
8990
8991       val = args[coding_arg_ccl_decoder];
8992       CHECK_CCL_PROGRAM (val);
8993       if (VECTORP (val))
8994         val = Fcopy_sequence (val);
8995       ASET (attrs, coding_attr_ccl_decoder, val);
8996
8997       val = args[coding_arg_ccl_encoder];
8998       CHECK_CCL_PROGRAM (val);
8999       if (VECTORP (val))
9000         val = Fcopy_sequence (val);
9001       ASET (attrs, coding_attr_ccl_encoder, val);
9002
9003       val = args[coding_arg_ccl_valids];
9004       valids = Fmake_string (make_number (256), make_number (0));
9005       for (tail = val; !NILP (tail); tail = Fcdr (tail))
9006         {
9007           int from, to;
9008
9009           val = Fcar (tail);
9010           if (INTEGERP (val))
9011             {
9012               from = to = XINT (val);
9013               if (from < 0 || from > 255)
9014                 args_out_of_range_3 (val, make_number (0), make_number (255));
9015             }
9016           else
9017             {
9018               CHECK_CONS (val);
9019               CHECK_NATNUM_CAR (val);
9020               CHECK_NATNUM_CDR (val);
9021               from = XINT (XCAR (val));
9022               if (from > 255)
9023                 args_out_of_range_3 (XCAR (val),
9024                                      make_number (0), make_number (255));
9025               to = XINT (XCDR (val));
9026               if (to < from || to > 255)
9027                 args_out_of_range_3 (XCDR (val),
9028                                      XCAR (val), make_number (255));
9029             }
9030           for (i = from; i <= to; i++)
9031             SSET (valids, i, 1);
9032         }
9033       ASET (attrs, coding_attr_ccl_valids, valids);
9034
9035       category = coding_category_ccl;
9036     }
9037   else if (EQ (coding_type, Qutf_16))
9038     {
9039       Lisp_Object bom, endian;
9040
9041       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9042
9043       if (nargs < coding_arg_utf16_max)
9044         goto short_args;
9045
9046       bom = args[coding_arg_utf16_bom];
9047       if (! NILP (bom) && ! EQ (bom, Qt))
9048         {
9049           CHECK_CONS (bom);
9050           val = XCAR (bom);
9051           CHECK_CODING_SYSTEM (val);
9052           val = XCDR (bom);
9053           CHECK_CODING_SYSTEM (val);
9054         }
9055       ASET (attrs, coding_attr_utf_16_bom, bom);
9056
9057       endian = args[coding_arg_utf16_endian];
9058       CHECK_SYMBOL (endian);
9059       if (NILP (endian))
9060         endian = Qbig;
9061       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
9062         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
9063       ASET (attrs, coding_attr_utf_16_endian, endian);
9064
9065       category = (CONSP (bom)
9066                   ? coding_category_utf_16_auto
9067                   : NILP (bom)
9068                   ? (EQ (endian, Qbig)
9069                      ? coding_category_utf_16_be_nosig
9070                      : coding_category_utf_16_le_nosig)
9071                   : (EQ (endian, Qbig)
9072                      ? coding_category_utf_16_be
9073                      : coding_category_utf_16_le));
9074     }
9075   else if (EQ (coding_type, Qiso_2022))
9076     {
9077       Lisp_Object initial, reg_usage, request, flags;
9078       int i;
9079
9080       if (nargs < coding_arg_iso2022_max)
9081         goto short_args;
9082
9083       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
9084       CHECK_VECTOR (initial);
9085       for (i = 0; i < 4; i++)
9086         {
9087           val = Faref (initial, make_number (i));
9088           if (! NILP (val))
9089             {
9090               struct charset *charset;
9091
9092               CHECK_CHARSET_GET_CHARSET (val, charset);
9093               ASET (initial, i, make_number (CHARSET_ID (charset)));
9094               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9095                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9096             }
9097           else
9098             ASET (initial, i, make_number (-1));
9099         }
9100
9101       reg_usage = args[coding_arg_iso2022_reg_usage];
9102       CHECK_CONS (reg_usage);
9103       CHECK_NUMBER_CAR (reg_usage);
9104       CHECK_NUMBER_CDR (reg_usage);
9105
9106       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9107       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9108         {
9109           int id;
9110           Lisp_Object tmp;
9111
9112           val = Fcar (tail);
9113           CHECK_CONS (val);
9114           tmp = XCAR (val);
9115           CHECK_CHARSET_GET_ID (tmp, id);
9116           CHECK_NATNUM_CDR (val);
9117           if (XINT (XCDR (val)) >= 4)
9118             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9119           XSETCAR (val, make_number (id));
9120         }
9121
9122       flags = args[coding_arg_iso2022_flags];
9123       CHECK_NATNUM (flags);
9124       i = XINT (flags);
9125       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9126         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9127
9128       ASET (attrs, coding_attr_iso_initial, initial);
9129       ASET (attrs, coding_attr_iso_usage, reg_usage);
9130       ASET (attrs, coding_attr_iso_request, request);
9131       ASET (attrs, coding_attr_iso_flags, flags);
9132       setup_iso_safe_charsets (attrs);
9133
9134       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9135         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9136                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9137                     ? coding_category_iso_7_else
9138                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9139                     ? coding_category_iso_7
9140                     : coding_category_iso_7_tight);
9141       else
9142         {
9143           int id = XINT (AREF (initial, 1));
9144
9145           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9146                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9147                        || id < 0)
9148                       ? coding_category_iso_8_else
9149                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9150                       ? coding_category_iso_8_1
9151                       : coding_category_iso_8_2);
9152         }
9153       if (category != coding_category_iso_8_1
9154           && category != coding_category_iso_8_2)
9155         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9156     }
9157   else if (EQ (coding_type, Qemacs_mule))
9158     {
9159       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9160         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9161       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9162       category = coding_category_emacs_mule;
9163     }
9164   else if (EQ (coding_type, Qshift_jis))
9165     {
9166
9167       struct charset *charset;
9168
9169       if (XINT (Flength (charset_list)) != 3
9170           && XINT (Flength (charset_list)) != 4)
9171         error ("There should be three or four charsets");
9172
9173       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9174       if (CHARSET_DIMENSION (charset) != 1)
9175         error ("Dimension of charset %s is not one",
9176                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9177       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9178         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9179
9180       charset_list = XCDR (charset_list);
9181       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9182       if (CHARSET_DIMENSION (charset) != 1)
9183         error ("Dimension of charset %s is not one",
9184                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9185
9186       charset_list = XCDR (charset_list);
9187       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9188       if (CHARSET_DIMENSION (charset) != 2)
9189         error ("Dimension of charset %s is not two",
9190                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9191
9192       charset_list = XCDR (charset_list);
9193       if (! NILP (charset_list))
9194         {
9195           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9196           if (CHARSET_DIMENSION (charset) != 2)
9197             error ("Dimension of charset %s is not two",
9198                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9199         }
9200
9201       category = coding_category_sjis;
9202       Vsjis_coding_system = name;
9203     }
9204   else if (EQ (coding_type, Qbig5))
9205     {
9206       struct charset *charset;
9207
9208       if (XINT (Flength (charset_list)) != 2)
9209         error ("There should be just two charsets");
9210
9211       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9212       if (CHARSET_DIMENSION (charset) != 1)
9213         error ("Dimension of charset %s is not one",
9214                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9215       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9216         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9217
9218       charset_list = XCDR (charset_list);
9219       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9220       if (CHARSET_DIMENSION (charset) != 2)
9221         error ("Dimension of charset %s is not two",
9222                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9223
9224       category = coding_category_big5;
9225       Vbig5_coding_system = name;
9226     }
9227   else if (EQ (coding_type, Qraw_text))
9228     {
9229       category = coding_category_raw_text;
9230       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9231     }
9232   else if (EQ (coding_type, Qutf_8))
9233     {
9234       category = coding_category_utf_8;
9235       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9236     }
9237   else if (EQ (coding_type, Qundecided))
9238     category = coding_category_undecided;
9239   else
9240     error ("Invalid coding system type: %s",
9241            SDATA (SYMBOL_NAME (coding_type)));
9242
9243   CODING_ATTR_CATEGORY (attrs) = make_number (category);
9244   CODING_ATTR_PLIST (attrs)
9245     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9246                                 CODING_ATTR_PLIST (attrs)));
9247   CODING_ATTR_PLIST (attrs)
9248     = Fcons (QCascii_compatible_p,
9249              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9250                     CODING_ATTR_PLIST (attrs)));
9251
9252   eol_type = args[coding_arg_eol_type];
9253   if (! NILP (eol_type)
9254       && ! EQ (eol_type, Qunix)
9255       && ! EQ (eol_type, Qdos)
9256       && ! EQ (eol_type, Qmac))
9257     error ("Invalid eol-type");
9258
9259   aliases = Fcons (name, Qnil);
9260
9261   if (NILP (eol_type))
9262     {
9263       eol_type = make_subsidiaries (name);
9264       for (i = 0; i < 3; i++)
9265         {
9266           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9267
9268           this_name = AREF (eol_type, i);
9269           this_aliases = Fcons (this_name, Qnil);
9270           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9271           this_spec = Fmake_vector (make_number (3), attrs);
9272           ASET (this_spec, 1, this_aliases);
9273           ASET (this_spec, 2, this_eol_type);
9274           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9275           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
9276           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9277           if (NILP (val))
9278             Vcoding_system_alist
9279               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9280                        Vcoding_system_alist);
9281         }
9282     }
9283
9284   spec_vec = Fmake_vector (make_number (3), attrs);
9285   ASET (spec_vec, 1, aliases);
9286   ASET (spec_vec, 2, eol_type);
9287
9288   Fputhash (name, spec_vec, Vcoding_system_hash_table);
9289   Vcoding_system_list = Fcons (name, Vcoding_system_list);
9290   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9291   if (NILP (val))
9292     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9293                                   Vcoding_system_alist);
9294
9295   {
9296     int id = coding_categories[category].id;
9297
9298     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9299       setup_coding_system (name, &coding_categories[category]);
9300   }
9301
9302   return Qnil;
9303
9304  short_args:
9305   return Fsignal (Qwrong_number_of_arguments,
9306                   Fcons (intern ("define-coding-system-internal"),
9307                          make_number (nargs)));
9308 }
9309
9310
9311 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9312        3, 3, 0,
9313        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
9314   (coding_system, prop, val)
9315      Lisp_Object coding_system, prop, val;
9316 {
9317   Lisp_Object spec, attrs;
9318
9319   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9320   attrs = AREF (spec, 0);
9321   if (EQ (prop, QCmnemonic))
9322     {
9323       if (! STRINGP (val))
9324         CHECK_CHARACTER (val);
9325       CODING_ATTR_MNEMONIC (attrs) = val;
9326     }
9327   else if (EQ (prop, QCdefalut_char))
9328     {
9329       if (NILP (val))
9330         val = make_number (' ');
9331       else
9332         CHECK_CHARACTER (val);
9333       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9334     }
9335   else if (EQ (prop, QCdecode_translation_table))
9336     {
9337       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9338         CHECK_SYMBOL (val);
9339       CODING_ATTR_DECODE_TBL (attrs) = val;
9340     }
9341   else if (EQ (prop, QCencode_translation_table))
9342     {
9343       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9344         CHECK_SYMBOL (val);
9345       CODING_ATTR_ENCODE_TBL (attrs) = val;
9346     }
9347   else if (EQ (prop, QCpost_read_conversion))
9348     {
9349       CHECK_SYMBOL (val);
9350       CODING_ATTR_POST_READ (attrs) = val;
9351     }
9352   else if (EQ (prop, QCpre_write_conversion))
9353     {
9354       CHECK_SYMBOL (val);
9355       CODING_ATTR_PRE_WRITE (attrs) = val;
9356     }
9357   else if (EQ (prop, QCascii_compatible_p))
9358     {
9359       CODING_ATTR_ASCII_COMPAT (attrs) = val;
9360     }
9361
9362   CODING_ATTR_PLIST (attrs)
9363     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9364   return val;
9365 }
9366
9367
9368 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9369        Sdefine_coding_system_alias, 2, 2, 0,
9370        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
9371      (alias, coding_system)
9372      Lisp_Object alias, coding_system;
9373 {
9374   Lisp_Object spec, aliases, eol_type, val;
9375
9376   CHECK_SYMBOL (alias);
9377   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9378   aliases = AREF (spec, 1);
9379   /* ALISES should be a list of length more than zero, and the first
9380      element is a base coding system.  Append ALIAS at the tail of the
9381      list.  */
9382   while (!NILP (XCDR (aliases)))
9383     aliases = XCDR (aliases);
9384   XSETCDR (aliases, Fcons (alias, Qnil));
9385
9386   eol_type = AREF (spec, 2);
9387   if (VECTORP (eol_type))
9388     {
9389       Lisp_Object subsidiaries;
9390       int i;
9391
9392       subsidiaries = make_subsidiaries (alias);
9393       for (i = 0; i < 3; i++)
9394         Fdefine_coding_system_alias (AREF (subsidiaries, i),
9395                                      AREF (eol_type, i));
9396     }
9397
9398   Fputhash (alias, spec, Vcoding_system_hash_table);
9399   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
9400   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9401   if (NILP (val))
9402     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9403                                   Vcoding_system_alist);
9404
9405   return Qnil;
9406 }
9407
9408 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9409        1, 1, 0,
9410        doc: /* Return the base of CODING-SYSTEM.
9411 Any alias or subsidiary coding system is not a base coding system.  */)
9412   (coding_system)
9413      Lisp_Object coding_system;
9414 {
9415   Lisp_Object spec, attrs;
9416
9417   if (NILP (coding_system))
9418     return (Qno_conversion);
9419   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9420   attrs = AREF (spec, 0);
9421   return CODING_ATTR_BASE_NAME (attrs);
9422 }
9423
9424 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9425        1, 1, 0,
9426        doc: "Return the property list of CODING-SYSTEM.")
9427      (coding_system)
9428      Lisp_Object coding_system;
9429 {
9430   Lisp_Object spec, attrs;
9431
9432   if (NILP (coding_system))
9433     coding_system = Qno_conversion;
9434   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9435   attrs = AREF (spec, 0);
9436   return CODING_ATTR_PLIST (attrs);
9437 }
9438
9439
9440 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9441        1, 1, 0,
9442        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
9443      (coding_system)
9444      Lisp_Object coding_system;
9445 {
9446   Lisp_Object spec;
9447
9448   if (NILP (coding_system))
9449     coding_system = Qno_conversion;
9450   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9451   return AREF (spec, 1);
9452 }
9453
9454 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9455        Scoding_system_eol_type, 1, 1, 0,
9456        doc: /* Return eol-type of CODING-SYSTEM.
9457 An eol-type is integer 0, 1, 2, or a vector of coding systems.
9458
9459 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9460 and CR respectively.
9461
9462 A vector value indicates that a format of end-of-line should be
9463 detected automatically.  Nth element of the vector is the subsidiary
9464 coding system whose eol-type is N.  */)
9465      (coding_system)
9466      Lisp_Object coding_system;
9467 {
9468   Lisp_Object spec, eol_type;
9469   int n;
9470
9471   if (NILP (coding_system))
9472     coding_system = Qno_conversion;
9473   if (! CODING_SYSTEM_P (coding_system))
9474     return Qnil;
9475   spec = CODING_SYSTEM_SPEC (coding_system);
9476   eol_type = AREF (spec, 2);
9477   if (VECTORP (eol_type))
9478     return Fcopy_sequence (eol_type);
9479   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9480   return make_number (n);
9481 }
9482
9483 #endif /* emacs */
9484
9485 \f
9486 /*** 9. Post-amble ***/
9487
9488 void
9489 init_coding_once ()
9490 {
9491   int i;
9492
9493   for (i = 0; i < coding_category_max; i++)
9494     {
9495       coding_categories[i].id = -1;
9496       coding_priorities[i] = i;
9497     }
9498
9499   /* ISO2022 specific initialize routine.  */
9500   for (i = 0; i < 0x20; i++)
9501     iso_code_class[i] = ISO_control_0;
9502   for (i = 0x21; i < 0x7F; i++)
9503     iso_code_class[i] = ISO_graphic_plane_0;
9504   for (i = 0x80; i < 0xA0; i++)
9505     iso_code_class[i] = ISO_control_1;
9506   for (i = 0xA1; i < 0xFF; i++)
9507     iso_code_class[i] = ISO_graphic_plane_1;
9508   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9509   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
9510   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9511   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9512   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9513   iso_code_class[ISO_CODE_ESC] = ISO_escape;
9514   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9515   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9516   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9517
9518   for (i = 0; i < 256; i++)
9519     {
9520       emacs_mule_bytes[i] = 1;
9521     }
9522   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9523   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9524   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9525   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
9526 }
9527
9528 #ifdef emacs
9529
9530 void
9531 syms_of_coding ()
9532 {
9533   staticpro (&Vcoding_system_hash_table);
9534   {
9535     Lisp_Object args[2];
9536     args[0] = QCtest;
9537     args[1] = Qeq;
9538     Vcoding_system_hash_table = Fmake_hash_table (2, args);
9539   }
9540
9541   staticpro (&Vsjis_coding_system);
9542   Vsjis_coding_system = Qnil;
9543
9544   staticpro (&Vbig5_coding_system);
9545   Vbig5_coding_system = Qnil;
9546
9547   staticpro (&Vcode_conversion_reused_workbuf);
9548   Vcode_conversion_reused_workbuf = Qnil;
9549
9550   staticpro (&Vcode_conversion_workbuf_name);
9551   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
9552
9553   reused_workbuf_in_use = 0;
9554
9555   DEFSYM (Qcharset, "charset");
9556   DEFSYM (Qtarget_idx, "target-idx");
9557   DEFSYM (Qcoding_system_history, "coding-system-history");
9558   Fset (Qcoding_system_history, Qnil);
9559
9560   /* Target FILENAME is the first argument.  */
9561   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9562   /* Target FILENAME is the third argument.  */
9563   Fput (Qwrite_region, Qtarget_idx, make_number (2));
9564
9565   DEFSYM (Qcall_process, "call-process");
9566   /* Target PROGRAM is the first argument.  */
9567   Fput (Qcall_process, Qtarget_idx, make_number (0));
9568
9569   DEFSYM (Qcall_process_region, "call-process-region");
9570   /* Target PROGRAM is the third argument.  */
9571   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9572
9573   DEFSYM (Qstart_process, "start-process");
9574   /* Target PROGRAM is the third argument.  */
9575   Fput (Qstart_process, Qtarget_idx, make_number (2));
9576
9577   DEFSYM (Qopen_network_stream, "open-network-stream");
9578   /* Target SERVICE is the fourth argument.  */
9579   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9580
9581   DEFSYM (Qcoding_system, "coding-system");
9582   DEFSYM (Qcoding_aliases, "coding-aliases");
9583
9584   DEFSYM (Qeol_type, "eol-type");
9585   DEFSYM (Qunix, "unix");
9586   DEFSYM (Qdos, "dos");
9587
9588   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9589   DEFSYM (Qpost_read_conversion, "post-read-conversion");
9590   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9591   DEFSYM (Qdefault_char, "default-char");
9592   DEFSYM (Qundecided, "undecided");
9593   DEFSYM (Qno_conversion, "no-conversion");
9594   DEFSYM (Qraw_text, "raw-text");
9595
9596   DEFSYM (Qiso_2022, "iso-2022");
9597
9598   DEFSYM (Qutf_8, "utf-8");
9599   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
9600
9601   DEFSYM (Qutf_16, "utf-16");
9602   DEFSYM (Qbig, "big");
9603   DEFSYM (Qlittle, "little");
9604
9605   DEFSYM (Qshift_jis, "shift-jis");
9606   DEFSYM (Qbig5, "big5");
9607
9608   DEFSYM (Qcoding_system_p, "coding-system-p");
9609
9610   DEFSYM (Qcoding_system_error, "coding-system-error");
9611   Fput (Qcoding_system_error, Qerror_conditions,
9612         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9613   Fput (Qcoding_system_error, Qerror_message,
9614         build_string ("Invalid coding system"));
9615
9616   /* Intern this now in case it isn't already done.
9617      Setting this variable twice is harmless.
9618      But don't staticpro it here--that is done in alloc.c.  */
9619   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9620
9621   DEFSYM (Qtranslation_table, "translation-table");
9622   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9623   DEFSYM (Qtranslation_table_id, "translation-table-id");
9624   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9625   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9626
9627   DEFSYM (Qvalid_codes, "valid-codes");
9628
9629   DEFSYM (Qemacs_mule, "emacs-mule");
9630
9631   DEFSYM (QCcategory, ":category");
9632   DEFSYM (QCmnemonic, ":mnemonic");
9633   DEFSYM (QCdefalut_char, ":default-char");
9634   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9635   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9636   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9637   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9638   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
9639
9640   Vcoding_category_table
9641     = Fmake_vector (make_number (coding_category_max), Qnil);
9642   staticpro (&Vcoding_category_table);
9643   /* Followings are target of code detection.  */
9644   ASET (Vcoding_category_table, coding_category_iso_7,
9645         intern ("coding-category-iso-7"));
9646   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9647         intern ("coding-category-iso-7-tight"));
9648   ASET (Vcoding_category_table, coding_category_iso_8_1,
9649         intern ("coding-category-iso-8-1"));
9650   ASET (Vcoding_category_table, coding_category_iso_8_2,
9651         intern ("coding-category-iso-8-2"));
9652   ASET (Vcoding_category_table, coding_category_iso_7_else,
9653         intern ("coding-category-iso-7-else"));
9654   ASET (Vcoding_category_table, coding_category_iso_8_else,
9655         intern ("coding-category-iso-8-else"));
9656   ASET (Vcoding_category_table, coding_category_utf_8,
9657         intern ("coding-category-utf-8"));
9658   ASET (Vcoding_category_table, coding_category_utf_16_be,
9659         intern ("coding-category-utf-16-be"));
9660   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9661         intern ("coding-category-utf-16-auto"));
9662   ASET (Vcoding_category_table, coding_category_utf_16_le,
9663         intern ("coding-category-utf-16-le"));
9664   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9665         intern ("coding-category-utf-16-be-nosig"));
9666   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9667         intern ("coding-category-utf-16-le-nosig"));
9668   ASET (Vcoding_category_table, coding_category_charset,
9669         intern ("coding-category-charset"));
9670   ASET (Vcoding_category_table, coding_category_sjis,
9671         intern ("coding-category-sjis"));
9672   ASET (Vcoding_category_table, coding_category_big5,
9673         intern ("coding-category-big5"));
9674   ASET (Vcoding_category_table, coding_category_ccl,
9675         intern ("coding-category-ccl"));
9676   ASET (Vcoding_category_table, coding_category_emacs_mule,
9677         intern ("coding-category-emacs-mule"));
9678   /* Followings are NOT target of code detection.  */
9679   ASET (Vcoding_category_table, coding_category_raw_text,
9680         intern ("coding-category-raw-text"));
9681   ASET (Vcoding_category_table, coding_category_undecided,
9682         intern ("coding-category-undecided"));
9683
9684   DEFSYM (Qinsufficient_source, "insufficient-source");
9685   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9686   DEFSYM (Qinvalid_source, "invalid-source");
9687   DEFSYM (Qinterrupted, "interrupted");
9688   DEFSYM (Qinsufficient_memory, "insufficient-memory");
9689   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
9690
9691   defsubr (&Scoding_system_p);
9692   defsubr (&Sread_coding_system);
9693   defsubr (&Sread_non_nil_coding_system);
9694   defsubr (&Scheck_coding_system);
9695   defsubr (&Sdetect_coding_region);
9696   defsubr (&Sdetect_coding_string);
9697   defsubr (&Sfind_coding_systems_region_internal);
9698   defsubr (&Sunencodable_char_position);
9699   defsubr (&Scheck_coding_systems_region);
9700   defsubr (&Sdecode_coding_region);
9701   defsubr (&Sencode_coding_region);
9702   defsubr (&Sdecode_coding_string);
9703   defsubr (&Sencode_coding_string);
9704   defsubr (&Sdecode_sjis_char);
9705   defsubr (&Sencode_sjis_char);
9706   defsubr (&Sdecode_big5_char);
9707   defsubr (&Sencode_big5_char);
9708   defsubr (&Sset_terminal_coding_system_internal);
9709   defsubr (&Sset_safe_terminal_coding_system_internal);
9710   defsubr (&Sterminal_coding_system);
9711   defsubr (&Sset_keyboard_coding_system_internal);
9712   defsubr (&Skeyboard_coding_system);
9713   defsubr (&Sfind_operation_coding_system);
9714   defsubr (&Sset_coding_system_priority);
9715   defsubr (&Sdefine_coding_system_internal);
9716   defsubr (&Sdefine_coding_system_alias);
9717   defsubr (&Scoding_system_put);
9718   defsubr (&Scoding_system_base);
9719   defsubr (&Scoding_system_plist);
9720   defsubr (&Scoding_system_aliases);
9721   defsubr (&Scoding_system_eol_type);
9722   defsubr (&Scoding_system_priority_list);
9723
9724   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
9725                doc: /* List of coding systems.
9726
9727 Do not alter the value of this variable manually.  This variable should be
9728 updated by the functions `define-coding-system' and
9729 `define-coding-system-alias'.  */);
9730   Vcoding_system_list = Qnil;
9731
9732   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
9733                doc: /* Alist of coding system names.
9734 Each element is one element list of coding system name.
9735 This variable is given to `completing-read' as TABLE argument.
9736
9737 Do not alter the value of this variable manually.  This variable should be
9738 updated by the functions `make-coding-system' and
9739 `define-coding-system-alias'.  */);
9740   Vcoding_system_alist = Qnil;
9741
9742   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
9743                doc: /* List of coding-categories (symbols) ordered by priority.
9744
9745 On detecting a coding system, Emacs tries code detection algorithms
9746 associated with each coding-category one by one in this order.  When
9747 one algorithm agrees with a byte sequence of source text, the coding
9748 system bound to the corresponding coding-category is selected.
9749
9750 Don't modify this variable directly, but use `set-coding-priority'.  */);
9751   {
9752     int i;
9753
9754     Vcoding_category_list = Qnil;
9755     for (i = coding_category_max - 1; i >= 0; i--)
9756       Vcoding_category_list
9757         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9758                  Vcoding_category_list);
9759   }
9760
9761   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
9762                doc: /* Specify the coding system for read operations.
9763 It is useful to bind this variable with `let', but do not set it globally.
9764 If the value is a coding system, it is used for decoding on read operation.
9765 If not, an appropriate element is used from one of the coding system alists:
9766 There are three such tables, `file-coding-system-alist',
9767 `process-coding-system-alist', and `network-coding-system-alist'.  */);
9768   Vcoding_system_for_read = Qnil;
9769
9770   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
9771                doc: /* Specify the coding system for write operations.
9772 Programs bind this variable with `let', but you should not set it globally.
9773 If the value is a coding system, it is used for encoding of output,
9774 when writing it to a file and when sending it to a file or subprocess.
9775
9776 If this does not specify a coding system, an appropriate element
9777 is used from one of the coding system alists:
9778 There are three such tables, `file-coding-system-alist',
9779 `process-coding-system-alist', and `network-coding-system-alist'.
9780 For output to files, if the above procedure does not specify a coding system,
9781 the value of `buffer-file-coding-system' is used.  */);
9782   Vcoding_system_for_write = Qnil;
9783
9784   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
9785                doc: /*
9786 Coding system used in the latest file or process I/O.  */);
9787   Vlast_coding_system_used = Qnil;
9788
9789   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9790                doc: /*
9791 Error status of the last code conversion.
9792
9793 When an error was detected in the last code conversion, this variable
9794 is set to one of the following symbols.
9795   `insufficient-source'
9796   `inconsistent-eol'
9797   `invalid-source'
9798   `interrupted'
9799   `insufficient-memory'
9800 When no error was detected, the value doesn't change.  So, to check
9801 the error status of a code conversion by this variable, you must
9802 explicitly set this variable to nil before performing code
9803 conversion.  */);
9804   Vlast_code_conversion_error = Qnil;
9805
9806   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
9807                doc: /*
9808 *Non-nil means always inhibit code conversion of end-of-line format.
9809 See info node `Coding Systems' and info node `Text and Binary' concerning
9810 such conversion.  */);
9811   inhibit_eol_conversion = 0;
9812
9813   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
9814                doc: /*
9815 Non-nil means process buffer inherits coding system of process output.
9816 Bind it to t if the process output is to be treated as if it were a file
9817 read from some filesystem.  */);
9818   inherit_process_coding_system = 0;
9819
9820   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
9821                doc: /*
9822 Alist to decide a coding system to use for a file I/O operation.
9823 The format is ((PATTERN . VAL) ...),
9824 where PATTERN is a regular expression matching a file name,
9825 VAL is a coding system, a cons of coding systems, or a function symbol.
9826 If VAL is a coding system, it is used for both decoding and encoding
9827 the file contents.
9828 If VAL is a cons of coding systems, the car part is used for decoding,
9829 and the cdr part is used for encoding.
9830 If VAL is a function symbol, the function must return a coding system
9831 or a cons of coding systems which are used as above.  The function is
9832 called with an argument that is a list of the arguments with which
9833 `find-operation-coding-system' was called.  If the function can't decide
9834 a coding system, it can return `undecided' so that the normal
9835 code-detection is performed.
9836
9837 See also the function `find-operation-coding-system'
9838 and the variable `auto-coding-alist'.  */);
9839   Vfile_coding_system_alist = Qnil;
9840
9841   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
9842                doc: /*
9843 Alist to decide a coding system to use for a process I/O operation.
9844 The format is ((PATTERN . VAL) ...),
9845 where PATTERN is a regular expression matching a program name,
9846 VAL is a coding system, a cons of coding systems, or a function symbol.
9847 If VAL is a coding system, it is used for both decoding what received
9848 from the program and encoding what sent to the program.
9849 If VAL is a cons of coding systems, the car part is used for decoding,
9850 and the cdr part is used for encoding.
9851 If VAL is a function symbol, the function must return a coding system
9852 or a cons of coding systems which are used as above.
9853
9854 See also the function `find-operation-coding-system'.  */);
9855   Vprocess_coding_system_alist = Qnil;
9856
9857   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
9858                doc: /*
9859 Alist to decide a coding system to use for a network I/O operation.
9860 The format is ((PATTERN . VAL) ...),
9861 where PATTERN is a regular expression matching a network service name
9862 or is a port number to connect to,
9863 VAL is a coding system, a cons of coding systems, or a function symbol.
9864 If VAL is a coding system, it is used for both decoding what received
9865 from the network stream and encoding what sent to the network stream.
9866 If VAL is a cons of coding systems, the car part is used for decoding,
9867 and the cdr part is used for encoding.
9868 If VAL is a function symbol, the function must return a coding system
9869 or a cons of coding systems which are used as above.
9870
9871 See also the function `find-operation-coding-system'.  */);
9872   Vnetwork_coding_system_alist = Qnil;
9873
9874   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
9875                doc: /* Coding system to use with system messages.
9876 Also used for decoding keyboard input on X Window system.  */);
9877   Vlocale_coding_system = Qnil;
9878
9879   /* The eol mnemonics are reset in startup.el system-dependently.  */
9880   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
9881                doc: /*
9882 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
9883   eol_mnemonic_unix = build_string (":");
9884
9885   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
9886                doc: /*
9887 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
9888   eol_mnemonic_dos = build_string ("\\");
9889
9890   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
9891                doc: /*
9892 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
9893   eol_mnemonic_mac = build_string ("/");
9894
9895   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
9896                doc: /*
9897 *String displayed in mode line when end-of-line format is not yet determined.  */);
9898   eol_mnemonic_undecided = build_string (":");
9899
9900   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
9901                doc: /*
9902 *Non-nil enables character translation while encoding and decoding.  */);
9903   Venable_character_translation = Qt;
9904
9905   DEFVAR_LISP ("standard-translation-table-for-decode",
9906                &Vstandard_translation_table_for_decode,
9907                doc: /* Table for translating characters while decoding.  */);
9908   Vstandard_translation_table_for_decode = Qnil;
9909
9910   DEFVAR_LISP ("standard-translation-table-for-encode",
9911                &Vstandard_translation_table_for_encode,
9912                doc: /* Table for translating characters while encoding.  */);
9913   Vstandard_translation_table_for_encode = Qnil;
9914
9915   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
9916                doc: /* Alist of charsets vs revision numbers.
9917 While encoding, if a charset (car part of an element) is found,
9918 designate it with the escape sequence identifying revision (cdr part
9919 of the element).  */);
9920   Vcharset_revision_table = Qnil;
9921
9922   DEFVAR_LISP ("default-process-coding-system",
9923                &Vdefault_process_coding_system,
9924                doc: /* Cons of coding systems used for process I/O by default.
9925 The car part is used for decoding a process output,
9926 the cdr part is used for encoding a text to be sent to a process.  */);
9927   Vdefault_process_coding_system = Qnil;
9928
9929   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
9930                doc: /*
9931 Table of extra Latin codes in the range 128..159 (inclusive).
9932 This is a vector of length 256.
9933 If Nth element is non-nil, the existence of code N in a file
9934 \(or output of subprocess) doesn't prevent it to be detected as
9935 a coding system of ISO 2022 variant which has a flag
9936 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9937 or reading output of a subprocess.
9938 Only 128th through 159th elements has a meaning.  */);
9939   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
9940
9941   DEFVAR_LISP ("select-safe-coding-system-function",
9942                &Vselect_safe_coding_system_function,
9943                doc: /*
9944 Function to call to select safe coding system for encoding a text.
9945
9946 If set, this function is called to force a user to select a proper
9947 coding system which can encode the text in the case that a default
9948 coding system used in each operation can't encode the text.  The
9949 function should take care that the buffer is not modified while
9950 the coding system is being selected.
9951
9952 The default value is `select-safe-coding-system' (which see).  */);
9953   Vselect_safe_coding_system_function = Qnil;
9954
9955   DEFVAR_BOOL ("coding-system-require-warning",
9956                &coding_system_require_warning,
9957                doc: /* Internal use only.
9958 If non-nil, on writing a file, `select-safe-coding-system-function' is
9959 called even if `coding-system-for-write' is non-nil.  The command
9960 `universal-coding-system-argument' binds this variable to t temporarily.  */);
9961   coding_system_require_warning = 0;
9962
9963
9964   DEFVAR_BOOL ("inhibit-iso-escape-detection",
9965                &inhibit_iso_escape_detection,
9966                doc: /*
9967 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9968
9969 By default, on reading a file, Emacs tries to detect how the text is
9970 encoded.  This code detection is sensitive to escape sequences.  If
9971 the sequence is valid as ISO2022, the code is determined as one of
9972 the ISO2022 encodings, and the file is decoded by the corresponding
9973 coding system (e.g. `iso-2022-7bit').
9974
9975 However, there may be a case that you want to read escape sequences in
9976 a file as is.  In such a case, you can set this variable to non-nil.
9977 Then, as the code detection ignores any escape sequences, no file is
9978 detected as encoded in some ISO2022 encoding.  The result is that all
9979 escape sequences become visible in a buffer.
9980
9981 The default value is nil, and it is strongly recommended not to change
9982 it.  That is because many Emacs Lisp source files that contain
9983 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9984 in Emacs's distribution, and they won't be decoded correctly on
9985 reading if you suppress escape sequence detection.
9986
9987 The other way to read escape sequences in a file without decoding is
9988 to explicitly specify some coding system that doesn't use ISO2022's
9989 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
9990   inhibit_iso_escape_detection = 0;
9991
9992   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
9993                doc: /* Char table for translating self-inserting characters.
9994 This is applied to the result of input methods, not their input.  See also
9995 `keyboard-translate-table'.  */);
9996     Vtranslation_table_for_input = Qnil;
9997
9998   {
9999     Lisp_Object args[coding_arg_max];
10000     Lisp_Object plist[16];
10001     int i;
10002
10003     for (i = 0; i < coding_arg_max; i++)
10004       args[i] = Qnil;
10005
10006     plist[0] = intern (":name");
10007     plist[1] = args[coding_arg_name] = Qno_conversion;
10008     plist[2] = intern (":mnemonic");
10009     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
10010     plist[4] = intern (":coding-type");
10011     plist[5] = args[coding_arg_coding_type] = Qraw_text;
10012     plist[6] = intern (":ascii-compatible-p");
10013     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
10014     plist[8] = intern (":default-char");
10015     plist[9] = args[coding_arg_default_char] = make_number (0);
10016     plist[10] = intern (":for-unibyte");
10017     plist[11] = args[coding_arg_for_unibyte] = Qt;
10018     plist[12] = intern (":docstring");
10019     plist[13] = build_string ("Do no conversion.\n\
10020 \n\
10021 When you visit a file with this coding, the file is read into a\n\
10022 unibyte buffer as is, thus each byte of a file is treated as a\n\
10023 character.");
10024     plist[14] = intern (":eol-type");
10025     plist[15] = args[coding_arg_eol_type] = Qunix;
10026     args[coding_arg_plist] = Flist (16, plist);
10027     Fdefine_coding_system_internal (coding_arg_max, args);
10028
10029     plist[1] = args[coding_arg_name] = Qundecided;
10030     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
10031     plist[5] = args[coding_arg_coding_type] = Qundecided;
10032     /* This is already set.
10033        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
10034     plist[8] = intern (":charset-list");
10035     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
10036     plist[11] = args[coding_arg_for_unibyte] = Qnil;
10037     plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
10038     plist[15] = args[coding_arg_eol_type] = Qnil;
10039     args[coding_arg_plist] = Flist (16, plist);
10040     Fdefine_coding_system_internal (coding_arg_max, args);
10041   }
10042
10043   setup_coding_system (Qno_conversion, &safe_terminal_coding);
10044
10045   {
10046     int i;
10047
10048     for (i = 0; i < coding_category_max; i++)
10049       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
10050   }
10051 #if defined (MSDOS) || defined (WINDOWSNT)
10052   system_eol_type = Qdos;
10053 #else
10054   system_eol_type = Qunix;
10055 #endif
10056   staticpro (&system_eol_type);
10057 }
10058
10059 char *
10060 emacs_strerror (error_number)
10061      int error_number;
10062 {
10063   char *str;
10064
10065   synchronize_system_messages_locale ();
10066   str = strerror (error_number);
10067
10068   if (! NILP (Vlocale_coding_system))
10069     {
10070       Lisp_Object dec = code_convert_string_norecord (build_string (str),
10071                                                       Vlocale_coding_system,
10072                                                       0);
10073       str = (char *) SDATA (dec);
10074     }
10075
10076   return str;
10077 }
10078
10079 #endif /* emacs */
10080
10081 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
10082    (do not change this comment) */