src/coding.c

   1 /* Coding system handler (conversion, detection, etc).
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8    Copyright (C) 2003
   9      National Institute of Advanced Industrial Science and Technology (AIST)
  10      Registration Number H13PRO009
  11
  12 This file is part of GNU Emacs.
  13
  14 GNU Emacs is free software; you can redistribute it and/or modify
  15 it under the terms of the GNU General Public License as published by
  16 the Free Software Foundation; either version 3, or (at your option)
  17 any later version.
  18
  19 GNU Emacs is distributed in the hope that it will be useful,
  20 but WITHOUT ANY WARRANTY; without even the implied warranty of
  21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22 GNU General Public License for more details.
  23
  24 You should have received a copy of the GNU General Public License
  25 along with GNU Emacs; see the file COPYING.  If not, write to
  26 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  27 Boston, MA 02110-1301, USA.  */
  28
  29 /*** TABLE OF CONTENTS ***
  30
  31   0. General comments
  32   1. Preamble
  33   2. Emacs' internal format (emacs-utf-8) handlers
  34   3. UTF-8 handlers
  35   4. UTF-16 handlers
  36   5. Charset-base coding systems handlers
  37   6. emacs-mule (old Emacs' internal format) handlers
  38   7. ISO2022 handlers
  39   8. Shift-JIS and BIG5 handlers
  40   9. CCL handlers
  41   10. C library functions
  42   11. Emacs Lisp library functions
  43   12. Postamble
  44
  45 */
  46
  47 /*** 0. General comments ***
  48
  49
  50 CODING SYSTEM
  51
  52   A coding system is an object for an encoding mechanism that contains
  53   information about how to convert byte sequences to character
  54   sequences and vice versa.  When we say "decode", it means converting
  55   a byte sequence of a specific coding system into a character
  56   sequence that is represented by Emacs' internal coding system
  57   `emacs-utf-8', and when we say "encode", it means converting a
  58   character sequence of emacs-utf-8 to a byte sequence of a specific
  59   coding system.
  60
  61   In Emacs Lisp, a coding system is represented by a Lisp symbol.  In
  62   C level, a coding system is represented by a vector of attributes
  63   stored in the hash table Vcharset_hash_table.  The conversion from
  64   coding system symbol to attributes vector is done by looking up
  65   Vcharset_hash_table by the symbol.
  66
  67   Coding systems are classified into the following types depending on
  68   the encoding mechanism.  Here's a brief description of the types.
  69
  70   o UTF-8
  71
  72   o UTF-16
  73
  74   o Charset-base coding system
  75
  76   A coding system defined by one or more (coded) character sets.
  77   Decoding and encoding are done by a code converter defined for each
  78   character set.
  79
  80   o Old Emacs internal format (emacs-mule)
  81
  82   The coding system adopted by old versions of Emacs (20 and 21).
  83
  84   o ISO2022-base coding system
  85
  86   The most famous coding system for multiple character sets.  X's
  87   Compound Text, various EUCs (Extended Unix Code), and coding systems
  88   used in the Internet communication such as ISO-2022-JP are all
  89   variants of ISO2022.
  90
  91   o SJIS (or Shift-JIS or MS-Kanji-Code)
  92
  93   A coding system to encode character sets: ASCII, JISX0201, and
  94   JISX0208.  Widely used for PC's in Japan.  Details are described in
  95   section 8.
  96
  97   o BIG5
  98
  99   A coding system to encode character sets: ASCII and Big5.  Widely
 100   used for Chinese (mainly in Taiwan and Hong Kong).  Details are
 101   described in section 8.  In this file, when we write "big5" (all
 102   lowercase), we mean the coding system, and when we write "Big5"
 103   (capitalized), we mean the character set.
 104
 105   o CCL
 106
 107   If a user wants to decode/encode text encoded in a coding system
 108   not listed above, he can supply a decoder and an encoder for it in
 109   CCL (Code Conversion Language) programs.  Emacs executes the CCL
 110   program while decoding/encoding.
 111
 112   o Raw-text
 113
 114   A coding system for text containing raw eight-bit data.  Emacs
 115   treats each byte of source text as a character (except for
 116   end-of-line conversion).
 117
 118   o No-conversion
 119
 120   Like raw text, but don't do end-of-line conversion.
 121
 122
 123 END-OF-LINE FORMAT
 124
 125   How text end-of-line is encoded depends on operating system.  For
 126   instance, Unix's format is just one byte of LF (line-feed) code,
 127   whereas DOS's format is two-byte sequence of `carriage-return' and
 128   `line-feed' codes.  MacOS's format is usually one byte of
 129   `carriage-return'.
 130
 131   Since text character encoding and end-of-line encoding are
 132   independent, any coding system described above can take any format
 133   of end-of-line (except for no-conversion).
 134
 135 STRUCT CODING_SYSTEM
 136
 137   Before using a coding system for code conversion (i.e. decoding and
 138   encoding), we setup a structure of type `struct coding_system'.
 139   This structure keeps various information about a specific code
 140   conversion (e.g. the location of source and destination data).
 141
 142 */
 143
 144 /* COMMON MACROS */
 145
 146
 147 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 148
 149   These functions check if a byte sequence specified as a source in
 150   CODING conforms to the format of XXX, and update the members of
 151   DETECT_INFO.
 152
 153   Return 1 if the byte sequence conforms to XXX, otherwise return 0.
 154
 155   Below is the template of these functions.  */
 156
 157 #if 0
 158 static int
 159 detect_coding_XXX (coding, detect_info)
 160      struct coding_system *coding;
 161      struct coding_detection_info *detect_info;
 162 {
 163   const unsigned char *src = coding->source;
 164   const unsigned char *src_end = coding->source + coding->src_bytes;
 165   int multibytep = coding->src_multibyte;
 166   int consumed_chars = 0;
 167   int found = 0;
 168   ...;
 169
 170   while (1)
 171     {
 172       /* Get one byte from the source.  If the souce is exausted, jump
 173          to no_more_source:.  */
 174       ONE_MORE_BYTE (c);
 175
 176       if (! __C_conforms_to_XXX___ (c))
 177         break;
 178       if (! __C_strongly_suggests_XXX__ (c))
 179         found = CATEGORY_MASK_XXX;
 180     }
 181   /* The byte sequence is invalid for XXX.  */
 182   detect_info->rejected |= CATEGORY_MASK_XXX;
 183   return 0;
 184
 185  no_more_source:
 186   /* The source exausted successfully.  */
 187   detect_info->found |= found;
 188   return 1;
 189 }
 190 #endif
 191
 192 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 193
 194   These functions decode a byte sequence specified as a source by
 195   CODING.  The resulting multibyte text goes to a place pointed to by
 196   CODING->charbuf, the length of which should not exceed
 197   CODING->charbuf_size;
 198
 199   These functions set the information of original and decoded texts in
 200   CODING->consumed, CODING->consumed_char, and CODING->charbuf_used.
 201   They also set CODING->result to one of CODING_RESULT_XXX indicating
 202   how the decoding is finished.
 203
 204   Below is the template of these functions.  */
 205
 206 #if 0
 207 static void
 208 decode_coding_XXXX (coding)
 209      struct coding_system *coding;
 210 {
 211   const unsigned char *src = coding->source + coding->consumed;
 212   const unsigned char *src_end = coding->source + coding->src_bytes;
 213   /* SRC_BASE remembers the start position in source in each loop.
 214      The loop will be exited when there's not enough source code, or
 215      when there's no room in CHARBUF for a decoded character.  */
 216   const unsigned char *src_base;
 217   /* A buffer to produce decoded characters.  */
 218   int *charbuf = coding->charbuf + coding->charbuf_used;
 219   int *charbuf_end = coding->charbuf + coding->charbuf_size;
 220   int multibytep = coding->src_multibyte;
 221
 222   while (1)
 223     {
 224       src_base = src;
 225       if (charbuf < charbuf_end)
 226         /* No more room to produce a decoded character.  */
 227         break;
 228       ONE_MORE_BYTE (c);
 229       /* Decode it. */
 230     }
 231
 232  no_more_source:
 233   if (src_base < src_end
 234       && coding->mode & CODING_MODE_LAST_BLOCK)
 235     /* If the source ends by partial bytes to construct a character,
 236        treat them as eight-bit raw data.  */
 237     while (src_base < src_end && charbuf < charbuf_end)
 238       *charbuf++ = *src_base++;
 239   /* Remember how many bytes and characters we consumed.  If the
 240      source is multibyte, the bytes and chars are not identical.  */
 241   coding->consumed = coding->consumed_char = src_base - coding->source;
 242   /* Remember how many characters we produced.  */
 243   coding->charbuf_used = charbuf - coding->charbuf;
 244 }
 245 #endif
 246
 247 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 248
 249   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 250   internal multibyte format by CODING.  The resulting byte sequence
 251   goes to a place pointed to by DESTINATION, the length of which
 252   should not exceed DST_BYTES.
 253
 254   These functions set the information of original and encoded texts in
 255   the members produced, produced_char, consumed, and consumed_char of
 256   the structure *CODING.  They also set the member result to one of
 257   CODING_RESULT_XXX indicating how the encoding finished.
 258
 259   DST_BYTES zero means that source area and destination area are
 260   overlapped, which means that we can produce a encoded text until it
 261   reaches at the head of not-yet-encoded source text.
 262
 263   Below is a template of these functions.  */
 264 #if 0
 265 static void
 266 encode_coding_XXX (coding)
 267      struct coding_system *coding;
 268 {
 269   int multibytep = coding->dst_multibyte;
 270   int *charbuf = coding->charbuf;
 271   int *charbuf_end = charbuf->charbuf + coding->charbuf_used;
 272   unsigned char *dst = coding->destination + coding->produced;
 273   unsigned char *dst_end = coding->destination + coding->dst_bytes;
 274   unsigned char *adjusted_dst_end = dst_end - _MAX_BYTES_PRODUCED_IN_LOOP_;
 275   int produced_chars = 0;
 276
 277   for (; charbuf < charbuf_end && dst < adjusted_dst_end; charbuf++)
 278     {
 279       int c = *charbuf;
 280       /* Encode C into DST, and increment DST.  */
 281     }
 282  label_no_more_destination:
 283   /* How many chars and bytes we produced.  */
 284   coding->produced_char += produced_chars;
 285   coding->produced = dst - coding->destination;
 286 }
 287 #endif
 288
 289 \f
 290 /*** 1. Preamble ***/
 291
 292 #include <config.h>
 293 #include <stdio.h>
 294
 295 #include "lisp.h"
 296 #include "buffer.h"
 297 #include "character.h"
 298 #include "charset.h"
 299 #include "ccl.h"
 300 #include "composite.h"
 301 #include "coding.h"
 302 #include "window.h"
 303 #include "frame.h"
 304 #include "termhooks.h"
 305
 306 Lisp_Object Vcoding_system_hash_table;
 307
 308 Lisp_Object Qcoding_system, Qcoding_aliases, Qeol_type;
 309 Lisp_Object Qunix, Qdos;
 310 extern Lisp_Object Qmac;        /* frame.c */
 311 Lisp_Object Qbuffer_file_coding_system;
 312 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 313 Lisp_Object Qdefault_char;
 314 Lisp_Object Qno_conversion, Qundecided;
 315 Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 316 Lisp_Object Qbig, Qlittle;
 317 Lisp_Object Qcoding_system_history;
 318 Lisp_Object Qvalid_codes;
 319 Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
 320 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 321 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 322 Lisp_Object QCascii_compatible_p;
 323
 324 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 325 Lisp_Object Qcall_process, Qcall_process_region;
 326 Lisp_Object Qstart_process, Qopen_network_stream;
 327 Lisp_Object Qtarget_idx;
 328
 329 Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
 330 Lisp_Object Qinterrupted, Qinsufficient_memory;
 331
 332 extern Lisp_Object Qcompletion_ignore_case;
 333
 334 /* If a symbol has this property, evaluate the value to define the
 335    symbol as a coding system.  */
 336 static Lisp_Object Qcoding_system_define_form;
 337
 338 int coding_system_require_warning;
 339
 340 Lisp_Object Vselect_safe_coding_system_function;
 341
 342 /* Mnemonic string for each format of end-of-line.  */
 343 Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 344 /* Mnemonic string to indicate format of end-of-line is not yet
 345    decided.  */
 346 Lisp_Object eol_mnemonic_undecided;
 347
 348 /* Format of end-of-line decided by system.  This is Qunix on
 349    Unix and Mac, Qdos on DOS/Windows.
 350    This has an effect only for external encoding (i.e. for output to
 351    file and process), not for in-buffer or Lisp string encoding.  */
 352 static Lisp_Object system_eol_type;
 353
 354 #ifdef emacs
 355
 356 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 357
 358 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 359
 360 /* Coding system emacs-mule and raw-text are for converting only
 361    end-of-line format.  */
 362 Lisp_Object Qemacs_mule, Qraw_text;
 363 Lisp_Object Qutf_8_emacs;
 364
 365 /* Coding-systems are handed between Emacs Lisp programs and C internal
 366    routines by the following three variables.  */
 367 /* Coding-system for reading files and receiving data from process.  */
 368 Lisp_Object Vcoding_system_for_read;
 369 /* Coding-system for writing files and sending data to process.  */
 370 Lisp_Object Vcoding_system_for_write;
 371 /* Coding-system actually used in the latest I/O.  */
 372 Lisp_Object Vlast_coding_system_used;
 373 /* Set to non-nil when an error is detected while code conversion.  */
 374 Lisp_Object Vlast_code_conversion_error;
 375 /* A vector of length 256 which contains information about special
 376    Latin codes (especially for dealing with Microsoft codes).  */
 377 Lisp_Object Vlatin_extra_code_table;
 378
 379 /* Flag to inhibit code conversion of end-of-line format.  */
 380 int inhibit_eol_conversion;
 381
 382 /* Flag to inhibit ISO2022 escape sequence detection.  */
 383 int inhibit_iso_escape_detection;
 384
 385 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 386 int inherit_process_coding_system;
 387
 388 /* Coding system to be used to encode text for terminal display when
 389    terminal coding system is nil.  */
 390 struct coding_system safe_terminal_coding;
 391
 392 Lisp_Object Vfile_coding_system_alist;
 393 Lisp_Object Vprocess_coding_system_alist;
 394 Lisp_Object Vnetwork_coding_system_alist;
 395
 396 Lisp_Object Vlocale_coding_system;
 397
 398 #endif /* emacs */
 399
 400 /* Flag to tell if we look up translation table on character code
 401    conversion.  */
 402 Lisp_Object Venable_character_translation;
 403 /* Standard translation table to look up on decoding (reading).  */
 404 Lisp_Object Vstandard_translation_table_for_decode;
 405 /* Standard translation table to look up on encoding (writing).  */
 406 Lisp_Object Vstandard_translation_table_for_encode;
 407
 408 Lisp_Object Qtranslation_table;
 409 Lisp_Object Qtranslation_table_id;
 410 Lisp_Object Qtranslation_table_for_decode;
 411 Lisp_Object Qtranslation_table_for_encode;
 412
 413 /* Alist of charsets vs revision number.  */
 414 static Lisp_Object Vcharset_revision_table;
 415
 416 /* Default coding systems used for process I/O.  */
 417 Lisp_Object Vdefault_process_coding_system;
 418
 419 /* Char table for translating Quail and self-inserting input.  */
 420 Lisp_Object Vtranslation_table_for_input;
 421
 422 /* Two special coding systems.  */
 423 Lisp_Object Vsjis_coding_system;
 424 Lisp_Object Vbig5_coding_system;
 425
 426 /* ISO2022 section */
 427
 428 #define CODING_ISO_INITIAL(coding, reg)                 \
 429   (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id),    \
 430                      coding_attr_iso_initial),          \
 431                reg)))
 432
 433
 434 #define CODING_ISO_REQUEST(coding, charset_id)  \
 435   ((charset_id <= (coding)->max_charset_id      \
 436     ? (coding)->safe_charsets[charset_id]       \
 437     : -1))
 438
 439
 440 #define CODING_ISO_FLAGS(coding)        \
 441   ((coding)->spec.iso_2022.flags)
 442 #define CODING_ISO_DESIGNATION(coding, reg)     \
 443   ((coding)->spec.iso_2022.current_designation[reg])
 444 #define CODING_ISO_INVOCATION(coding, plane)    \
 445   ((coding)->spec.iso_2022.current_invocation[plane])
 446 #define CODING_ISO_SINGLE_SHIFTING(coding)      \
 447   ((coding)->spec.iso_2022.single_shifting)
 448 #define CODING_ISO_BOL(coding)  \
 449   ((coding)->spec.iso_2022.bol)
 450 #define CODING_ISO_INVOKED_CHARSET(coding, plane)       \
 451   CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
 452
 453 /* Control characters of ISO2022.  */
 454                         /* code */      /* function */
 455 #define ISO_CODE_LF     0x0A            /* line-feed */
 456 #define ISO_CODE_CR     0x0D            /* carriage-return */
 457 #define ISO_CODE_SO     0x0E            /* shift-out */
 458 #define ISO_CODE_SI     0x0F            /* shift-in */
 459 #define ISO_CODE_SS2_7  0x19            /* single-shift-2 for 7-bit code */
 460 #define ISO_CODE_ESC    0x1B            /* escape */
 461 #define ISO_CODE_SS2    0x8E            /* single-shift-2 */
 462 #define ISO_CODE_SS3    0x8F            /* single-shift-3 */
 463 #define ISO_CODE_CSI    0x9B            /* control-sequence-introducer */
 464
 465 /* All code (1-byte) of ISO2022 is classified into one of the
 466    followings.  */
 467 enum iso_code_class_type
 468   {
 469     ISO_control_0,              /* Control codes in the range
 470                                    0x00..0x1F and 0x7F, except for the
 471                                    following 5 codes.  */
 472     ISO_shift_out,              /* ISO_CODE_SO (0x0E) */
 473     ISO_shift_in,               /* ISO_CODE_SI (0x0F) */
 474     ISO_single_shift_2_7,       /* ISO_CODE_SS2_7 (0x19) */
 475     ISO_escape,                 /* ISO_CODE_SO (0x1B) */
 476     ISO_control_1,              /* Control codes in the range
 477                                    0x80..0x9F, except for the
 478                                    following 3 codes.  */
 479     ISO_single_shift_2,         /* ISO_CODE_SS2 (0x8E) */
 480     ISO_single_shift_3,         /* ISO_CODE_SS3 (0x8F) */
 481     ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
 482     ISO_0x20_or_0x7F,           /* Codes of the values 0x20 or 0x7F.  */
 483     ISO_graphic_plane_0,        /* Graphic codes in the range 0x21..0x7E.  */
 484     ISO_0xA0_or_0xFF,           /* Codes of the values 0xA0 or 0xFF.  */
 485     ISO_graphic_plane_1         /* Graphic codes in the range 0xA1..0xFE.  */
 486   };
 487
 488 /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the
 489     `iso-flags' attribute of an iso2022 coding system.  */
 490
 491 /* If set, produce long-form designation sequence (e.g. ESC $ ( A)
 492    instead of the correct short-form sequence (e.g. ESC $ A).  */
 493 #define CODING_ISO_FLAG_LONG_FORM       0x0001
 494
 495 /* If set, reset graphic planes and registers at end-of-line to the
 496    initial state.  */
 497 #define CODING_ISO_FLAG_RESET_AT_EOL    0x0002
 498
 499 /* If set, reset graphic planes and registers before any control
 500    characters to the initial state.  */
 501 #define CODING_ISO_FLAG_RESET_AT_CNTL   0x0004
 502
 503 /* If set, encode by 7-bit environment.  */
 504 #define CODING_ISO_FLAG_SEVEN_BITS      0x0008
 505
 506 /* If set, use locking-shift function.  */
 507 #define CODING_ISO_FLAG_LOCKING_SHIFT   0x0010
 508
 509 /* If set, use single-shift function.  Overwrite
 510    CODING_ISO_FLAG_LOCKING_SHIFT.  */
 511 #define CODING_ISO_FLAG_SINGLE_SHIFT    0x0020
 512
 513 /* If set, use designation escape sequence.  */
 514 #define CODING_ISO_FLAG_DESIGNATION     0x0040
 515
 516 /* If set, produce revision number sequence.  */
 517 #define CODING_ISO_FLAG_REVISION        0x0080
 518
 519 /* If set, produce ISO6429's direction specifying sequence.  */
 520 #define CODING_ISO_FLAG_DIRECTION       0x0100
 521
 522 /* If set, assume designation states are reset at beginning of line on
 523    output.  */
 524 #define CODING_ISO_FLAG_INIT_AT_BOL     0x0200
 525
 526 /* If set, designation sequence should be placed at beginning of line
 527    on output.  */
 528 #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400
 529
 530 /* If set, do not encode unsafe charactes on output.  */
 531 #define CODING_ISO_FLAG_SAFE            0x0800
 532
 533 /* If set, extra latin codes (128..159) are accepted as a valid code
 534    on input.  */
 535 #define CODING_ISO_FLAG_LATIN_EXTRA     0x1000
 536
 537 #define CODING_ISO_FLAG_COMPOSITION     0x2000
 538
 539 #define CODING_ISO_FLAG_EUC_TW_SHIFT    0x4000
 540
 541 #define CODING_ISO_FLAG_USE_ROMAN       0x8000
 542
 543 #define CODING_ISO_FLAG_USE_OLDJIS      0x10000
 544
 545 #define CODING_ISO_FLAG_FULL_SUPPORT    0x100000
 546
 547 /* A character to be produced on output if encoding of the original
 548    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 549 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 550
 551
 552 /* UTF-16 section */
 553 #define CODING_UTF_16_BOM(coding)       \
 554   ((coding)->spec.utf_16.bom)
 555
 556 #define CODING_UTF_16_ENDIAN(coding)    \
 557   ((coding)->spec.utf_16.endian)
 558
 559 #define CODING_UTF_16_SURROGATE(coding) \
 560   ((coding)->spec.utf_16.surrogate)
 561
 562
 563 /* CCL section */
 564 #define CODING_CCL_DECODER(coding)      \
 565   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder)
 566 #define CODING_CCL_ENCODER(coding)      \
 567   AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder)
 568 #define CODING_CCL_VALIDS(coding)                                          \
 569   (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)))
 570
 571 /* Index for each coding category in `coding_categories' */
 572
 573 enum coding_category
 574   {
 575     coding_category_iso_7,
 576     coding_category_iso_7_tight,
 577     coding_category_iso_8_1,
 578     coding_category_iso_8_2,
 579     coding_category_iso_7_else,
 580     coding_category_iso_8_else,
 581     coding_category_utf_8,
 582     coding_category_utf_16_auto,
 583     coding_category_utf_16_be,
 584     coding_category_utf_16_le,
 585     coding_category_utf_16_be_nosig,
 586     coding_category_utf_16_le_nosig,
 587     coding_category_charset,
 588     coding_category_sjis,
 589     coding_category_big5,
 590     coding_category_ccl,
 591     coding_category_emacs_mule,
 592     /* All above are targets of code detection.  */
 593     coding_category_raw_text,
 594     coding_category_undecided,
 595     coding_category_max
 596   };
 597
 598 /* Definitions of flag bits used in detect_coding_XXXX.  */
 599 #define CATEGORY_MASK_ISO_7             (1 << coding_category_iso_7)
 600 #define CATEGORY_MASK_ISO_7_TIGHT       (1 << coding_category_iso_7_tight)
 601 #define CATEGORY_MASK_ISO_8_1           (1 << coding_category_iso_8_1)
 602 #define CATEGORY_MASK_ISO_8_2           (1 << coding_category_iso_8_2)
 603 #define CATEGORY_MASK_ISO_7_ELSE        (1 << coding_category_iso_7_else)
 604 #define CATEGORY_MASK_ISO_8_ELSE        (1 << coding_category_iso_8_else)
 605 #define CATEGORY_MASK_UTF_8             (1 << coding_category_utf_8)
 606 #define CATEGORY_MASK_UTF_16_AUTO       (1 << coding_category_utf_16_auto)
 607 #define CATEGORY_MASK_UTF_16_BE         (1 << coding_category_utf_16_be)
 608 #define CATEGORY_MASK_UTF_16_LE         (1 << coding_category_utf_16_le)
 609 #define CATEGORY_MASK_UTF_16_BE_NOSIG   (1 << coding_category_utf_16_be_nosig)
 610 #define CATEGORY_MASK_UTF_16_LE_NOSIG   (1 << coding_category_utf_16_le_nosig)
 611 #define CATEGORY_MASK_CHARSET           (1 << coding_category_charset)
 612 #define CATEGORY_MASK_SJIS              (1 << coding_category_sjis)
 613 #define CATEGORY_MASK_BIG5              (1 << coding_category_big5)
 614 #define CATEGORY_MASK_CCL               (1 << coding_category_ccl)
 615 #define CATEGORY_MASK_EMACS_MULE        (1 << coding_category_emacs_mule)
 616 #define CATEGORY_MASK_RAW_TEXT          (1 << coding_category_raw_text)
 617
 618 /* This value is returned if detect_coding_mask () find nothing other
 619    than ASCII characters.  */
 620 #define CATEGORY_MASK_ANY               \
 621   (CATEGORY_MASK_ISO_7                  \
 622    | CATEGORY_MASK_ISO_7_TIGHT          \
 623    | CATEGORY_MASK_ISO_8_1              \
 624    | CATEGORY_MASK_ISO_8_2              \
 625    | CATEGORY_MASK_ISO_7_ELSE           \
 626    | CATEGORY_MASK_ISO_8_ELSE           \
 627    | CATEGORY_MASK_UTF_8                \
 628    | CATEGORY_MASK_UTF_16_BE            \
 629    | CATEGORY_MASK_UTF_16_LE            \
 630    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 631    | CATEGORY_MASK_UTF_16_LE_NOSIG      \
 632    | CATEGORY_MASK_CHARSET              \
 633    | CATEGORY_MASK_SJIS                 \
 634    | CATEGORY_MASK_BIG5                 \
 635    | CATEGORY_MASK_CCL                  \
 636    | CATEGORY_MASK_EMACS_MULE)
 637
 638
 639 #define CATEGORY_MASK_ISO_7BIT \
 640   (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT)
 641
 642 #define CATEGORY_MASK_ISO_8BIT \
 643   (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2)
 644
 645 #define CATEGORY_MASK_ISO_ELSE \
 646   (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE)
 647
 648 #define CATEGORY_MASK_ISO_ESCAPE        \
 649   (CATEGORY_MASK_ISO_7                  \
 650    | CATEGORY_MASK_ISO_7_TIGHT          \
 651    | CATEGORY_MASK_ISO_7_ELSE           \
 652    | CATEGORY_MASK_ISO_8_ELSE)
 653
 654 #define CATEGORY_MASK_ISO       \
 655   (  CATEGORY_MASK_ISO_7BIT     \
 656      | CATEGORY_MASK_ISO_8BIT   \
 657      | CATEGORY_MASK_ISO_ELSE)
 658
 659 #define CATEGORY_MASK_UTF_16            \
 660   (CATEGORY_MASK_UTF_16_BE              \
 661    | CATEGORY_MASK_UTF_16_LE            \
 662    | CATEGORY_MASK_UTF_16_BE_NOSIG      \
 663    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 664
 665
 666 /* List of symbols `coding-category-xxx' ordered by priority.  This
 667    variable is exposed to Emacs Lisp.  */
 668 static Lisp_Object Vcoding_category_list;
 669
 670 /* Table of coding categories (Lisp symbols).  This variable is for
 671    internal use oly.  */
 672 static Lisp_Object Vcoding_category_table;
 673
 674 /* Table of coding-categories ordered by priority.  */
 675 static enum coding_category coding_priorities[coding_category_max];
 676
 677 /* Nth element is a coding context for the coding system bound to the
 678    Nth coding category.  */
 679 static struct coding_system coding_categories[coding_category_max];
 680
 681 /*** Commonly used macros and functions ***/
 682
 683 #ifndef min
 684 #define min(a, b) ((a) < (b) ? (a) : (b))
 685 #endif
 686 #ifndef max
 687 #define max(a, b) ((a) > (b) ? (a) : (b))
 688 #endif
 689
 690 #define CODING_GET_INFO(coding, attrs, charset_list)    \
 691   do {                                                  \
 692     (attrs) = CODING_ID_ATTRS ((coding)->id);           \
 693     (charset_list) = CODING_ATTR_CHARSET_LIST (attrs);  \
 694   } while (0)
 695
 696
 697 /* Safely get one byte from the source text pointed by SRC which ends
 698    at SRC_END, and set C to that byte.  If there are not enough bytes
 699    in the source, it jumps to `no_more_source'.  If multibytep is
 700    nonzero, and a multibyte character is found at SRC, set C to the
 701    negative value of the character code.  The caller should declare
 702    and set these variables appropriately in advance:
 703         src, src_end, multibytep */
 704
 705 #define ONE_MORE_BYTE(c)                                \
 706   do {                                                  \
 707     if (src == src_end)                                 \
 708       {                                                 \
 709         if (src_base < src)                             \
 710           record_conversion_result                      \
 711             (coding, CODING_RESULT_INSUFFICIENT_SRC);   \
 712         goto no_more_source;                            \
 713       }                                                 \
 714     c = *src++;                                         \
 715     if (multibytep && (c & 0x80))                       \
 716       {                                                 \
 717         if ((c & 0xFE) == 0xC0)                         \
 718           c = ((c & 1) << 6) | *src++;                  \
 719         else                                            \
 720           {                                             \
 721             src--;                                      \
 722             c = - string_char (src, &src, NULL);        \
 723             record_conversion_result                    \
 724               (coding, CODING_RESULT_INVALID_SRC);      \
 725           }                                             \
 726       }                                                 \
 727     consumed_chars++;                                   \
 728   } while (0)
 729
 730
 731 #define ONE_MORE_BYTE_NO_CHECK(c)                       \
 732   do {                                                  \
 733     c = *src++;                                         \
 734     if (multibytep && (c & 0x80))                       \
 735       {                                                 \
 736         if ((c & 0xFE) == 0xC0)                         \
 737           c = ((c & 1) << 6) | *src++;                  \
 738         else                                            \
 739           {                                             \
 740             src--;                                      \
 741             c = - string_char (src, &src, NULL);        \
 742             record_conversion_result                    \
 743               (coding, CODING_RESULT_INVALID_SRC);      \
 744           }                                             \
 745       }                                                 \
 746     consumed_chars++;                                   \
 747   } while (0)
 748
 749
 750 /* Store a byte C in the place pointed by DST and increment DST to the
 751    next free point, and increment PRODUCED_CHARS.  The caller should
 752    assure that C is 0..127, and declare and set the variable `dst'
 753    appropriately in advance.
 754 */
 755
 756
 757 #define EMIT_ONE_ASCII_BYTE(c)  \
 758   do {                          \
 759     produced_chars++;           \
 760     *dst++ = (c);               \
 761   } while (0)
 762
 763
 764 /* Like EMIT_ONE_ASCII_BYTE byt store two bytes; C1 and C2.  */
 765
 766 #define EMIT_TWO_ASCII_BYTES(c1, c2)    \
 767   do {                                  \
 768     produced_chars += 2;                \
 769     *dst++ = (c1), *dst++ = (c2);       \
 770   } while (0)
 771
 772
 773 /* Store a byte C in the place pointed by DST and increment DST to the
 774    next free point, and increment PRODUCED_CHARS.  If MULTIBYTEP is
 775    nonzero, store in an appropriate multibyte from.  The caller should
 776    declare and set the variables `dst' and `multibytep' appropriately
 777    in advance.  */
 778
 779 #define EMIT_ONE_BYTE(c)                \
 780   do {                                  \
 781     produced_chars++;                   \
 782     if (multibytep)                     \
 783       {                                 \
 784         int ch = (c);                   \
 785         if (ch >= 0x80)                 \
 786           ch = BYTE8_TO_CHAR (ch);      \
 787         CHAR_STRING_ADVANCE (ch, dst);  \
 788       }                                 \
 789     else                                \
 790       *dst++ = (c);                     \
 791   } while (0)
 792
 793
 794 /* Like EMIT_ONE_BYTE, but emit two bytes; C1 and C2.  */
 795
 796 #define EMIT_TWO_BYTES(c1, c2)          \
 797   do {                                  \
 798     produced_chars += 2;                \
 799     if (multibytep)                     \
 800       {                                 \
 801         int ch;                         \
 802                                         \
 803         ch = (c1);                      \
 804         if (ch >= 0x80)                 \
 805           ch = BYTE8_TO_CHAR (ch);      \
 806         CHAR_STRING_ADVANCE (ch, dst);  \
 807         ch = (c2);                      \
 808         if (ch >= 0x80)                 \
 809           ch = BYTE8_TO_CHAR (ch);      \
 810         CHAR_STRING_ADVANCE (ch, dst);  \
 811       }                                 \
 812     else                                \
 813       {                                 \
 814         *dst++ = (c1);                  \
 815         *dst++ = (c2);                  \
 816       }                                 \
 817   } while (0)
 818
 819
 820 #define EMIT_THREE_BYTES(c1, c2, c3)    \
 821   do {                                  \
 822     EMIT_ONE_BYTE (c1);                 \
 823     EMIT_TWO_BYTES (c2, c3);            \
 824   } while (0)
 825
 826
 827 #define EMIT_FOUR_BYTES(c1, c2, c3, c4)         \
 828   do {                                          \
 829     EMIT_TWO_BYTES (c1, c2);                    \
 830     EMIT_TWO_BYTES (c3, c4);                    \
 831   } while (0)
 832
 833
 834 /* Prototypes for static functions.  */
 835 static void record_conversion_result P_ ((struct coding_system *coding,
 836                                           enum coding_result_code result));
 837 static int detect_coding_utf_8 P_ ((struct coding_system *,
 838                                     struct coding_detection_info *info));
 839 static void decode_coding_utf_8 P_ ((struct coding_system *));
 840 static int encode_coding_utf_8 P_ ((struct coding_system *));
 841
 842 static int detect_coding_utf_16 P_ ((struct coding_system *,
 843                                      struct coding_detection_info *info));
 844 static void decode_coding_utf_16 P_ ((struct coding_system *));
 845 static int encode_coding_utf_16 P_ ((struct coding_system *));
 846
 847 static int detect_coding_iso_2022 P_ ((struct coding_system *,
 848                                        struct coding_detection_info *info));
 849 static void decode_coding_iso_2022 P_ ((struct coding_system *));
 850 static int encode_coding_iso_2022 P_ ((struct coding_system *));
 851
 852 static int detect_coding_emacs_mule P_ ((struct coding_system *,
 853                                          struct coding_detection_info *info));
 854 static void decode_coding_emacs_mule P_ ((struct coding_system *));
 855 static int encode_coding_emacs_mule P_ ((struct coding_system *));
 856
 857 static int detect_coding_sjis P_ ((struct coding_system *,
 858                                    struct coding_detection_info *info));
 859 static void decode_coding_sjis P_ ((struct coding_system *));
 860 static int encode_coding_sjis P_ ((struct coding_system *));
 861
 862 static int detect_coding_big5 P_ ((struct coding_system *,
 863                                    struct coding_detection_info *info));
 864 static void decode_coding_big5 P_ ((struct coding_system *));
 865 static int encode_coding_big5 P_ ((struct coding_system *));
 866
 867 static int detect_coding_ccl P_ ((struct coding_system *,
 868                                   struct coding_detection_info *info));
 869 static void decode_coding_ccl P_ ((struct coding_system *));
 870 static int encode_coding_ccl P_ ((struct coding_system *));
 871
 872 static void decode_coding_raw_text P_ ((struct coding_system *));
 873 static int encode_coding_raw_text P_ ((struct coding_system *));
 874
 875 static void coding_set_source P_ ((struct coding_system *));
 876 static void coding_set_destination P_ ((struct coding_system *));
 877 static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT));
 878 static void coding_alloc_by_making_gap P_ ((struct coding_system *,
 879                                             EMACS_INT, EMACS_INT));
 880 static unsigned char *alloc_destination P_ ((struct coding_system *,
 881                                              EMACS_INT, unsigned char *));
 882 static void setup_iso_safe_charsets P_ ((Lisp_Object));
 883 static unsigned char *encode_designation_at_bol P_ ((struct coding_system *,
 884                                                      int *, int *,
 885                                                      unsigned char *));
 886 static int detect_eol P_ ((const unsigned char *,
 887                            EMACS_INT, enum coding_category));
 888 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
 889 static void decode_eol P_ ((struct coding_system *));
 890 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
 891 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *,
 892                                         int, int *, int *));
 893 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
 894 static INLINE void produce_composition P_ ((struct coding_system *, int *,
 895                                             EMACS_INT));
 896 static INLINE void produce_charset P_ ((struct coding_system *, int *,
 897                                         EMACS_INT));
 898 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 899 static int decode_coding P_ ((struct coding_system *));
 900 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
 901                                                       struct coding_system *,
 902                                                       int *, EMACS_INT *));
 903 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 904                                                   struct coding_system *,
 905                                                   int *, EMACS_INT *));
 906 static void consume_chars P_ ((struct coding_system *, Lisp_Object, int));
 907 static int encode_coding P_ ((struct coding_system *));
 908 static Lisp_Object make_conversion_work_buffer P_ ((int));
 909 static Lisp_Object code_conversion_restore P_ ((Lisp_Object));
 910 static INLINE int char_encodable_p P_ ((int, Lisp_Object));
 911 static Lisp_Object make_subsidiaries P_ ((Lisp_Object));
 912
 913 static void
 914 record_conversion_result (struct coding_system *coding,
 915                           enum coding_result_code result)
 916 {
 917   coding->result = result;
 918   switch (result)
 919     {
 920     case CODING_RESULT_INSUFFICIENT_SRC:
 921       Vlast_code_conversion_error = Qinsufficient_source;
 922       break;
 923     case CODING_RESULT_INCONSISTENT_EOL:
 924       Vlast_code_conversion_error = Qinconsistent_eol;
 925       break;
 926     case CODING_RESULT_INVALID_SRC:
 927       Vlast_code_conversion_error = Qinvalid_source;
 928       break;
 929     case CODING_RESULT_INTERRUPT:
 930       Vlast_code_conversion_error = Qinterrupted;
 931       break;
 932     case CODING_RESULT_INSUFFICIENT_MEM:
 933       Vlast_code_conversion_error = Qinsufficient_memory;
 934       break;
 935     default:
 936       Vlast_code_conversion_error = intern ("Unknown error");
 937     }
 938 }
 939
 940 #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \
 941   do {                                                                       \
 942     charset_map_loaded = 0;                                                  \
 943     c = DECODE_CHAR (charset, code);                                         \
 944     if (charset_map_loaded)                                                  \
 945       {                                                                      \
 946         const unsigned char *orig = coding->source;                          \
 947         EMACS_INT offset;                                                    \
 948                                                                              \
 949         coding_set_source (coding);                                          \
 950         offset = coding->source - orig;                                      \
 951         src += offset;                                                       \
 952         src_base += offset;                                                  \
 953         src_end += offset;                                                   \
 954       }                                                                      \
 955   } while (0)
 956
 957
 958 /* If there are at least BYTES length of room at dst, allocate memory
 959    for coding->destination and update dst and dst_end.  We don't have
 960    to take care of coding->source which will be relocated.  It is
 961    handled by calling coding_set_source in encode_coding.  */
 962
 963 #define ASSURE_DESTINATION(bytes)                               \
 964   do {                                                          \
 965     if (dst + (bytes) >= dst_end)                               \
 966       {                                                         \
 967         int more_bytes = charbuf_end - charbuf + (bytes);       \
 968                                                                 \
 969         dst = alloc_destination (coding, more_bytes, dst);      \
 970         dst_end = coding->destination + coding->dst_bytes;      \
 971       }                                                         \
 972   } while (0)
 973
 974
 975
 976 static void
 977 coding_set_source (coding)
 978      struct coding_system *coding;
 979 {
 980   if (BUFFERP (coding->src_object))
 981     {
 982       struct buffer *buf = XBUFFER (coding->src_object);
 983
 984       if (coding->src_pos < 0)
 985         coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte;
 986       else
 987         coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte);
 988     }
 989   else if (STRINGP (coding->src_object))
 990     {
 991       coding->source = SDATA (coding->src_object) + coding->src_pos_byte;
 992     }
 993   else
 994     /* Otherwise, the source is C string and is never relocated
 995        automatically.  Thus we don't have to update anything.  */
 996     ;
 997 }
 998
 999 static void
1000 coding_set_destination (coding)
1001      struct coding_system *coding;
1002 {
1003   if (BUFFERP (coding->dst_object))
1004     {
1005       if (coding->src_pos < 0)
1006         {
1007           coding->destination = BEG_ADDR + coding->dst_pos_byte - BEG_BYTE;
1008           coding->dst_bytes = (GAP_END_ADDR
1009                                - (coding->src_bytes - coding->consumed)
1010                                - coding->destination);
1011         }
1012       else
1013         {
1014           /* We are sure that coding->dst_pos_byte is before the gap
1015              of the buffer. */
1016           coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object))
1017                                  + coding->dst_pos_byte - BEG_BYTE);
1018           coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object))
1019                                - coding->destination);
1020         }
1021     }
1022   else
1023     /* Otherwise, the destination is C string and is never relocated
1024        automatically.  Thus we don't have to update anything.  */
1025     ;
1026 }
1027
1028
1029 static void
1030 coding_alloc_by_realloc (coding, bytes)
1031      struct coding_system *coding;
1032      EMACS_INT bytes;
1033 {
1034   coding->destination = (unsigned char *) xrealloc (coding->destination,
1035                                                     coding->dst_bytes + bytes);
1036   coding->dst_bytes += bytes;
1037 }
1038
1039 static void
1040 coding_alloc_by_making_gap (coding, offset, bytes)
1041      struct coding_system *coding;
1042      EMACS_INT offset, bytes;
1043 {
1044   if (BUFFERP (coding->dst_object)
1045       && EQ (coding->src_object, coding->dst_object))
1046     {
1047       EMACS_INT add = offset + (coding->src_bytes - coding->consumed);
1048
1049       GPT += offset, GPT_BYTE += offset;
1050       GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
1051       make_gap (bytes);
1052       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
1053       GPT -= offset, GPT_BYTE -= offset;
1054     }
1055   else
1056     {
1057       Lisp_Object this_buffer;
1058
1059       this_buffer = Fcurrent_buffer ();
1060       set_buffer_internal (XBUFFER (coding->dst_object));
1061       make_gap (bytes);
1062       set_buffer_internal (XBUFFER (this_buffer));
1063     }
1064 }
1065
1066
1067 static unsigned char *
1068 alloc_destination (coding, nbytes, dst)
1069      struct coding_system *coding;
1070      EMACS_INT nbytes;
1071      unsigned char *dst;
1072 {
1073   EMACS_INT offset = dst - coding->destination;
1074
1075   if (BUFFERP (coding->dst_object))
1076     coding_alloc_by_making_gap (coding, offset, nbytes);
1077   else
1078     coding_alloc_by_realloc (coding, nbytes);
1079   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1080   coding_set_destination (coding);
1081   dst = coding->destination + offset;
1082   return dst;
1083 }
1084
1085 /** Macros for annotations.  */
1086
1087 /* Maximum length of annotation data (sum of annotations for
1088    composition and charset).  */
1089 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1090
1091 /* An annotation data is stored in the array coding->charbuf in this
1092    format:
1093      [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1094    LENGTH is the number of elements in the annotation.
1095    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1096    NCHARS is the number of characters in the text annotated.
1097
1098    The format of the following elements depend on ANNOTATION_MASK.
1099
1100    In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1101    follows:
1102      ... METHOD [ COMPOSITION-COMPONENTS ... ]
1103    METHOD is one of enum composition_method.
1104    Optionnal COMPOSITION-COMPONENTS are characters and composition
1105    rules.
1106
1107    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1108    follows.  */
1109
1110 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars)     \
1111   do {                                                  \
1112     *(buf)++ = -(len);                                  \
1113     *(buf)++ = (mask);                                  \
1114     *(buf)++ = (nchars);                                \
1115     coding->annotated = 1;                              \
1116   } while (0);
1117
1118 #define ADD_COMPOSITION_DATA(buf, nchars, method)                           \
1119   do {                                                                      \
1120     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1121     *buf++ = method;                                                        \
1122   } while (0)
1123
1124
1125 #define ADD_CHARSET_DATA(buf, nchars, id)                               \
1126   do {                                                                  \
1127     ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars); \
1128     *buf++ = id;                                                        \
1129   } while (0)
1130
1131 \f
1132 /*** 2. Emacs' internal format (emacs-utf-8) ***/
1133
1134
1135
1136 \f
1137 /*** 3. UTF-8 ***/
1138
1139 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1140    Check if a text is encoded in UTF-8.  If it is, return 1, else
1141    return 0.  */
1142
1143 #define UTF_8_1_OCTET_P(c)         ((c) < 0x80)
1144 #define UTF_8_EXTRA_OCTET_P(c)     (((c) & 0xC0) == 0x80)
1145 #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0)
1146 #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0)
1147 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
1148 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
1149
1150 static int
1151 detect_coding_utf_8 (coding, detect_info)
1152      struct coding_system *coding;
1153      struct coding_detection_info *detect_info;
1154 {
1155   const unsigned char *src = coding->source, *src_base;
1156   const unsigned char *src_end = coding->source + coding->src_bytes;
1157   int multibytep = coding->src_multibyte;
1158   int consumed_chars = 0;
1159   int found = 0;
1160
1161   detect_info->checked |= CATEGORY_MASK_UTF_8;
1162   /* A coding system of this category is always ASCII compatible.  */
1163   src += coding->head_ascii;
1164
1165   while (1)
1166     {
1167       int c, c1, c2, c3, c4;
1168
1169       src_base = src;
1170       ONE_MORE_BYTE (c);
1171       if (c < 0 || UTF_8_1_OCTET_P (c))
1172         continue;
1173       ONE_MORE_BYTE (c1);
1174       if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
1175         break;
1176       if (UTF_8_2_OCTET_LEADING_P (c))
1177         {
1178           found = CATEGORY_MASK_UTF_8;
1179           continue;
1180         }
1181       ONE_MORE_BYTE (c2);
1182       if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1183         break;
1184       if (UTF_8_3_OCTET_LEADING_P (c))
1185         {
1186           found = CATEGORY_MASK_UTF_8;
1187           continue;
1188         }
1189       ONE_MORE_BYTE (c3);
1190       if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1191         break;
1192       if (UTF_8_4_OCTET_LEADING_P (c))
1193         {
1194           found = CATEGORY_MASK_UTF_8;
1195           continue;
1196         }
1197       ONE_MORE_BYTE (c4);
1198       if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1199         break;
1200       if (UTF_8_5_OCTET_LEADING_P (c))
1201         {
1202           found = CATEGORY_MASK_UTF_8;
1203           continue;
1204         }
1205       break;
1206     }
1207   detect_info->rejected |= CATEGORY_MASK_UTF_8;
1208   return 0;
1209
1210  no_more_source:
1211   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1212     {
1213       detect_info->rejected |= CATEGORY_MASK_UTF_8;
1214       return 0;
1215     }
1216   detect_info->found |= found;
1217   return 1;
1218 }
1219
1220
1221 static void
1222 decode_coding_utf_8 (coding)
1223      struct coding_system *coding;
1224 {
1225   const unsigned char *src = coding->source + coding->consumed;
1226   const unsigned char *src_end = coding->source + coding->src_bytes;
1227   const unsigned char *src_base;
1228   int *charbuf = coding->charbuf + coding->charbuf_used;
1229   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1230   int consumed_chars = 0, consumed_chars_base;
1231   int multibytep = coding->src_multibyte;
1232   Lisp_Object attr, charset_list;
1233   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1234   int byte_after_cr = -1;
1235
1236   CODING_GET_INFO (coding, attr, charset_list);
1237
1238   while (1)
1239     {
1240       int c, c1, c2, c3, c4, c5;
1241
1242       src_base = src;
1243       consumed_chars_base = consumed_chars;
1244
1245       if (charbuf >= charbuf_end)
1246         break;
1247
1248       if (byte_after_cr >= 0)
1249         c1 = byte_after_cr, byte_after_cr = -1;
1250       else
1251         ONE_MORE_BYTE (c1);
1252       if (c1 < 0)
1253         {
1254           c = - c1;
1255         }
1256       else if (UTF_8_1_OCTET_P(c1))
1257         {
1258           if (eol_crlf && c1 == '\r')
1259             ONE_MORE_BYTE (byte_after_cr);
1260           c = c1;
1261         }
1262       else
1263         {
1264           ONE_MORE_BYTE (c2);
1265           if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2))
1266             goto invalid_code;
1267           if (UTF_8_2_OCTET_LEADING_P (c1))
1268             {
1269               c = ((c1 & 0x1F) << 6) | (c2 & 0x3F);
1270               /* Reject overlong sequences here and below.  Encoders
1271                  producing them are incorrect, they can be misleading,
1272                  and they mess up read/write invariance.  */
1273               if (c < 128)
1274                 goto invalid_code;
1275             }
1276           else
1277             {
1278               ONE_MORE_BYTE (c3);
1279               if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3))
1280                 goto invalid_code;
1281               if (UTF_8_3_OCTET_LEADING_P (c1))
1282                 {
1283                   c = (((c1 & 0xF) << 12)
1284                        | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
1285                   if (c < 0x800
1286                       || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
1287                     goto invalid_code;
1288                 }
1289               else
1290                 {
1291                   ONE_MORE_BYTE (c4);
1292                   if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4))
1293                     goto invalid_code;
1294                   if (UTF_8_4_OCTET_LEADING_P (c1))
1295                     {
1296                     c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12)
1297                          | ((c3 & 0x3F) << 6) | (c4 & 0x3F));
1298                     if (c < 0x10000)
1299                       goto invalid_code;
1300                     }
1301                   else
1302                     {
1303                       ONE_MORE_BYTE (c5);
1304                       if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5))
1305                         goto invalid_code;
1306                       if (UTF_8_5_OCTET_LEADING_P (c1))
1307                         {
1308                           c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18)
1309                                | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6)
1310                                | (c5 & 0x3F));
1311                           if ((c > MAX_CHAR) || (c < 0x200000))
1312                             goto invalid_code;
1313                         }
1314                       else
1315                         goto invalid_code;
1316                     }
1317                 }
1318             }
1319         }
1320
1321       *charbuf++ = c;
1322       continue;
1323
1324     invalid_code:
1325       src = src_base;
1326       consumed_chars = consumed_chars_base;
1327       ONE_MORE_BYTE (c);
1328       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
1329       coding->errors++;
1330     }
1331
1332  no_more_source:
1333   coding->consumed_char += consumed_chars_base;
1334   coding->consumed = src_base - coding->source;
1335   coding->charbuf_used = charbuf - coding->charbuf;
1336 }
1337
1338
1339 static int
1340 encode_coding_utf_8 (coding)
1341      struct coding_system *coding;
1342 {
1343   int multibytep = coding->dst_multibyte;
1344   int *charbuf = coding->charbuf;
1345   int *charbuf_end = charbuf + coding->charbuf_used;
1346   unsigned char *dst = coding->destination + coding->produced;
1347   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1348   int produced_chars = 0;
1349   int c;
1350
1351   if (multibytep)
1352     {
1353       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
1354
1355       while (charbuf < charbuf_end)
1356         {
1357           unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str;
1358
1359           ASSURE_DESTINATION (safe_room);
1360           c = *charbuf++;
1361           if (CHAR_BYTE8_P (c))
1362             {
1363               c = CHAR_TO_BYTE8 (c);
1364               EMIT_ONE_BYTE (c);
1365             }
1366           else
1367             {
1368               CHAR_STRING_ADVANCE (c, pend);
1369               for (p = str; p < pend; p++)
1370                 EMIT_ONE_BYTE (*p);
1371             }
1372         }
1373     }
1374   else
1375     {
1376       int safe_room = MAX_MULTIBYTE_LENGTH;
1377
1378       while (charbuf < charbuf_end)
1379         {
1380           ASSURE_DESTINATION (safe_room);
1381           c = *charbuf++;
1382           if (CHAR_BYTE8_P (c))
1383             *dst++ = CHAR_TO_BYTE8 (c);
1384           else
1385             dst += CHAR_STRING (c, dst);
1386           produced_chars++;
1387         }
1388     }
1389   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1390   coding->produced_char += produced_chars;
1391   coding->produced = dst - coding->destination;
1392   return 0;
1393 }
1394
1395
1396 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1397    Check if a text is encoded in one of UTF-16 based coding systems.
1398    If it is, return 1, else return 0.  */
1399
1400 #define UTF_16_HIGH_SURROGATE_P(val) \
1401   (((val) & 0xFC00) == 0xD800)
1402
1403 #define UTF_16_LOW_SURROGATE_P(val) \
1404   (((val) & 0xFC00) == 0xDC00)
1405
1406 #define UTF_16_INVALID_P(val)   \
1407   (((val) == 0xFFFE)            \
1408    || ((val) == 0xFFFF)         \
1409    || UTF_16_LOW_SURROGATE_P (val))
1410
1411
1412 static int
1413 detect_coding_utf_16 (coding, detect_info)
1414      struct coding_system *coding;
1415      struct coding_detection_info *detect_info;
1416 {
1417   const unsigned char *src = coding->source, *src_base = src;
1418   const unsigned char *src_end = coding->source + coding->src_bytes;
1419   int multibytep = coding->src_multibyte;
1420   int consumed_chars = 0;
1421   int c1, c2;
1422
1423   detect_info->checked |= CATEGORY_MASK_UTF_16;
1424   if (coding->mode & CODING_MODE_LAST_BLOCK
1425       && (coding->src_chars & 1))
1426     {
1427       detect_info->rejected |= CATEGORY_MASK_UTF_16;
1428       return 0;
1429     }
1430
1431   ONE_MORE_BYTE (c1);
1432   ONE_MORE_BYTE (c2);
1433   if ((c1 == 0xFF) && (c2 == 0xFE))
1434     {
1435       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
1436                              | CATEGORY_MASK_UTF_16_AUTO);
1437       detect_info->rejected |= (CATEGORY_MASK_UTF_16_BE
1438                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1439                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1440     }
1441   else if ((c1 == 0xFE) && (c2 == 0xFF))
1442     {
1443       detect_info->found |= (CATEGORY_MASK_UTF_16_BE
1444                              | CATEGORY_MASK_UTF_16_AUTO);
1445       detect_info->rejected |= (CATEGORY_MASK_UTF_16_LE
1446                                 | CATEGORY_MASK_UTF_16_BE_NOSIG
1447                                 | CATEGORY_MASK_UTF_16_LE_NOSIG);
1448     }
1449   else if (c1 >= 0 && c2 >= 0)
1450     {
1451       detect_info->rejected
1452         |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
1453     }
1454  no_more_source:
1455   return 1;
1456 }
1457
1458 static void
1459 decode_coding_utf_16 (coding)
1460      struct coding_system *coding;
1461 {
1462   const unsigned char *src = coding->source + coding->consumed;
1463   const unsigned char *src_end = coding->source + coding->src_bytes;
1464   const unsigned char *src_base;
1465   int *charbuf = coding->charbuf + coding->charbuf_used;
1466   int *charbuf_end = coding->charbuf + coding->charbuf_size;
1467   int consumed_chars = 0, consumed_chars_base;
1468   int multibytep = coding->src_multibyte;
1469   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1470   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
1471   int surrogate = CODING_UTF_16_SURROGATE (coding);
1472   Lisp_Object attr, charset_list;
1473   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
1474   int byte_after_cr1 = -1, byte_after_cr2 = -1;
1475
1476   CODING_GET_INFO (coding, attr, charset_list);
1477
1478   if (bom == utf_16_with_bom)
1479     {
1480       int c, c1, c2;
1481
1482       src_base = src;
1483       ONE_MORE_BYTE (c1);
1484       ONE_MORE_BYTE (c2);
1485       c = (c1 << 8) | c2;
1486
1487       if (endian == utf_16_big_endian
1488           ? c != 0xFEFF : c != 0xFFFE)
1489         {
1490           /* The first two bytes are not BOM.  Treat them as bytes
1491              for a normal character.  */
1492           src = src_base;
1493           coding->errors++;
1494         }
1495       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1496     }
1497   else if (bom == utf_16_detect_bom)
1498     {
1499       /* We have already tried to detect BOM and failed in
1500          detect_coding.  */
1501       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1502     }
1503
1504   while (1)
1505     {
1506       int c, c1, c2;
1507
1508       src_base = src;
1509       consumed_chars_base = consumed_chars;
1510
1511       if (charbuf + 2 >= charbuf_end)
1512         break;
1513
1514       if (byte_after_cr1 >= 0)
1515         c1 = byte_after_cr1, byte_after_cr1 = -1;
1516       else
1517         ONE_MORE_BYTE (c1);
1518       if (c1 < 0)
1519         {
1520           *charbuf++ = -c1;
1521           continue;
1522         }
1523       if (byte_after_cr2 >= 0)
1524         c2 = byte_after_cr2, byte_after_cr2 = -1;
1525       else
1526         ONE_MORE_BYTE (c2);
1527       if (c2 < 0)
1528         {
1529           *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
1530           *charbuf++ = -c2;
1531           continue;
1532         }
1533       c = (endian == utf_16_big_endian
1534            ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
1535
1536       if (surrogate)
1537         {
1538           if (! UTF_16_LOW_SURROGATE_P (c))
1539             {
1540               if (endian == utf_16_big_endian)
1541                 c1 = surrogate >> 8, c2 = surrogate & 0xFF;
1542               else
1543                 c1 = surrogate & 0xFF, c2 = surrogate >> 8;
1544               *charbuf++ = c1;
1545               *charbuf++ = c2;
1546               coding->errors++;
1547               if (UTF_16_HIGH_SURROGATE_P (c))
1548                 CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1549               else
1550                 *charbuf++ = c;
1551             }
1552           else
1553             {
1554               c = ((surrogate - 0xD800) << 10) | (c - 0xDC00);
1555               CODING_UTF_16_SURROGATE (coding) = surrogate = 0;
1556               *charbuf++ = 0x10000 + c;
1557             }
1558         }
1559       else
1560         {
1561           if (UTF_16_HIGH_SURROGATE_P (c))
1562             CODING_UTF_16_SURROGATE (coding) = surrogate = c;
1563           else
1564             {
1565               if (eol_crlf && c == '\r')
1566                 {
1567                   ONE_MORE_BYTE (byte_after_cr1);
1568                   ONE_MORE_BYTE (byte_after_cr2);
1569                 }
1570               *charbuf++ = c;
1571             }
1572         }
1573     }
1574
1575  no_more_source:
1576   coding->consumed_char += consumed_chars_base;
1577   coding->consumed = src_base - coding->source;
1578   coding->charbuf_used = charbuf - coding->charbuf;
1579 }
1580
1581 static int
1582 encode_coding_utf_16 (coding)
1583      struct coding_system *coding;
1584 {
1585   int multibytep = coding->dst_multibyte;
1586   int *charbuf = coding->charbuf;
1587   int *charbuf_end = charbuf + coding->charbuf_used;
1588   unsigned char *dst = coding->destination + coding->produced;
1589   unsigned char *dst_end = coding->destination + coding->dst_bytes;
1590   int safe_room = 8;
1591   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
1592   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
1593   int produced_chars = 0;
1594   Lisp_Object attrs, charset_list;
1595   int c;
1596
1597   CODING_GET_INFO (coding, attrs, charset_list);
1598
1599   if (bom != utf_16_without_bom)
1600     {
1601       ASSURE_DESTINATION (safe_room);
1602       if (big_endian)
1603         EMIT_TWO_BYTES (0xFE, 0xFF);
1604       else
1605         EMIT_TWO_BYTES (0xFF, 0xFE);
1606       CODING_UTF_16_BOM (coding) = utf_16_without_bom;
1607     }
1608
1609   while (charbuf < charbuf_end)
1610     {
1611       ASSURE_DESTINATION (safe_room);
1612       c = *charbuf++;
1613       if (c >= MAX_UNICODE_CHAR)
1614         c = coding->default_char;
1615
1616       if (c < 0x10000)
1617         {
1618           if (big_endian)
1619             EMIT_TWO_BYTES (c >> 8, c & 0xFF);
1620           else
1621             EMIT_TWO_BYTES (c & 0xFF, c >> 8);
1622         }
1623       else
1624         {
1625           int c1, c2;
1626
1627           c -= 0x10000;
1628           c1 = (c >> 10) + 0xD800;
1629           c2 = (c & 0x3FF) + 0xDC00;
1630           if (big_endian)
1631             EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF);
1632           else
1633             EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8);
1634         }
1635     }
1636   record_conversion_result (coding, CODING_RESULT_SUCCESS);
1637   coding->produced = dst - coding->destination;
1638   coding->produced_char += produced_chars;
1639   return 0;
1640 }
1641
1642 \f
1643 /*** 6. Old Emacs' internal format (emacs-mule) ***/
1644
1645 /* Emacs' internal format for representation of multiple character
1646    sets is a kind of multi-byte encoding, i.e. characters are
1647    represented by variable-length sequences of one-byte codes.
1648
1649    ASCII characters and control characters (e.g. `tab', `newline') are
1650    represented by one-byte sequences which are their ASCII codes, in
1651    the range 0x00 through 0x7F.
1652
1653    8-bit characters of the range 0x80..0x9F are represented by
1654    two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
1655    code + 0x20).
1656
1657    8-bit characters of the range 0xA0..0xFF are represented by
1658    one-byte sequences which are their 8-bit code.
1659
1660    The other characters are represented by a sequence of `base
1661    leading-code', optional `extended leading-code', and one or two
1662    `position-code's.  The length of the sequence is determined by the
1663    base leading-code.  Leading-code takes the range 0x81 through 0x9D,
1664    whereas extended leading-code and position-code take the range 0xA0
1665    through 0xFF.  See `charset.h' for more details about leading-code
1666    and position-code.
1667
1668    --- CODE RANGE of Emacs' internal format ---
1669    character set        range
1670    -------------        -----
1671    ascii                0x00..0x7F
1672    eight-bit-control    LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
1673    eight-bit-graphic    0xA0..0xBF
1674    ELSE                 0x81..0x9D + [0xA0..0xFF]+
1675    ---------------------------------------------
1676
1677    As this is the internal character representation, the format is
1678    usually not used externally (i.e. in a file or in a data sent to a
1679    process).  But, it is possible to have a text externally in this
1680    format (i.e. by encoding by the coding system `emacs-mule').
1681
1682    In that case, a sequence of one-byte codes has a slightly different
1683    form.
1684
1685    At first, all characters in eight-bit-control are represented by
1686    one-byte sequences which are their 8-bit code.
1687
1688    Next, character composition data are represented by the byte
1689    sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1690    where,
1691         METHOD is 0xF0 plus one of composition method (enum
1692         composition_method),
1693
1694         BYTES is 0xA0 plus a byte length of this composition data,
1695
1696         CHARS is 0x20 plus a number of characters composed by this
1697         data,
1698
1699         COMPONENTs are characters of multibye form or composition
1700         rules encoded by two-byte of ASCII codes.
1701
1702    In addition, for backward compatibility, the following formats are
1703    also recognized as composition data on decoding.
1704
1705    0x80 MSEQ ...
1706    0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ
1707
1708    Here,
1709         MSEQ is a multibyte form but in these special format:
1710           ASCII: 0xA0 ASCII_CODE+0x80,
1711           other: LEADING_CODE+0x20 FOLLOWING-BYTE ...,
1712         RULE is a one byte code of the range 0xA0..0xF0 that
1713         represents a composition rule.
1714   */
1715
1716 char emacs_mule_bytes[256];
1717
1718 int
1719 emacs_mule_char (coding, src, nbytes, nchars, id)
1720      struct coding_system *coding;
1721      const unsigned char *src;
1722      int *nbytes, *nchars, *id;
1723 {
1724   const unsigned char *src_end = coding->source + coding->src_bytes;
1725   const unsigned char *src_base = src;
1726   int multibytep = coding->src_multibyte;
1727   struct charset *charset;
1728   unsigned code;
1729   int c;
1730   int consumed_chars = 0;
1731
1732   ONE_MORE_BYTE (c);
1733   if (c < 0)
1734     {
1735       c = -c;
1736       charset = emacs_mule_charset[0];
1737     }
1738   else
1739     {
1740       if (c >= 0xA0)
1741         {
1742           /* Old style component character of a composition.  */
1743           if (c == 0xA0)
1744             {
1745               ONE_MORE_BYTE (c);
1746               c -= 0x80;
1747             }
1748           else
1749             c -= 0x20;
1750         }
1751
1752       switch (emacs_mule_bytes[c])
1753         {
1754         case 2:
1755           if (! (charset = emacs_mule_charset[c]))
1756             goto invalid_code;
1757           ONE_MORE_BYTE (c);
1758           if (c < 0xA0)
1759             goto invalid_code;
1760           code = c & 0x7F;
1761           break;
1762
1763         case 3:
1764           if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11
1765               || c == EMACS_MULE_LEADING_CODE_PRIVATE_12)
1766             {
1767               ONE_MORE_BYTE (c);
1768               if (c < 0xA0 || ! (charset = emacs_mule_charset[c]))
1769                 goto invalid_code;
1770               ONE_MORE_BYTE (c);
1771               if (c < 0xA0)
1772                 goto invalid_code;
1773               code = c & 0x7F;
1774             }
1775           else
1776             {
1777               if (! (charset = emacs_mule_charset[c]))
1778                 goto invalid_code;
1779               ONE_MORE_BYTE (c);
1780               if (c < 0xA0)
1781                 goto invalid_code;
1782               code = (c & 0x7F) << 8;
1783               ONE_MORE_BYTE (c);
1784               if (c < 0xA0)
1785                 goto invalid_code;
1786               code |= c & 0x7F;
1787             }
1788           break;
1789
1790         case 4:
1791           ONE_MORE_BYTE (c);
1792           if (c < 0 || ! (charset = emacs_mule_charset[c]))
1793             goto invalid_code;
1794           ONE_MORE_BYTE (c);
1795           if (c < 0xA0)
1796             goto invalid_code;
1797           code = (c & 0x7F) << 8;
1798           ONE_MORE_BYTE (c);
1799           if (c < 0xA0)
1800             goto invalid_code;
1801           code |= c & 0x7F;
1802           break;
1803
1804         case 1:
1805           code = c;
1806           charset = CHARSET_FROM_ID (ASCII_BYTE_P (code)
1807                                      ? charset_ascii : charset_eight_bit);
1808           break;
1809
1810         default:
1811           abort ();
1812         }
1813       c = DECODE_CHAR (charset, code);
1814       if (c < 0)
1815         goto invalid_code;
1816     }
1817   *nbytes = src - src_base;
1818   *nchars = consumed_chars;
1819   if (id)
1820     *id = charset->id;
1821   return c;
1822
1823  no_more_source:
1824   return -2;
1825
1826  invalid_code:
1827   return -1;
1828 }
1829
1830
1831 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1832    Check if a text is encoded in `emacs-mule'.  If it is, return 1,
1833    else return 0.  */
1834
1835 static int
1836 detect_coding_emacs_mule (coding, detect_info)
1837      struct coding_system *coding;
1838      struct coding_detection_info *detect_info;
1839 {
1840   const unsigned char *src = coding->source, *src_base;
1841   const unsigned char *src_end = coding->source + coding->src_bytes;
1842   int multibytep = coding->src_multibyte;
1843   int consumed_chars = 0;
1844   int c;
1845   int found = 0;
1846
1847   detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1848   /* A coding system of this category is always ASCII compatible.  */
1849   src += coding->head_ascii;
1850
1851   while (1)
1852     {
1853       src_base = src;
1854       ONE_MORE_BYTE (c);
1855       if (c < 0)
1856         continue;
1857       if (c == 0x80)
1858         {
1859           /* Perhaps the start of composite character.  We simple skip
1860              it because analyzing it is too heavy for detecting.  But,
1861              at least, we check that the composite character
1862              constitues of more than 4 bytes.  */
1863           const unsigned char *src_base;
1864
1865         repeat:
1866           src_base = src;
1867           do
1868             {
1869               ONE_MORE_BYTE (c);
1870             }
1871           while (c >= 0xA0);
1872
1873           if (src - src_base <= 4)
1874             break;
1875           found = CATEGORY_MASK_EMACS_MULE;
1876           if (c == 0x80)
1877             goto repeat;
1878         }
1879
1880       if (c < 0x80)
1881         {
1882           if (c < 0x20
1883               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
1884             break;
1885         }
1886       else
1887         {
1888           int more_bytes = emacs_mule_bytes[*src_base] - 1;
1889
1890           while (more_bytes > 0)
1891             {
1892               ONE_MORE_BYTE (c);
1893               if (c < 0xA0)
1894                 {
1895                   src--;        /* Unread the last byte.  */
1896                   break;
1897                 }
1898               more_bytes--;
1899             }
1900           if (more_bytes != 0)
1901             break;
1902           found = CATEGORY_MASK_EMACS_MULE;
1903         }
1904     }
1905   detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1906   return 0;
1907
1908  no_more_source:
1909   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
1910     {
1911       detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
1912       return 0;
1913     }
1914   detect_info->found |= found;
1915   return 1;
1916 }
1917
1918
1919 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
1920
1921 /* Decode a character represented as a component of composition
1922    sequence of Emacs 20/21 style at SRC.  Set C to that character and
1923    update SRC to the head of next character (or an encoded composition
1924    rule).  If SRC doesn't points a composition component, set C to -1.
1925    If SRC points an invalid byte sequence, global exit by a return
1926    value 0.  */
1927
1928 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf)                 \
1929   do                                                            \
1930     {                                                           \
1931       int c;                                                    \
1932       int nbytes, nchars;                                       \
1933                                                                 \
1934       if (src == src_end)                                       \
1935         break;                                                  \
1936       c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\
1937       if (c < 0)                                                \
1938         {                                                       \
1939           if (c == -2)                                          \
1940             break;                                              \
1941           goto invalid_code;                                    \
1942         }                                                       \
1943       *buf++ = c;                                               \
1944       src += nbytes;                                            \
1945       consumed_chars += nchars;                                 \
1946     }                                                           \
1947   while (0)
1948
1949
1950 /* Decode a composition rule represented as a component of composition
1951    sequence of Emacs 20 style at SRC.  Store the decoded rule in *BUF,
1952    and increment BUF.  If SRC points an invalid byte sequence, set C
1953    to -1.  */
1954
1955 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf)      \
1956   do {                                                  \
1957     int c, gref, nref;                                  \
1958                                                         \
1959     if (src >= src_end)                                 \
1960       goto invalid_code;                                \
1961     ONE_MORE_BYTE_NO_CHECK (c);                         \
1962     c -= 0xA0;                                          \
1963     if (c < 0 || c >= 81)                               \
1964       goto invalid_code;                                \
1965                                                         \
1966     gref = c / 9, nref = c % 9;                         \
1967     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1968   } while (0)
1969
1970
1971 /* Decode a composition rule represented as a component of composition
1972    sequence of Emacs 21 style at SRC.  Store the decoded rule in *BUF,
1973    and increment BUF.  If SRC points an invalid byte sequence, set C
1974    to -1.  */
1975
1976 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf)      \
1977   do {                                                  \
1978     int gref, nref;                                     \
1979                                                         \
1980     if (src + 1>= src_end)                              \
1981       goto invalid_code;                                \
1982     ONE_MORE_BYTE_NO_CHECK (gref);                      \
1983     gref -= 0x20;                                       \
1984     ONE_MORE_BYTE_NO_CHECK (nref);                      \
1985     nref -= 0x20;                                       \
1986     if (gref < 0 || gref >= 81                          \
1987         || nref < 0 || nref >= 81)                      \
1988       goto invalid_code;                                \
1989     *buf++ = COMPOSITION_ENCODE_RULE (gref, nref);      \
1990   } while (0)
1991
1992
1993 #define DECODE_EMACS_MULE_21_COMPOSITION(c)                             \
1994   do {                                                                  \
1995     /* Emacs 21 style format.  The first three bytes at SRC are         \
1996        (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is  \
1997        the byte length of this composition information, CHARS is the    \
1998        number of characters composed by this composition.  */           \
1999     enum composition_method method = c - 0xF2;                          \
2000     int *charbuf_base = charbuf;                                        \
2001     int consumed_chars_limit;                                           \
2002     int nbytes, nchars;                                                 \
2003                                                                         \
2004     ONE_MORE_BYTE (c);                                                  \
2005     if (c < 0)                                                          \
2006       goto invalid_code;                                                \
2007     nbytes = c - 0xA0;                                                  \
2008     if (nbytes < 3)                                                     \
2009       goto invalid_code;                                                \
2010     ONE_MORE_BYTE (c);                                                  \
2011     if (c < 0)                                                          \
2012       goto invalid_code;                                                \
2013     nchars = c - 0xA0;                                                  \
2014     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2015     consumed_chars_limit = consumed_chars_base + nbytes;                \
2016     if (method != COMPOSITION_RELATIVE)                                 \
2017       {                                                                 \
2018         int i = 0;                                                      \
2019         while (consumed_chars < consumed_chars_limit)                   \
2020           {                                                             \
2021             if (i % 2 && method != COMPOSITION_WITH_ALTCHARS)           \
2022               DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf);          \
2023             else                                                        \
2024               DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf);             \
2025             i++;                                                        \
2026           }                                                             \
2027         if (consumed_chars < consumed_chars_limit)                      \
2028           goto invalid_code;                                            \
2029         charbuf_base[0] -= i;                                           \
2030       }                                                                 \
2031   } while (0)
2032
2033
2034 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c)                    \
2035   do {                                                                  \
2036     /* Emacs 20 style format for relative composition.  */              \
2037     /* Store multibyte form of characters to be composed.  */           \
2038     enum composition_method method = COMPOSITION_RELATIVE;              \
2039     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];                 \
2040     int *buf = components;                                              \
2041     int i, j;                                                           \
2042                                                                         \
2043     src = src_base;                                                     \
2044     ONE_MORE_BYTE (c);          /* skip 0x80 */                         \
2045     for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++)    \
2046       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                         \
2047     if (i < 2)                                                          \
2048       goto invalid_code;                                                \
2049     ADD_COMPOSITION_DATA (charbuf, i, method);                          \
2050     for (j = 0; j < i; j++)                                             \
2051       *charbuf++ = components[j];                                       \
2052   } while (0)
2053
2054
2055 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c)            \
2056   do {                                                          \
2057     /* Emacs 20 style format for rule-base composition.  */     \
2058     /* Store multibyte form of characters to be composed.  */   \
2059     enum composition_method method = COMPOSITION_WITH_RULE;     \
2060     int *charbuf_base = charbuf;                                \
2061     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];         \
2062     int *buf = components;                                      \
2063     int i, j;                                                   \
2064                                                                 \
2065     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);                   \
2066     for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++)            \
2067       {                                                         \
2068         if (*src < 0xA0)                                        \
2069           break;                                                \
2070         DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf);            \
2071         DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);               \
2072       }                                                         \
2073     if (i <= 1 || (buf - components) % 2 == 0)                  \
2074       goto invalid_code;                                        \
2075     if (charbuf + i + (i / 2) + 1 >= charbuf_end)               \
2076       goto no_more_source;                                      \
2077     ADD_COMPOSITION_DATA (charbuf, i, method);                  \
2078     i = i * 2 - 1;                                              \
2079     for (j = 0; j < i; j++)                                     \
2080       *charbuf++ = components[j];                               \
2081     charbuf_base[0] -= i;                                       \
2082     for (j = 0; j < i; j += 2)                                  \
2083       *charbuf++ = components[j];                               \
2084   } while (0)
2085
2086
2087 static void
2088 decode_coding_emacs_mule (coding)
2089      struct coding_system *coding;
2090 {
2091   const unsigned char *src = coding->source + coding->consumed;
2092   const unsigned char *src_end = coding->source + coding->src_bytes;
2093   const unsigned char *src_base;
2094   int *charbuf = coding->charbuf + coding->charbuf_used;
2095   int *charbuf_end
2096     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
2097   int consumed_chars = 0, consumed_chars_base;
2098   int multibytep = coding->src_multibyte;
2099   Lisp_Object attrs, charset_list;
2100   int char_offset = coding->produced_char;
2101   int last_offset = char_offset;
2102   int last_id = charset_ascii;
2103   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2104   int byte_after_cr = -1;
2105
2106   CODING_GET_INFO (coding, attrs, charset_list);
2107
2108   while (1)
2109     {
2110       int c;
2111
2112       src_base = src;
2113       consumed_chars_base = consumed_chars;
2114
2115       if (charbuf >= charbuf_end)
2116         break;
2117
2118       if (byte_after_cr >= 0)
2119         c = byte_after_cr, byte_after_cr = -1;
2120       else
2121         ONE_MORE_BYTE (c);
2122       if (c < 0)
2123         {
2124           *charbuf++ = -c;
2125           char_offset++;
2126         }
2127       else if (c < 0x80)
2128         {
2129           if (eol_crlf && c == '\r')
2130             ONE_MORE_BYTE (byte_after_cr);
2131           *charbuf++ = c;
2132           char_offset++;
2133         }
2134       else if (c == 0x80)
2135         {
2136           ONE_MORE_BYTE (c);
2137           if (c < 0)
2138             goto invalid_code;
2139           if (c - 0xF2 >= COMPOSITION_RELATIVE
2140               && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS)
2141             DECODE_EMACS_MULE_21_COMPOSITION (c);
2142           else if (c < 0xC0)
2143             DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c);
2144           else if (c == 0xFF)
2145             DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c);
2146           else
2147             goto invalid_code;
2148         }
2149       else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2150         {
2151           int nbytes, nchars;
2152           int id;
2153
2154           src = src_base;
2155           consumed_chars = consumed_chars_base;
2156           c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2157           if (c < 0)
2158             {
2159               if (c == -2)
2160                 break;
2161               goto invalid_code;
2162             }
2163           if (last_id != id)
2164             {
2165               if (last_id != charset_ascii)
2166                 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2167               last_id = id;
2168               last_offset = char_offset;
2169             }
2170           *charbuf++ = c;
2171           src += nbytes;
2172           consumed_chars += nchars;
2173           char_offset++;
2174         }
2175       else
2176         goto invalid_code;
2177       continue;
2178
2179     invalid_code:
2180       src = src_base;
2181       consumed_chars = consumed_chars_base;
2182       ONE_MORE_BYTE (c);
2183       *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2184       char_offset++;
2185       coding->errors++;
2186     }
2187
2188  no_more_source:
2189   if (last_id != charset_ascii)
2190     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2191   coding->consumed_char += consumed_chars_base;
2192   coding->consumed = src_base - coding->source;
2193   coding->charbuf_used = charbuf - coding->charbuf;
2194 }
2195
2196
2197 #define EMACS_MULE_LEADING_CODES(id, codes)     \
2198   do {                                          \
2199     if (id < 0xA0)                              \
2200       codes[0] = id, codes[1] = 0;              \
2201     else if (id < 0xE0)                         \
2202       codes[0] = 0x9A, codes[1] = id;           \
2203     else if (id < 0xF0)                         \
2204       codes[0] = 0x9B, codes[1] = id;           \
2205     else if (id < 0xF5)                         \
2206       codes[0] = 0x9C, codes[1] = id;           \
2207     else                                        \
2208       codes[0] = 0x9D, codes[1] = id;           \
2209   } while (0);
2210
2211
2212 static int
2213 encode_coding_emacs_mule (coding)
2214      struct coding_system *coding;
2215 {
2216   int multibytep = coding->dst_multibyte;
2217   int *charbuf = coding->charbuf;
2218   int *charbuf_end = charbuf + coding->charbuf_used;
2219   unsigned char *dst = coding->destination + coding->produced;
2220   unsigned char *dst_end = coding->destination + coding->dst_bytes;
2221   int safe_room = 8;
2222   int produced_chars = 0;
2223   Lisp_Object attrs, charset_list;
2224   int c;
2225   int preferred_charset_id = -1;
2226
2227   CODING_GET_INFO (coding, attrs, charset_list);
2228   if (! EQ (charset_list, Vemacs_mule_charset_list))
2229     {
2230       CODING_ATTR_CHARSET_LIST (attrs)
2231         = charset_list = Vemacs_mule_charset_list;
2232     }
2233
2234   while (charbuf < charbuf_end)
2235     {
2236       ASSURE_DESTINATION (safe_room);
2237       c = *charbuf++;
2238
2239       if (c < 0)
2240         {
2241           /* Handle an annotation.  */
2242           switch (*charbuf)
2243             {
2244             case CODING_ANNOTATE_COMPOSITION_MASK:
2245               /* Not yet implemented.  */
2246               break;
2247             case CODING_ANNOTATE_CHARSET_MASK:
2248               preferred_charset_id = charbuf[3];
2249               if (preferred_charset_id >= 0
2250                   && NILP (Fmemq (make_number (preferred_charset_id),
2251                                   charset_list)))
2252                 preferred_charset_id = -1;
2253               break;
2254             default:
2255               abort ();
2256             }
2257           charbuf += -c - 1;
2258           continue;
2259         }
2260
2261       if (ASCII_CHAR_P (c))
2262         EMIT_ONE_ASCII_BYTE (c);
2263       else if (CHAR_BYTE8_P (c))
2264         {
2265           c = CHAR_TO_BYTE8 (c);
2266           EMIT_ONE_BYTE (c);
2267         }
2268       else
2269         {
2270           struct charset *charset;
2271           unsigned code;
2272           int dimension;
2273           int emacs_mule_id;
2274           unsigned char leading_codes[2];
2275
2276           if (preferred_charset_id >= 0)
2277             {
2278               charset = CHARSET_FROM_ID (preferred_charset_id);
2279               if (! CHAR_CHARSET_P (c, charset))
2280                 charset = char_charset (c, charset_list, NULL);
2281             }
2282           else
2283             charset = char_charset (c, charset_list, &code);
2284           if (! charset)
2285             {
2286               c = coding->default_char;
2287               if (ASCII_CHAR_P (c))
2288                 {
2289                   EMIT_ONE_ASCII_BYTE (c);
2290                   continue;
2291                 }
2292               charset = char_charset (c, charset_list, &code);
2293             }
2294           dimension = CHARSET_DIMENSION (charset);
2295           emacs_mule_id = CHARSET_EMACS_MULE_ID (charset);
2296           EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes);
2297           EMIT_ONE_BYTE (leading_codes[0]);
2298           if (leading_codes[1])
2299             EMIT_ONE_BYTE (leading_codes[1]);
2300           if (dimension == 1)
2301             EMIT_ONE_BYTE (code | 0x80);
2302           else
2303             {
2304               code |= 0x8080;
2305               EMIT_ONE_BYTE (code >> 8);
2306               EMIT_ONE_BYTE (code & 0xFF);
2307             }
2308         }
2309     }
2310   record_conversion_result (coding, CODING_RESULT_SUCCESS);
2311   coding->produced_char += produced_chars;
2312   coding->produced = dst - coding->destination;
2313   return 0;
2314 }
2315
2316 \f
2317 /*** 7. ISO2022 handlers ***/
2318
2319 /* The following note describes the coding system ISO2022 briefly.
2320    Since the intention of this note is to help understand the
2321    functions in this file, some parts are NOT ACCURATE or are OVERLY
2322    SIMPLIFIED.  For thorough understanding, please refer to the
2323    original document of ISO2022.  This is equivalent to the standard
2324    ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*).
2325
2326    ISO2022 provides many mechanisms to encode several character sets
2327    in 7-bit and 8-bit environments.  For 7-bit environments, all text
2328    is encoded using bytes less than 128.  This may make the encoded
2329    text a little bit longer, but the text passes more easily through
2330    several types of gateway, some of which strip off the MSB (Most
2331    Significant Bit).
2332
2333    There are two kinds of character sets: control character sets and
2334    graphic character sets.  The former contain control characters such
2335    as `newline' and `escape' to provide control functions (control
2336    functions are also provided by escape sequences).  The latter
2337    contain graphic characters such as 'A' and '-'.  Emacs recognizes
2338    two control character sets and many graphic character sets.
2339
2340    Graphic character sets are classified into one of the following
2341    four classes, according to the number of bytes (DIMENSION) and
2342    number of characters in one dimension (CHARS) of the set:
2343    - DIMENSION1_CHARS94
2344    - DIMENSION1_CHARS96
2345    - DIMENSION2_CHARS94
2346    - DIMENSION2_CHARS96
2347
2348    In addition, each character set is assigned an identification tag,
2349    unique for each set, called the "final character" (denoted as <F>
2350    hereafter).  The <F> of each character set is decided by ECMA(*)
2351    when it is registered in ISO.  The code range of <F> is 0x30..0x7F
2352    (0x30..0x3F are for private use only).
2353
2354    Note (*): ECMA = European Computer Manufacturers Association
2355
2356    Here are examples of graphic character sets [NAME(<F>)]:
2357         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
2358         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
2359         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
2360         o DIMENSION2_CHARS96 -- none for the moment
2361
2362    A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR.
2363         C0 [0x00..0x1F] -- control character plane 0
2364         GL [0x20..0x7F] -- graphic character plane 0
2365         C1 [0x80..0x9F] -- control character plane 1
2366         GR [0xA0..0xFF] -- graphic character plane 1
2367
2368    A control character set is directly designated and invoked to C0 or
2369    C1 by an escape sequence.  The most common case is that:
2370    - ISO646's  control character set is designated/invoked to C0, and
2371    - ISO6429's control character set is designated/invoked to C1,
2372    and usually these designations/invocations are omitted in encoded
2373    text.  In a 7-bit environment, only C0 can be used, and a control
2374    character for C1 is encoded by an appropriate escape sequence to
2375    fit into the environment.  All control characters for C1 are
2376    defined to have corresponding escape sequences.
2377
2378    A graphic character set is at first designated to one of four
2379    graphic registers (G0 through G3), then these graphic registers are
2380    invoked to GL or GR.  These designations and invocations can be
2381    done independently.  The most common case is that G0 is invoked to
2382    GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
2383    these invocations and designations are omitted in encoded text.
2384    In a 7-bit environment, only GL can be used.
2385
2386    When a graphic character set of CHARS94 is invoked to GL, codes
2387    0x20 and 0x7F of the GL area work as control characters SPACE and
2388    DEL respectively, and codes 0xA0 and 0xFF of the GR area should not
2389    be used.
2390
2391    There are two ways of invocation: locking-shift and single-shift.
2392    With locking-shift, the invocation lasts until the next different
2393    invocation, whereas with single-shift, the invocation affects the
2394    following character only and doesn't affect the locking-shift
2395    state.  Invocations are done by the following control characters or
2396    escape sequences:
2397
2398    ----------------------------------------------------------------------
2399    abbrev  function                  cntrl escape seq   description
2400    ----------------------------------------------------------------------
2401    SI/LS0  (shift-in)                0x0F  none         invoke G0 into GL
2402    SO/LS1  (shift-out)               0x0E  none         invoke G1 into GL
2403    LS2     (locking-shift-2)         none  ESC 'n'      invoke G2 into GL
2404    LS3     (locking-shift-3)         none  ESC 'o'      invoke G3 into GL
2405    LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR (*)
2406    LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR (*)
2407    LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR (*)
2408    SS2     (single-shift-2)          0x8E  ESC 'N'      invoke G2 for one char
2409    SS3     (single-shift-3)          0x8F  ESC 'O'      invoke G3 for one char
2410    ----------------------------------------------------------------------
2411    (*) These are not used by any known coding system.
2412
2413    Control characters for these functions are defined by macros
2414    ISO_CODE_XXX in `coding.h'.
2415
2416    Designations are done by the following escape sequences:
2417    ----------------------------------------------------------------------
2418    escape sequence      description
2419    ----------------------------------------------------------------------
2420    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
2421    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
2422    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
2423    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
2424    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
2425    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
2426    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
2427    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
2428    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
2429    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
2430    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
2431    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
2432    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
2433    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
2434    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
2435    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
2436    ----------------------------------------------------------------------
2437
2438    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
2439    of dimension 1, chars 94, and final character <F>, etc...
2440
2441    Note (*): Although these designations are not allowed in ISO2022,
2442    Emacs accepts them on decoding, and produces them on encoding
2443    CHARS96 character sets in a coding system which is characterized as
2444    7-bit environment, non-locking-shift, and non-single-shift.
2445
2446    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
2447    '(' must be omitted.  We refer to this as "short-form" hereafter.
2448
2449    Now you may notice that there are a lot of ways of encoding the
2450    same multilingual text in ISO2022.  Actually, there exist many
2451    coding systems such as Compound Text (used in X11's inter client
2452    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
2453    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
2454    localized platforms), and all of these are variants of ISO2022.
2455
2456    In addition to the above, Emacs handles two more kinds of escape
2457    sequences: ISO6429's direction specification and Emacs' private
2458    sequence for specifying character composition.
2459
2460    ISO6429's direction specification takes the following form:
2461         o CSI ']'      -- end of the current direction
2462         o CSI '0' ']'  -- end of the current direction
2463         o CSI '1' ']'  -- start of left-to-right text
2464         o CSI '2' ']'  -- start of right-to-left text
2465    The control character CSI (0x9B: control sequence introducer) is
2466    abbreviated to the escape sequence ESC '[' in a 7-bit environment.
2467
2468    Character composition specification takes the following form:
2469         o ESC '0' -- start relative composition
2470         o ESC '1' -- end composition
2471         o ESC '2' -- start rule-base composition (*)
2472         o ESC '3' -- start relative composition with alternate chars  (**)
2473         o ESC '4' -- start rule-base composition with alternate chars  (**)
2474   Since these are not standard escape sequences of any ISO standard,
2475   the use of them with these meanings is restricted to Emacs only.
2476
2477   (*) This form is used only in Emacs 20.7 and older versions,
2478   but newer versions can safely decode it.
2479   (**) This form is used only in Emacs 21.1 and newer versions,
2480   and older versions can't decode it.
2481
2482   Here's a list of example usages of these composition escape
2483   sequences (categorized by `enum composition_method').
2484
2485   COMPOSITION_RELATIVE:
2486         ESC 0 CHAR [ CHAR ] ESC 1
2487   COMPOSITION_WITH_RULE:
2488         ESC 2 CHAR [ RULE CHAR ] ESC 1
2489   COMPOSITION_WITH_ALTCHARS:
2490         ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
2491   COMPOSITION_WITH_RULE_ALTCHARS:
2492         ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
2493
2494 enum iso_code_class_type iso_code_class[256];
2495
2496 #define SAFE_CHARSET_P(coding, id)      \
2497   ((id) <= (coding)->max_charset_id     \
2498    && (coding)->safe_charsets[id] >= 0)
2499
2500
2501 #define SHIFT_OUT_OK(category)  \
2502   (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0)
2503
2504 static void
2505 setup_iso_safe_charsets (attrs)
2506      Lisp_Object attrs;
2507 {
2508   Lisp_Object charset_list, safe_charsets;
2509   Lisp_Object request;
2510   Lisp_Object reg_usage;
2511   Lisp_Object tail;
2512   int reg94, reg96;
2513   int flags = XINT (AREF (attrs, coding_attr_iso_flags));
2514   int max_charset_id;
2515
2516   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2517   if ((flags & CODING_ISO_FLAG_FULL_SUPPORT)
2518       && ! EQ (charset_list, Viso_2022_charset_list))
2519     {
2520       CODING_ATTR_CHARSET_LIST (attrs)
2521         = charset_list = Viso_2022_charset_list;
2522       ASET (attrs, coding_attr_safe_charsets, Qnil);
2523     }
2524
2525   if (STRINGP (AREF (attrs, coding_attr_safe_charsets)))
2526     return;
2527
2528   max_charset_id = 0;
2529   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2530     {
2531       int id = XINT (XCAR (tail));
2532       if (max_charset_id < id)
2533         max_charset_id = id;
2534     }
2535
2536   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
2537                                 make_number (255));
2538   request = AREF (attrs, coding_attr_iso_request);
2539   reg_usage = AREF (attrs, coding_attr_iso_usage);
2540   reg94 = XINT (XCAR (reg_usage));
2541   reg96 = XINT (XCDR (reg_usage));
2542
2543   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
2544     {
2545       Lisp_Object id;
2546       Lisp_Object reg;
2547       struct charset *charset;
2548
2549       id = XCAR (tail);
2550       charset = CHARSET_FROM_ID (XINT (id));
2551       reg = Fcdr (Fassq (id, request));
2552       if (! NILP (reg))
2553         SSET (safe_charsets, XINT (id), XINT (reg));
2554       else if (charset->iso_chars_96)
2555         {
2556           if (reg96 < 4)
2557             SSET (safe_charsets, XINT (id), reg96);
2558         }
2559       else
2560         {
2561           if (reg94 < 4)
2562             SSET (safe_charsets, XINT (id), reg94);
2563         }
2564     }
2565   ASET (attrs, coding_attr_safe_charsets, safe_charsets);
2566 }
2567
2568
2569 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2570    Check if a text is encoded in one of ISO-2022 based codig systems.
2571    If it is, return 1, else return 0.  */
2572
2573 static int
2574 detect_coding_iso_2022 (coding, detect_info)
2575      struct coding_system *coding;
2576      struct coding_detection_info *detect_info;
2577 {
2578   const unsigned char *src = coding->source, *src_base = src;
2579   const unsigned char *src_end = coding->source + coding->src_bytes;
2580   int multibytep = coding->src_multibyte;
2581   int single_shifting = 0;
2582   int id;
2583   int c, c1;
2584   int consumed_chars = 0;
2585   int i;
2586   int rejected = 0;
2587   int found = 0;
2588
2589   detect_info->checked |= CATEGORY_MASK_ISO;
2590
2591   for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++)
2592     {
2593       struct coding_system *this = &(coding_categories[i]);
2594       Lisp_Object attrs, val;
2595
2596       attrs = CODING_ID_ATTRS (this->id);
2597       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
2598           && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
2599         setup_iso_safe_charsets (attrs);
2600       val = CODING_ATTR_SAFE_CHARSETS (attrs);
2601       this->max_charset_id = SCHARS (val) - 1;
2602       this->safe_charsets = (char *) SDATA (val);
2603     }
2604
2605   /* A coding system of this category is always ASCII compatible.  */
2606   src += coding->head_ascii;
2607
2608   while (rejected != CATEGORY_MASK_ISO)
2609     {
2610       src_base = src;
2611       ONE_MORE_BYTE (c);
2612       switch (c)
2613         {
2614         case ISO_CODE_ESC:
2615           if (inhibit_iso_escape_detection)
2616             break;
2617           single_shifting = 0;
2618           ONE_MORE_BYTE (c);
2619           if (c >= '(' && c <= '/')
2620             {
2621               /* Designation sequence for a charset of dimension 1.  */
2622               ONE_MORE_BYTE (c1);
2623               if (c1 < ' ' || c1 >= 0x80
2624                   || (id = iso_charset_table[0][c >= ','][c1]) < 0)
2625                 /* Invalid designation sequence.  Just ignore.  */
2626                 break;
2627             }
2628           else if (c == '$')
2629             {
2630               /* Designation sequence for a charset of dimension 2.  */
2631               ONE_MORE_BYTE (c);
2632               if (c >= '@' && c <= 'B')
2633                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
2634                 id = iso_charset_table[1][0][c];
2635               else if (c >= '(' && c <= '/')
2636                 {
2637                   ONE_MORE_BYTE (c1);
2638                   if (c1 < ' ' || c1 >= 0x80
2639                       || (id = iso_charset_table[1][c >= ','][c1]) < 0)
2640                     /* Invalid designation sequence.  Just ignore.  */
2641                     break;
2642                 }
2643               else
2644                 /* Invalid designation sequence.  Just ignore it.  */
2645                 break;
2646             }
2647           else if (c == 'N' || c == 'O')
2648             {
2649               /* ESC <Fe> for SS2 or SS3.  */
2650               single_shifting = 1;
2651               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2652               break;
2653             }
2654           else if (c >= '0' && c <= '4')
2655             {
2656               /* ESC <Fp> for start/end composition.  */
2657               found |= CATEGORY_MASK_ISO;
2658               break;
2659             }
2660           else
2661             {
2662               /* Invalid escape sequence.  Just ignore it.  */
2663               break;
2664             }
2665
2666           /* We found a valid designation sequence for CHARSET.  */
2667           rejected |= CATEGORY_MASK_ISO_8BIT;
2668           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7],
2669                               id))
2670             found |= CATEGORY_MASK_ISO_7;
2671           else
2672             rejected |= CATEGORY_MASK_ISO_7;
2673           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_tight],
2674                               id))
2675             found |= CATEGORY_MASK_ISO_7_TIGHT;
2676           else
2677             rejected |= CATEGORY_MASK_ISO_7_TIGHT;
2678           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_7_else],
2679                               id))
2680             found |= CATEGORY_MASK_ISO_7_ELSE;
2681           else
2682             rejected |= CATEGORY_MASK_ISO_7_ELSE;
2683           if (SAFE_CHARSET_P (&coding_categories[coding_category_iso_8_else],
2684                               id))
2685             found |= CATEGORY_MASK_ISO_8_ELSE;
2686           else
2687             rejected |= CATEGORY_MASK_ISO_8_ELSE;
2688           break;
2689
2690         case ISO_CODE_SO:
2691         case ISO_CODE_SI:
2692           /* Locking shift out/in.  */
2693           if (inhibit_iso_escape_detection)
2694             break;
2695           single_shifting = 0;
2696           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
2697           break;
2698
2699         case ISO_CODE_CSI:
2700           /* Control sequence introducer.  */
2701           single_shifting = 0;
2702           rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2703           found |= CATEGORY_MASK_ISO_8_ELSE;
2704           goto check_extra_latin;
2705
2706         case ISO_CODE_SS2:
2707         case ISO_CODE_SS3:
2708           /* Single shift.   */
2709           if (inhibit_iso_escape_detection)
2710             break;
2711           single_shifting = 0;
2712           rejected |= CATEGORY_MASK_ISO_7BIT;
2713           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2714               & CODING_ISO_FLAG_SINGLE_SHIFT)
2715             found |= CATEGORY_MASK_ISO_8_1, single_shifting = 1;
2716           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2])
2717               & CODING_ISO_FLAG_SINGLE_SHIFT)
2718             found |= CATEGORY_MASK_ISO_8_2, single_shifting = 1;
2719           if (single_shifting)
2720             break;
2721           goto check_extra_latin;
2722
2723         default:
2724           if (c < 0)
2725             continue;
2726           if (c < 0x80)
2727             {
2728               single_shifting = 0;
2729               break;
2730             }
2731           if (c >= 0xA0)
2732             {
2733               rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE;
2734               found |= CATEGORY_MASK_ISO_8_1;
2735               /* Check the length of succeeding codes of the range
2736                  0xA0..0FF.  If the byte length is even, we include
2737                  CATEGORY_MASK_ISO_8_2 in `found'.  We can check this
2738                  only when we are not single shifting.  */
2739               if (! single_shifting
2740                   && ! (rejected & CATEGORY_MASK_ISO_8_2))
2741                 {
2742                   int i = 1;
2743                   while (src < src_end)
2744                     {
2745                       ONE_MORE_BYTE (c);
2746                       if (c < 0xA0)
2747                         break;
2748                       i++;
2749                     }
2750
2751                   if (i & 1 && src < src_end)
2752                     rejected |= CATEGORY_MASK_ISO_8_2;
2753                   else
2754                     found |= CATEGORY_MASK_ISO_8_2;
2755                 }
2756               break;
2757             }
2758         check_extra_latin:
2759           single_shifting = 0;
2760           if (! VECTORP (Vlatin_extra_code_table)
2761               || NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
2762             {
2763               rejected = CATEGORY_MASK_ISO;
2764               break;
2765             }
2766           if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
2767               & CODING_ISO_FLAG_LATIN_EXTRA)
2768             found |= CATEGORY_MASK_ISO_8_1;
2769           else
2770             rejected |= CATEGORY_MASK_ISO_8_1;
2771           rejected |= CATEGORY_MASK_ISO_8_2;
2772         }
2773     }
2774   detect_info->rejected |= CATEGORY_MASK_ISO;
2775   return 0;
2776
2777  no_more_source:
2778   detect_info->rejected |= rejected;
2779   detect_info->found |= (found & ~rejected);
2780   return 1;
2781 }
2782
2783
2784 /* Set designation state into CODING.  Set CHARS_96 to -1 if the
2785    escape sequence should be kept.  */
2786 #define DECODE_DESIGNATION(reg, dim, chars_96, final)                   \
2787   do {                                                                  \
2788     int id, prev;                                                       \
2789                                                                         \
2790     if (final < '0' || final >= 128                                     \
2791         || ((id = ISO_CHARSET_TABLE (dim, chars_96, final)) < 0)        \
2792         || !SAFE_CHARSET_P (coding, id))                                \
2793       {                                                                 \
2794         CODING_ISO_DESIGNATION (coding, reg) = -2;                      \
2795         chars_96 = -1;                                                  \
2796         break;                                                          \
2797       }                                                                 \
2798     prev = CODING_ISO_DESIGNATION (coding, reg);                        \
2799     if (id == charset_jisx0201_roman)                                   \
2800       {                                                                 \
2801         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)      \
2802           id = charset_ascii;                                           \
2803       }                                                                 \
2804     else if (id == charset_jisx0208_1978)                               \
2805       {                                                                 \
2806         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)     \
2807           id = charset_jisx0208;                                        \
2808       }                                                                 \
2809     CODING_ISO_DESIGNATION (coding, reg) = id;                          \
2810     /* If there was an invalid designation to REG previously, and this  \
2811        designation is ASCII to REG, we should keep this designation     \
2812        sequence.  */                                                    \
2813     if (prev == -2 && id == charset_ascii)                              \
2814       chars_96 = -1;                                                    \
2815   } while (0)
2816
2817
2818 #define MAYBE_FINISH_COMPOSITION()                              \
2819   do {                                                          \
2820     int i;                                                      \
2821     if (composition_state == COMPOSING_NO)                      \
2822       break;                                                    \
2823     /* It is assured that we have enough room for producing     \
2824        characters stored in the table `components'.  */         \
2825     if (charbuf + component_idx > charbuf_end)                  \
2826       goto no_more_source;                                      \
2827     composition_state = COMPOSING_NO;                           \
2828     if (method == COMPOSITION_RELATIVE                          \
2829         || method == COMPOSITION_WITH_ALTCHARS)                 \
2830       {                                                         \
2831         for (i = 0; i < component_idx; i++)                     \
2832           *charbuf++ = components[i];                           \
2833         char_offset += component_idx;                           \
2834       }                                                         \
2835     else                                                        \
2836       {                                                         \
2837         for (i = 0; i < component_idx; i += 2)                  \
2838           *charbuf++ = components[i];                           \
2839         char_offset += (component_idx / 2) + 1;                 \
2840       }                                                         \
2841   } while (0)
2842
2843
2844 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
2845    ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
2846    ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
2847    ESC 3 : altchar composition :  ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
2848    ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
2849   */
2850
2851 #define DECODE_COMPOSITION_START(c1)                                    \
2852   do {                                                                  \
2853     if (c1 == '0'                                                       \
2854         && composition_state == COMPOSING_COMPONENT_RULE)               \
2855       {                                                                 \
2856         component_len = component_idx;                                  \
2857         composition_state = COMPOSING_CHAR;                             \
2858       }                                                                 \
2859     else                                                                \
2860       {                                                                 \
2861         const unsigned char *p;                                         \
2862                                                                         \
2863         MAYBE_FINISH_COMPOSITION ();                                    \
2864         if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end)         \
2865           goto no_more_source;                                          \
2866         for (p = src; p < src_end - 1; p++)                             \
2867           if (*p == ISO_CODE_ESC && p[1] == '1')                        \
2868             break;                                                      \
2869         if (p == src_end - 1)                                           \
2870           {                                                             \
2871             /* The current composition doesn't end in the current       \
2872                source.  */                                              \
2873             record_conversion_result                                    \
2874               (coding, CODING_RESULT_INSUFFICIENT_SRC);                 \
2875             goto no_more_source;                                        \
2876           }                                                             \
2877                                                                         \
2878         /* This is surely the start of a composition.  */               \
2879         method = (c1 == '0' ? COMPOSITION_RELATIVE                      \
2880                   : c1 == '2' ? COMPOSITION_WITH_RULE                   \
2881                   : c1 == '3' ? COMPOSITION_WITH_ALTCHARS               \
2882                   : COMPOSITION_WITH_RULE_ALTCHARS);                    \
2883         composition_state = (c1 <= '2' ? COMPOSING_CHAR                 \
2884                              : COMPOSING_COMPONENT_CHAR);               \
2885         component_idx = component_len = 0;                              \
2886       }                                                                 \
2887   } while (0)
2888
2889
2890 /* Handle compositoin end sequence ESC 1.  */
2891
2892 #define DECODE_COMPOSITION_END()                                        \
2893   do {                                                                  \
2894     int nchars = (component_len > 0 ? component_idx - component_len     \
2895                   : method == COMPOSITION_RELATIVE ? component_idx      \
2896                   : (component_idx + 1) / 2);                           \
2897     int i;                                                              \
2898     int *saved_charbuf = charbuf;                                       \
2899                                                                         \
2900     ADD_COMPOSITION_DATA (charbuf, nchars, method);                     \
2901     if (method != COMPOSITION_RELATIVE)                                 \
2902       {                                                                 \
2903         if (component_len == 0)                                         \
2904           for (i = 0; i < component_idx; i++)                           \
2905             *charbuf++ = components[i];                                 \
2906         else                                                            \
2907           for (i = 0; i < component_len; i++)                           \
2908             *charbuf++ = components[i];                                 \
2909         *saved_charbuf = saved_charbuf - charbuf;                       \
2910       }                                                                 \
2911     if (method == COMPOSITION_WITH_RULE)                                \
2912       for (i = 0; i < component_idx; i += 2, char_offset++)             \
2913         *charbuf++ = components[i];                                     \
2914     else                                                                \
2915       for (i = component_len; i < component_idx; i++, char_offset++)    \
2916         *charbuf++ = components[i];                                     \
2917     coding->annotated = 1;                                              \
2918     composition_state = COMPOSING_NO;                                   \
2919   } while (0)
2920
2921
2922 /* Decode a composition rule from the byte C1 (and maybe one more byte
2923    from SRC) and store one encoded composition rule in
2924    coding->cmp_data.  */
2925
2926 #define DECODE_COMPOSITION_RULE(c1)                                     \
2927   do {                                                                  \
2928     (c1) -= 32;                                                         \
2929     if (c1 < 81)                /* old format (before ver.21) */        \
2930       {                                                                 \
2931         int gref = (c1) / 9;                                            \
2932         int nref = (c1) % 9;                                            \
2933         if (gref == 4) gref = 10;                                       \
2934         if (nref == 4) nref = 10;                                       \
2935         c1 = COMPOSITION_ENCODE_RULE (gref, nref);                      \
2936       }                                                                 \
2937     else if (c1 < 93)           /* new format (after ver.21) */         \
2938       {                                                                 \
2939         ONE_MORE_BYTE (c2);                                             \
2940         c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32);                \
2941       }                                                                 \
2942     else                                                                \
2943       c1 = 0;                                                           \
2944   } while (0)
2945
2946
2947 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
2948
2949 static void
2950 decode_coding_iso_2022 (coding)
2951      struct coding_system *coding;
2952 {
2953   const unsigned char *src = coding->source + coding->consumed;
2954   const unsigned char *src_end = coding->source + coding->src_bytes;
2955   const unsigned char *src_base;
2956   int *charbuf = coding->charbuf + coding->charbuf_used;
2957   int *charbuf_end
2958     = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
2959   int consumed_chars = 0, consumed_chars_base;
2960   int multibytep = coding->src_multibyte;
2961   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
2962   int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
2963   int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
2964   int charset_id_2, charset_id_3;
2965   struct charset *charset;
2966   int c;
2967   /* For handling composition sequence.  */
2968 #define COMPOSING_NO                    0
2969 #define COMPOSING_CHAR                  1
2970 #define COMPOSING_RULE                  2
2971 #define COMPOSING_COMPONENT_CHAR        3
2972 #define COMPOSING_COMPONENT_RULE        4
2973
2974   int composition_state = COMPOSING_NO;
2975   enum composition_method method;
2976   int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
2977   int component_idx;
2978   int component_len;
2979   Lisp_Object attrs, charset_list;
2980   int char_offset = coding->produced_char;
2981   int last_offset = char_offset;
2982   int last_id = charset_ascii;
2983   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2984   int byte_after_cr = -1;
2985
2986   CODING_GET_INFO (coding, attrs, charset_list);
2987   setup_iso_safe_charsets (attrs);
2988   /* Charset list may have been changed.  */
2989   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
2990   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
2991
2992   while (1)
2993     {
2994       int c1, c2;
2995
2996       src_base = src;
2997       consumed_chars_base = consumed_chars;
2998
2999       if (charbuf >= charbuf_end)
3000         break;
3001
3002       if (byte_after_cr >= 0)
3003         c1 = byte_after_cr, byte_after_cr = -1;
3004       else
3005         ONE_MORE_BYTE (c1);
3006       if (c1 < 0)
3007         goto invalid_code;
3008
3009       /* We produce at most one character.  */
3010       switch (iso_code_class [c1])
3011         {
3012         case ISO_0x20_or_0x7F:
3013           if (composition_state != COMPOSING_NO)
3014             {
3015               if (composition_state == COMPOSING_RULE
3016                   || composition_state == COMPOSING_COMPONENT_RULE)
3017                 {
3018                   DECODE_COMPOSITION_RULE (c1);
3019                   components[component_idx++] = c1;
3020                   composition_state--;
3021                   continue;
3022                 }
3023             }
3024           if (charset_id_0 < 0
3025               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3026             /* This is SPACE or DEL.  */
3027             charset = CHARSET_FROM_ID (charset_ascii);
3028           else
3029             charset = CHARSET_FROM_ID (charset_id_0);
3030           break;
3031
3032         case ISO_graphic_plane_0:
3033           if (composition_state != COMPOSING_NO)
3034             {
3035               if (composition_state == COMPOSING_RULE
3036                   || composition_state == COMPOSING_COMPONENT_RULE)
3037                 {
3038                   DECODE_COMPOSITION_RULE (c1);
3039                   components[component_idx++] = c1;
3040                   composition_state--;
3041                   continue;
3042                 }
3043             }
3044           if (charset_id_0 < 0)
3045             charset = CHARSET_FROM_ID (charset_ascii);
3046           else
3047             charset = CHARSET_FROM_ID (charset_id_0);
3048           break;
3049
3050         case ISO_0xA0_or_0xFF:
3051           if (charset_id_1 < 0
3052               || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_1))
3053               || CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
3054             goto invalid_code;
3055           /* This is a graphic character, we fall down ... */
3056
3057         case ISO_graphic_plane_1:
3058           if (charset_id_1 < 0)
3059             goto invalid_code;
3060           charset = CHARSET_FROM_ID (charset_id_1);
3061           break;
3062
3063         case ISO_control_0:
3064           if (eol_crlf && c1 == '\r')
3065             ONE_MORE_BYTE (byte_after_cr);
3066           MAYBE_FINISH_COMPOSITION ();
3067           charset = CHARSET_FROM_ID (charset_ascii);
3068           break;
3069
3070         case ISO_control_1:
3071           MAYBE_FINISH_COMPOSITION ();
3072           goto invalid_code;
3073
3074         case ISO_shift_out:
3075           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3076               || CODING_ISO_DESIGNATION (coding, 1) < 0)
3077             goto invalid_code;
3078           CODING_ISO_INVOCATION (coding, 0) = 1;
3079           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3080           continue;
3081
3082         case ISO_shift_in:
3083           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT))
3084             goto invalid_code;
3085           CODING_ISO_INVOCATION (coding, 0) = 0;
3086           charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3087           continue;
3088
3089         case ISO_single_shift_2_7:
3090         case ISO_single_shift_2:
3091           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3092             goto invalid_code;
3093           /* SS2 is handled as an escape sequence of ESC 'N' */
3094           c1 = 'N';
3095           goto label_escape_sequence;
3096
3097         case ISO_single_shift_3:
3098           if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT))
3099             goto invalid_code;
3100           /* SS2 is handled as an escape sequence of ESC 'O' */
3101           c1 = 'O';
3102           goto label_escape_sequence;
3103
3104         case ISO_control_sequence_introducer:
3105           /* CSI is handled as an escape sequence of ESC '[' ...  */
3106           c1 = '[';
3107           goto label_escape_sequence;
3108
3109         case ISO_escape:
3110           ONE_MORE_BYTE (c1);
3111         label_escape_sequence:
3112           /* Escape sequences handled here are invocation,
3113              designation, direction specification, and character
3114              composition specification.  */
3115           switch (c1)
3116             {
3117             case '&':           /* revision of following character set */
3118               ONE_MORE_BYTE (c1);
3119               if (!(c1 >= '@' && c1 <= '~'))
3120                 goto invalid_code;
3121               ONE_MORE_BYTE (c1);
3122               if (c1 != ISO_CODE_ESC)
3123                 goto invalid_code;
3124               ONE_MORE_BYTE (c1);
3125               goto label_escape_sequence;
3126
3127             case '$':           /* designation of 2-byte character set */
3128               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3129                 goto invalid_code;
3130               {
3131                 int reg, chars96;
3132
3133                 ONE_MORE_BYTE (c1);
3134                 if (c1 >= '@' && c1 <= 'B')
3135                   {     /* designation of JISX0208.1978, GB2312.1980,
3136                            or JISX0208.1980 */
3137                     reg = 0, chars96 = 0;
3138                   }
3139                 else if (c1 >= 0x28 && c1 <= 0x2B)
3140                   { /* designation of DIMENSION2_CHARS94 character set */
3141                     reg = c1 - 0x28, chars96 = 0;
3142                     ONE_MORE_BYTE (c1);
3143                   }
3144                 else if (c1 >= 0x2C && c1 <= 0x2F)
3145                   { /* designation of DIMENSION2_CHARS96 character set */
3146                     reg = c1 - 0x2C, chars96 = 1;
3147                     ONE_MORE_BYTE (c1);
3148                   }
3149                 else
3150                   goto invalid_code;
3151                 DECODE_DESIGNATION (reg, 2, chars96, c1);
3152                 /* We must update these variables now.  */
3153                 if (reg == 0)
3154                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3155                 else if (reg == 1)
3156                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3157                 if (chars96 < 0)
3158                   goto invalid_code;
3159               }
3160               continue;
3161
3162             case 'n':           /* invocation of locking-shift-2 */
3163               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3164                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3165                 goto invalid_code;
3166               CODING_ISO_INVOCATION (coding, 0) = 2;
3167               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3168               continue;
3169
3170             case 'o':           /* invocation of locking-shift-3 */
3171               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3172                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3173                 goto invalid_code;
3174               CODING_ISO_INVOCATION (coding, 0) = 3;
3175               charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3176               continue;
3177
3178             case 'N':           /* invocation of single-shift-2 */
3179               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3180                   || CODING_ISO_DESIGNATION (coding, 2) < 0)
3181                 goto invalid_code;
3182               charset_id_2 = CODING_ISO_DESIGNATION (coding, 2);
3183               if (charset_id_2 < 0)
3184                 charset = CHARSET_FROM_ID (charset_ascii);
3185               else
3186                 charset = CHARSET_FROM_ID (charset_id_2);
3187               ONE_MORE_BYTE (c1);
3188               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3189                 goto invalid_code;
3190               break;
3191
3192             case 'O':           /* invocation of single-shift-3 */
3193               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3194                   || CODING_ISO_DESIGNATION (coding, 3) < 0)
3195                 goto invalid_code;
3196               charset_id_3 = CODING_ISO_DESIGNATION (coding, 3);
3197               if (charset_id_3 < 0)
3198                 charset = CHARSET_FROM_ID (charset_ascii);
3199               else
3200                 charset = CHARSET_FROM_ID (charset_id_3);
3201               ONE_MORE_BYTE (c1);
3202               if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
3203                 goto invalid_code;
3204               break;
3205
3206             case '0': case '2': case '3': case '4': /* start composition */
3207               if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3208                 goto invalid_code;
3209               DECODE_COMPOSITION_START (c1);
3210               continue;
3211
3212             case '1':           /* end composition */
3213               if (composition_state == COMPOSING_NO)
3214                 goto invalid_code;
3215               DECODE_COMPOSITION_END ();
3216               continue;
3217
3218             case '[':           /* specification of direction */
3219               if (! CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DIRECTION)
3220                 goto invalid_code;
3221               /* For the moment, nested direction is not supported.
3222                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
3223                  left-to-right, and nozero means right-to-left.  */
3224               ONE_MORE_BYTE (c1);
3225               switch (c1)
3226                 {
3227                 case ']':       /* end of the current direction */
3228                   coding->mode &= ~CODING_MODE_DIRECTION;
3229
3230                 case '0':       /* end of the current direction */
3231                 case '1':       /* start of left-to-right direction */
3232                   ONE_MORE_BYTE (c1);
3233                   if (c1 == ']')
3234                     coding->mode &= ~CODING_MODE_DIRECTION;
3235                   else
3236                     goto invalid_code;
3237                   break;
3238
3239                 case '2':       /* start of right-to-left direction */
3240                   ONE_MORE_BYTE (c1);
3241                   if (c1 == ']')
3242                     coding->mode |= CODING_MODE_DIRECTION;
3243                   else
3244                     goto invalid_code;
3245                   break;
3246
3247                 default:
3248                   goto invalid_code;
3249                 }
3250               continue;
3251
3252             case '%':
3253               ONE_MORE_BYTE (c1);
3254               if (c1 == '/')
3255                 {
3256                   /* CTEXT extended segment:
3257                      ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
3258                      We keep these bytes as is for the moment.
3259                      They may be decoded by post-read-conversion.  */
3260                   int dim, M, L;
3261                   int size;
3262
3263                   ONE_MORE_BYTE (dim);
3264                   ONE_MORE_BYTE (M);
3265                   ONE_MORE_BYTE (L);
3266                   size = ((M - 128) * 128) + (L - 128);
3267                   if (charbuf + 8 + size > charbuf_end)
3268                     goto break_loop;
3269                   *charbuf++ = ISO_CODE_ESC;
3270                   *charbuf++ = '%';
3271                   *charbuf++ = '/';
3272                   *charbuf++ = dim;
3273                   *charbuf++ = BYTE8_TO_CHAR (M);
3274                   *charbuf++ = BYTE8_TO_CHAR (L);
3275                   while (size-- > 0)
3276                     {
3277                       ONE_MORE_BYTE (c1);
3278                       *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3279                     }
3280                 }
3281               else if (c1 == 'G')
3282                 {
3283                   /* XFree86 extension for embedding UTF-8 in CTEXT:
3284                      ESC % G --UTF-8-BYTES-- ESC % @
3285                      We keep these bytes as is for the moment.
3286                      They may be decoded by post-read-conversion.  */
3287                   int *p = charbuf;
3288
3289                   if (p + 6 > charbuf_end)
3290                     goto break_loop;
3291                   *p++ = ISO_CODE_ESC;
3292                   *p++ = '%';
3293                   *p++ = 'G';
3294                   while (p < charbuf_end)
3295                     {
3296                       ONE_MORE_BYTE (c1);
3297                       if (c1 == ISO_CODE_ESC
3298                           && src + 1 < src_end
3299                           && src[0] == '%'
3300                           && src[1] == '@')
3301                         {
3302                           src += 2;
3303                           break;
3304                         }
3305                       *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3306                     }
3307                   if (p + 3 > charbuf_end)
3308                     goto break_loop;
3309                   *p++ = ISO_CODE_ESC;
3310                   *p++ = '%';
3311                   *p++ = '@';
3312                   charbuf = p;
3313                 }
3314               else
3315                 goto invalid_code;
3316               continue;
3317               break;
3318
3319             default:
3320               if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATION))
3321                 goto invalid_code;
3322               {
3323                 int reg, chars96;
3324
3325                 if (c1 >= 0x28 && c1 <= 0x2B)
3326                   { /* designation of DIMENSION1_CHARS94 character set */
3327                     reg = c1 - 0x28, chars96 = 0;
3328                     ONE_MORE_BYTE (c1);
3329                   }
3330                 else if (c1 >= 0x2C && c1 <= 0x2F)
3331                   { /* designation of DIMENSION1_CHARS96 character set */
3332                     reg = c1 - 0x2C, chars96 = 1;
3333                     ONE_MORE_BYTE (c1);
3334                   }
3335                 else
3336                   goto invalid_code;
3337                 DECODE_DESIGNATION (reg, 1, chars96, c1);
3338                 /* We must update these variables now.  */
3339                 if (reg == 0)
3340                   charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3341                 else if (reg == 1)
3342                   charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3343                 if (chars96 < 0)
3344                   goto invalid_code;
3345               }
3346               continue;
3347             }
3348         }
3349
3350       if (charset->id != charset_ascii
3351           && last_id != charset->id)
3352         {
3353           if (last_id != charset_ascii)
3354             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3355           last_id = charset->id;
3356           last_offset = char_offset;
3357         }
3358
3359       /* Now we know CHARSET and 1st position code C1 of a character.
3360          Produce a decoded character while getting 2nd position code
3361          C2 if necessary.  */
3362       c1 &= 0x7F;
3363       if (CHARSET_DIMENSION (charset) > 1)
3364         {
3365           ONE_MORE_BYTE (c2);
3366           if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3367             /* C2 is not in a valid range.  */
3368             goto invalid_code;
3369           c1 = (c1 << 8) | (c2 & 0x7F);
3370           if (CHARSET_DIMENSION (charset) > 2)
3371             {
3372               ONE_MORE_BYTE (c2);
3373               if (c2 < 0x20 || (c2 >= 0x80 && c2 < 0xA0))
3374                 /* C2 is not in a valid range.  */
3375                 goto invalid_code;
3376               c1 = (c1 << 8) | (c2 & 0x7F);
3377             }
3378         }
3379
3380       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c1, c);
3381       if (c < 0)
3382         {
3383           MAYBE_FINISH_COMPOSITION ();
3384           for (; src_base < src; src_base++, char_offset++)
3385             {
3386               if (ASCII_BYTE_P (*src_base))
3387                 *charbuf++ = *src_base;
3388               else
3389                 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3390             }
3391         }
3392       else if (composition_state == COMPOSING_NO)
3393         {
3394           *charbuf++ = c;
3395           char_offset++;
3396         }
3397       else
3398         {
3399           components[component_idx++] = c;
3400           if (method == COMPOSITION_WITH_RULE
3401               || (method == COMPOSITION_WITH_RULE_ALTCHARS
3402                   && composition_state == COMPOSING_COMPONENT_CHAR))
3403             composition_state++;
3404         }
3405       continue;
3406
3407     invalid_code:
3408       MAYBE_FINISH_COMPOSITION ();
3409       src = src_base;
3410       consumed_chars = consumed_chars_base;
3411       ONE_MORE_BYTE (c);
3412       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
3413       char_offset++;
3414       coding->errors++;
3415       continue;
3416
3417     break_loop:
3418       break;
3419     }
3420
3421  no_more_source:
3422   if (last_id != charset_ascii)
3423     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3424   coding->consumed_char += consumed_chars_base;
3425   coding->consumed = src_base - coding->source;
3426   coding->charbuf_used = charbuf - coding->charbuf;
3427 }
3428
3429
3430 /* ISO2022 encoding stuff.  */
3431
3432 /*
3433    It is not enough to say just "ISO2022" on encoding, we have to
3434    specify more details.  In Emacs, each coding system of ISO2022
3435    variant has the following specifications:
3436         1. Initial designation to G0 thru G3.
3437         2. Allows short-form designation?
3438         3. ASCII should be designated to G0 before control characters?
3439         4. ASCII should be designated to G0 at end of line?
3440         5. 7-bit environment or 8-bit environment?
3441         6. Use locking-shift?
3442         7. Use Single-shift?
3443    And the following two are only for Japanese:
3444         8. Use ASCII in place of JIS0201-1976-Roman?
3445         9. Use JISX0208-1983 in place of JISX0208-1978?
3446    These specifications are encoded in CODING_ISO_FLAGS (coding) as flag bits
3447    defined by macros CODING_ISO_FLAG_XXX.  See `coding.h' for more
3448    details.
3449 */
3450
3451 /* Produce codes (escape sequence) for designating CHARSET to graphic
3452    register REG at DST, and increment DST.  If <final-char> of CHARSET is
3453    '@', 'A', or 'B' and the coding system CODING allows, produce
3454    designation sequence of short-form.  */
3455
3456 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
3457   do {                                                                  \
3458     unsigned char final_char = CHARSET_ISO_FINAL (charset);             \
3459     char *intermediate_char_94 = "()*+";                                \
3460     char *intermediate_char_96 = ",-./";                                \
3461     int revision = -1;                                                  \
3462     int c;                                                              \
3463                                                                         \
3464     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_REVISION)           \
3465       revision = CHARSET_ISO_REVISION (charset);                        \
3466                                                                         \
3467     if (revision >= 0)                                                  \
3468       {                                                                 \
3469         EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '&');                       \
3470         EMIT_ONE_BYTE ('@' + revision);                                 \
3471       }                                                                 \
3472     EMIT_ONE_ASCII_BYTE (ISO_CODE_ESC);                                 \
3473     if (CHARSET_DIMENSION (charset) == 1)                               \
3474       {                                                                 \
3475         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3476           c = intermediate_char_94[reg];                                \
3477         else                                                            \
3478           c = intermediate_char_96[reg];                                \
3479         EMIT_ONE_ASCII_BYTE (c);                                        \
3480       }                                                                 \
3481     else                                                                \
3482       {                                                                 \
3483         EMIT_ONE_ASCII_BYTE ('$');                                      \
3484         if (! CHARSET_ISO_CHARS_96 (charset))                           \
3485           {                                                             \
3486             if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LONG_FORM   \
3487                 || reg != 0                                             \
3488                 || final_char < '@' || final_char > 'B')                \
3489               EMIT_ONE_ASCII_BYTE (intermediate_char_94[reg]);          \
3490           }                                                             \
3491         else                                                            \
3492           EMIT_ONE_ASCII_BYTE (intermediate_char_96[reg]);              \
3493       }                                                                 \
3494     EMIT_ONE_ASCII_BYTE (final_char);                                   \
3495                                                                         \
3496     CODING_ISO_DESIGNATION (coding, reg) = CHARSET_ID (charset);        \
3497   } while (0)
3498
3499
3500 /* The following two macros produce codes (control character or escape
3501    sequence) for ISO2022 single-shift functions (single-shift-2 and
3502    single-shift-3).  */
3503
3504 #define ENCODE_SINGLE_SHIFT_2                                           \
3505   do {                                                                  \
3506     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3507       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'N');                         \
3508     else                                                                \
3509       EMIT_ONE_BYTE (ISO_CODE_SS2);                                     \
3510     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3511   } while (0)
3512
3513
3514 #define ENCODE_SINGLE_SHIFT_3                                           \
3515   do {                                                                  \
3516     if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)         \
3517       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'O');                         \
3518     else                                                                \
3519       EMIT_ONE_BYTE (ISO_CODE_SS3);                                     \
3520     CODING_ISO_SINGLE_SHIFTING (coding) = 1;                            \
3521   } while (0)
3522
3523
3524 /* The following four macros produce codes (control character or
3525    escape sequence) for ISO2022 locking-shift functions (shift-in,
3526    shift-out, locking-shift-2, and locking-shift-3).  */
3527
3528 #define ENCODE_SHIFT_IN                                 \
3529   do {                                                  \
3530     EMIT_ONE_ASCII_BYTE (ISO_CODE_SI);                  \
3531     CODING_ISO_INVOCATION (coding, 0) = 0;              \
3532   } while (0)
3533
3534
3535 #define ENCODE_SHIFT_OUT                                \
3536   do {                                                  \
3537     EMIT_ONE_ASCII_BYTE (ISO_CODE_SO);                  \
3538     CODING_ISO_INVOCATION (coding, 0) = 1;              \
3539   } while (0)
3540
3541
3542 #define ENCODE_LOCKING_SHIFT_2                          \
3543   do {                                                  \
3544     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3545     CODING_ISO_INVOCATION (coding, 0) = 2;              \
3546   } while (0)
3547
3548
3549 #define ENCODE_LOCKING_SHIFT_3                          \
3550   do {                                                  \
3551     EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, 'n');           \
3552     CODING_ISO_INVOCATION (coding, 0) = 3;              \
3553   } while (0)
3554
3555
3556 /* Produce codes for a DIMENSION1 character whose character set is
3557    CHARSET and whose position-code is C1.  Designation and invocation
3558    sequences are also produced in advance if necessary.  */
3559
3560 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
3561   do {                                                                  \
3562     int id = CHARSET_ID (charset);                                      \
3563                                                                         \
3564     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_ROMAN)         \
3565         && id == charset_ascii)                                         \
3566       {                                                                 \
3567         id = charset_jisx0201_roman;                                    \
3568         charset = CHARSET_FROM_ID (id);                                 \
3569       }                                                                 \
3570                                                                         \
3571     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3572       {                                                                 \
3573         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3574           EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                              \
3575         else                                                            \
3576           EMIT_ONE_BYTE (c1 | 0x80);                                    \
3577         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3578         break;                                                          \
3579       }                                                                 \
3580     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3581       {                                                                 \
3582         EMIT_ONE_ASCII_BYTE (c1 & 0x7F);                                \
3583         break;                                                          \
3584       }                                                                 \
3585     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3586       {                                                                 \
3587         EMIT_ONE_BYTE (c1 | 0x80);                                      \
3588         break;                                                          \
3589       }                                                                 \
3590     else                                                                \
3591       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3592          must invoke it, or, at first, designate it to some graphic     \
3593          register.  Then repeat the loop to actually produce the        \
3594          character.  */                                                 \
3595       dst = encode_invocation_designation (charset, coding, dst,        \
3596                                            &produced_chars);            \
3597   } while (1)
3598
3599
3600 /* Produce codes for a DIMENSION2 character whose character set is
3601    CHARSET and whose position-codes are C1 and C2.  Designation and
3602    invocation codes are also produced in advance if necessary.  */
3603
3604 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
3605   do {                                                                  \
3606     int id = CHARSET_ID (charset);                                      \
3607                                                                         \
3608     if ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_USE_OLDJIS)        \
3609         && id == charset_jisx0208)                                      \
3610       {                                                                 \
3611         id = charset_jisx0208_1978;                                     \
3612         charset = CHARSET_FROM_ID (id);                                 \
3613       }                                                                 \
3614                                                                         \
3615     if (CODING_ISO_SINGLE_SHIFTING (coding))                            \
3616       {                                                                 \
3617         if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)     \
3618           EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);              \
3619         else                                                            \
3620           EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                    \
3621         CODING_ISO_SINGLE_SHIFTING (coding) = 0;                        \
3622         break;                                                          \
3623       }                                                                 \
3624     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 0))              \
3625       {                                                                 \
3626         EMIT_TWO_ASCII_BYTES ((c1) & 0x7F, (c2) & 0x7F);                \
3627         break;                                                          \
3628       }                                                                 \
3629     else if (id == CODING_ISO_INVOKED_CHARSET (coding, 1))              \
3630       {                                                                 \
3631         EMIT_TWO_BYTES ((c1) | 0x80, (c2) | 0x80);                      \
3632         break;                                                          \
3633       }                                                                 \
3634     else                                                                \
3635       /* Since CHARSET is not yet invoked to any graphic planes, we     \
3636          must invoke it, or, at first, designate it to some graphic     \
3637          register.  Then repeat the loop to actually produce the        \
3638          character.  */                                                 \
3639       dst = encode_invocation_designation (charset, coding, dst,        \
3640                                            &produced_chars);            \
3641   } while (1)
3642
3643
3644 #define ENCODE_ISO_CHARACTER(charset, c)                                   \
3645   do {                                                                     \
3646     int code = ENCODE_CHAR ((charset),(c));                                \
3647                                                                            \
3648     if (CHARSET_DIMENSION (charset) == 1)                                  \
3649       ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code);                   \
3650     else                                                                   \
3651       ENCODE_ISO_CHARACTER_DIMENSION2 ((charset), code >> 8, code & 0xFF); \
3652   } while (0)
3653
3654
3655 /* Produce designation and invocation codes at a place pointed by DST
3656    to use CHARSET.  The element `spec.iso_2022' of *CODING is updated.
3657    Return new DST.  */
3658
3659 unsigned char *
3660 encode_invocation_designation (charset, coding, dst, p_nchars)
3661      struct charset *charset;
3662      struct coding_system *coding;
3663      unsigned char *dst;
3664      int *p_nchars;
3665 {
3666   int multibytep = coding->dst_multibyte;
3667   int produced_chars = *p_nchars;
3668   int reg;                      /* graphic register number */
3669   int id = CHARSET_ID (charset);
3670
3671   /* At first, check designations.  */
3672   for (reg = 0; reg < 4; reg++)
3673     if (id == CODING_ISO_DESIGNATION (coding, reg))
3674       break;
3675
3676   if (reg >= 4)
3677     {
3678       /* CHARSET is not yet designated to any graphic registers.  */
3679       /* At first check the requested designation.  */
3680       reg = CODING_ISO_REQUEST (coding, id);
3681       if (reg < 0)
3682         /* Since CHARSET requests no special designation, designate it
3683            to graphic register 0.  */
3684         reg = 0;
3685
3686       ENCODE_DESIGNATION (charset, reg, coding);
3687     }
3688
3689   if (CODING_ISO_INVOCATION (coding, 0) != reg
3690       && CODING_ISO_INVOCATION (coding, 1) != reg)
3691     {
3692       /* Since the graphic register REG is not invoked to any graphic
3693          planes, invoke it to graphic plane 0.  */
3694       switch (reg)
3695         {
3696         case 0:                 /* graphic register 0 */
3697           ENCODE_SHIFT_IN;
3698           break;
3699
3700         case 1:                 /* graphic register 1 */
3701           ENCODE_SHIFT_OUT;
3702           break;
3703
3704         case 2:                 /* graphic register 2 */
3705           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3706             ENCODE_SINGLE_SHIFT_2;
3707           else
3708             ENCODE_LOCKING_SHIFT_2;
3709           break;
3710
3711         case 3:                 /* graphic register 3 */
3712           if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SINGLE_SHIFT)
3713             ENCODE_SINGLE_SHIFT_3;
3714           else
3715             ENCODE_LOCKING_SHIFT_3;
3716           break;
3717         }
3718     }
3719
3720   *p_nchars = produced_chars;
3721   return dst;
3722 }
3723
3724 /* The following three macros produce codes for indicating direction
3725    of text.  */
3726 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER                              \
3727   do {                                                                  \
3728     if (CODING_ISO_FLAGS (coding) == CODING_ISO_FLAG_SEVEN_BITS)        \
3729       EMIT_TWO_ASCII_BYTES (ISO_CODE_ESC, '[');                         \
3730     else                                                                \
3731       EMIT_ONE_BYTE (ISO_CODE_CSI);                                     \
3732   } while (0)
3733
3734
3735 #define ENCODE_DIRECTION_R2L()                  \
3736   do {                                          \
3737     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3738     EMIT_TWO_ASCII_BYTES ('2', ']');            \
3739   } while (0)
3740
3741
3742 #define ENCODE_DIRECTION_L2R()                  \
3743   do {                                          \
3744     ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst);   \
3745     EMIT_TWO_ASCII_BYTES ('0', ']');            \
3746   } while (0)
3747
3748
3749 /* Produce codes for designation and invocation to reset the graphic
3750    planes and registers to initial state.  */
3751 #define ENCODE_RESET_PLANE_AND_REGISTER()                               \
3752   do {                                                                  \
3753     int reg;                                                            \
3754     struct charset *charset;                                            \
3755                                                                         \
3756     if (CODING_ISO_INVOCATION (coding, 0) != 0)                         \
3757       ENCODE_SHIFT_IN;                                                  \
3758     for (reg = 0; reg < 4; reg++)                                       \
3759       if (CODING_ISO_INITIAL (coding, reg) >= 0                         \
3760           && (CODING_ISO_DESIGNATION (coding, reg)                      \
3761               != CODING_ISO_INITIAL (coding, reg)))                     \
3762         {                                                               \
3763           charset = CHARSET_FROM_ID (CODING_ISO_INITIAL (coding, reg)); \
3764           ENCODE_DESIGNATION (charset, reg, coding);                    \
3765         }                                                               \
3766   } while (0)
3767
3768
3769 /* Produce designation sequences of charsets in the line started from
3770    SRC to a place pointed by DST, and return updated DST.
3771
3772    If the current block ends before any end-of-line, we may fail to
3773    find all the necessary designations.  */
3774
3775 static unsigned char *
3776 encode_designation_at_bol (coding, charbuf, charbuf_end, dst)
3777      struct coding_system *coding;
3778      int *charbuf, *charbuf_end;
3779      unsigned char *dst;
3780 {
3781   struct charset *charset;
3782   /* Table of charsets to be designated to each graphic register.  */
3783   int r[4];
3784   int c, found = 0, reg;
3785   int produced_chars = 0;
3786   int multibytep = coding->dst_multibyte;
3787   Lisp_Object attrs;
3788   Lisp_Object charset_list;
3789
3790   attrs = CODING_ID_ATTRS (coding->id);
3791   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3792   if (EQ (charset_list, Qiso_2022))
3793     charset_list = Viso_2022_charset_list;
3794
3795   for (reg = 0; reg < 4; reg++)
3796     r[reg] = -1;
3797
3798   while (found < 4)
3799     {
3800       int id;
3801
3802       c = *charbuf++;
3803       if (c == '\n')
3804         break;
3805       charset = char_charset (c, charset_list, NULL);
3806       id = CHARSET_ID (charset);
3807       reg = CODING_ISO_REQUEST (coding, id);
3808       if (reg >= 0 && r[reg] < 0)
3809         {
3810           found++;
3811           r[reg] = id;
3812         }
3813     }
3814
3815   if (found)
3816     {
3817       for (reg = 0; reg < 4; reg++)
3818         if (r[reg] >= 0
3819             && CODING_ISO_DESIGNATION (coding, reg) != r[reg])
3820           ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding);
3821     }
3822
3823   return dst;
3824 }
3825
3826 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
3827
3828 static int
3829 encode_coding_iso_2022 (coding)
3830      struct coding_system *coding;
3831 {
3832   int multibytep = coding->dst_multibyte;
3833   int *charbuf = coding->charbuf;
3834   int *charbuf_end = charbuf + coding->charbuf_used;
3835   unsigned char *dst = coding->destination + coding->produced;
3836   unsigned char *dst_end = coding->destination + coding->dst_bytes;
3837   int safe_room = 16;
3838   int bol_designation
3839     = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL
3840        && CODING_ISO_BOL (coding));
3841   int produced_chars = 0;
3842   Lisp_Object attrs, eol_type, charset_list;
3843   int ascii_compatible;
3844   int c;
3845   int preferred_charset_id = -1;
3846
3847   CODING_GET_INFO (coding, attrs, charset_list);
3848   eol_type = CODING_ID_EOL_TYPE (coding->id);
3849   if (VECTORP (eol_type))
3850     eol_type = Qunix;
3851
3852   setup_iso_safe_charsets (attrs);
3853   /* Charset list may have been changed.  */
3854   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3855   coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs));
3856
3857   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
3858
3859   while (charbuf < charbuf_end)
3860     {
3861       ASSURE_DESTINATION (safe_room);
3862
3863       if (bol_designation)
3864         {
3865           unsigned char *dst_prev = dst;
3866
3867           /* We have to produce designation sequences if any now.  */
3868           dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst);
3869           bol_designation = 0;
3870           /* We are sure that designation sequences are all ASCII bytes.  */
3871           produced_chars += dst - dst_prev;
3872         }
3873
3874       c = *charbuf++;
3875
3876       if (c < 0)
3877         {
3878           /* Handle an annotation.  */
3879           switch (*charbuf)
3880             {
3881             case CODING_ANNOTATE_COMPOSITION_MASK:
3882               /* Not yet implemented.  */
3883               break;
3884             case CODING_ANNOTATE_CHARSET_MASK:
3885               preferred_charset_id = charbuf[2];
3886               if (preferred_charset_id >= 0
3887                   && NILP (Fmemq (make_number (preferred_charset_id),
3888                                   charset_list)))
3889                 preferred_charset_id = -1;
3890               break;
3891             default:
3892               abort ();
3893             }
3894           charbuf += -c - 1;
3895           continue;
3896         }
3897
3898       /* Now encode the character C.  */
3899       if (c < 0x20 || c == 0x7F)
3900         {
3901           if (c == '\n'
3902               || (c == '\r' && EQ (eol_type, Qmac)))
3903             {
3904               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3905                 ENCODE_RESET_PLANE_AND_REGISTER ();
3906               if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_INIT_AT_BOL)
3907                 {
3908                   int i;
3909
3910                   for (i = 0; i < 4; i++)
3911                     CODING_ISO_DESIGNATION (coding, i)
3912                       = CODING_ISO_INITIAL (coding, i);
3913                 }
3914               bol_designation
3915                 = CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL;
3916             }
3917           else if (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_CNTL)
3918             ENCODE_RESET_PLANE_AND_REGISTER ();
3919           EMIT_ONE_ASCII_BYTE (c);
3920         }
3921       else if (ASCII_CHAR_P (c))
3922         {
3923           if (ascii_compatible)
3924             EMIT_ONE_ASCII_BYTE (c);
3925           else
3926             {
3927               struct charset *charset = CHARSET_FROM_ID (charset_ascii);
3928               ENCODE_ISO_CHARACTER (charset, c);
3929             }
3930         }
3931       else if (CHAR_BYTE8_P (c))
3932         {
3933           c = CHAR_TO_BYTE8 (c);
3934           EMIT_ONE_BYTE (c);
3935         }
3936       else
3937         {
3938           struct charset *charset;
3939
3940           if (preferred_charset_id >= 0)
3941             {
3942               charset = CHARSET_FROM_ID (preferred_charset_id);
3943               if (! CHAR_CHARSET_P (c, charset))
3944                 charset = char_charset (c, charset_list, NULL);
3945             }
3946           else
3947             charset = char_charset (c, charset_list, NULL);
3948           if (!charset)
3949             {
3950               if (coding->mode & CODING_MODE_SAFE_ENCODING)
3951                 {
3952                   c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
3953                   charset = CHARSET_FROM_ID (charset_ascii);
3954                 }
3955               else
3956                 {
3957                   c = coding->default_char;
3958                   charset = char_charset (c, charset_list, NULL);
3959                 }
3960             }
3961           ENCODE_ISO_CHARACTER (charset, c);
3962         }
3963     }
3964
3965   if (coding->mode & CODING_MODE_LAST_BLOCK
3966       && CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_RESET_AT_EOL)
3967     {
3968       ASSURE_DESTINATION (safe_room);
3969       ENCODE_RESET_PLANE_AND_REGISTER ();
3970     }
3971   record_conversion_result (coding, CODING_RESULT_SUCCESS);
3972   CODING_ISO_BOL (coding) = bol_designation;
3973   coding->produced_char += produced_chars;
3974   coding->produced = dst - coding->destination;
3975   return 0;
3976 }
3977
3978 \f
3979 /*** 8,9. SJIS and BIG5 handlers ***/
3980
3981 /* Although SJIS and BIG5 are not ISO's coding system, they are used
3982    quite widely.  So, for the moment, Emacs supports them in the bare
3983    C code.  But, in the future, they may be supported only by CCL.  */
3984
3985 /* SJIS is a coding system encoding three character sets: ASCII, right
3986    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
3987    as is.  A character of charset katakana-jisx0201 is encoded by
3988    "position-code + 0x80".  A character of charset japanese-jisx0208
3989    is encoded in 2-byte but two position-codes are divided and shifted
3990    so that it fit in the range below.
3991
3992    --- CODE RANGE of SJIS ---
3993    (character set)      (range)
3994    ASCII                0x00 .. 0x7F
3995    KATAKANA-JISX0201    0xA0 .. 0xDF
3996    JISX0208 (1st byte)  0x81 .. 0x9F and 0xE0 .. 0xEF
3997             (2nd byte)  0x40 .. 0x7E and 0x80 .. 0xFC
3998    -------------------------------
3999
4000 */
4001
4002 /* BIG5 is a coding system encoding two character sets: ASCII and
4003    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
4004    character set and is encoded in two-byte.
4005
4006    --- CODE RANGE of BIG5 ---
4007    (character set)      (range)
4008    ASCII                0x00 .. 0x7F
4009    Big5 (1st byte)      0xA1 .. 0xFE
4010         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
4011    --------------------------
4012
4013   */
4014
4015 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4016    Check if a text is encoded in SJIS.  If it is, return
4017    CATEGORY_MASK_SJIS, else return 0.  */
4018
4019 static int
4020 detect_coding_sjis (coding, detect_info)
4021      struct coding_system *coding;
4022      struct coding_detection_info *detect_info;
4023 {
4024   const unsigned char *src = coding->source, *src_base;
4025   const unsigned char *src_end = coding->source + coding->src_bytes;
4026   int multibytep = coding->src_multibyte;
4027   int consumed_chars = 0;
4028   int found = 0;
4029   int c;
4030
4031   detect_info->checked |= CATEGORY_MASK_SJIS;
4032   /* A coding system of this category is always ASCII compatible.  */
4033   src += coding->head_ascii;
4034
4035   while (1)
4036     {
4037       src_base = src;
4038       ONE_MORE_BYTE (c);
4039       if (c < 0x80)
4040         continue;
4041       if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF))
4042         {
4043           ONE_MORE_BYTE (c);
4044           if (c < 0x40 || c == 0x7F || c > 0xFC)
4045             break;
4046           found = CATEGORY_MASK_SJIS;
4047         }
4048       else if (c >= 0xA0 && c < 0xE0)
4049         found = CATEGORY_MASK_SJIS;
4050       else
4051         break;
4052     }
4053   detect_info->rejected |= CATEGORY_MASK_SJIS;
4054   return 0;
4055
4056  no_more_source:
4057   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4058     {
4059       detect_info->rejected |= CATEGORY_MASK_SJIS;
4060       return 0;
4061     }
4062   detect_info->found |= found;
4063   return 1;
4064 }
4065
4066 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4067    Check if a text is encoded in BIG5.  If it is, return
4068    CATEGORY_MASK_BIG5, else return 0.  */
4069
4070 static int
4071 detect_coding_big5 (coding, detect_info)
4072      struct coding_system *coding;
4073      struct coding_detection_info *detect_info;
4074 {
4075   const unsigned char *src = coding->source, *src_base;
4076   const unsigned char *src_end = coding->source + coding->src_bytes;
4077   int multibytep = coding->src_multibyte;
4078   int consumed_chars = 0;
4079   int found = 0;
4080   int c;
4081
4082   detect_info->checked |= CATEGORY_MASK_BIG5;
4083   /* A coding system of this category is always ASCII compatible.  */
4084   src += coding->head_ascii;
4085
4086   while (1)
4087     {
4088       src_base = src;
4089       ONE_MORE_BYTE (c);
4090       if (c < 0x80)
4091         continue;
4092       if (c >= 0xA1)
4093         {
4094           ONE_MORE_BYTE (c);
4095           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
4096             return 0;
4097           found = CATEGORY_MASK_BIG5;
4098         }
4099       else
4100         break;
4101     }
4102   detect_info->rejected |= CATEGORY_MASK_BIG5;
4103   return 0;
4104
4105  no_more_source:
4106   if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
4107     {
4108       detect_info->rejected |= CATEGORY_MASK_BIG5;
4109       return 0;
4110     }
4111   detect_info->found |= found;
4112   return 1;
4113 }
4114
4115 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
4116    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
4117
4118 static void
4119 decode_coding_sjis (coding)
4120      struct coding_system *coding;
4121 {
4122   const unsigned char *src = coding->source + coding->consumed;
4123   const unsigned char *src_end = coding->source + coding->src_bytes;
4124   const unsigned char *src_base;
4125   int *charbuf = coding->charbuf + coding->charbuf_used;
4126   int *charbuf_end
4127     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4128   int consumed_chars = 0, consumed_chars_base;
4129   int multibytep = coding->src_multibyte;
4130   struct charset *charset_roman, *charset_kanji, *charset_kana;
4131   struct charset *charset_kanji2;
4132   Lisp_Object attrs, charset_list, val;
4133   int char_offset = coding->produced_char;
4134   int last_offset = char_offset;
4135   int last_id = charset_ascii;
4136   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4137   int byte_after_cr = -1;
4138
4139   CODING_GET_INFO (coding, attrs, charset_list);
4140
4141   val = charset_list;
4142   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4143   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4144   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4145   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4146
4147   while (1)
4148     {
4149       int c, c1;
4150       struct charset *charset;
4151
4152       src_base = src;
4153       consumed_chars_base = consumed_chars;
4154
4155       if (charbuf >= charbuf_end)
4156         break;
4157
4158       if (byte_after_cr >= 0)
4159         c = byte_after_cr, byte_after_cr = -1;
4160       else
4161         ONE_MORE_BYTE (c);
4162       if (c < 0)
4163         goto invalid_code;
4164       if (c < 0x80)
4165         {
4166           if (eol_crlf && c == '\r')
4167             ONE_MORE_BYTE (byte_after_cr);
4168           charset = charset_roman;
4169         }
4170       else if (c == 0x80 || c == 0xA0)
4171         goto invalid_code;
4172       else if (c >= 0xA1 && c <= 0xDF)
4173         {
4174           /* SJIS -> JISX0201-Kana */
4175           c &= 0x7F;
4176           charset = charset_kana;
4177         }
4178       else if (c <= 0xEF)
4179         {
4180           /* SJIS -> JISX0208 */
4181           ONE_MORE_BYTE (c1);
4182           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4183             goto invalid_code;
4184           c = (c << 8) | c1;
4185           SJIS_TO_JIS (c);
4186           charset = charset_kanji;
4187         }
4188       else if (c <= 0xFC && charset_kanji2)
4189         {
4190           /* SJIS -> JISX0213-2 */
4191           ONE_MORE_BYTE (c1);
4192           if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC)
4193             goto invalid_code;
4194           c = (c << 8) | c1;
4195           SJIS_TO_JIS2 (c);
4196           charset = charset_kanji2;
4197         }
4198       else
4199         goto invalid_code;
4200       if (charset->id != charset_ascii
4201           && last_id != charset->id)
4202         {
4203           if (last_id != charset_ascii)
4204             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4205           last_id = charset->id;
4206           last_offset = char_offset;
4207         }
4208       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4209       *charbuf++ = c;
4210       char_offset++;
4211       continue;
4212
4213     invalid_code:
4214       src = src_base;
4215       consumed_chars = consumed_chars_base;
4216       ONE_MORE_BYTE (c);
4217       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4218       char_offset++;
4219       coding->errors++;
4220     }
4221
4222  no_more_source:
4223   if (last_id != charset_ascii)
4224     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4225   coding->consumed_char += consumed_chars_base;
4226   coding->consumed = src_base - coding->source;
4227   coding->charbuf_used = charbuf - coding->charbuf;
4228 }
4229
4230 static void
4231 decode_coding_big5 (coding)
4232      struct coding_system *coding;
4233 {
4234   const unsigned char *src = coding->source + coding->consumed;
4235   const unsigned char *src_end = coding->source + coding->src_bytes;
4236   const unsigned char *src_base;
4237   int *charbuf = coding->charbuf + coding->charbuf_used;
4238   int *charbuf_end
4239     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4240   int consumed_chars = 0, consumed_chars_base;
4241   int multibytep = coding->src_multibyte;
4242   struct charset *charset_roman, *charset_big5;
4243   Lisp_Object attrs, charset_list, val;
4244   int char_offset = coding->produced_char;
4245   int last_offset = char_offset;
4246   int last_id = charset_ascii;
4247   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4248   int byte_after_cr = -1;
4249
4250   CODING_GET_INFO (coding, attrs, charset_list);
4251   val = charset_list;
4252   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4253   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4254
4255   while (1)
4256     {
4257       int c, c1;
4258       struct charset *charset;
4259
4260       src_base = src;
4261       consumed_chars_base = consumed_chars;
4262
4263       if (charbuf >= charbuf_end)
4264         break;
4265
4266       if (byte_after_cr >= 0)
4267         c = byte_after_cr, byte_after_cr = -1;
4268       else
4269         ONE_MORE_BYTE (c);
4270
4271       if (c < 0)
4272         goto invalid_code;
4273       if (c < 0x80)
4274         {
4275           if (eol_crlf && c == '\r')
4276             ONE_MORE_BYTE (byte_after_cr);
4277           charset = charset_roman;
4278         }
4279       else
4280         {
4281           /* BIG5 -> Big5 */
4282           if (c < 0xA1 || c > 0xFE)
4283             goto invalid_code;
4284           ONE_MORE_BYTE (c1);
4285           if (c1 < 0x40 || (c1 > 0x7E && c1 < 0xA1) || c1 > 0xFE)
4286             goto invalid_code;
4287           c = c << 8 | c1;
4288           charset = charset_big5;
4289         }
4290       if (charset->id != charset_ascii
4291           && last_id != charset->id)
4292         {
4293           if (last_id != charset_ascii)
4294             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4295           last_id = charset->id;
4296           last_offset = char_offset;
4297         }
4298       CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c);
4299       *charbuf++ = c;
4300       char_offset++;
4301       continue;
4302
4303     invalid_code:
4304       src = src_base;
4305       consumed_chars = consumed_chars_base;
4306       ONE_MORE_BYTE (c);
4307       *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c);
4308       char_offset++;
4309       coding->errors++;
4310     }
4311
4312  no_more_source:
4313   if (last_id != charset_ascii)
4314     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4315   coding->consumed_char += consumed_chars_base;
4316   coding->consumed = src_base - coding->source;
4317   coding->charbuf_used = charbuf - coding->charbuf;
4318 }
4319
4320 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
4321    This function can encode charsets `ascii', `katakana-jisx0201',
4322    `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'.  We
4323    are sure that all these charsets are registered as official charset
4324    (i.e. do not have extended leading-codes).  Characters of other
4325    charsets are produced without any encoding.  If SJIS_P is 1, encode
4326    SJIS text, else encode BIG5 text.  */
4327
4328 static int
4329 encode_coding_sjis (coding)
4330      struct coding_system *coding;
4331 {
4332   int multibytep = coding->dst_multibyte;
4333   int *charbuf = coding->charbuf;
4334   int *charbuf_end = charbuf + coding->charbuf_used;
4335   unsigned char *dst = coding->destination + coding->produced;
4336   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4337   int safe_room = 4;
4338   int produced_chars = 0;
4339   Lisp_Object attrs, charset_list, val;
4340   int ascii_compatible;
4341   struct charset *charset_roman, *charset_kanji, *charset_kana;
4342   struct charset *charset_kanji2;
4343   int c;
4344
4345   CODING_GET_INFO (coding, attrs, charset_list);
4346   val = charset_list;
4347   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4348   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4349   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4350   charset_kanji2 = NILP (val) ? NULL : CHARSET_FROM_ID (XINT (XCAR (val)));
4351
4352   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4353
4354   while (charbuf < charbuf_end)
4355     {
4356       ASSURE_DESTINATION (safe_room);
4357       c = *charbuf++;
4358       /* Now encode the character C.  */
4359       if (ASCII_CHAR_P (c) && ascii_compatible)
4360         EMIT_ONE_ASCII_BYTE (c);
4361       else if (CHAR_BYTE8_P (c))
4362         {
4363           c = CHAR_TO_BYTE8 (c);
4364           EMIT_ONE_BYTE (c);
4365         }
4366       else
4367         {
4368           unsigned code;
4369           struct charset *charset = char_charset (c, charset_list, &code);
4370
4371           if (!charset)
4372             {
4373               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4374                 {
4375                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4376                   charset = CHARSET_FROM_ID (charset_ascii);
4377                 }
4378               else
4379                 {
4380                   c = coding->default_char;
4381                   charset = char_charset (c, charset_list, &code);
4382                 }
4383             }
4384           if (code == CHARSET_INVALID_CODE (charset))
4385             abort ();
4386           if (charset == charset_kanji)
4387             {
4388               int c1, c2;
4389               JIS_TO_SJIS (code);
4390               c1 = code >> 8, c2 = code & 0xFF;
4391               EMIT_TWO_BYTES (c1, c2);
4392             }
4393           else if (charset == charset_kana)
4394             EMIT_ONE_BYTE (code | 0x80);
4395           else if (charset_kanji2 && charset == charset_kanji2)
4396             {
4397               int c1, c2;
4398
4399               c1 = code >> 8;
4400               if (c1 == 0x21 || (c1 >= 0x23 && c1 < 0x25)
4401                   || (c1 >= 0x2C && c1 <= 0x2F) || c1 >= 0x6E)
4402                 {
4403                   JIS_TO_SJIS2 (code);
4404                   c1 = code >> 8, c2 = code & 0xFF;
4405                   EMIT_TWO_BYTES (c1, c2);
4406                 }
4407               else
4408                 EMIT_ONE_ASCII_BYTE (code & 0x7F);
4409             }
4410           else
4411             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4412         }
4413     }
4414   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4415   coding->produced_char += produced_chars;
4416   coding->produced = dst - coding->destination;
4417   return 0;
4418 }
4419
4420 static int
4421 encode_coding_big5 (coding)
4422      struct coding_system *coding;
4423 {
4424   int multibytep = coding->dst_multibyte;
4425   int *charbuf = coding->charbuf;
4426   int *charbuf_end = charbuf + coding->charbuf_used;
4427   unsigned char *dst = coding->destination + coding->produced;
4428   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4429   int safe_room = 4;
4430   int produced_chars = 0;
4431   Lisp_Object attrs, charset_list, val;
4432   int ascii_compatible;
4433   struct charset *charset_roman, *charset_big5;
4434   int c;
4435
4436   CODING_GET_INFO (coding, attrs, charset_list);
4437   val = charset_list;
4438   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
4439   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
4440   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
4441
4442   while (charbuf < charbuf_end)
4443     {
4444       ASSURE_DESTINATION (safe_room);
4445       c = *charbuf++;
4446       /* Now encode the character C.  */
4447       if (ASCII_CHAR_P (c) && ascii_compatible)
4448         EMIT_ONE_ASCII_BYTE (c);
4449       else if (CHAR_BYTE8_P (c))
4450         {
4451           c = CHAR_TO_BYTE8 (c);
4452           EMIT_ONE_BYTE (c);
4453         }
4454       else
4455         {
4456           unsigned code;
4457           struct charset *charset = char_charset (c, charset_list, &code);
4458
4459           if (! charset)
4460             {
4461               if (coding->mode & CODING_MODE_SAFE_ENCODING)
4462                 {
4463                   code = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
4464                   charset = CHARSET_FROM_ID (charset_ascii);
4465                 }
4466               else
4467                 {
4468                   c = coding->default_char;
4469                   charset = char_charset (c, charset_list, &code);
4470                 }
4471             }
4472           if (code == CHARSET_INVALID_CODE (charset))
4473             abort ();
4474           if (charset == charset_big5)
4475             {
4476               int c1, c2;
4477
4478               c1 = code >> 8, c2 = code & 0xFF;
4479               EMIT_TWO_BYTES (c1, c2);
4480             }
4481           else
4482             EMIT_ONE_ASCII_BYTE (code & 0x7F);
4483         }
4484     }
4485   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4486   coding->produced_char += produced_chars;
4487   coding->produced = dst - coding->destination;
4488   return 0;
4489 }
4490
4491 \f
4492 /*** 10. CCL handlers ***/
4493
4494 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4495    Check if a text is encoded in a coding system of which
4496    encoder/decoder are written in CCL program.  If it is, return
4497    CATEGORY_MASK_CCL, else return 0.  */
4498
4499 static int
4500 detect_coding_ccl (coding, detect_info)
4501      struct coding_system *coding;
4502      struct coding_detection_info *detect_info;
4503 {
4504   const unsigned char *src = coding->source, *src_base;
4505   const unsigned char *src_end = coding->source + coding->src_bytes;
4506   int multibytep = coding->src_multibyte;
4507   int consumed_chars = 0;
4508   int found = 0;
4509   unsigned char *valids;
4510   int head_ascii = coding->head_ascii;
4511   Lisp_Object attrs;
4512
4513   detect_info->checked |= CATEGORY_MASK_CCL;
4514
4515   coding = &coding_categories[coding_category_ccl];
4516   valids = CODING_CCL_VALIDS (coding);
4517   attrs = CODING_ID_ATTRS (coding->id);
4518   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4519     src += head_ascii;
4520
4521   while (1)
4522     {
4523       int c;
4524
4525       src_base = src;
4526       ONE_MORE_BYTE (c);
4527       if (c < 0 || ! valids[c])
4528         break;
4529       if ((valids[c] > 1))
4530         found = CATEGORY_MASK_CCL;
4531     }
4532   detect_info->rejected |= CATEGORY_MASK_CCL;
4533   return 0;
4534
4535  no_more_source:
4536   detect_info->found |= found;
4537   return 1;
4538 }
4539
4540 static void
4541 decode_coding_ccl (coding)
4542      struct coding_system *coding;
4543 {
4544   const unsigned char *src = coding->source + coding->consumed;
4545   const unsigned char *src_end = coding->source + coding->src_bytes;
4546   int *charbuf = coding->charbuf + coding->charbuf_used;
4547   int *charbuf_end = coding->charbuf + coding->charbuf_size;
4548   int consumed_chars = 0;
4549   int multibytep = coding->src_multibyte;
4550   struct ccl_program ccl;
4551   int source_charbuf[1024];
4552   int source_byteidx[1024];
4553   Lisp_Object attrs, charset_list;
4554
4555   CODING_GET_INFO (coding, attrs, charset_list);
4556   setup_ccl_program (&ccl, CODING_CCL_DECODER (coding));
4557
4558   while (src < src_end)
4559     {
4560       const unsigned char *p = src;
4561       int *source, *source_end;
4562       int i = 0;
4563
4564       if (multibytep)
4565         while (i < 1024 && p < src_end)
4566           {
4567             source_byteidx[i] = p - src;
4568             source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
4569           }
4570       else
4571         while (i < 1024 && p < src_end)
4572           source_charbuf[i++] = *p++;
4573
4574       if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK)
4575         ccl.last_block = 1;
4576
4577       source = source_charbuf;
4578       source_end = source + i;
4579       while (source < source_end)
4580         {
4581           ccl_driver (&ccl, source, charbuf,
4582                       source_end - source, charbuf_end - charbuf,
4583                       charset_list);
4584           source += ccl.consumed;
4585           charbuf += ccl.produced;
4586           if (ccl.status != CCL_STAT_SUSPEND_BY_DST)
4587             break;
4588         }
4589       if (source < source_end)
4590         src += source_byteidx[source - source_charbuf];
4591       else
4592         src = p;
4593       consumed_chars += source - source_charbuf;
4594
4595       if (ccl.status != CCL_STAT_SUSPEND_BY_SRC
4596           && ccl.status != CODING_RESULT_INSUFFICIENT_SRC)
4597         break;
4598     }
4599
4600   switch (ccl.status)
4601     {
4602     case CCL_STAT_SUSPEND_BY_SRC:
4603       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4604       break;
4605     case CCL_STAT_SUSPEND_BY_DST:
4606       break;
4607     case CCL_STAT_QUIT:
4608     case CCL_STAT_INVALID_CMD:
4609       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4610       break;
4611     default:
4612       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4613       break;
4614     }
4615   coding->consumed_char += consumed_chars;
4616   coding->consumed = src - coding->source;
4617   coding->charbuf_used = charbuf - coding->charbuf;
4618 }
4619
4620 static int
4621 encode_coding_ccl (coding)
4622      struct coding_system *coding;
4623 {
4624   struct ccl_program ccl;
4625   int multibytep = coding->dst_multibyte;
4626   int *charbuf = coding->charbuf;
4627   int *charbuf_end = charbuf + coding->charbuf_used;
4628   unsigned char *dst = coding->destination + coding->produced;
4629   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4630   int destination_charbuf[1024];
4631   int i, produced_chars = 0;
4632   Lisp_Object attrs, charset_list;
4633
4634   CODING_GET_INFO (coding, attrs, charset_list);
4635   setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding));
4636
4637   ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK;
4638   ccl.dst_multibyte = coding->dst_multibyte;
4639
4640   while (charbuf < charbuf_end)
4641     {
4642       ccl_driver (&ccl, charbuf, destination_charbuf,
4643                   charbuf_end - charbuf, 1024, charset_list);
4644       if (multibytep)
4645         {
4646           ASSURE_DESTINATION (ccl.produced * 2);
4647           for (i = 0; i < ccl.produced; i++)
4648             EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF);
4649         }
4650       else
4651         {
4652           ASSURE_DESTINATION (ccl.produced);
4653           for (i = 0; i < ccl.produced; i++)
4654             *dst++ = destination_charbuf[i] & 0xFF;
4655           produced_chars += ccl.produced;
4656         }
4657       charbuf += ccl.consumed;
4658       if (ccl.status == CCL_STAT_QUIT
4659           || ccl.status == CCL_STAT_INVALID_CMD)
4660         break;
4661     }
4662
4663   switch (ccl.status)
4664     {
4665     case CCL_STAT_SUSPEND_BY_SRC:
4666       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4667       break;
4668     case CCL_STAT_SUSPEND_BY_DST:
4669       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST);
4670       break;
4671     case CCL_STAT_QUIT:
4672     case CCL_STAT_INVALID_CMD:
4673       record_conversion_result (coding, CODING_RESULT_INTERRUPT);
4674       break;
4675     default:
4676       record_conversion_result (coding, CODING_RESULT_SUCCESS);
4677       break;
4678     }
4679
4680   coding->produced_char += produced_chars;
4681   coding->produced = dst - coding->destination;
4682   return 0;
4683 }
4684
4685
4686 \f
4687 /*** 10, 11. no-conversion handlers ***/
4688
4689 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
4690
4691 static void
4692 decode_coding_raw_text (coding)
4693      struct coding_system *coding;
4694 {
4695   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4696
4697   coding->chars_at_source = 1;
4698   coding->consumed_char = coding->src_chars;
4699   coding->consumed = coding->src_bytes;
4700   if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
4701     {
4702       coding->consumed_char--;
4703       coding->consumed--;
4704       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
4705     }
4706   else
4707     record_conversion_result (coding, CODING_RESULT_SUCCESS);
4708 }
4709
4710 static int
4711 encode_coding_raw_text (coding)
4712      struct coding_system *coding;
4713 {
4714   int multibytep = coding->dst_multibyte;
4715   int *charbuf = coding->charbuf;
4716   int *charbuf_end = coding->charbuf + coding->charbuf_used;
4717   unsigned char *dst = coding->destination + coding->produced;
4718   unsigned char *dst_end = coding->destination + coding->dst_bytes;
4719   int produced_chars = 0;
4720   int c;
4721
4722   if (multibytep)
4723     {
4724       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
4725
4726       if (coding->src_multibyte)
4727         while (charbuf < charbuf_end)
4728           {
4729             ASSURE_DESTINATION (safe_room);
4730             c = *charbuf++;
4731             if (ASCII_CHAR_P (c))
4732               EMIT_ONE_ASCII_BYTE (c);
4733             else if (CHAR_BYTE8_P (c))
4734               {
4735                 c = CHAR_TO_BYTE8 (c);
4736                 EMIT_ONE_BYTE (c);
4737               }
4738             else
4739               {
4740                 unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
4741
4742                 CHAR_STRING_ADVANCE (c, p1);
4743                 while (p0 < p1)
4744                   {
4745                     EMIT_ONE_BYTE (*p0);
4746                     p0++;
4747                   }
4748               }
4749           }
4750       else
4751         while (charbuf < charbuf_end)
4752           {
4753             ASSURE_DESTINATION (safe_room);
4754             c = *charbuf++;
4755             EMIT_ONE_BYTE (c);
4756           }
4757     }
4758   else
4759     {
4760       if (coding->src_multibyte)
4761         {
4762           int safe_room = MAX_MULTIBYTE_LENGTH;
4763
4764           while (charbuf < charbuf_end)
4765             {
4766               ASSURE_DESTINATION (safe_room);
4767               c = *charbuf++;
4768               if (ASCII_CHAR_P (c))
4769                 *dst++ = c;
4770               else if (CHAR_BYTE8_P (c))
4771                 *dst++ = CHAR_TO_BYTE8 (c);
4772               else
4773                 CHAR_STRING_ADVANCE (c, dst);
4774               produced_chars++;
4775             }
4776         }
4777       else
4778         {
4779           ASSURE_DESTINATION (charbuf_end - charbuf);
4780           while (charbuf < charbuf_end && dst < dst_end)
4781             *dst++ = *charbuf++;
4782           produced_chars = dst - (coding->destination + coding->dst_bytes);
4783         }
4784     }
4785   record_conversion_result (coding, CODING_RESULT_SUCCESS);
4786   coding->produced_char += produced_chars;
4787   coding->produced = dst - coding->destination;
4788   return 0;
4789 }
4790
4791 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
4792    Check if a text is encoded in a charset-based coding system.  If it
4793    is, return 1, else return 0.  */
4794
4795 static int
4796 detect_coding_charset (coding, detect_info)
4797      struct coding_system *coding;
4798      struct coding_detection_info *detect_info;
4799 {
4800   const unsigned char *src = coding->source, *src_base;
4801   const unsigned char *src_end = coding->source + coding->src_bytes;
4802   int multibytep = coding->src_multibyte;
4803   int consumed_chars = 0;
4804   Lisp_Object attrs, valids;
4805   int found = 0;
4806   int head_ascii = coding->head_ascii;
4807
4808   detect_info->checked |= CATEGORY_MASK_CHARSET;
4809
4810   coding = &coding_categories[coding_category_charset];
4811   attrs = CODING_ID_ATTRS (coding->id);
4812   valids = AREF (attrs, coding_attr_charset_valids);
4813
4814   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
4815     src += head_ascii;
4816
4817   while (1)
4818     {
4819       int c;
4820       Lisp_Object val;
4821       struct charset *charset;
4822       int dim, idx;
4823
4824       src_base = src;
4825       ONE_MORE_BYTE (c);
4826       if (c < 0)
4827         continue;
4828       val = AREF (valids, c);
4829       if (NILP (val))
4830         break;
4831       if (c >= 0x80)
4832         found = CATEGORY_MASK_CHARSET;
4833       if (INTEGERP (val))
4834         {
4835           charset = CHARSET_FROM_ID (XFASTINT (val));
4836           dim = CHARSET_DIMENSION (charset);
4837           for (idx = 1; idx < dim; idx++)
4838             {
4839               if (src == src_end)
4840                 goto too_short;
4841               ONE_MORE_BYTE (c);
4842               if (c < charset->code_space[(dim - 1 - idx) * 2]
4843                   || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
4844                 break;
4845             }
4846           if (idx < dim)
4847             break;
4848         }
4849       else
4850         {
4851           idx = 1;
4852           for (; CONSP (val); val = XCDR (val))
4853             {
4854               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4855               dim = CHARSET_DIMENSION (charset);
4856               while (idx < dim)
4857                 {
4858                   if (src == src_end)
4859                     goto too_short;
4860                   ONE_MORE_BYTE (c);
4861                   if (c < charset->code_space[(dim - 1 - idx) * 4]
4862                       || c > charset->code_space[(dim - 1 - idx) * 4 + 1])
4863                     break;
4864                   idx++;
4865                 }
4866               if (idx == dim)
4867                 {
4868                   val = Qnil;
4869                   break;
4870                 }
4871             }
4872           if (CONSP (val))
4873             break;
4874         }
4875     }
4876  too_short:
4877   detect_info->rejected |= CATEGORY_MASK_CHARSET;
4878   return 0;
4879
4880  no_more_source:
4881   detect_info->found |= found;
4882   return 1;
4883 }
4884
4885 static void
4886 decode_coding_charset (coding)
4887      struct coding_system *coding;
4888 {
4889   const unsigned char *src = coding->source + coding->consumed;
4890   const unsigned char *src_end = coding->source + coding->src_bytes;
4891   const unsigned char *src_base;
4892   int *charbuf = coding->charbuf + coding->charbuf_used;
4893   int *charbuf_end
4894     = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
4895   int consumed_chars = 0, consumed_chars_base;
4896   int multibytep = coding->src_multibyte;
4897   Lisp_Object attrs, charset_list, valids;
4898   int char_offset = coding->produced_char;
4899   int last_offset = char_offset;
4900   int last_id = charset_ascii;
4901   int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
4902   int byte_after_cr = -1;
4903
4904   CODING_GET_INFO (coding, attrs, charset_list);
4905   valids = AREF (attrs, coding_attr_charset_valids);
4906
4907   while (1)
4908     {
4909       int c;
4910       Lisp_Object val;
4911       struct charset *charset;
4912       int dim;
4913       int len = 1;
4914       unsigned code;
4915
4916       src_base = src;
4917       consumed_chars_base = consumed_chars;
4918
4919       if (charbuf >= charbuf_end)
4920         break;
4921
4922       if (byte_after_cr >= 0)
4923         {
4924           c = byte_after_cr;
4925           byte_after_cr = -1;
4926         }
4927       else
4928         {
4929           ONE_MORE_BYTE (c);
4930           if (eol_crlf && c == '\r')
4931             ONE_MORE_BYTE (byte_after_cr);
4932         }
4933       if (c < 0)
4934         goto invalid_code;
4935       code = c;
4936
4937       val = AREF (valids, c);
4938       if (NILP (val))
4939         goto invalid_code;
4940       if (INTEGERP (val))
4941         {
4942           charset = CHARSET_FROM_ID (XFASTINT (val));
4943           dim = CHARSET_DIMENSION (charset);
4944           while (len < dim)
4945             {
4946               ONE_MORE_BYTE (c);
4947               code = (code << 8) | c;
4948               len++;
4949             }
4950           CODING_DECODE_CHAR (coding, src, src_base, src_end,
4951                               charset, code, c);
4952         }
4953       else
4954         {
4955           /* VAL is a list of charset IDs.  It is assured that the
4956              list is sorted by charset dimensions (smaller one
4957              comes first).  */
4958           while (CONSP (val))
4959             {
4960               charset = CHARSET_FROM_ID (XFASTINT (XCAR (val)));
4961               dim = CHARSET_DIMENSION (charset);
4962               while (len < dim)
4963                 {
4964                   ONE_MORE_BYTE (c);
4965                   code = (code << 8) | c;
4966                   len++;
4967                 }
4968               CODING_DECODE_CHAR (coding, src, src_base,
4969                                   src_end, charset, code, c);
4970               if (c >= 0)
4971                 break;
4972               val = XCDR (val);
4973             }
4974         }
4975       if (c < 0)
4976         goto invalid_code;
4977       if (charset->id != charset_ascii
4978           && last_id != charset->id)
4979         {
4980           if (last_id != charset_ascii)
4981             ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
4982           last_id = charset->id;
4983           last_offset = char_offset;
4984         }
4985
4986       *charbuf++ = c;
4987       char_offset++;
4988       continue;
4989
4990     invalid_code:
4991       src = src_base;
4992       consumed_chars = consumed_chars_base;
4993       ONE_MORE_BYTE (c);
4994       *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
4995       char_offset++;
4996       coding->errors++;
4997     }
4998
4999  no_more_source:
5000   if (last_id != charset_ascii)
5001     ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
5002   coding->consumed_char += consumed_chars_base;
5003   coding->consumed = src_base - coding->source;
5004   coding->charbuf_used = charbuf - coding->charbuf;
5005 }
5006
5007 static int
5008 encode_coding_charset (coding)
5009      struct coding_system *coding;
5010 {
5011   int multibytep = coding->dst_multibyte;
5012   int *charbuf = coding->charbuf;
5013   int *charbuf_end = charbuf + coding->charbuf_used;
5014   unsigned char *dst = coding->destination + coding->produced;
5015   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5016   int safe_room = MAX_MULTIBYTE_LENGTH;
5017   int produced_chars = 0;
5018   Lisp_Object attrs, charset_list;
5019   int ascii_compatible;
5020   int c;
5021
5022   CODING_GET_INFO (coding, attrs, charset_list);
5023   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
5024
5025   while (charbuf < charbuf_end)
5026     {
5027       struct charset *charset;
5028       unsigned code;
5029
5030       ASSURE_DESTINATION (safe_room);
5031       c = *charbuf++;
5032       if (ascii_compatible && ASCII_CHAR_P (c))
5033         EMIT_ONE_ASCII_BYTE (c);
5034       else if (CHAR_BYTE8_P (c))
5035         {
5036           c = CHAR_TO_BYTE8 (c);
5037           EMIT_ONE_BYTE (c);
5038         }
5039       else
5040         {
5041           charset = char_charset (c, charset_list, &code);
5042           if (charset)
5043             {
5044               if (CHARSET_DIMENSION (charset) == 1)
5045                 EMIT_ONE_BYTE (code);
5046               else if (CHARSET_DIMENSION (charset) == 2)
5047                 EMIT_TWO_BYTES (code >> 8, code & 0xFF);
5048               else if (CHARSET_DIMENSION (charset) == 3)
5049                 EMIT_THREE_BYTES (code >> 16, (code >> 8) & 0xFF, code & 0xFF);
5050               else
5051                 EMIT_FOUR_BYTES (code >> 24, (code >> 16) & 0xFF,
5052                                  (code >> 8) & 0xFF, code & 0xFF);
5053             }
5054           else
5055             {
5056               if (coding->mode & CODING_MODE_SAFE_ENCODING)
5057                 c = CODING_INHIBIT_CHARACTER_SUBSTITUTION;
5058               else
5059                 c = coding->default_char;
5060               EMIT_ONE_BYTE (c);
5061             }
5062         }
5063     }
5064
5065   record_conversion_result (coding, CODING_RESULT_SUCCESS);
5066   coding->produced_char += produced_chars;
5067   coding->produced = dst - coding->destination;
5068   return 0;
5069 }
5070
5071 \f
5072 /*** 7. C library functions ***/
5073
5074 /* Setup coding context CODING from information about CODING_SYSTEM.
5075    If CODING_SYSTEM is nil, `no-conversion' is assumed.  If
5076    CODING_SYSTEM is invalid, signal an error.  */
5077
5078 void
5079 setup_coding_system (coding_system, coding)
5080      Lisp_Object coding_system;
5081      struct coding_system *coding;
5082 {
5083   Lisp_Object attrs;
5084   Lisp_Object eol_type;
5085   Lisp_Object coding_type;
5086   Lisp_Object val;
5087
5088   if (NILP (coding_system))
5089     coding_system = Qundecided;
5090
5091   CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id);
5092
5093   attrs = CODING_ID_ATTRS (coding->id);
5094   eol_type = CODING_ID_EOL_TYPE (coding->id);
5095
5096   coding->mode = 0;
5097   coding->head_ascii = -1;
5098   if (VECTORP (eol_type))
5099     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5100                             | CODING_REQUIRE_DETECTION_MASK);
5101   else if (! EQ (eol_type, Qunix))
5102     coding->common_flags = (CODING_REQUIRE_DECODING_MASK
5103                             | CODING_REQUIRE_ENCODING_MASK);
5104   else
5105     coding->common_flags = 0;
5106   if (! NILP (CODING_ATTR_POST_READ (attrs)))
5107     coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5108   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
5109     coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5110   if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs)))
5111     coding->common_flags |= CODING_FOR_UNIBYTE_MASK;
5112
5113   val = CODING_ATTR_SAFE_CHARSETS (attrs);
5114   coding->max_charset_id = SCHARS (val) - 1;
5115   coding->safe_charsets = (char *) SDATA (val);
5116   coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs));
5117
5118   coding_type = CODING_ATTR_TYPE (attrs);
5119   if (EQ (coding_type, Qundecided))
5120     {
5121       coding->detector = NULL;
5122       coding->decoder = decode_coding_raw_text;
5123       coding->encoder = encode_coding_raw_text;
5124       coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5125     }
5126   else if (EQ (coding_type, Qiso_2022))
5127     {
5128       int i;
5129       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5130
5131       /* Invoke graphic register 0 to plane 0.  */
5132       CODING_ISO_INVOCATION (coding, 0) = 0;
5133       /* Invoke graphic register 1 to plane 1 if we can use 8-bit.  */
5134       CODING_ISO_INVOCATION (coding, 1)
5135         = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1);
5136       /* Setup the initial status of designation.  */
5137       for (i = 0; i < 4; i++)
5138         CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i);
5139       /* Not single shifting initially.  */
5140       CODING_ISO_SINGLE_SHIFTING (coding) = 0;
5141       /* Beginning of buffer should also be regarded as bol. */
5142       CODING_ISO_BOL (coding) = 1;
5143       coding->detector = detect_coding_iso_2022;
5144       coding->decoder = decode_coding_iso_2022;
5145       coding->encoder = encode_coding_iso_2022;
5146       if (flags & CODING_ISO_FLAG_SAFE)
5147         coding->mode |= CODING_MODE_SAFE_ENCODING;
5148       coding->common_flags
5149         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5150             | CODING_REQUIRE_FLUSHING_MASK);
5151       if (flags & CODING_ISO_FLAG_COMPOSITION)
5152         coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK;
5153       if (flags & CODING_ISO_FLAG_DESIGNATION)
5154         coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK;
5155       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5156         {
5157           setup_iso_safe_charsets (attrs);
5158           val = CODING_ATTR_SAFE_CHARSETS (attrs);
5159           coding->max_charset_id = SCHARS (val) - 1;
5160           coding->safe_charsets = (char *) SDATA (val);
5161         }
5162       CODING_ISO_FLAGS (coding) = flags;
5163     }
5164   else if (EQ (coding_type, Qcharset))
5165     {
5166       coding->detector = detect_coding_charset;
5167       coding->decoder = decode_coding_charset;
5168       coding->encoder = encode_coding_charset;
5169       coding->common_flags
5170         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5171     }
5172   else if (EQ (coding_type, Qutf_8))
5173     {
5174       coding->detector = detect_coding_utf_8;
5175       coding->decoder = decode_coding_utf_8;
5176       coding->encoder = encode_coding_utf_8;
5177       coding->common_flags
5178         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5179     }
5180   else if (EQ (coding_type, Qutf_16))
5181     {
5182       val = AREF (attrs, coding_attr_utf_16_bom);
5183       CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
5184                                     : EQ (val, Qt) ? utf_16_with_bom
5185                                     : utf_16_without_bom);
5186       val = AREF (attrs, coding_attr_utf_16_endian);
5187       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
5188                                        : utf_16_little_endian);
5189       CODING_UTF_16_SURROGATE (coding) = 0;
5190       coding->detector = detect_coding_utf_16;
5191       coding->decoder = decode_coding_utf_16;
5192       coding->encoder = encode_coding_utf_16;
5193       coding->common_flags
5194         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5195       if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
5196         coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
5197     }
5198   else if (EQ (coding_type, Qccl))
5199     {
5200       coding->detector = detect_coding_ccl;
5201       coding->decoder = decode_coding_ccl;
5202       coding->encoder = encode_coding_ccl;
5203       coding->common_flags
5204         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK
5205             | CODING_REQUIRE_FLUSHING_MASK);
5206     }
5207   else if (EQ (coding_type, Qemacs_mule))
5208     {
5209       coding->detector = detect_coding_emacs_mule;
5210       coding->decoder = decode_coding_emacs_mule;
5211       coding->encoder = encode_coding_emacs_mule;
5212       coding->common_flags
5213         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5214       if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5215           && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5216         {
5217           Lisp_Object tail, safe_charsets;
5218           int max_charset_id = 0;
5219
5220           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5221                tail = XCDR (tail))
5222             if (max_charset_id < XFASTINT (XCAR (tail)))
5223               max_charset_id = XFASTINT (XCAR (tail));
5224           safe_charsets = Fmake_string (make_number (max_charset_id + 1),
5225                                         make_number (255));
5226           for (tail = Vemacs_mule_charset_list; CONSP (tail);
5227                tail = XCDR (tail))
5228             SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5229           coding->max_charset_id = max_charset_id;
5230           coding->safe_charsets = (char *) SDATA (safe_charsets);
5231         }
5232     }
5233   else if (EQ (coding_type, Qshift_jis))
5234     {
5235       coding->detector = detect_coding_sjis;
5236       coding->decoder = decode_coding_sjis;
5237       coding->encoder = encode_coding_sjis;
5238       coding->common_flags
5239         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5240     }
5241   else if (EQ (coding_type, Qbig5))
5242     {
5243       coding->detector = detect_coding_big5;
5244       coding->decoder = decode_coding_big5;
5245       coding->encoder = encode_coding_big5;
5246       coding->common_flags
5247         |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5248     }
5249   else                          /* EQ (coding_type, Qraw_text) */
5250     {
5251       coding->detector = NULL;
5252       coding->decoder = decode_coding_raw_text;
5253       coding->encoder = encode_coding_raw_text;
5254       if (! EQ (eol_type, Qunix))
5255         {
5256           coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
5257           if (! VECTORP (eol_type))
5258             coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
5259         }
5260
5261     }
5262
5263   return;
5264 }
5265
5266 /* Return a list of charsets supported by CODING.  */
5267
5268 Lisp_Object
5269 coding_charset_list (coding)
5270      struct coding_system *coding;
5271 {
5272   Lisp_Object attrs, charset_list;
5273
5274   CODING_GET_INFO (coding, attrs, charset_list);
5275   if (EQ (CODING_ATTR_TYPE (attrs), Qiso_2022))
5276     {
5277       int flags = XINT (AREF (attrs, coding_attr_iso_flags));
5278
5279       if (flags & CODING_ISO_FLAG_FULL_SUPPORT)
5280         charset_list = Viso_2022_charset_list;
5281     }
5282   else if (EQ (CODING_ATTR_TYPE (attrs), Qemacs_mule))
5283     {
5284       charset_list = Vemacs_mule_charset_list;
5285     }
5286   return charset_list;
5287 }
5288
5289
5290 /* Return raw-text or one of its subsidiaries that has the same
5291    eol_type as CODING-SYSTEM.  */
5292
5293 Lisp_Object
5294 raw_text_coding_system (coding_system)
5295      Lisp_Object coding_system;
5296 {
5297   Lisp_Object spec, attrs;
5298   Lisp_Object eol_type, raw_text_eol_type;
5299
5300   if (NILP (coding_system))
5301     return Qraw_text;
5302   spec = CODING_SYSTEM_SPEC (coding_system);
5303   attrs = AREF (spec, 0);
5304
5305   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
5306     return coding_system;
5307
5308   eol_type = AREF (spec, 2);
5309   if (VECTORP (eol_type))
5310     return Qraw_text;
5311   spec = CODING_SYSTEM_SPEC (Qraw_text);
5312   raw_text_eol_type = AREF (spec, 2);
5313   return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0)
5314           : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1)
5315           : AREF (raw_text_eol_type, 2));
5316 }
5317
5318
5319 /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT
5320    does, return one of the subsidiary that has the same eol-spec as
5321    PARENT.  Otherwise, return CODING_SYSTEM.  If PARENT is nil,
5322    inherit end-of-line format from the system's setting
5323    (system_eol_type).  */
5324
5325 Lisp_Object
5326 coding_inherit_eol_type (coding_system, parent)
5327      Lisp_Object coding_system, parent;
5328 {
5329   Lisp_Object spec, eol_type;
5330
5331   if (NILP (coding_system))
5332     coding_system = Qraw_text;
5333   spec = CODING_SYSTEM_SPEC (coding_system);
5334   eol_type = AREF (spec, 2);
5335   if (VECTORP (eol_type))
5336     {
5337       Lisp_Object parent_eol_type;
5338
5339       if (! NILP (parent))
5340         {
5341           Lisp_Object parent_spec;
5342
5343           parent_spec = CODING_SYSTEM_SPEC (parent);
5344           parent_eol_type = AREF (parent_spec, 2);
5345         }
5346       else
5347         parent_eol_type = system_eol_type;
5348       if (EQ (parent_eol_type, Qunix))
5349         coding_system = AREF (eol_type, 0);
5350       else if (EQ (parent_eol_type, Qdos))
5351         coding_system = AREF (eol_type, 1);
5352       else if (EQ (parent_eol_type, Qmac))
5353         coding_system = AREF (eol_type, 2);
5354     }
5355   return coding_system;
5356 }
5357
5358 /* Emacs has a mechanism to automatically detect a coding system if it
5359    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
5360    it's impossible to distinguish some coding systems accurately
5361    because they use the same range of codes.  So, at first, coding
5362    systems are categorized into 7, those are:
5363
5364    o coding-category-emacs-mule
5365
5366         The category for a coding system which has the same code range
5367         as Emacs' internal format.  Assigned the coding-system (Lisp
5368         symbol) `emacs-mule' by default.
5369
5370    o coding-category-sjis
5371
5372         The category for a coding system which has the same code range
5373         as SJIS.  Assigned the coding-system (Lisp
5374         symbol) `japanese-shift-jis' by default.
5375
5376    o coding-category-iso-7
5377
5378         The category for a coding system which has the same code range
5379         as ISO2022 of 7-bit environment.  This doesn't use any locking
5380         shift and single shift functions.  This can encode/decode all
5381         charsets.  Assigned the coding-system (Lisp symbol)
5382         `iso-2022-7bit' by default.
5383
5384    o coding-category-iso-7-tight
5385
5386         Same as coding-category-iso-7 except that this can
5387         encode/decode only the specified charsets.
5388
5389    o coding-category-iso-8-1
5390
5391         The category for a coding system which has the same code range
5392         as ISO2022 of 8-bit environment and graphic plane 1 used only
5393         for DIMENSION1 charset.  This doesn't use any locking shift
5394         and single shift functions.  Assigned the coding-system (Lisp
5395         symbol) `iso-latin-1' by default.
5396
5397    o coding-category-iso-8-2
5398
5399         The category for a coding system which has the same code range
5400         as ISO2022 of 8-bit environment and graphic plane 1 used only
5401         for DIMENSION2 charset.  This doesn't use any locking shift
5402         and single shift functions.  Assigned the coding-system (Lisp
5403         symbol) `japanese-iso-8bit' by default.
5404
5405    o coding-category-iso-7-else
5406
5407         The category for a coding system which has the same code range
5408         as ISO2022 of 7-bit environemnt but uses locking shift or
5409         single shift functions.  Assigned the coding-system (Lisp
5410         symbol) `iso-2022-7bit-lock' by default.
5411
5412    o coding-category-iso-8-else
5413
5414         The category for a coding system which has the same code range
5415         as ISO2022 of 8-bit environemnt but uses locking shift or
5416         single shift functions.  Assigned the coding-system (Lisp
5417         symbol) `iso-2022-8bit-ss2' by default.
5418
5419    o coding-category-big5
5420
5421         The category for a coding system which has the same code range
5422         as BIG5.  Assigned the coding-system (Lisp symbol)
5423         `cn-big5' by default.
5424
5425    o coding-category-utf-8
5426
5427         The category for a coding system which has the same code range
5428         as UTF-8 (cf. RFC3629).  Assigned the coding-system (Lisp
5429         symbol) `utf-8' by default.
5430
5431    o coding-category-utf-16-be
5432
5433         The category for a coding system in which a text has an
5434         Unicode signature (cf. Unicode Standard) in the order of BIG
5435         endian at the head.  Assigned the coding-system (Lisp symbol)
5436         `utf-16-be' by default.
5437
5438    o coding-category-utf-16-le
5439
5440         The category for a coding system in which a text has an
5441         Unicode signature (cf. Unicode Standard) in the order of
5442         LITTLE endian at the head.  Assigned the coding-system (Lisp
5443         symbol) `utf-16-le' by default.
5444
5445    o coding-category-ccl
5446
5447         The category for a coding system of which encoder/decoder is
5448         written in CCL programs.  The default value is nil, i.e., no
5449         coding system is assigned.
5450
5451    o coding-category-binary
5452
5453         The category for a coding system not categorized in any of the
5454         above.  Assigned the coding-system (Lisp symbol)
5455         `no-conversion' by default.
5456
5457    Each of them is a Lisp symbol and the value is an actual
5458    `coding-system's (this is also a Lisp symbol) assigned by a user.
5459    What Emacs does actually is to detect a category of coding system.
5460    Then, it uses a `coding-system' assigned to it.  If Emacs can't
5461    decide only one possible category, it selects a category of the
5462    highest priority.  Priorities of categories are also specified by a
5463    user in a Lisp variable `coding-category-list'.
5464
5465 */
5466
5467 #define EOL_SEEN_NONE   0
5468 #define EOL_SEEN_LF     1
5469 #define EOL_SEEN_CR     2
5470 #define EOL_SEEN_CRLF   4
5471
5472 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
5473    SOURCE is encoded.  If CATEGORY is one of
5474    coding_category_utf_16_XXXX, assume that CR and LF are encoded by
5475    two-byte, else they are encoded by one-byte.
5476
5477    Return one of EOL_SEEN_XXX.  */
5478
5479 #define MAX_EOL_CHECK_COUNT 3
5480
5481 static int
5482 detect_eol (source, src_bytes, category)
5483      const unsigned char *source;
5484      EMACS_INT src_bytes;
5485      enum coding_category category;
5486 {
5487   const unsigned char *src = source, *src_end = src + src_bytes;
5488   unsigned char c;
5489   int total  = 0;
5490   int eol_seen = EOL_SEEN_NONE;
5491
5492   if ((1 << category) & CATEGORY_MASK_UTF_16)
5493     {
5494       int msb, lsb;
5495
5496       msb = category == (coding_category_utf_16_le
5497                          | coding_category_utf_16_le_nosig);
5498       lsb = 1 - msb;
5499
5500       while (src + 1 < src_end)
5501         {
5502           c = src[lsb];
5503           if (src[msb] == 0 && (c == '\n' || c == '\r'))
5504             {
5505               int this_eol;
5506
5507               if (c == '\n')
5508                 this_eol = EOL_SEEN_LF;
5509               else if (src + 3 >= src_end
5510                        || src[msb + 2] != 0
5511                        || src[lsb + 2] != '\n')
5512                 this_eol = EOL_SEEN_CR;
5513               else
5514                 this_eol = EOL_SEEN_CRLF;
5515
5516               if (eol_seen == EOL_SEEN_NONE)
5517                 /* This is the first end-of-line.  */
5518                 eol_seen = this_eol;
5519               else if (eol_seen != this_eol)
5520                 {
5521                   /* The found type is different from what found before.  */
5522                   eol_seen = EOL_SEEN_LF;
5523                   break;
5524                 }
5525               if (++total == MAX_EOL_CHECK_COUNT)
5526                 break;
5527             }
5528           src += 2;
5529         }
5530     }
5531   else
5532     {
5533       while (src < src_end)
5534         {
5535           c = *src++;
5536           if (c == '\n' || c == '\r')
5537             {
5538               int this_eol;
5539
5540               if (c == '\n')
5541                 this_eol = EOL_SEEN_LF;
5542               else if (src >= src_end || *src != '\n')
5543                 this_eol = EOL_SEEN_CR;
5544               else
5545                 this_eol = EOL_SEEN_CRLF, src++;
5546
5547               if (eol_seen == EOL_SEEN_NONE)
5548                 /* This is the first end-of-line.  */
5549                 eol_seen = this_eol;
5550               else if (eol_seen != this_eol)
5551                 {
5552                   /* The found type is different from what found before.  */
5553                   eol_seen = EOL_SEEN_LF;
5554                   break;
5555                 }
5556               if (++total == MAX_EOL_CHECK_COUNT)
5557                 break;
5558             }
5559         }
5560     }
5561   return eol_seen;
5562 }
5563
5564
5565 static Lisp_Object
5566 adjust_coding_eol_type (coding, eol_seen)
5567      struct coding_system *coding;
5568      int eol_seen;
5569 {
5570   Lisp_Object eol_type;
5571
5572   eol_type = CODING_ID_EOL_TYPE (coding->id);
5573   if (eol_seen & EOL_SEEN_LF)
5574     {
5575       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
5576       eol_type = Qunix;
5577     }
5578   else if (eol_seen & EOL_SEEN_CRLF)
5579     {
5580       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1));
5581       eol_type = Qdos;
5582     }
5583   else if (eol_seen & EOL_SEEN_CR)
5584     {
5585       coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2));
5586       eol_type = Qmac;
5587     }
5588   return eol_type;
5589 }
5590
5591 /* Detect how a text specified in CODING is encoded.  If a coding
5592    system is detected, update fields of CODING by the detected coding
5593    system.  */
5594
5595 void
5596 detect_coding (coding)
5597      struct coding_system *coding;
5598 {
5599   const unsigned char *src, *src_end;
5600
5601   coding->consumed = coding->consumed_char = 0;
5602   coding->produced = coding->produced_char = 0;
5603   coding_set_source (coding);
5604
5605   src_end = coding->source + coding->src_bytes;
5606
5607   /* If we have not yet decided the text encoding type, detect it
5608      now.  */
5609   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
5610     {
5611       int c, i;
5612       struct coding_detection_info detect_info;
5613
5614       detect_info.checked = detect_info.found = detect_info.rejected = 0;
5615       for (i = 0, src = coding->source; src < src_end; i++, src++)
5616         {
5617           c = *src;
5618           if (c & 0x80)
5619             break;
5620           if (c < 0x20
5621               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
5622               && ! inhibit_iso_escape_detection
5623               && ! detect_info.checked)
5624             {
5625               coding->head_ascii = src - (coding->source + coding->consumed);
5626               if (detect_coding_iso_2022 (coding, &detect_info))
5627                 {
5628                   /* We have scanned the whole data.  */
5629                   if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
5630                     /* We didn't find an 8-bit code.  */
5631                     src = src_end;
5632                   break;
5633                 }
5634             }
5635         }
5636       coding->head_ascii = src - (coding->source + coding->consumed);
5637
5638       if (coding->head_ascii < coding->src_bytes
5639           || detect_info.found)
5640         {
5641           enum coding_category category;
5642           struct coding_system *this;
5643
5644           if (coding->head_ascii == coding->src_bytes)
5645             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
5646             for (i = 0; i < coding_category_raw_text; i++)
5647               {
5648                 category = coding_priorities[i];
5649                 this = coding_categories + category;
5650                 if (detect_info.found & (1 << category))
5651                   break;
5652               }
5653           else
5654             for (i = 0; i < coding_category_raw_text; i++)
5655               {
5656                 category = coding_priorities[i];
5657                 this = coding_categories + category;
5658                 if (this->id < 0)
5659                   {
5660                     /* No coding system of this category is defined.  */
5661                     detect_info.rejected |= (1 << category);
5662                   }
5663                 else if (category >= coding_category_raw_text)
5664                   continue;
5665                 else if (detect_info.checked & (1 << category))
5666                   {
5667                     if (detect_info.found & (1 << category))
5668                       break;
5669                   }
5670                 else if ((*(this->detector)) (coding, &detect_info)
5671                          && detect_info.found & (1 << category))
5672                   {
5673                     if (category == coding_category_utf_16_auto)
5674                       {
5675                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5676                           category = coding_category_utf_16_le;
5677                         else
5678                           category = coding_category_utf_16_be;
5679                       }
5680                     break;
5681                   }
5682               }
5683
5684           if (i < coding_category_raw_text)
5685             setup_coding_system (CODING_ID_NAME (this->id), coding);
5686           else if (detect_info.rejected == CATEGORY_MASK_ANY)
5687             setup_coding_system (Qraw_text, coding);
5688           else if (detect_info.rejected)
5689             for (i = 0; i < coding_category_raw_text; i++)
5690               if (! (detect_info.rejected & (1 << coding_priorities[i])))
5691                 {
5692                   this = coding_categories + coding_priorities[i];
5693                   setup_coding_system (CODING_ID_NAME (this->id), coding);
5694                   break;
5695                 }
5696         }
5697     }
5698   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
5699            == coding_category_utf_16_auto)
5700     {
5701       Lisp_Object coding_systems;
5702       struct coding_detection_info detect_info;
5703
5704       coding_systems
5705         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
5706       detect_info.found = detect_info.rejected = 0;
5707       if (CONSP (coding_systems)
5708           && detect_coding_utf_16 (coding, &detect_info))
5709         {
5710           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
5711             setup_coding_system (XCAR (coding_systems), coding);
5712           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
5713             setup_coding_system (XCDR (coding_systems), coding);
5714         }
5715     }
5716 }
5717
5718
5719 static void
5720 decode_eol (coding)
5721      struct coding_system *coding;
5722 {
5723   Lisp_Object eol_type;
5724   unsigned char *p, *pbeg, *pend;
5725
5726   eol_type = CODING_ID_EOL_TYPE (coding->id);
5727   if (EQ (eol_type, Qunix))
5728     return;
5729
5730   if (NILP (coding->dst_object))
5731     pbeg = coding->destination;
5732   else
5733     pbeg = BYTE_POS_ADDR (coding->dst_pos_byte);
5734   pend = pbeg + coding->produced;
5735
5736   if (VECTORP (eol_type))
5737     {
5738       int eol_seen = EOL_SEEN_NONE;
5739
5740       for (p = pbeg; p < pend; p++)
5741         {
5742           if (*p == '\n')
5743             eol_seen |= EOL_SEEN_LF;
5744           else if (*p == '\r')
5745             {
5746               if (p + 1 < pend && *(p + 1) == '\n')
5747                 {
5748                   eol_seen |= EOL_SEEN_CRLF;
5749                   p++;
5750                 }
5751               else
5752                 eol_seen |= EOL_SEEN_CR;
5753             }
5754         }
5755       if (eol_seen != EOL_SEEN_NONE
5756           && eol_seen != EOL_SEEN_LF
5757           && eol_seen != EOL_SEEN_CRLF
5758           && eol_seen != EOL_SEEN_CR)
5759         eol_seen = EOL_SEEN_LF;
5760       if (eol_seen != EOL_SEEN_NONE)
5761         eol_type = adjust_coding_eol_type (coding, eol_seen);
5762     }
5763
5764   if (EQ (eol_type, Qmac))
5765     {
5766       for (p = pbeg; p < pend; p++)
5767         if (*p == '\r')
5768           *p = '\n';
5769     }
5770   else if (EQ (eol_type, Qdos))
5771     {
5772       int n = 0;
5773
5774       if (NILP (coding->dst_object))
5775         {
5776           /* Start deleting '\r' from the tail to minimize the memory
5777              movement.  */
5778           for (p = pend - 2; p >= pbeg; p--)
5779             if (*p == '\r')
5780               {
5781                 safe_bcopy ((char *) (p + 1), (char *) p, pend-- - p - 1);
5782                 n++;
5783               }
5784         }
5785       else
5786         {
5787           int pos_byte = coding->dst_pos_byte;
5788           int pos = coding->dst_pos;
5789           int pos_end = pos + coding->produced_char - 1;
5790
5791           while (pos < pos_end)
5792             {
5793               p = BYTE_POS_ADDR (pos_byte);
5794               if (*p == '\r' && p[1] == '\n')
5795                 {
5796                   del_range_2 (pos, pos_byte, pos + 1, pos_byte + 1, 0);
5797                   n++;
5798                   pos_end--;
5799                 }
5800               pos++;
5801               if (coding->dst_multibyte)
5802                 pos_byte += BYTES_BY_CHAR_HEAD (*p);
5803               else
5804                 pos_byte++;
5805             }
5806         }
5807       coding->produced -= n;
5808       coding->produced_char -= n;
5809     }
5810 }
5811
5812
5813 /* Return a translation table (or list of them) from coding system
5814    attribute vector ATTRS for encoding (ENCODEP is nonzero) or
5815    decoding (ENCODEP is zero). */
5816
5817 static Lisp_Object
5818 get_translation_table (attrs, encodep, max_lookup)
5819      Lisp_Object attrs;
5820      int encodep, *max_lookup;
5821 {
5822   Lisp_Object standard, translation_table;
5823   Lisp_Object val;
5824
5825   if (encodep)
5826     translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5827       standard = Vstandard_translation_table_for_encode;
5828   else
5829     translation_table = CODING_ATTR_DECODE_TBL (attrs),
5830       standard = Vstandard_translation_table_for_decode;
5831   if (NILP (translation_table))
5832     translation_table = standard;
5833   else
5834     {
5835       if (SYMBOLP (translation_table))
5836         translation_table = Fget (translation_table, Qtranslation_table);
5837       else if (CONSP (translation_table))
5838         {
5839           translation_table = Fcopy_sequence (translation_table);
5840           for (val = translation_table; CONSP (val); val = XCDR (val))
5841             if (SYMBOLP (XCAR (val)))
5842               XSETCAR (val, Fget (XCAR (val), Qtranslation_table));
5843         }
5844       if (CHAR_TABLE_P (standard))
5845         {
5846           if (CONSP (translation_table))
5847             translation_table = nconc2 (translation_table,
5848                                         Fcons (standard, Qnil));
5849           else
5850             translation_table = Fcons (translation_table,
5851                                        Fcons (standard, Qnil));
5852         }
5853     }
5854
5855   if (max_lookup)
5856     {
5857       *max_lookup = 1;
5858       if (CHAR_TABLE_P (translation_table)
5859           && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (translation_table)) > 1)
5860         {
5861           val = XCHAR_TABLE (translation_table)->extras[1];
5862           if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5863             *max_lookup = XFASTINT (val);
5864         }
5865       else if (CONSP (translation_table))
5866         {
5867           Lisp_Object tail, val;
5868
5869           for (tail = translation_table; CONSP (tail); tail = XCDR (tail))
5870             if (CHAR_TABLE_P (XCAR (tail))
5871                 && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (XCAR (tail))) > 1)
5872               {
5873                 val = XCHAR_TABLE (XCAR (tail))->extras[1];
5874                 if (NATNUMP (val) && *max_lookup < XFASTINT (val))
5875                   *max_lookup = XFASTINT (val);
5876               }
5877         }
5878     }
5879   return translation_table;
5880 }
5881
5882 #define LOOKUP_TRANSLATION_TABLE(table, c, trans)               \
5883   do {                                                          \
5884     trans = Qnil;                                               \
5885     if (CHAR_TABLE_P (table))                                   \
5886       {                                                         \
5887         trans = CHAR_TABLE_REF (table, c);                      \
5888         if (CHARACTERP (trans))                                 \
5889           c = XFASTINT (trans), trans = Qnil;                   \
5890       }                                                         \
5891     else if (CONSP (table))                                     \
5892       {                                                         \
5893         Lisp_Object tail;                                       \
5894                                                                 \
5895         for (tail = table; CONSP (tail); tail = XCDR (tail))    \
5896           if (CHAR_TABLE_P (XCAR (tail)))                       \
5897             {                                                   \
5898               trans = CHAR_TABLE_REF (XCAR (tail), c);          \
5899               if (CHARACTERP (trans))                           \
5900                 c = XFASTINT (trans), trans = Qnil;             \
5901               else if (! NILP (trans))                          \
5902                 break;                                          \
5903             }                                                   \
5904       }                                                         \
5905   } while (0)
5906
5907
5908 static Lisp_Object
5909 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
5910      Lisp_Object val;
5911      int *buf, *buf_end;
5912      int last_block;
5913      int *from_nchars, *to_nchars;
5914 {
5915   /* VAL is TO or (([FROM-CHAR ...] .  TO) ...) where TO is TO-CHAR or
5916      [TO-CHAR ...].  */
5917   if (CONSP (val))
5918     {
5919       Lisp_Object from, tail;
5920       int i, len;
5921
5922       for (tail = val; CONSP (tail); tail = XCDR (tail))
5923         {
5924           val = XCAR (tail);
5925           from = XCAR (val);
5926           len = ASIZE (from);
5927           for (i = 0; i < len; i++)
5928             {
5929               if (buf + i == buf_end)
5930                 {
5931                   if (! last_block)
5932                     return Qt;
5933                   break;
5934                 }
5935               if (XINT (AREF (from, i)) != buf[i])
5936                 break;
5937             }
5938           if (i == len)
5939             {
5940               val = XCDR (val);
5941               *from_nchars = len;
5942               break;
5943             }
5944         }
5945       if (! CONSP (tail))
5946         return Qnil;
5947     }
5948   if (VECTORP (val))
5949     *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
5950   else
5951     *buf = XINT (val);
5952   return val;
5953 }
5954
5955
5956 static int
5957 produce_chars (coding, translation_table, last_block)
5958      struct coding_system *coding;
5959      Lisp_Object translation_table;
5960      int last_block;
5961 {
5962   unsigned char *dst = coding->destination + coding->produced;
5963   unsigned char *dst_end = coding->destination + coding->dst_bytes;
5964   EMACS_INT produced;
5965   EMACS_INT produced_chars = 0;
5966   int carryover = 0;
5967
5968   if (! coding->chars_at_source)
5969     {
5970       /* Source characters are in coding->charbuf.  */
5971       int *buf = coding->charbuf;
5972       int *buf_end = buf + coding->charbuf_used;
5973
5974       if (BUFFERP (coding->src_object)
5975           && EQ (coding->src_object, coding->dst_object))
5976         dst_end = ((unsigned char *) coding->source) + coding->consumed;
5977
5978       while (buf < buf_end)
5979         {
5980           int c = *buf, i;
5981
5982           if (c >= 0)
5983             {
5984               int from_nchars = 1, to_nchars = 1;
5985               Lisp_Object trans = Qnil;
5986
5987               LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
5988               if (! NILP (trans))
5989                 {
5990                   trans = get_translation (trans, buf, buf_end, last_block,
5991                                            &from_nchars, &to_nchars);
5992                   if (EQ (trans, Qt))
5993                     break;
5994                   c = *buf;
5995                 }
5996
5997               if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
5998                 {
5999                   dst = alloc_destination (coding,
6000                                            buf_end - buf
6001                                            + MAX_MULTIBYTE_LENGTH * to_nchars,
6002                                            dst);
6003                   dst_end = coding->destination + coding->dst_bytes;
6004                 }
6005
6006               for (i = 0; i < to_nchars; i++)
6007                 {
6008                   if (i > 0)
6009                     c = XINT (AREF (trans, i));
6010                   if (coding->dst_multibyte
6011                       || ! CHAR_BYTE8_P (c))
6012                     CHAR_STRING_ADVANCE (c, dst);
6013                   else
6014                     *dst++ = CHAR_TO_BYTE8 (c);
6015                 }
6016               produced_chars += to_nchars;
6017               *buf++ = to_nchars;
6018               while (--from_nchars > 0)
6019                 *buf++ = 0;
6020             }
6021           else
6022             /* This is an annotation datum.  (-C) is the length.  */
6023             buf += -c;
6024         }
6025       carryover = buf_end - buf;
6026     }
6027   else
6028     {
6029       /* Source characters are at coding->source.  */
6030       const unsigned char *src = coding->source;
6031       const unsigned char *src_end = src + coding->consumed;
6032
6033       if (coding->src_multibyte != coding->dst_multibyte)
6034         {
6035           if (coding->src_multibyte)
6036             {
6037               int multibytep = 1;
6038               EMACS_INT consumed_chars;
6039
6040               while (1)
6041                 {
6042                   const unsigned char *src_base = src;
6043                   int c;
6044
6045                   ONE_MORE_BYTE (c);
6046                   if (dst == dst_end)
6047                     {
6048                       if (EQ (coding->src_object, coding->dst_object))
6049                         dst_end = (unsigned char *) src;
6050                       if (dst == dst_end)
6051                         {
6052                           EMACS_INT offset = src - coding->source;
6053
6054                           dst = alloc_destination (coding, src_end - src + 1,
6055                                                    dst);
6056                           dst_end = coding->destination + coding->dst_bytes;
6057                           coding_set_source (coding);
6058                           src = coding->source + offset;
6059                           src_end = coding->source + coding->src_bytes;
6060                         }
6061                     }
6062                   *dst++ = c;
6063                   produced_chars++;
6064                 }
6065             no_more_source:
6066               ;
6067             }
6068           else
6069             while (src < src_end)
6070               {
6071                 int multibytep = 1;
6072                 int c = *src++;
6073
6074                 if (dst >= dst_end - 1)
6075                   {
6076                     if (EQ (coding->src_object, coding->dst_object))
6077                       dst_end = (unsigned char *) src;
6078                     if (dst >= dst_end - 1)
6079                       {
6080                         EMACS_INT offset = src - coding->source;
6081
6082                         dst = alloc_destination (coding, src_end - src + 2,
6083                                                  dst);
6084                         dst_end = coding->destination + coding->dst_bytes;
6085                         coding_set_source (coding);
6086                         src = coding->source + offset;
6087                         src_end = coding->source + coding->src_bytes;
6088                       }
6089                   }
6090                 EMIT_ONE_BYTE (c);
6091               }
6092         }
6093       else
6094         {
6095           if (!EQ (coding->src_object, coding->dst_object))
6096             {
6097               EMACS_INT require = coding->src_bytes - coding->dst_bytes;
6098
6099               if (require > 0)
6100                 {
6101                   EMACS_INT offset = src - coding->source;
6102
6103                   dst = alloc_destination (coding, require, dst);
6104                   coding_set_source (coding);
6105                   src = coding->source + offset;
6106                   src_end = coding->source + coding->src_bytes;
6107                 }
6108             }
6109           produced_chars = coding->consumed_char;
6110           while (src < src_end)
6111             *dst++ = *src++;
6112         }
6113     }
6114
6115   produced = dst - (coding->destination + coding->produced);
6116   if (BUFFERP (coding->dst_object) && produced_chars > 0)
6117     insert_from_gap (produced_chars, produced);
6118   coding->produced += produced;
6119   coding->produced_char += produced_chars;
6120   return carryover;
6121 }
6122
6123 /* Compose text in CODING->object according to the annotation data at
6124    CHARBUF.  CHARBUF is an array:
6125      [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ]
6126  */
6127
6128 static INLINE void
6129 produce_composition (coding, charbuf, pos)
6130      struct coding_system *coding;
6131      int *charbuf;
6132      EMACS_INT pos;
6133 {
6134   int len;
6135   EMACS_INT to;
6136   enum composition_method method;
6137   Lisp_Object components;
6138
6139   len = -charbuf[0];
6140   to = pos + charbuf[2];
6141   if (to <= pos)
6142     return;
6143   method = (enum composition_method) (charbuf[3]);
6144
6145   if (method == COMPOSITION_RELATIVE)
6146     components = Qnil;
6147   else if (method >= COMPOSITION_WITH_RULE
6148            && method <= COMPOSITION_WITH_RULE_ALTCHARS)
6149     {
6150       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6151       int i;
6152
6153       len -= 4;
6154       charbuf += 4;
6155       for (i = 0; i < len; i++)
6156         {
6157           args[i] = make_number (charbuf[i]);
6158           if (charbuf[i] < 0)
6159             return;
6160         }
6161       components = (method == COMPOSITION_WITH_ALTCHARS
6162                     ? Fstring (len, args) : Fvector (len, args));
6163     }
6164   else
6165     return;
6166   compose_text (pos, to, components, Qnil, coding->dst_object);
6167 }
6168
6169
6170 /* Put `charset' property on text in CODING->object according to
6171    the annotation data at CHARBUF.  CHARBUF is an array:
6172      [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
6173  */
6174
6175 static INLINE void
6176 produce_charset (coding, charbuf, pos)
6177      struct coding_system *coding;
6178      int *charbuf;
6179      EMACS_INT pos;
6180 {
6181   EMACS_INT from = pos - charbuf[2];
6182   struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
6183
6184   Fput_text_property (make_number (from), make_number (pos),
6185                       Qcharset, CHARSET_NAME (charset),
6186                       coding->dst_object);
6187 }
6188
6189
6190 #define CHARBUF_SIZE 0x4000
6191
6192 #define ALLOC_CONVERSION_WORK_AREA(coding)                              \
6193   do {                                                                  \
6194     int size = CHARBUF_SIZE;;                                           \
6195                                                                         \
6196     coding->charbuf = NULL;                                             \
6197     while (size > 1024)                                                 \
6198       {                                                                 \
6199         coding->charbuf = (int *) alloca (sizeof (int) * size);         \
6200         if (coding->charbuf)                                            \
6201           break;                                                        \
6202         size >>= 1;                                                     \
6203       }                                                                 \
6204     if (! coding->charbuf)                                              \
6205       {                                                                 \
6206         record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
6207         return coding->result;                                          \
6208       }                                                                 \
6209     coding->charbuf_size = size;                                        \
6210   } while (0)
6211
6212
6213 static void
6214 produce_annotation (coding, pos)
6215      struct coding_system *coding;
6216      EMACS_INT pos;
6217 {
6218   int *charbuf = coding->charbuf;
6219   int *charbuf_end = charbuf + coding->charbuf_used;
6220
6221   if (NILP (coding->dst_object))
6222     return;
6223
6224   while (charbuf < charbuf_end)
6225     {
6226       if (*charbuf >= 0)
6227         pos += *charbuf++;
6228       else
6229         {
6230           int len = -*charbuf;
6231           switch (charbuf[1])
6232             {
6233             case CODING_ANNOTATE_COMPOSITION_MASK:
6234               produce_composition (coding, charbuf, pos);
6235               break;
6236             case CODING_ANNOTATE_CHARSET_MASK:
6237               produce_charset (coding, charbuf, pos);
6238               break;
6239             default:
6240               abort ();
6241             }
6242           charbuf += len;
6243         }
6244     }
6245 }
6246
6247 /* Decode the data at CODING->src_object into CODING->dst_object.
6248    CODING->src_object is a buffer, a string, or nil.
6249    CODING->dst_object is a buffer.
6250
6251    If CODING->src_object is a buffer, it must be the current buffer.
6252    In this case, if CODING->src_pos is positive, it is a position of
6253    the source text in the buffer, otherwise, the source text is in the
6254    gap area of the buffer, and CODING->src_pos specifies the offset of
6255    the text from GPT (which must be the same as PT).  If this is the
6256    same buffer as CODING->dst_object, CODING->src_pos must be
6257    negative.
6258
6259    If CODING->src_object is a string, CODING->src_pos is an index to
6260    that string.
6261
6262    If CODING->src_object is nil, CODING->source must already point to
6263    the non-relocatable memory area.  In this case, CODING->src_pos is
6264    an offset from CODING->source.
6265
6266    The decoded data is inserted at the current point of the buffer
6267    CODING->dst_object.
6268 */
6269
6270 static int
6271 decode_coding (coding)
6272      struct coding_system *coding;
6273 {
6274   Lisp_Object attrs;
6275   Lisp_Object undo_list;
6276   Lisp_Object translation_table;
6277   int carryover;
6278   int i;
6279
6280   if (BUFFERP (coding->src_object)
6281       && coding->src_pos > 0
6282       && coding->src_pos < GPT
6283       && coding->src_pos + coding->src_chars > GPT)
6284     move_gap_both (coding->src_pos, coding->src_pos_byte);
6285
6286   undo_list = Qt;
6287   if (BUFFERP (coding->dst_object))
6288     {
6289       if (current_buffer != XBUFFER (coding->dst_object))
6290         set_buffer_internal (XBUFFER (coding->dst_object));
6291       if (GPT != PT)
6292         move_gap_both (PT, PT_BYTE);
6293       undo_list = current_buffer->undo_list;
6294       current_buffer->undo_list = Qt;
6295     }
6296
6297   coding->consumed = coding->consumed_char = 0;
6298   coding->produced = coding->produced_char = 0;
6299   coding->chars_at_source = 0;
6300   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6301   coding->errors = 0;
6302
6303   ALLOC_CONVERSION_WORK_AREA (coding);
6304
6305   attrs = CODING_ID_ATTRS (coding->id);
6306   translation_table = get_translation_table (attrs, 0, NULL);
6307
6308   carryover = 0;
6309   do
6310     {
6311       EMACS_INT pos = coding->dst_pos + coding->produced_char;
6312
6313       coding_set_source (coding);
6314       coding->annotated = 0;
6315       coding->charbuf_used = carryover;
6316       (*(coding->decoder)) (coding);
6317       coding_set_destination (coding);
6318       carryover = produce_chars (coding, translation_table, 0);
6319       if (coding->annotated)
6320         produce_annotation (coding, pos);
6321       for (i = 0; i < carryover; i++)
6322         coding->charbuf[i]
6323           = coding->charbuf[coding->charbuf_used - carryover + i];
6324     }
6325   while (coding->consumed < coding->src_bytes
6326          && (coding->result == CODING_RESULT_SUCCESS
6327              || coding->result == CODING_RESULT_INVALID_SRC));
6328
6329   if (carryover > 0)
6330     {
6331       coding_set_destination (coding);
6332       coding->charbuf_used = carryover;
6333       produce_chars (coding, translation_table, 1);
6334     }
6335
6336   coding->carryover_bytes = 0;
6337   if (coding->consumed < coding->src_bytes)
6338     {
6339       int nbytes = coding->src_bytes - coding->consumed;
6340       const unsigned char *src;
6341
6342       coding_set_source (coding);
6343       coding_set_destination (coding);
6344       src = coding->source + coding->consumed;
6345
6346       if (coding->mode & CODING_MODE_LAST_BLOCK)
6347         {
6348           /* Flush out unprocessed data as binary chars.  We are sure
6349              that the number of data is less than the size of
6350              coding->charbuf.  */
6351           coding->charbuf_used = 0;
6352           while (nbytes-- > 0)
6353             {
6354               int c = *src++;
6355
6356               if (c & 0x80)
6357                 c = BYTE8_TO_CHAR (c);
6358               coding->charbuf[coding->charbuf_used++] = c;
6359             }
6360           produce_chars (coding, Qnil, 1);
6361         }
6362       else
6363         {
6364           /* Record unprocessed bytes in coding->carryover.  We are
6365              sure that the number of data is less than the size of
6366              coding->carryover.  */
6367           unsigned char *p = coding->carryover;
6368
6369           coding->carryover_bytes = nbytes;
6370           while (nbytes-- > 0)
6371             *p++ = *src++;
6372         }
6373       coding->consumed = coding->src_bytes;
6374     }
6375
6376   if (! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix))
6377     decode_eol (coding);
6378   if (BUFFERP (coding->dst_object))
6379     {
6380       current_buffer->undo_list = undo_list;
6381       record_insert (coding->dst_pos, coding->produced_char);
6382     }
6383   return coding->result;
6384 }
6385
6386
6387 /* Extract an annotation datum from a composition starting at POS and
6388    ending before LIMIT of CODING->src_object (buffer or string), store
6389    the data in BUF, set *STOP to a starting position of the next
6390    composition (if any) or to LIMIT, and return the address of the
6391    next element of BUF.
6392
6393    If such an annotation is not found, set *STOP to a starting
6394    position of a composition after POS (if any) or to LIMIT, and
6395    return BUF.  */
6396
6397 static INLINE int *
6398 handle_composition_annotation (pos, limit, coding, buf, stop)
6399      EMACS_INT pos, limit;
6400      struct coding_system *coding;
6401      int *buf;
6402      EMACS_INT *stop;
6403 {
6404   EMACS_INT start, end;
6405   Lisp_Object prop;
6406
6407   if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object)
6408       || end > limit)
6409     *stop = limit;
6410   else if (start > pos)
6411     *stop = start;
6412   else
6413     {
6414       if (start == pos)
6415         {
6416           /* We found a composition.  Store the corresponding
6417              annotation data in BUF.  */
6418           int *head = buf;
6419           enum composition_method method = COMPOSITION_METHOD (prop);
6420           int nchars = COMPOSITION_LENGTH (prop);
6421
6422           ADD_COMPOSITION_DATA (buf, nchars, method);
6423           if (method != COMPOSITION_RELATIVE)
6424             {
6425               Lisp_Object components;
6426               int len, i, i_byte;
6427
6428               components = COMPOSITION_COMPONENTS (prop);
6429               if (VECTORP (components))
6430                 {
6431                   len = XVECTOR (components)->size;
6432                   for (i = 0; i < len; i++)
6433                     *buf++ = XINT (AREF (components, i));
6434                 }
6435               else if (STRINGP (components))
6436                 {
6437                   len = SCHARS (components);
6438                   i = i_byte = 0;
6439                   while (i < len)
6440                     {
6441                       FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
6442                       buf++;
6443                     }
6444                 }
6445               else if (INTEGERP (components))
6446                 {
6447                   len = 1;
6448                   *buf++ = XINT (components);
6449                 }
6450               else if (CONSP (components))
6451                 {
6452                   for (len = 0; CONSP (components);
6453                        len++, components = XCDR (components))
6454                     *buf++ = XINT (XCAR (components));
6455                 }
6456               else
6457                 abort ();
6458               *head -= len;
6459             }
6460         }
6461
6462       if (find_composition (end, limit, &start, &end, &prop,
6463                             coding->src_object)
6464           && end <= limit)
6465         *stop = start;
6466       else
6467         *stop = limit;
6468     }
6469   return buf;
6470 }
6471
6472
6473 /* Extract an annotation datum from a text property `charset' at POS of
6474    CODING->src_object (buffer of string), store the data in BUF, set
6475    *STOP to the position where the value of `charset' property changes
6476    (limiting by LIMIT), and return the address of the next element of
6477    BUF.
6478
6479    If the property value is nil, set *STOP to the position where the
6480    property value is non-nil (limiting by LIMIT), and return BUF.  */
6481
6482 static INLINE int *
6483 handle_charset_annotation (pos, limit, coding, buf, stop)
6484      EMACS_INT pos, limit;
6485      struct coding_system *coding;
6486      int *buf;
6487      EMACS_INT *stop;
6488 {
6489   Lisp_Object val, next;
6490   int id;
6491
6492   val = Fget_text_property (make_number (pos), Qcharset, coding->src_object);
6493   if (! NILP (val) && CHARSETP (val))
6494     id = XINT (CHARSET_SYMBOL_ID (val));
6495   else
6496     id = -1;
6497   ADD_CHARSET_DATA (buf, 0, id);
6498   next = Fnext_single_property_change (make_number (pos), Qcharset,
6499                                        coding->src_object,
6500                                        make_number (limit));
6501   *stop = XINT (next);
6502   return buf;
6503 }
6504
6505
6506 static void
6507 consume_chars (coding, translation_table, max_lookup)
6508      struct coding_system *coding;
6509      Lisp_Object translation_table;
6510      int max_lookup;
6511 {
6512   int *buf = coding->charbuf;
6513   int *buf_end = coding->charbuf + coding->charbuf_size;
6514   const unsigned char *src = coding->source + coding->consumed;
6515   const unsigned char *src_end = coding->source + coding->src_bytes;
6516   EMACS_INT pos = coding->src_pos + coding->consumed_char;
6517   EMACS_INT end_pos = coding->src_pos + coding->src_chars;
6518   int multibytep = coding->src_multibyte;
6519   Lisp_Object eol_type;
6520   int c;
6521   EMACS_INT stop, stop_composition, stop_charset;
6522   int *lookup_buf = NULL;
6523
6524   if (! NILP (translation_table))
6525     lookup_buf = alloca (sizeof (int) * max_lookup);
6526
6527   eol_type = CODING_ID_EOL_TYPE (coding->id);
6528   if (VECTORP (eol_type))
6529     eol_type = Qunix;
6530
6531   /* Note: composition handling is not yet implemented.  */
6532   coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
6533
6534   if (NILP (coding->src_object))
6535     stop = stop_composition = stop_charset = end_pos;
6536   else
6537     {
6538       if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)
6539         stop = stop_composition = pos;
6540       else
6541         stop = stop_composition = end_pos;
6542       if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK)
6543         stop = stop_charset = pos;
6544       else
6545         stop_charset = end_pos;
6546     }
6547
6548   /* Compensate for CRLF and conversion.  */
6549   buf_end -= 1 + MAX_ANNOTATION_LENGTH;
6550   while (buf < buf_end)
6551     {
6552       Lisp_Object trans;
6553
6554       if (pos == stop)
6555         {
6556           if (pos == end_pos)
6557             break;
6558           if (pos == stop_composition)
6559             buf = handle_composition_annotation (pos, end_pos, coding,
6560                                                  buf, &stop_composition);
6561           if (pos == stop_charset)
6562             buf = handle_charset_annotation (pos, end_pos, coding,
6563                                              buf, &stop_charset);
6564           stop = (stop_composition < stop_charset
6565                   ? stop_composition : stop_charset);
6566         }
6567
6568       if (! multibytep)
6569         {
6570           EMACS_INT bytes;
6571
6572           if (coding->encoder == encode_coding_raw_text)
6573             c = *src++, pos++;
6574           else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
6575             c = STRING_CHAR_ADVANCE (src), pos += bytes;
6576           else
6577             c = BYTE8_TO_CHAR (*src), src++, pos++;
6578         }
6579       else
6580         c = STRING_CHAR_ADVANCE (src), pos++;
6581       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
6582         c = '\n';
6583       if (! EQ (eol_type, Qunix))
6584         {
6585           if (c == '\n')
6586             {
6587               if (EQ (eol_type, Qdos))
6588                 *buf++ = '\r';
6589               else
6590                 c = '\r';
6591             }
6592         }
6593
6594       trans = Qnil;
6595       LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6596       if (NILP (trans))
6597         *buf++ = c;
6598       else
6599         {
6600           int from_nchars = 1, to_nchars = 1;
6601           int *lookup_buf_end;
6602           const unsigned char *p = src;
6603           int i;
6604
6605           lookup_buf[0] = c;
6606           for (i = 1; i < max_lookup && p < src_end; i++)
6607             lookup_buf[i] = STRING_CHAR_ADVANCE (p);
6608           lookup_buf_end = lookup_buf + i;
6609           trans = get_translation (trans, lookup_buf, lookup_buf_end, 1,
6610                                    &from_nchars, &to_nchars);
6611           if (EQ (trans, Qt)
6612               || buf + to_nchars > buf_end)
6613             break;
6614           *buf++ = *lookup_buf;
6615           for (i = 1; i < to_nchars; i++)
6616             *buf++ = XINT (AREF (trans, i));
6617           for (i = 1; i < from_nchars; i++, pos++)
6618             src += MULTIBYTE_LENGTH_NO_CHECK (src);
6619         }
6620     }
6621
6622   coding->consumed = src - coding->source;
6623   coding->consumed_char = pos - coding->src_pos;
6624   coding->charbuf_used = buf - coding->charbuf;
6625   coding->chars_at_source = 0;
6626 }
6627
6628
6629 /* Encode the text at CODING->src_object into CODING->dst_object.
6630    CODING->src_object is a buffer or a string.
6631    CODING->dst_object is a buffer or nil.
6632
6633    If CODING->src_object is a buffer, it must be the current buffer.
6634    In this case, if CODING->src_pos is positive, it is a position of
6635    the source text in the buffer, otherwise. the source text is in the
6636    gap area of the buffer, and coding->src_pos specifies the offset of
6637    the text from GPT (which must be the same as PT).  If this is the
6638    same buffer as CODING->dst_object, CODING->src_pos must be
6639    negative and CODING should not have `pre-write-conversion'.
6640
6641    If CODING->src_object is a string, CODING should not have
6642    `pre-write-conversion'.
6643
6644    If CODING->dst_object is a buffer, the encoded data is inserted at
6645    the current point of that buffer.
6646
6647    If CODING->dst_object is nil, the encoded data is placed at the
6648    memory area specified by CODING->destination.  */
6649
6650 static int
6651 encode_coding (coding)
6652      struct coding_system *coding;
6653 {
6654   Lisp_Object attrs;
6655   Lisp_Object translation_table;
6656   int max_lookup;
6657
6658   attrs = CODING_ID_ATTRS (coding->id);
6659   if (coding->encoder == encode_coding_raw_text)
6660     translation_table = Qnil, max_lookup = 0;
6661   else
6662     translation_table = get_translation_table (attrs, 1, &max_lookup);
6663
6664   if (BUFFERP (coding->dst_object))
6665     {
6666       set_buffer_internal (XBUFFER (coding->dst_object));
6667       coding->dst_multibyte
6668         = ! NILP (current_buffer->enable_multibyte_characters);
6669     }
6670
6671   coding->consumed = coding->consumed_char = 0;
6672   coding->produced = coding->produced_char = 0;
6673   record_conversion_result (coding, CODING_RESULT_SUCCESS);
6674   coding->errors = 0;
6675
6676   ALLOC_CONVERSION_WORK_AREA (coding);
6677
6678   do {
6679     coding_set_source (coding);
6680     consume_chars (coding, translation_table, max_lookup);
6681     coding_set_destination (coding);
6682     (*(coding->encoder)) (coding);
6683   } while (coding->consumed_char < coding->src_chars);
6684
6685   if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
6686     insert_from_gap (coding->produced_char, coding->produced);
6687
6688   return (coding->result);
6689 }
6690
6691
6692 /* Name (or base name) of work buffer for code conversion.  */
6693 static Lisp_Object Vcode_conversion_workbuf_name;
6694
6695 /* A working buffer used by the top level conversion.  Once it is
6696    created, it is never destroyed.  It has the name
6697    Vcode_conversion_workbuf_name.  The other working buffers are
6698    destroyed after the use is finished, and their names are modified
6699    versions of Vcode_conversion_workbuf_name.  */
6700 static Lisp_Object Vcode_conversion_reused_workbuf;
6701
6702 /* 1 iff Vcode_conversion_reused_workbuf is already in use.  */
6703 static int reused_workbuf_in_use;
6704
6705
6706 /* Return a working buffer of code convesion.  MULTIBYTE specifies the
6707    multibyteness of returning buffer.  */
6708
6709 static Lisp_Object
6710 make_conversion_work_buffer (multibyte)
6711      int multibyte;
6712 {
6713   Lisp_Object name, workbuf;
6714   struct buffer *current;
6715
6716   if (reused_workbuf_in_use++)
6717     {
6718       name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
6719       workbuf = Fget_buffer_create (name);
6720     }
6721   else
6722     {
6723       name = Vcode_conversion_workbuf_name;
6724       workbuf = Fget_buffer_create (name);
6725       if (NILP (Vcode_conversion_reused_workbuf))
6726         Vcode_conversion_reused_workbuf = workbuf;
6727     }
6728   current = current_buffer;
6729   set_buffer_internal (XBUFFER (workbuf));
6730   Ferase_buffer ();
6731   current_buffer->undo_list = Qt;
6732   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
6733   set_buffer_internal (current);
6734   return workbuf;
6735 }
6736
6737
6738 static Lisp_Object
6739 code_conversion_restore (arg)
6740      Lisp_Object arg;
6741 {
6742   Lisp_Object current, workbuf;
6743   struct gcpro gcpro1;
6744
6745   GCPRO1 (arg);
6746   current = XCAR (arg);
6747   workbuf = XCDR (arg);
6748   if (! NILP (workbuf))
6749     {
6750       if (EQ (workbuf, Vcode_conversion_reused_workbuf))
6751         reused_workbuf_in_use = 0;
6752       else if (! NILP (Fbuffer_live_p (workbuf)))
6753         Fkill_buffer (workbuf);
6754     }
6755   set_buffer_internal (XBUFFER (current));
6756   UNGCPRO;
6757   return Qnil;
6758 }
6759
6760 Lisp_Object
6761 code_conversion_save (with_work_buf, multibyte)
6762      int with_work_buf, multibyte;
6763 {
6764   Lisp_Object workbuf = Qnil;
6765
6766   if (with_work_buf)
6767     workbuf = make_conversion_work_buffer (multibyte);
6768   record_unwind_protect (code_conversion_restore,
6769                          Fcons (Fcurrent_buffer (), workbuf));
6770   return workbuf;
6771 }
6772
6773 int
6774 decode_coding_gap (coding, chars, bytes)
6775      struct coding_system *coding;
6776      EMACS_INT chars, bytes;
6777 {
6778   int count = specpdl_ptr - specpdl;
6779   Lisp_Object attrs;
6780
6781   code_conversion_save (0, 0);
6782
6783   coding->src_object = Fcurrent_buffer ();
6784   coding->src_chars = chars;
6785   coding->src_bytes = bytes;
6786   coding->src_pos = -chars;
6787   coding->src_pos_byte = -bytes;
6788   coding->src_multibyte = chars < bytes;
6789   coding->dst_object = coding->src_object;
6790   coding->dst_pos = PT;
6791   coding->dst_pos_byte = PT_BYTE;
6792   coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters);
6793
6794   if (CODING_REQUIRE_DETECTION (coding))
6795     detect_coding (coding);
6796
6797   coding->mode |= CODING_MODE_LAST_BLOCK;
6798   current_buffer->text->inhibit_shrinking = 1;
6799   decode_coding (coding);
6800   current_buffer->text->inhibit_shrinking = 0;
6801
6802   attrs = CODING_ID_ATTRS (coding->id);
6803   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6804     {
6805       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6806       Lisp_Object val;
6807
6808       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6809       val = call1 (CODING_ATTR_POST_READ (attrs),
6810                    make_number (coding->produced_char));
6811       CHECK_NATNUM (val);
6812       coding->produced_char += Z - prev_Z;
6813       coding->produced += Z_BYTE - prev_Z_BYTE;
6814     }
6815
6816   unbind_to (count, Qnil);
6817   return coding->result;
6818 }
6819
6820 int
6821 encode_coding_gap (coding, chars, bytes)
6822      struct coding_system *coding;
6823      EMACS_INT chars, bytes;
6824 {
6825   int count = specpdl_ptr - specpdl;
6826
6827   code_conversion_save (0, 0);
6828
6829   coding->src_object = Fcurrent_buffer ();
6830   coding->src_chars = chars;
6831   coding->src_bytes = bytes;
6832   coding->src_pos = -chars;
6833   coding->src_pos_byte = -bytes;
6834   coding->src_multibyte = chars < bytes;
6835   coding->dst_object = coding->src_object;
6836   coding->dst_pos = PT;
6837   coding->dst_pos_byte = PT_BYTE;
6838
6839   encode_coding (coding);
6840
6841   unbind_to (count, Qnil);
6842   return coding->result;
6843 }
6844
6845
6846 /* Decode the text in the range FROM/FROM_BYTE and TO/TO_BYTE in
6847    SRC_OBJECT into DST_OBJECT by coding context CODING.
6848
6849    SRC_OBJECT is a buffer, a string, or Qnil.
6850
6851    If it is a buffer, the text is at point of the buffer.  FROM and TO
6852    are positions in the buffer.
6853
6854    If it is a string, the text is at the beginning of the string.
6855    FROM and TO are indices to the string.
6856
6857    If it is nil, the text is at coding->source.  FROM and TO are
6858    indices to coding->source.
6859
6860    DST_OBJECT is a buffer, Qt, or Qnil.
6861
6862    If it is a buffer, the decoded text is inserted at point of the
6863    buffer.  If the buffer is the same as SRC_OBJECT, the source text
6864    is deleted.
6865
6866    If it is Qt, a string is made from the decoded text, and
6867    set in CODING->dst_object.
6868
6869    If it is Qnil, the decoded text is stored at CODING->destination.
6870    The caller must allocate CODING->dst_bytes bytes at
6871    CODING->destination by xmalloc.  If the decoded text is longer than
6872    CODING->dst_bytes, CODING->destination is relocated by xrealloc.
6873  */
6874
6875 void
6876 decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
6877                       dst_object)
6878      struct coding_system *coding;
6879      Lisp_Object src_object;
6880      EMACS_INT from, from_byte, to, to_byte;
6881      Lisp_Object dst_object;
6882 {
6883   int count = specpdl_ptr - specpdl;
6884   unsigned char *destination;
6885   EMACS_INT dst_bytes;
6886   EMACS_INT chars = to - from;
6887   EMACS_INT bytes = to_byte - from_byte;
6888   Lisp_Object attrs;
6889   int saved_pt = -1, saved_pt_byte;
6890   int need_marker_adjustment = 0;
6891   Lisp_Object old_deactivate_mark;
6892
6893   old_deactivate_mark = Vdeactivate_mark;
6894
6895   if (NILP (dst_object))
6896     {
6897       destination = coding->destination;
6898       dst_bytes = coding->dst_bytes;
6899     }
6900
6901   coding->src_object = src_object;
6902   coding->src_chars = chars;
6903   coding->src_bytes = bytes;
6904   coding->src_multibyte = chars < bytes;
6905
6906   if (STRINGP (src_object))
6907     {
6908       coding->src_pos = from;
6909       coding->src_pos_byte = from_byte;
6910     }
6911   else if (BUFFERP (src_object))
6912     {
6913       set_buffer_internal (XBUFFER (src_object));
6914       if (from != GPT)
6915         move_gap_both (from, from_byte);
6916       if (EQ (src_object, dst_object))
6917         {
6918           struct Lisp_Marker *tail;
6919
6920           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
6921             {
6922               tail->need_adjustment
6923                 = tail->charpos == (tail->insertion_type ? from : to);
6924               need_marker_adjustment |= tail->need_adjustment;
6925             }
6926           saved_pt = PT, saved_pt_byte = PT_BYTE;
6927           TEMP_SET_PT_BOTH (from, from_byte);
6928           current_buffer->text->inhibit_shrinking = 1;
6929           del_range_both (from, from_byte, to, to_byte, 1);
6930           coding->src_pos = -chars;
6931           coding->src_pos_byte = -bytes;
6932         }
6933       else
6934         {
6935           coding->src_pos = from;
6936           coding->src_pos_byte = from_byte;
6937         }
6938     }
6939
6940   if (CODING_REQUIRE_DETECTION (coding))
6941     detect_coding (coding);
6942   attrs = CODING_ID_ATTRS (coding->id);
6943
6944   if (EQ (dst_object, Qt)
6945       || (! NILP (CODING_ATTR_POST_READ (attrs))
6946           && NILP (dst_object)))
6947     {
6948       coding->dst_object = code_conversion_save (1, 1);
6949       coding->dst_pos = BEG;
6950       coding->dst_pos_byte = BEG_BYTE;
6951       coding->dst_multibyte = 1;
6952     }
6953   else if (BUFFERP (dst_object))
6954     {
6955       code_conversion_save (0, 0);
6956       coding->dst_object = dst_object;
6957       coding->dst_pos = BUF_PT (XBUFFER (dst_object));
6958       coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
6959       coding->dst_multibyte
6960         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
6961     }
6962   else
6963     {
6964       code_conversion_save (0, 0);
6965       coding->dst_object = Qnil;
6966       coding->dst_multibyte = 1;
6967     }
6968
6969   decode_coding (coding);
6970
6971   if (BUFFERP (coding->dst_object))
6972     set_buffer_internal (XBUFFER (coding->dst_object));
6973
6974   if (! NILP (CODING_ATTR_POST_READ (attrs)))
6975     {
6976       struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
6977       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
6978       Lisp_Object val;
6979
6980       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
6981       GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
6982               old_deactivate_mark);
6983       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
6984                         make_number (coding->produced_char));
6985       UNGCPRO;
6986       CHECK_NATNUM (val);
6987       coding->produced_char += Z - prev_Z;
6988       coding->produced += Z_BYTE - prev_Z_BYTE;
6989     }
6990
6991   if (EQ (dst_object, Qt))
6992     {
6993       coding->dst_object = Fbuffer_string ();
6994     }
6995   else if (NILP (dst_object) && BUFFERP (coding->dst_object))
6996     {
6997       set_buffer_internal (XBUFFER (coding->dst_object));
6998       if (dst_bytes < coding->produced)
6999         {
7000           destination = xrealloc (destination, coding->produced);
7001           if (! destination)
7002             {
7003               record_conversion_result (coding,
7004                                         CODING_RESULT_INSUFFICIENT_DST);
7005               unbind_to (count, Qnil);
7006               return;
7007             }
7008           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
7009             move_gap_both (BEGV, BEGV_BYTE);
7010           bcopy (BEGV_ADDR, destination, coding->produced);
7011           coding->destination = destination;
7012         }
7013     }
7014
7015   if (saved_pt >= 0)
7016     {
7017       /* This is the case of:
7018          (BUFFERP (src_object) && EQ (src_object, dst_object))
7019          As we have moved PT while replacing the original buffer
7020          contents, we must recover it now.  */
7021       set_buffer_internal (XBUFFER (src_object));
7022       current_buffer->text->inhibit_shrinking = 0;
7023       if (saved_pt < from)
7024         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7025       else if (saved_pt < from + chars)
7026         TEMP_SET_PT_BOTH (from, from_byte);
7027       else if (! NILP (current_buffer->enable_multibyte_characters))
7028         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7029                           saved_pt_byte + (coding->produced - bytes));
7030       else
7031         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7032                           saved_pt_byte + (coding->produced - bytes));
7033
7034       if (need_marker_adjustment)
7035         {
7036           struct Lisp_Marker *tail;
7037
7038           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7039             if (tail->need_adjustment)
7040               {
7041                 tail->need_adjustment = 0;
7042                 if (tail->insertion_type)
7043                   {
7044                     tail->bytepos = from_byte;
7045                     tail->charpos = from;
7046                   }
7047                 else
7048                   {
7049                     tail->bytepos = from_byte + coding->produced;
7050                     tail->charpos
7051                       = (NILP (current_buffer->enable_multibyte_characters)
7052                          ? tail->bytepos : from + coding->produced_char);
7053                   }
7054               }
7055         }
7056     }
7057
7058   Vdeactivate_mark = old_deactivate_mark;
7059   unbind_to (count, coding->dst_object);
7060 }
7061
7062
7063 void
7064 encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
7065                       dst_object)
7066      struct coding_system *coding;
7067      Lisp_Object src_object;
7068      EMACS_INT from, from_byte, to, to_byte;
7069      Lisp_Object dst_object;
7070 {
7071   int count = specpdl_ptr - specpdl;
7072   EMACS_INT chars = to - from;
7073   EMACS_INT bytes = to_byte - from_byte;
7074   Lisp_Object attrs;
7075   int saved_pt = -1, saved_pt_byte;
7076   int need_marker_adjustment = 0;
7077   int kill_src_buffer = 0;
7078   Lisp_Object old_deactivate_mark;
7079
7080   old_deactivate_mark = Vdeactivate_mark;
7081
7082   coding->src_object = src_object;
7083   coding->src_chars = chars;
7084   coding->src_bytes = bytes;
7085   coding->src_multibyte = chars < bytes;
7086
7087   attrs = CODING_ID_ATTRS (coding->id);
7088
7089   if (EQ (src_object, dst_object))
7090     {
7091       struct Lisp_Marker *tail;
7092
7093       for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7094         {
7095           tail->need_adjustment
7096             = tail->charpos == (tail->insertion_type ? from : to);
7097           need_marker_adjustment |= tail->need_adjustment;
7098         }
7099     }
7100
7101   if (! NILP (CODING_ATTR_PRE_WRITE (attrs)))
7102     {
7103       coding->src_object = code_conversion_save (1, coding->src_multibyte);
7104       set_buffer_internal (XBUFFER (coding->src_object));
7105       if (STRINGP (src_object))
7106         insert_from_string (src_object, from, from_byte, chars, bytes, 0);
7107       else if (BUFFERP (src_object))
7108         insert_from_buffer (XBUFFER (src_object), from, chars, 0);
7109       else
7110         insert_1_both (coding->source + from, chars, bytes, 0, 0, 0);
7111
7112       if (EQ (src_object, dst_object))
7113         {
7114           set_buffer_internal (XBUFFER (src_object));
7115           saved_pt = PT, saved_pt_byte = PT_BYTE;
7116           del_range_both (from, from_byte, to, to_byte, 1);
7117           set_buffer_internal (XBUFFER (coding->src_object));
7118         }
7119
7120       {
7121         Lisp_Object args[3];
7122         struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
7123
7124         GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
7125                 old_deactivate_mark);
7126         args[0] = CODING_ATTR_PRE_WRITE (attrs);
7127         args[1] = make_number (BEG);
7128         args[2] = make_number (Z);
7129         safe_call (3, args);
7130         UNGCPRO;
7131       }
7132       if (XBUFFER (coding->src_object) != current_buffer)
7133         kill_src_buffer = 1;
7134       coding->src_object = Fcurrent_buffer ();
7135       if (BEG != GPT)
7136         move_gap_both (BEG, BEG_BYTE);
7137       coding->src_chars = Z - BEG;
7138       coding->src_bytes = Z_BYTE - BEG_BYTE;
7139       coding->src_pos = BEG;
7140       coding->src_pos_byte = BEG_BYTE;
7141       coding->src_multibyte = Z < Z_BYTE;
7142     }
7143   else if (STRINGP (src_object))
7144     {
7145       code_conversion_save (0, 0);
7146       coding->src_pos = from;
7147       coding->src_pos_byte = from_byte;
7148     }
7149   else if (BUFFERP (src_object))
7150     {
7151       code_conversion_save (0, 0);
7152       set_buffer_internal (XBUFFER (src_object));
7153       if (EQ (src_object, dst_object))
7154         {
7155           saved_pt = PT, saved_pt_byte = PT_BYTE;
7156           coding->src_object = del_range_1 (from, to, 1, 1);
7157           coding->src_pos = 0;
7158           coding->src_pos_byte = 0;
7159         }
7160       else
7161         {
7162           if (from < GPT && to >= GPT)
7163             move_gap_both (from, from_byte);
7164           coding->src_pos = from;
7165           coding->src_pos_byte = from_byte;
7166         }
7167     }
7168   else
7169     code_conversion_save (0, 0);
7170
7171   if (BUFFERP (dst_object))
7172     {
7173       coding->dst_object = dst_object;
7174       if (EQ (src_object, dst_object))
7175         {
7176           coding->dst_pos = from;
7177           coding->dst_pos_byte = from_byte;
7178         }
7179       else
7180         {
7181           coding->dst_pos = BUF_PT (XBUFFER (dst_object));
7182           coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
7183         }
7184       coding->dst_multibyte
7185         = ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
7186     }
7187   else if (EQ (dst_object, Qt))
7188     {
7189       coding->dst_object = Qnil;
7190       coding->dst_bytes = coding->src_chars;
7191       if (coding->dst_bytes == 0)
7192         coding->dst_bytes = 1;
7193       coding->destination = (unsigned char *) xmalloc (coding->dst_bytes);
7194       coding->dst_multibyte = 0;
7195     }
7196   else
7197     {
7198       coding->dst_object = Qnil;
7199       coding->dst_multibyte = 0;
7200     }
7201
7202   encode_coding (coding);
7203
7204   if (EQ (dst_object, Qt))
7205     {
7206       if (BUFFERP (coding->dst_object))
7207         coding->dst_object = Fbuffer_string ();
7208       else
7209         {
7210           coding->dst_object
7211             = make_unibyte_string ((char *) coding->destination,
7212                                    coding->produced);
7213           xfree (coding->destination);
7214         }
7215     }
7216
7217   if (saved_pt >= 0)
7218     {
7219       /* This is the case of:
7220          (BUFFERP (src_object) && EQ (src_object, dst_object))
7221          As we have moved PT while replacing the original buffer
7222          contents, we must recover it now.  */
7223       set_buffer_internal (XBUFFER (src_object));
7224       if (saved_pt < from)
7225         TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
7226       else if (saved_pt < from + chars)
7227         TEMP_SET_PT_BOTH (from, from_byte);
7228       else if (! NILP (current_buffer->enable_multibyte_characters))
7229         TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars),
7230                           saved_pt_byte + (coding->produced - bytes));
7231       else
7232         TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes),
7233                           saved_pt_byte + (coding->produced - bytes));
7234
7235       if (need_marker_adjustment)
7236         {
7237           struct Lisp_Marker *tail;
7238
7239           for (tail = BUF_MARKERS (current_buffer); tail; tail = tail->next)
7240             if (tail->need_adjustment)
7241               {
7242                 tail->need_adjustment = 0;
7243                 if (tail->insertion_type)
7244                   {
7245                     tail->bytepos = from_byte;
7246                     tail->charpos = from;
7247                   }
7248                 else
7249                   {
7250                     tail->bytepos = from_byte + coding->produced;
7251                     tail->charpos
7252                       = (NILP (current_buffer->enable_multibyte_characters)
7253                          ? tail->bytepos : from + coding->produced_char);
7254                   }
7255               }
7256         }
7257     }
7258
7259   if (kill_src_buffer)
7260     Fkill_buffer (coding->src_object);
7261
7262   Vdeactivate_mark = old_deactivate_mark;
7263   unbind_to (count, Qnil);
7264 }
7265
7266
7267 Lisp_Object
7268 preferred_coding_system ()
7269 {
7270   int id = coding_categories[coding_priorities[0]].id;
7271
7272   return CODING_ID_NAME (id);
7273 }
7274
7275 \f
7276 #ifdef emacs
7277 /*** 8. Emacs Lisp library functions ***/
7278
7279 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
7280        doc: /* Return t if OBJECT is nil or a coding-system.
7281 See the documentation of `define-coding-system' for information
7282 about coding-system objects.  */)
7283      (obj)
7284      Lisp_Object obj;
7285 {
7286   if (NILP (obj)
7287       || CODING_SYSTEM_ID (obj) >= 0)
7288     return Qt;
7289   if (! SYMBOLP (obj)
7290       || NILP (Fget (obj, Qcoding_system_define_form)))
7291     return Qnil;
7292   return Qt;
7293 }
7294
7295 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
7296        Sread_non_nil_coding_system, 1, 1, 0,
7297        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.  */)
7298      (prompt)
7299      Lisp_Object prompt;
7300 {
7301   Lisp_Object val;
7302   do
7303     {
7304       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7305                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
7306     }
7307   while (SCHARS (val) == 0);
7308   return (Fintern (val, Qnil));
7309 }
7310
7311 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
7312        doc: /* Read a coding system from the minibuffer, prompting with string PROMPT.
7313 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.
7314 Ignores case when completing coding systems (all Emacs coding systems
7315 are lower-case).  */)
7316      (prompt, default_coding_system)
7317      Lisp_Object prompt, default_coding_system;
7318 {
7319   Lisp_Object val;
7320   int count = SPECPDL_INDEX ();
7321
7322   if (SYMBOLP (default_coding_system))
7323     default_coding_system = SYMBOL_NAME (default_coding_system);
7324   specbind (Qcompletion_ignore_case, Qt);
7325   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
7326                           Qt, Qnil, Qcoding_system_history,
7327                           default_coding_system, Qnil);
7328   unbind_to (count, Qnil);
7329   return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil));
7330 }
7331
7332 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
7333        1, 1, 0,
7334        doc: /* Check validity of CODING-SYSTEM.
7335 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.
7336 It is valid if it is nil or a symbol defined as a coding system by the
7337 function `define-coding-system'.  */)
7338   (coding_system)
7339      Lisp_Object coding_system;
7340 {
7341   Lisp_Object define_form;
7342
7343   define_form = Fget (coding_system, Qcoding_system_define_form);
7344   if (! NILP (define_form))
7345     {
7346       Fput (coding_system, Qcoding_system_define_form, Qnil);
7347       safe_eval (define_form);
7348     }
7349   if (!NILP (Fcoding_system_p (coding_system)))
7350     return coding_system;
7351   xsignal1 (Qcoding_system_error, coding_system);
7352 }
7353
7354 \f
7355 /* Detect how the bytes at SRC of length SRC_BYTES are encoded.  If
7356    HIGHEST is nonzero, return the coding system of the highest
7357    priority among the detected coding systems.  Otherwize return a
7358    list of detected coding systems sorted by their priorities.  If
7359    MULTIBYTEP is nonzero, it is assumed that the bytes are in correct
7360    multibyte form but contains only ASCII and eight-bit chars.
7361    Otherwise, the bytes are raw bytes.
7362
7363    CODING-SYSTEM controls the detection as below:
7364
7365    If it is nil, detect both text-format and eol-format.  If the
7366    text-format part of CODING-SYSTEM is already specified
7367    (e.g. `iso-latin-1'), detect only eol-format.  If the eol-format
7368    part of CODING-SYSTEM is already specified (e.g. `undecided-unix'),
7369    detect only text-format.  */
7370
7371 Lisp_Object
7372 detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
7373                       coding_system)
7374      const unsigned char *src;
7375      EMACS_INT src_chars, src_bytes;
7376      int highest;
7377      int multibytep;
7378      Lisp_Object coding_system;
7379 {
7380   const unsigned char *src_end = src + src_bytes;
7381   Lisp_Object attrs, eol_type;
7382   Lisp_Object val;
7383   struct coding_system coding;
7384   int id;
7385   struct coding_detection_info detect_info;
7386   enum coding_category base_category;
7387
7388   if (NILP (coding_system))
7389     coding_system = Qundecided;
7390   setup_coding_system (coding_system, &coding);
7391   attrs = CODING_ID_ATTRS (coding.id);
7392   eol_type = CODING_ID_EOL_TYPE (coding.id);
7393   coding_system = CODING_ATTR_BASE_NAME (attrs);
7394
7395   coding.source = src;
7396   coding.src_chars = src_chars;
7397   coding.src_bytes = src_bytes;
7398   coding.src_multibyte = multibytep;
7399   coding.consumed = 0;
7400   coding.mode |= CODING_MODE_LAST_BLOCK;
7401
7402   detect_info.checked = detect_info.found = detect_info.rejected = 0;
7403
7404   /* At first, detect text-format if necessary.  */
7405   base_category = XINT (CODING_ATTR_CATEGORY (attrs));
7406   if (base_category == coding_category_undecided)
7407     {
7408       enum coding_category category;
7409       struct coding_system *this;
7410       int c, i;
7411
7412       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
7413       for (i = 0; src < src_end; i++, src++)
7414         {
7415           c = *src;
7416           if (c & 0x80)
7417             break;
7418           if (c < 0x20
7419               && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
7420               && ! inhibit_iso_escape_detection)
7421             {
7422               coding.head_ascii = src - coding.source;
7423               if (detect_coding_iso_2022 (&coding, &detect_info))
7424                 {
7425                   /* We have scanned the whole data.  */
7426                   if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
7427                     /* We didn't find an 8-bit code.  */
7428                     src = src_end;
7429                   break;
7430                 }
7431             }
7432         }
7433       coding.head_ascii = src - coding.source;
7434
7435       if (src < src_end
7436           || detect_info.found)
7437         {
7438           if (src == src_end)
7439             /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
7440             for (i = 0; i < coding_category_raw_text; i++)
7441               {
7442                 category = coding_priorities[i];
7443                 this = coding_categories + category;
7444                 if (detect_info.found & (1 << category))
7445                   break;
7446               }
7447           else
7448             for (i = 0; i < coding_category_raw_text; i++)
7449               {
7450                 category = coding_priorities[i];
7451                 this = coding_categories + category;
7452
7453                 if (this->id < 0)
7454                   {
7455                     /* No coding system of this category is defined.  */
7456                     detect_info.rejected |= (1 << category);
7457                   }
7458                 else if (category >= coding_category_raw_text)
7459                   continue;
7460                 else if (detect_info.checked & (1 << category))
7461                   {
7462                     if (highest
7463                         && (detect_info.found & (1 << category)))
7464                       break;
7465                   }
7466                 else
7467                   {
7468                     if ((*(this->detector)) (&coding, &detect_info)
7469                         && highest
7470                         && (detect_info.found & (1 << category)))
7471                       {
7472                         if (category == coding_category_utf_16_auto)
7473                           {
7474                             if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7475                               category = coding_category_utf_16_le;
7476                             else
7477                               category = coding_category_utf_16_be;
7478                           }
7479                         break;
7480                       }
7481                   }
7482               }
7483         }
7484
7485       if (detect_info.rejected == CATEGORY_MASK_ANY)
7486         {
7487           detect_info.found = CATEGORY_MASK_RAW_TEXT;
7488           id = coding_categories[coding_category_raw_text].id;
7489           val = Fcons (make_number (id), Qnil);
7490         }
7491       else if (! detect_info.rejected && ! detect_info.found)
7492         {
7493           detect_info.found = CATEGORY_MASK_ANY;
7494           id = coding_categories[coding_category_undecided].id;
7495           val = Fcons (make_number (id), Qnil);
7496         }
7497       else if (highest)
7498         {
7499           if (detect_info.found)
7500             {
7501               detect_info.found = 1 << category;
7502               val = Fcons (make_number (this->id), Qnil);
7503             }
7504           else
7505             for (i = 0; i < coding_category_raw_text; i++)
7506               if (! (detect_info.rejected & (1 << coding_priorities[i])))
7507                 {
7508                   detect_info.found = 1 << coding_priorities[i];
7509                   id = coding_categories[coding_priorities[i]].id;
7510                   val = Fcons (make_number (id), Qnil);
7511                   break;
7512                 }
7513         }
7514       else
7515         {
7516           int mask = detect_info.rejected | detect_info.found;
7517           int found = 0;
7518           val = Qnil;
7519
7520           for (i = coding_category_raw_text - 1; i >= 0; i--)
7521             {
7522               category = coding_priorities[i];
7523               if (! (mask & (1 << category)))
7524                 {
7525                   found |= 1 << category;
7526                   id = coding_categories[category].id;
7527                   if (id >= 0)
7528                     val = Fcons (make_number (id), val);
7529                 }
7530             }
7531           for (i = coding_category_raw_text - 1; i >= 0; i--)
7532             {
7533               category = coding_priorities[i];
7534               if (detect_info.found & (1 << category))
7535                 {
7536                   id = coding_categories[category].id;
7537                   val = Fcons (make_number (id), val);
7538                 }
7539             }
7540           detect_info.found |= found;
7541         }
7542     }
7543   else if (base_category == coding_category_utf_16_auto)
7544     {
7545       if (detect_coding_utf_16 (&coding, &detect_info))
7546         {
7547           struct coding_system *this;
7548
7549           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
7550             this = coding_categories + coding_category_utf_16_le;
7551           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
7552             this = coding_categories + coding_category_utf_16_be;
7553           else if (detect_info.rejected & CATEGORY_MASK_UTF_16_LE_NOSIG)
7554             this = coding_categories + coding_category_utf_16_be_nosig;
7555           else
7556             this = coding_categories + coding_category_utf_16_le_nosig;
7557           val = Fcons (make_number (this->id), Qnil);
7558         }
7559     }
7560   else
7561     {
7562       detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
7563       val = Fcons (make_number (coding.id), Qnil);
7564     }
7565
7566   /* Then, detect eol-format if necessary.  */
7567   {
7568     int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
7569     Lisp_Object tail;
7570
7571     if (VECTORP (eol_type))
7572       {
7573         if (detect_info.found & ~CATEGORY_MASK_UTF_16)
7574           normal_eol = detect_eol (coding.source, src_bytes,
7575                                    coding_category_raw_text);
7576         if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
7577                                  | CATEGORY_MASK_UTF_16_BE_NOSIG))
7578           utf_16_be_eol = detect_eol (coding.source, src_bytes,
7579                                       coding_category_utf_16_be);
7580         if (detect_info.found & (CATEGORY_MASK_UTF_16_LE
7581                                  | CATEGORY_MASK_UTF_16_LE_NOSIG))
7582           utf_16_le_eol = detect_eol (coding.source, src_bytes,
7583                                       coding_category_utf_16_le);
7584       }
7585     else
7586       {
7587         if (EQ (eol_type, Qunix))
7588           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF;
7589         else if (EQ (eol_type, Qdos))
7590           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF;
7591         else
7592           normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR;
7593       }
7594
7595     for (tail = val; CONSP (tail); tail = XCDR (tail))
7596       {
7597         enum coding_category category;
7598         int this_eol;
7599
7600         id = XINT (XCAR (tail));
7601         attrs = CODING_ID_ATTRS (id);
7602         category = XINT (CODING_ATTR_CATEGORY (attrs));
7603         eol_type = CODING_ID_EOL_TYPE (id);
7604         if (VECTORP (eol_type))
7605           {
7606             if (category == coding_category_utf_16_be
7607                 || category == coding_category_utf_16_be_nosig)
7608               this_eol = utf_16_be_eol;
7609             else if (category == coding_category_utf_16_le
7610                      || category == coding_category_utf_16_le_nosig)
7611               this_eol = utf_16_le_eol;
7612             else
7613               this_eol = normal_eol;
7614
7615             if (this_eol == EOL_SEEN_LF)
7616               XSETCAR (tail, AREF (eol_type, 0));
7617             else if (this_eol == EOL_SEEN_CRLF)
7618               XSETCAR (tail, AREF (eol_type, 1));
7619             else if (this_eol == EOL_SEEN_CR)
7620               XSETCAR (tail, AREF (eol_type, 2));
7621             else
7622               XSETCAR (tail, CODING_ID_NAME (id));
7623           }
7624         else
7625           XSETCAR (tail, CODING_ID_NAME (id));
7626       }
7627   }
7628
7629   return (highest ? XCAR (val) : val);
7630 }
7631
7632
7633 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
7634        2, 3, 0,
7635        doc: /* Detect coding system of the text in the region between START and END.
7636 Return a list of possible coding systems ordered by priority.
7637
7638 If only ASCII characters are found (except for such ISO-2022 control
7639 characters ISO-2022 as ESC), it returns a list of single element
7640 `undecided' or its subsidiary coding system according to a detected
7641 end-of-line format.
7642
7643 If optional argument HIGHEST is non-nil, return the coding system of
7644 highest priority.  */)
7645      (start, end, highest)
7646      Lisp_Object start, end, highest;
7647 {
7648   int from, to;
7649   int from_byte, to_byte;
7650
7651   CHECK_NUMBER_COERCE_MARKER (start);
7652   CHECK_NUMBER_COERCE_MARKER (end);
7653
7654   validate_region (&start, &end);
7655   from = XINT (start), to = XINT (end);
7656   from_byte = CHAR_TO_BYTE (from);
7657   to_byte = CHAR_TO_BYTE (to);
7658
7659   if (from < GPT && to >= GPT)
7660     move_gap_both (to, to_byte);
7661
7662   return detect_coding_system (BYTE_POS_ADDR (from_byte),
7663                                to - from, to_byte - from_byte,
7664                                !NILP (highest),
7665                                !NILP (current_buffer
7666                                       ->enable_multibyte_characters),
7667                                Qnil);
7668 }
7669
7670 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
7671        1, 2, 0,
7672        doc: /* Detect coding system of the text in STRING.
7673 Return a list of possible coding systems ordered by priority.
7674
7675 If only ASCII characters are found (except for such ISO-2022 control
7676 characters ISO-2022 as ESC), it returns a list of single element
7677 `undecided' or its subsidiary coding system according to a detected
7678 end-of-line format.
7679
7680 If optional argument HIGHEST is non-nil, return the coding system of
7681 highest priority.  */)
7682      (string, highest)
7683      Lisp_Object string, highest;
7684 {
7685   CHECK_STRING (string);
7686
7687   return detect_coding_system (SDATA (string),
7688                                SCHARS (string), SBYTES (string),
7689                                !NILP (highest), STRING_MULTIBYTE (string),
7690                                Qnil);
7691 }
7692
7693
7694 static INLINE int
7695 char_encodable_p (c, attrs)
7696      int c;
7697      Lisp_Object attrs;
7698 {
7699   Lisp_Object tail;
7700   struct charset *charset;
7701   Lisp_Object translation_table;
7702
7703   translation_table = CODING_ATTR_TRANS_TBL (attrs);
7704   if (! NILP (translation_table))
7705     c = translate_char (translation_table, c);
7706   for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7707        CONSP (tail); tail = XCDR (tail))
7708     {
7709       charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7710       if (CHAR_CHARSET_P (c, charset))
7711         break;
7712     }
7713   return (! NILP (tail));
7714 }
7715
7716
7717 /* Return a list of coding systems that safely encode the text between
7718    START and END.  If EXCLUDE is non-nil, it is a list of coding
7719    systems not to check.  The returned list doesn't contain any such
7720    coding systems.  In any case, if the text contains only ASCII or is
7721    unibyte, return t.  */
7722
7723 DEFUN ("find-coding-systems-region-internal",
7724        Ffind_coding_systems_region_internal,
7725        Sfind_coding_systems_region_internal, 2, 3, 0,
7726        doc: /* Internal use only.  */)
7727      (start, end, exclude)
7728      Lisp_Object start, end, exclude;
7729 {
7730   Lisp_Object coding_attrs_list, safe_codings;
7731   EMACS_INT start_byte, end_byte;
7732   const unsigned char *p, *pbeg, *pend;
7733   int c;
7734   Lisp_Object tail, elt;
7735
7736   if (STRINGP (start))
7737     {
7738       if (!STRING_MULTIBYTE (start)
7739           || SCHARS (start) == SBYTES (start))
7740         return Qt;
7741       start_byte = 0;
7742       end_byte = SBYTES (start);
7743     }
7744   else
7745     {
7746       CHECK_NUMBER_COERCE_MARKER (start);
7747       CHECK_NUMBER_COERCE_MARKER (end);
7748       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7749         args_out_of_range (start, end);
7750       if (NILP (current_buffer->enable_multibyte_characters))
7751         return Qt;
7752       start_byte = CHAR_TO_BYTE (XINT (start));
7753       end_byte = CHAR_TO_BYTE (XINT (end));
7754       if (XINT (end) - XINT (start) == end_byte - start_byte)
7755         return Qt;
7756
7757       if (XINT (start) < GPT && XINT (end) > GPT)
7758         {
7759           if ((GPT - XINT (start)) < (XINT (end) - GPT))
7760             move_gap_both (XINT (start), start_byte);
7761           else
7762             move_gap_both (XINT (end), end_byte);
7763         }
7764     }
7765
7766   coding_attrs_list = Qnil;
7767   for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail))
7768     if (NILP (exclude)
7769         || NILP (Fmemq (XCAR (tail), exclude)))
7770       {
7771         Lisp_Object attrs;
7772
7773         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7774         if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7775             && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7776           {
7777             ASET (attrs, coding_attr_trans_tbl,
7778                   get_translation_table (attrs, 1, NULL));
7779             coding_attrs_list = Fcons (attrs, coding_attrs_list);
7780           }
7781       }
7782
7783   if (STRINGP (start))
7784     p = pbeg = SDATA (start);
7785   else
7786     p = pbeg = BYTE_POS_ADDR (start_byte);
7787   pend = p + (end_byte - start_byte);
7788
7789   while (p < pend && ASCII_BYTE_P (*p)) p++;
7790   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
7791
7792   while (p < pend)
7793     {
7794       if (ASCII_BYTE_P (*p))
7795         p++;
7796       else
7797         {
7798           c = STRING_CHAR_ADVANCE (p);
7799
7800           charset_map_loaded = 0;
7801           for (tail = coding_attrs_list; CONSP (tail);)
7802             {
7803               elt = XCAR (tail);
7804               if (NILP (elt))
7805                 tail = XCDR (tail);
7806               else if (char_encodable_p (c, elt))
7807                 tail = XCDR (tail);
7808               else if (CONSP (XCDR (tail)))
7809                 {
7810                   XSETCAR (tail, XCAR (XCDR (tail)));
7811                   XSETCDR (tail, XCDR (XCDR (tail)));
7812                 }
7813               else
7814                 {
7815                   XSETCAR (tail, Qnil);
7816                   tail = XCDR (tail);
7817                 }
7818             }
7819           if (charset_map_loaded)
7820             {
7821               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
7822
7823               if (STRINGP (start))
7824                 pbeg = SDATA (start);
7825               else
7826                 pbeg = BYTE_POS_ADDR (start_byte);
7827               p = pbeg + p_offset;
7828               pend = pbeg + pend_offset;
7829             }
7830         }
7831     }
7832
7833   safe_codings = list2 (Qraw_text, Qno_conversion);
7834   for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail))
7835     if (! NILP (XCAR (tail)))
7836       safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings);
7837
7838   return safe_codings;
7839 }
7840
7841
7842 DEFUN ("unencodable-char-position", Funencodable_char_position,
7843        Sunencodable_char_position, 3, 5, 0,
7844        doc: /*
7845 Return position of first un-encodable character in a region.
7846 START and END specfiy the region and CODING-SYSTEM specifies the
7847 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
7848
7849 If optional 4th argument COUNT is non-nil, it specifies at most how
7850 many un-encodable characters to search.  In this case, the value is a
7851 list of positions.
7852
7853 If optional 5th argument STRING is non-nil, it is a string to search
7854 for un-encodable characters.  In that case, START and END are indexes
7855 to the string.  */)
7856      (start, end, coding_system, count, string)
7857      Lisp_Object start, end, coding_system, count, string;
7858 {
7859   int n;
7860   struct coding_system coding;
7861   Lisp_Object attrs, charset_list, translation_table;
7862   Lisp_Object positions;
7863   int from, to;
7864   const unsigned char *p, *stop, *pend;
7865   int ascii_compatible;
7866
7867   setup_coding_system (Fcheck_coding_system (coding_system), &coding);
7868   attrs = CODING_ID_ATTRS (coding.id);
7869   if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7870     return Qnil;
7871   ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7872   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7873   translation_table = get_translation_table (attrs, 1, NULL);
7874
7875   if (NILP (string))
7876     {
7877       validate_region (&start, &end);
7878       from = XINT (start);
7879       to = XINT (end);
7880       if (NILP (current_buffer->enable_multibyte_characters)
7881           || (ascii_compatible
7882               && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from)))))
7883         return Qnil;
7884       p = CHAR_POS_ADDR (from);
7885       pend = CHAR_POS_ADDR (to);
7886       if (from < GPT && to >= GPT)
7887         stop = GPT_ADDR;
7888       else
7889         stop = pend;
7890     }
7891   else
7892     {
7893       CHECK_STRING (string);
7894       CHECK_NATNUM (start);
7895       CHECK_NATNUM (end);
7896       from = XINT (start);
7897       to = XINT (end);
7898       if (from > to
7899           || to > SCHARS (string))
7900         args_out_of_range_3 (string, start, end);
7901       if (! STRING_MULTIBYTE (string))
7902         return Qnil;
7903       p = SDATA (string) + string_char_to_byte (string, from);
7904       stop = pend = SDATA (string) + string_char_to_byte (string, to);
7905       if (ascii_compatible && (to - from) == (pend - p))
7906         return Qnil;
7907     }
7908
7909   if (NILP (count))
7910     n = 1;
7911   else
7912     {
7913       CHECK_NATNUM (count);
7914       n = XINT (count);
7915     }
7916
7917   positions = Qnil;
7918   while (1)
7919     {
7920       int c;
7921
7922       if (ascii_compatible)
7923         while (p < stop && ASCII_BYTE_P (*p))
7924           p++, from++;
7925       if (p >= stop)
7926         {
7927           if (p >= pend)
7928             break;
7929           stop = pend;
7930           p = GAP_END_ADDR;
7931         }
7932
7933       c = STRING_CHAR_ADVANCE (p);
7934       if (! (ASCII_CHAR_P (c) && ascii_compatible)
7935           && ! char_charset (translate_char (translation_table, c),
7936                              charset_list, NULL))
7937         {
7938           positions = Fcons (make_number (from), positions);
7939           n--;
7940           if (n == 0)
7941             break;
7942         }
7943
7944       from++;
7945     }
7946
7947   return (NILP (count) ? Fcar (positions) : Fnreverse (positions));
7948 }
7949
7950
7951 DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region,
7952        Scheck_coding_systems_region, 3, 3, 0,
7953        doc: /* Check if the region is encodable by coding systems.
7954
7955 START and END are buffer positions specifying the region.
7956 CODING-SYSTEM-LIST is a list of coding systems to check.
7957
7958 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
7959 CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
7960 whole region, POS0, POS1, ... are buffer positions where non-encodable
7961 characters are found.
7962
7963 If all coding systems in CODING-SYSTEM-LIST can encode the region, the
7964 value is nil.
7965
7966 START may be a string.  In that case, check if the string is
7967 encodable, and the value contains indices to the string instead of
7968 buffer positions.  END is ignored.  */)
7969      (start, end, coding_system_list)
7970      Lisp_Object start, end, coding_system_list;
7971 {
7972   Lisp_Object list;
7973   EMACS_INT start_byte, end_byte;
7974   int pos;
7975   const unsigned char *p, *pbeg, *pend;
7976   int c;
7977   Lisp_Object tail, elt, attrs;
7978
7979   if (STRINGP (start))
7980     {
7981       if (!STRING_MULTIBYTE (start)
7982           && SCHARS (start) != SBYTES (start))
7983         return Qnil;
7984       start_byte = 0;
7985       end_byte = SBYTES (start);
7986       pos = 0;
7987     }
7988   else
7989     {
7990       CHECK_NUMBER_COERCE_MARKER (start);
7991       CHECK_NUMBER_COERCE_MARKER (end);
7992       if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
7993         args_out_of_range (start, end);
7994       if (NILP (current_buffer->enable_multibyte_characters))
7995         return Qnil;
7996       start_byte = CHAR_TO_BYTE (XINT (start));
7997       end_byte = CHAR_TO_BYTE (XINT (end));
7998       if (XINT (end) - XINT (start) == end_byte - start_byte)
7999         return Qt;
8000
8001       if (XINT (start) < GPT && XINT (end) > GPT)
8002         {
8003           if ((GPT - XINT (start)) < (XINT (end) - GPT))
8004             move_gap_both (XINT (start), start_byte);
8005           else
8006             move_gap_both (XINT (end), end_byte);
8007         }
8008       pos = XINT (start);
8009     }
8010
8011   list = Qnil;
8012   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
8013     {
8014       elt = XCAR (tail);
8015       attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
8016       ASET (attrs, coding_attr_trans_tbl,
8017             get_translation_table (attrs, 1, NULL));
8018       list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
8019     }
8020
8021   if (STRINGP (start))
8022     p = pbeg = SDATA (start);
8023   else
8024     p = pbeg = BYTE_POS_ADDR (start_byte);
8025   pend = p + (end_byte - start_byte);
8026
8027   while (p < pend && ASCII_BYTE_P (*p)) p++, pos++;
8028   while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--;
8029
8030   while (p < pend)
8031     {
8032       if (ASCII_BYTE_P (*p))
8033         p++;
8034       else
8035         {
8036           c = STRING_CHAR_ADVANCE (p);
8037
8038           charset_map_loaded = 0;
8039           for (tail = list; CONSP (tail); tail = XCDR (tail))
8040             {
8041               elt = XCDR (XCAR (tail));
8042               if (! char_encodable_p (c, XCAR (elt)))
8043                 XSETCDR (elt, Fcons (make_number (pos), XCDR (elt)));
8044             }
8045           if (charset_map_loaded)
8046             {
8047               EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg;
8048
8049               if (STRINGP (start))
8050                 pbeg = SDATA (start);
8051               else
8052                 pbeg = BYTE_POS_ADDR (start_byte);
8053               p = pbeg + p_offset;
8054               pend = pbeg + pend_offset;
8055             }
8056         }
8057       pos++;
8058     }
8059
8060   tail = list;
8061   list = Qnil;
8062   for (; CONSP (tail); tail = XCDR (tail))
8063     {
8064       elt = XCAR (tail);
8065       if (CONSP (XCDR (XCDR (elt))))
8066         list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))),
8067                       list);
8068     }
8069
8070   return list;
8071 }
8072
8073
8074 Lisp_Object
8075 code_convert_region (start, end, coding_system, dst_object, encodep, norecord)
8076      Lisp_Object start, end, coding_system, dst_object;
8077      int encodep, norecord;
8078 {
8079   struct coding_system coding;
8080   EMACS_INT from, from_byte, to, to_byte;
8081   Lisp_Object src_object;
8082
8083   CHECK_NUMBER_COERCE_MARKER (start);
8084   CHECK_NUMBER_COERCE_MARKER (end);
8085   if (NILP (coding_system))
8086     coding_system = Qno_conversion;
8087   else
8088     CHECK_CODING_SYSTEM (coding_system);
8089   src_object = Fcurrent_buffer ();
8090   if (NILP (dst_object))
8091     dst_object = src_object;
8092   else if (! EQ (dst_object, Qt))
8093     CHECK_BUFFER (dst_object);
8094
8095   validate_region (&start, &end);
8096   from = XFASTINT (start);
8097   from_byte = CHAR_TO_BYTE (from);
8098   to = XFASTINT (end);
8099   to_byte = CHAR_TO_BYTE (to);
8100
8101   setup_coding_system (coding_system, &coding);
8102   coding.mode |= CODING_MODE_LAST_BLOCK;
8103
8104   if (encodep)
8105     encode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8106                           dst_object);
8107   else
8108     decode_coding_object (&coding, src_object, from, from_byte, to, to_byte,
8109                           dst_object);
8110   if (! norecord)
8111     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8112
8113   return (BUFFERP (dst_object)
8114           ? make_number (coding.produced_char)
8115           : coding.dst_object);
8116 }
8117
8118
8119 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
8120        3, 4, "r\nzCoding system: ",
8121        doc: /* Decode the current region from the specified coding system.
8122 When called from a program, takes four arguments:
8123         START, END, CODING-SYSTEM, and DESTINATION.
8124 START and END are buffer positions.
8125
8126 Optional 4th arguments DESTINATION specifies where the decoded text goes.
8127 If nil, the region between START and END is replaced by the decoded text.
8128 If buffer, the decoded text is inserted in the buffer.
8129 If t, the decoded text is returned.
8130
8131 This function sets `last-coding-system-used' to the precise coding system
8132 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8133 not fully specified.)
8134 It returns the length of the decoded text.  */)
8135      (start, end, coding_system, destination)
8136      Lisp_Object start, end, coding_system, destination;
8137 {
8138   return code_convert_region (start, end, coding_system, destination, 0, 0);
8139 }
8140
8141 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
8142        3, 4, "r\nzCoding system: ",
8143        doc: /* Encode the current region by specified coding system.
8144 When called from a program, takes three arguments:
8145 START, END, and CODING-SYSTEM.  START and END are buffer positions.
8146
8147 Optional 4th arguments DESTINATION specifies where the encoded text goes.
8148 If nil, the region between START and END is replace by the encoded text.
8149 If buffer, the encoded text is inserted in the buffer.
8150 If t, the encoded text is returned.
8151
8152 This function sets `last-coding-system-used' to the precise coding system
8153 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8154 not fully specified.)
8155 It returns the length of the encoded text.  */)
8156   (start, end, coding_system, destination)
8157      Lisp_Object start, end, coding_system, destination;
8158 {
8159   return code_convert_region (start, end, coding_system, destination, 1, 0);
8160 }
8161
8162 Lisp_Object
8163 code_convert_string (string, coding_system, dst_object,
8164                      encodep, nocopy, norecord)
8165      Lisp_Object string, coding_system, dst_object;
8166      int encodep, nocopy, norecord;
8167 {
8168   struct coding_system coding;
8169   EMACS_INT chars, bytes;
8170
8171   CHECK_STRING (string);
8172   if (NILP (coding_system))
8173     {
8174       if (! norecord)
8175         Vlast_coding_system_used = Qno_conversion;
8176       if (NILP (dst_object))
8177         return (nocopy ? Fcopy_sequence (string) : string);
8178     }
8179
8180   if (NILP (coding_system))
8181     coding_system = Qno_conversion;
8182   else
8183     CHECK_CODING_SYSTEM (coding_system);
8184   if (NILP (dst_object))
8185     dst_object = Qt;
8186   else if (! EQ (dst_object, Qt))
8187     CHECK_BUFFER (dst_object);
8188
8189   setup_coding_system (coding_system, &coding);
8190   coding.mode |= CODING_MODE_LAST_BLOCK;
8191   chars = SCHARS (string);
8192   bytes = SBYTES (string);
8193   if (encodep)
8194     encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8195   else
8196     decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object);
8197   if (! norecord)
8198     Vlast_coding_system_used = CODING_ID_NAME (coding.id);
8199
8200   return (BUFFERP (dst_object)
8201           ? make_number (coding.produced_char)
8202           : coding.dst_object);
8203 }
8204
8205
8206 /* Encode or decode STRING according to CODING_SYSTEM.
8207    Do not set Vlast_coding_system_used.
8208
8209    This function is called only from macros DECODE_FILE and
8210    ENCODE_FILE, thus we ignore character composition.  */
8211
8212 Lisp_Object
8213 code_convert_string_norecord (string, coding_system, encodep)
8214      Lisp_Object string, coding_system;
8215      int encodep;
8216 {
8217   return code_convert_string (string, coding_system, Qt, encodep, 0, 1);
8218 }
8219
8220
8221 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
8222        2, 4, 0,
8223        doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result.
8224
8225 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
8226 if the decoding operation is trivial.
8227
8228 Optional fourth arg BUFFER non-nil meant that the decoded text is
8229 inserted in BUFFER instead of returned as a string.  In this case,
8230 the return value is BUFFER.
8231
8232 This function sets `last-coding-system-used' to the precise coding system
8233 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8234 not fully specified.  */)
8235   (string, coding_system, nocopy, buffer)
8236      Lisp_Object string, coding_system, nocopy, buffer;
8237 {
8238   return code_convert_string (string, coding_system, buffer,
8239                               0, ! NILP (nocopy), 0);
8240 }
8241
8242 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
8243        2, 4, 0,
8244        doc: /* Encode STRING to CODING-SYSTEM, and return the result.
8245
8246 Optional third arg NOCOPY non-nil means it is OK to return STRING
8247 itself if the encoding operation is trivial.
8248
8249 Optional fourth arg BUFFER non-nil meant that the encoded text is
8250 inserted in BUFFER instead of returned as a string.  In this case,
8251 the return value is BUFFER.
8252
8253 This function sets `last-coding-system-used' to the precise coding system
8254 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
8255 not fully specified.)  */)
8256      (string, coding_system, nocopy, buffer)
8257      Lisp_Object string, coding_system, nocopy, buffer;
8258 {
8259   return code_convert_string (string, coding_system, buffer,
8260                               1, ! NILP (nocopy), 1);
8261 }
8262
8263 \f
8264 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
8265        doc: /* Decode a Japanese character which has CODE in shift_jis encoding.
8266 Return the corresponding character.  */)
8267      (code)
8268      Lisp_Object code;
8269 {
8270   Lisp_Object spec, attrs, val;
8271   struct charset *charset_roman, *charset_kanji, *charset_kana, *charset;
8272   int c;
8273
8274   CHECK_NATNUM (code);
8275   c = XFASTINT (code);
8276   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8277   attrs = AREF (spec, 0);
8278
8279   if (ASCII_BYTE_P (c)
8280       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8281     return code;
8282
8283   val = CODING_ATTR_CHARSET_LIST (attrs);
8284   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8285   charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8286   charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val)));
8287
8288   if (c <= 0x7F)
8289     charset = charset_roman;
8290   else if (c >= 0xA0 && c < 0xDF)
8291     {
8292       charset = charset_kana;
8293       c -= 0x80;
8294     }
8295   else
8296     {
8297       int s1 = c >> 8, s2 = c & 0xFF;
8298
8299       if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF
8300           || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)
8301         error ("Invalid code: %d", code);
8302       SJIS_TO_JIS (c);
8303       charset = charset_kanji;
8304     }
8305   c = DECODE_CHAR (charset, c);
8306   if (c < 0)
8307     error ("Invalid code: %d", code);
8308   return make_number (c);
8309 }
8310
8311
8312 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
8313        doc: /* Encode a Japanese character CH to shift_jis encoding.
8314 Return the corresponding code in SJIS.  */)
8315      (ch)
8316     Lisp_Object ch;
8317 {
8318   Lisp_Object spec, attrs, charset_list;
8319   int c;
8320   struct charset *charset;
8321   unsigned code;
8322
8323   CHECK_CHARACTER (ch);
8324   c = XFASTINT (ch);
8325   CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec);
8326   attrs = AREF (spec, 0);
8327
8328   if (ASCII_CHAR_P (c)
8329       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8330     return ch;
8331
8332   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8333   charset = char_charset (c, charset_list, &code);
8334   if (code == CHARSET_INVALID_CODE (charset))
8335     error ("Can't encode by shift_jis encoding: %d", c);
8336   JIS_TO_SJIS (code);
8337
8338   return make_number (code);
8339 }
8340
8341 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
8342        doc: /* Decode a Big5 character which has CODE in BIG5 coding system.
8343 Return the corresponding character.  */)
8344      (code)
8345      Lisp_Object code;
8346 {
8347   Lisp_Object spec, attrs, val;
8348   struct charset *charset_roman, *charset_big5, *charset;
8349   int c;
8350
8351   CHECK_NATNUM (code);
8352   c = XFASTINT (code);
8353   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8354   attrs = AREF (spec, 0);
8355
8356   if (ASCII_BYTE_P (c)
8357       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8358     return code;
8359
8360   val = CODING_ATTR_CHARSET_LIST (attrs);
8361   charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val);
8362   charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val)));
8363
8364   if (c <= 0x7F)
8365     charset = charset_roman;
8366   else
8367     {
8368       int b1 = c >> 8, b2 = c & 0x7F;
8369       if (b1 < 0xA1 || b1 > 0xFE
8370           || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)
8371         error ("Invalid code: %d", code);
8372       charset = charset_big5;
8373     }
8374   c = DECODE_CHAR (charset, (unsigned )c);
8375   if (c < 0)
8376     error ("Invalid code: %d", code);
8377   return make_number (c);
8378 }
8379
8380 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
8381        doc: /* Encode the Big5 character CH to BIG5 coding system.
8382 Return the corresponding character code in Big5.  */)
8383      (ch)
8384      Lisp_Object ch;
8385 {
8386   Lisp_Object spec, attrs, charset_list;
8387   struct charset *charset;
8388   int c;
8389   unsigned code;
8390
8391   CHECK_CHARACTER (ch);
8392   c = XFASTINT (ch);
8393   CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec);
8394   attrs = AREF (spec, 0);
8395   if (ASCII_CHAR_P (c)
8396       && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
8397     return ch;
8398
8399   charset_list = CODING_ATTR_CHARSET_LIST (attrs);
8400   charset = char_charset (c, charset_list, &code);
8401   if (code == CHARSET_INVALID_CODE (charset))
8402     error ("Can't encode by Big5 encoding: %d", c);
8403
8404   return make_number (code);
8405 }
8406
8407 \f
8408 DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal,
8409        Sset_terminal_coding_system_internal, 1, 2, 0,
8410        doc: /* Internal use only.  */)
8411      (coding_system, terminal)
8412      Lisp_Object coding_system;
8413      Lisp_Object terminal;
8414 {
8415   struct coding_system *terminal_coding = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8416   CHECK_SYMBOL (coding_system);
8417   setup_coding_system (Fcheck_coding_system (coding_system), terminal_coding);
8418   /* We had better not send unsafe characters to terminal.  */
8419   terminal_coding->mode |= CODING_MODE_SAFE_ENCODING;
8420   /* Characer composition should be disabled.  */
8421   terminal_coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8422   terminal_coding->src_multibyte = 1;
8423   terminal_coding->dst_multibyte = 0;
8424   return Qnil;
8425 }
8426
8427 DEFUN ("set-safe-terminal-coding-system-internal",
8428        Fset_safe_terminal_coding_system_internal,
8429        Sset_safe_terminal_coding_system_internal, 1, 1, 0,
8430        doc: /* Internal use only.  */)
8431      (coding_system)
8432      Lisp_Object coding_system;
8433 {
8434   CHECK_SYMBOL (coding_system);
8435   setup_coding_system (Fcheck_coding_system (coding_system),
8436                        &safe_terminal_coding);
8437   /* Characer composition should be disabled.  */
8438   safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8439   safe_terminal_coding.src_multibyte = 1;
8440   safe_terminal_coding.dst_multibyte = 0;
8441   return Qnil;
8442 }
8443
8444 DEFUN ("terminal-coding-system", Fterminal_coding_system,
8445        Sterminal_coding_system, 0, 1, 0,
8446        doc: /* Return coding system specified for terminal output on the given terminal.
8447 TERMINAL may be a terminal id, a frame, or nil for the selected
8448 frame's terminal device.  */)
8449      (terminal)
8450      Lisp_Object terminal;
8451 {
8452   struct coding_system *terminal_coding
8453     = TERMINAL_TERMINAL_CODING (get_terminal (terminal, 1));
8454   Lisp_Object coding_system = CODING_ID_NAME (terminal_coding->id);
8455
8456   /* For backward compatibility, return nil if it is `undecided'. */
8457   return (! EQ (coding_system, Qundecided) ? coding_system : Qnil);
8458 }
8459
8460 DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal,
8461        Sset_keyboard_coding_system_internal, 1, 2, 0,
8462        doc: /* Internal use only.  */)
8463      (coding_system, terminal)
8464      Lisp_Object coding_system;
8465      Lisp_Object terminal;
8466 {
8467   struct terminal *t = get_terminal (terminal, 1);
8468   CHECK_SYMBOL (coding_system);
8469   setup_coding_system (Fcheck_coding_system (coding_system),
8470                        TERMINAL_KEYBOARD_CODING (t));
8471   /* Characer composition should be disabled.  */
8472   TERMINAL_KEYBOARD_CODING (t)->common_flags
8473     &= ~CODING_ANNOTATE_COMPOSITION_MASK;
8474   return Qnil;
8475 }
8476
8477 DEFUN ("keyboard-coding-system",
8478        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 1, 0,
8479        doc: /* Return coding system specified for decoding keyboard input.  */)
8480      (terminal)
8481      Lisp_Object terminal;
8482 {
8483   return CODING_ID_NAME (TERMINAL_KEYBOARD_CODING
8484                          (get_terminal (terminal, 1))->id);
8485 }
8486
8487 \f
8488 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
8489        Sfind_operation_coding_system,  1, MANY, 0,
8490        doc: /* Choose a coding system for an operation based on the target name.
8491 The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM).
8492 DECODING-SYSTEM is the coding system to use for decoding
8493 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system
8494 for encoding (in case OPERATION does encoding).
8495
8496 The first argument OPERATION specifies an I/O primitive:
8497   For file I/O, `insert-file-contents' or `write-region'.
8498   For process I/O, `call-process', `call-process-region', or `start-process'.
8499   For network I/O, `open-network-stream'.
8500
8501 The remaining arguments should be the same arguments that were passed
8502 to the primitive.  Depending on which primitive, one of those arguments
8503 is selected as the TARGET.  For example, if OPERATION does file I/O,
8504 whichever argument specifies the file name is TARGET.
8505
8506 TARGET has a meaning which depends on OPERATION:
8507   For file I/O, TARGET is a file name (except for the special case below).
8508   For process I/O, TARGET is a process name.
8509   For network I/O, TARGET is a service name or a port number
8510
8511 This function looks up what specified for TARGET in,
8512 `file-coding-system-alist', `process-coding-system-alist',
8513 or `network-coding-system-alist' depending on OPERATION.
8514 They may specify a coding system, a cons of coding systems,
8515 or a function symbol to call.
8516 In the last case, we call the function with one argument,
8517 which is a list of all the arguments given to this function.
8518 If the function can't decide a coding system, it can return
8519 `undecided' so that the normal code-detection is performed.
8520
8521 If OPERATION is `insert-file-contents', the argument corresponding to
8522 TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
8523 file name to look up, and BUFFER is a buffer that contains the file's
8524 contents (not yet decoded).  If `file-coding-system-alist' specifies a
8525 function to call for FILENAME, that function should examine the
8526 contents of BUFFER instead of reading the file.
8527
8528 usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
8529      (nargs, args)
8530      int nargs;
8531      Lisp_Object *args;
8532 {
8533   Lisp_Object operation, target_idx, target, val;
8534   register Lisp_Object chain;
8535
8536   if (nargs < 2)
8537     error ("Too few arguments");
8538   operation = args[0];
8539   if (!SYMBOLP (operation)
8540       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
8541     error ("Invalid first arguement");
8542   if (nargs < 1 + XINT (target_idx))
8543     error ("Too few arguments for operation: %s",
8544            SDATA (SYMBOL_NAME (operation)));
8545   target = args[XINT (target_idx) + 1];
8546   if (!(STRINGP (target)
8547         || (EQ (operation, Qinsert_file_contents) && CONSP (target)
8548             && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
8549         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
8550     error ("Invalid %dth argument", XINT (target_idx) + 1);
8551   if (CONSP (target))
8552     target = XCAR (target);
8553
8554   chain = ((EQ (operation, Qinsert_file_contents)
8555             || EQ (operation, Qwrite_region))
8556            ? Vfile_coding_system_alist
8557            : (EQ (operation, Qopen_network_stream)
8558               ? Vnetwork_coding_system_alist
8559               : Vprocess_coding_system_alist));
8560   if (NILP (chain))
8561     return Qnil;
8562
8563   for (; CONSP (chain); chain = XCDR (chain))
8564     {
8565       Lisp_Object elt;
8566
8567       elt = XCAR (chain);
8568       if (CONSP (elt)
8569           && ((STRINGP (target)
8570                && STRINGP (XCAR (elt))
8571                && fast_string_match (XCAR (elt), target) >= 0)
8572               || (INTEGERP (target) && EQ (target, XCAR (elt)))))
8573         {
8574           val = XCDR (elt);
8575           /* Here, if VAL is both a valid coding system and a valid
8576              function symbol, we return VAL as a coding system.  */
8577           if (CONSP (val))
8578             return val;
8579           if (! SYMBOLP (val))
8580             return Qnil;
8581           if (! NILP (Fcoding_system_p (val)))
8582             return Fcons (val, val);
8583           if (! NILP (Ffboundp (val)))
8584             {
8585               /* We use call1 rather than safe_call1
8586                  so as to get bug reports about functions called here
8587                  which don't handle the current interface.  */
8588               val = call1 (val, Flist (nargs, args));
8589               if (CONSP (val))
8590                 return val;
8591               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
8592                 return Fcons (val, val);
8593             }
8594           return Qnil;
8595         }
8596     }
8597   return Qnil;
8598 }
8599
8600 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
8601        Sset_coding_system_priority, 0, MANY, 0,
8602        doc: /* Assign higher priority to the coding systems given as arguments.
8603 If multiple coding systems belongs to the same category,
8604 all but the first one are ignored.
8605
8606 usage: (set-coding-system-priority ...)  */)
8607      (nargs, args)
8608      int nargs;
8609      Lisp_Object *args;
8610 {
8611   int i, j;
8612   int changed[coding_category_max];
8613   enum coding_category priorities[coding_category_max];
8614
8615   bzero (changed, sizeof changed);
8616
8617   for (i = j = 0; i < nargs; i++)
8618     {
8619       enum coding_category category;
8620       Lisp_Object spec, attrs;
8621
8622       CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec);
8623       attrs = AREF (spec, 0);
8624       category = XINT (CODING_ATTR_CATEGORY (attrs));
8625       if (changed[category])
8626         /* Ignore this coding system because a coding system of the
8627            same category already had a higher priority.  */
8628         continue;
8629       changed[category] = 1;
8630       priorities[j++] = category;
8631       if (coding_categories[category].id >= 0
8632           && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id)))
8633         setup_coding_system (args[i], &coding_categories[category]);
8634       Fset (AREF (Vcoding_category_table, category), args[i]);
8635     }
8636
8637   /* Now we have decided top J priorities.  Reflect the order of the
8638      original priorities to the remaining priorities.  */
8639
8640   for (i = j, j = 0; i < coding_category_max; i++, j++)
8641     {
8642       while (j < coding_category_max
8643              && changed[coding_priorities[j]])
8644         j++;
8645       if (j == coding_category_max)
8646         abort ();
8647       priorities[i] = coding_priorities[j];
8648     }
8649
8650   bcopy (priorities, coding_priorities, sizeof priorities);
8651
8652   /* Update `coding-category-list'.  */
8653   Vcoding_category_list = Qnil;
8654   for (i = coding_category_max - 1; i >= 0; i--)
8655     Vcoding_category_list
8656       = Fcons (AREF (Vcoding_category_table, priorities[i]),
8657                Vcoding_category_list);
8658
8659   return Qnil;
8660 }
8661
8662 DEFUN ("coding-system-priority-list", Fcoding_system_priority_list,
8663        Scoding_system_priority_list, 0, 1, 0,
8664        doc: /* Return a list of coding systems ordered by their priorities.
8665 HIGHESTP non-nil means just return the highest priority one.  */)
8666      (highestp)
8667      Lisp_Object highestp;
8668 {
8669   int i;
8670   Lisp_Object val;
8671
8672   for (i = 0, val = Qnil; i < coding_category_max; i++)
8673     {
8674       enum coding_category category = coding_priorities[i];
8675       int id = coding_categories[category].id;
8676       Lisp_Object attrs;
8677
8678       if (id < 0)
8679         continue;
8680       attrs = CODING_ID_ATTRS (id);
8681       if (! NILP (highestp))
8682         return CODING_ATTR_BASE_NAME (attrs);
8683       val = Fcons (CODING_ATTR_BASE_NAME (attrs), val);
8684     }
8685   return Fnreverse (val);
8686 }
8687
8688 static char *suffixes[] = { "-unix", "-dos", "-mac" };
8689
8690 static Lisp_Object
8691 make_subsidiaries (base)
8692      Lisp_Object base;
8693 {
8694   Lisp_Object subsidiaries;
8695   int base_name_len = SBYTES (SYMBOL_NAME (base));
8696   char *buf = (char *) alloca (base_name_len + 6);
8697   int i;
8698
8699   bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len);
8700   subsidiaries = Fmake_vector (make_number (3), Qnil);
8701   for (i = 0; i < 3; i++)
8702     {
8703       bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1);
8704       ASET (subsidiaries, i, intern (buf));
8705     }
8706   return subsidiaries;
8707 }
8708
8709
8710 DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal,
8711        Sdefine_coding_system_internal, coding_arg_max, MANY, 0,
8712        doc: /* For internal use only.
8713 usage: (define-coding-system-internal ...)  */)
8714      (nargs, args)
8715      int nargs;
8716      Lisp_Object *args;
8717 {
8718   Lisp_Object name;
8719   Lisp_Object spec_vec;         /* [ ATTRS ALIASE EOL_TYPE ] */
8720   Lisp_Object attrs;            /* Vector of attributes.  */
8721   Lisp_Object eol_type;
8722   Lisp_Object aliases;
8723   Lisp_Object coding_type, charset_list, safe_charsets;
8724   enum coding_category category;
8725   Lisp_Object tail, val;
8726   int max_charset_id = 0;
8727   int i;
8728
8729   if (nargs < coding_arg_max)
8730     goto short_args;
8731
8732   attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil);
8733
8734   name = args[coding_arg_name];
8735   CHECK_SYMBOL (name);
8736   CODING_ATTR_BASE_NAME (attrs) = name;
8737
8738   val = args[coding_arg_mnemonic];
8739   if (! STRINGP (val))
8740     CHECK_CHARACTER (val);
8741   CODING_ATTR_MNEMONIC (attrs) = val;
8742
8743   coding_type = args[coding_arg_coding_type];
8744   CHECK_SYMBOL (coding_type);
8745   CODING_ATTR_TYPE (attrs) = coding_type;
8746
8747   charset_list = args[coding_arg_charset_list];
8748   if (SYMBOLP (charset_list))
8749     {
8750       if (EQ (charset_list, Qiso_2022))
8751         {
8752           if (! EQ (coding_type, Qiso_2022))
8753             error ("Invalid charset-list");
8754           charset_list = Viso_2022_charset_list;
8755         }
8756       else if (EQ (charset_list, Qemacs_mule))
8757         {
8758           if (! EQ (coding_type, Qemacs_mule))
8759             error ("Invalid charset-list");
8760           charset_list = Vemacs_mule_charset_list;
8761         }
8762       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8763         if (max_charset_id < XFASTINT (XCAR (tail)))
8764           max_charset_id = XFASTINT (XCAR (tail));
8765     }
8766   else
8767     {
8768       charset_list = Fcopy_sequence (charset_list);
8769       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8770         {
8771           struct charset *charset;
8772
8773           val = XCAR (tail);
8774           CHECK_CHARSET_GET_CHARSET (val, charset);
8775           if (EQ (coding_type, Qiso_2022)
8776               ? CHARSET_ISO_FINAL (charset) < 0
8777               : EQ (coding_type, Qemacs_mule)
8778               ? CHARSET_EMACS_MULE_ID (charset) < 0
8779               : 0)
8780             error ("Can't handle charset `%s'",
8781                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8782
8783           XSETCAR (tail, make_number (charset->id));
8784           if (max_charset_id < charset->id)
8785             max_charset_id = charset->id;
8786         }
8787     }
8788   CODING_ATTR_CHARSET_LIST (attrs) = charset_list;
8789
8790   safe_charsets = Fmake_string (make_number (max_charset_id + 1),
8791                                 make_number (255));
8792   for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8793     SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
8794   CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
8795
8796   CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
8797
8798   val = args[coding_arg_decode_translation_table];
8799   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8800     CHECK_SYMBOL (val);
8801   CODING_ATTR_DECODE_TBL (attrs) = val;
8802
8803   val = args[coding_arg_encode_translation_table];
8804   if (! CHAR_TABLE_P (val) && ! CONSP (val))
8805     CHECK_SYMBOL (val);
8806   CODING_ATTR_ENCODE_TBL (attrs) = val;
8807
8808   val = args[coding_arg_post_read_conversion];
8809   CHECK_SYMBOL (val);
8810   CODING_ATTR_POST_READ (attrs) = val;
8811
8812   val = args[coding_arg_pre_write_conversion];
8813   CHECK_SYMBOL (val);
8814   CODING_ATTR_PRE_WRITE (attrs) = val;
8815
8816   val = args[coding_arg_default_char];
8817   if (NILP (val))
8818     CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' ');
8819   else
8820     {
8821       CHECK_CHARACTER (val);
8822       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
8823     }
8824
8825   val = args[coding_arg_for_unibyte];
8826   CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt;
8827
8828   val = args[coding_arg_plist];
8829   CHECK_LIST (val);
8830   CODING_ATTR_PLIST (attrs) = val;
8831
8832   if (EQ (coding_type, Qcharset))
8833     {
8834       /* Generate a lisp vector of 256 elements.  Each element is nil,
8835          integer, or a list of charset IDs.
8836
8837          If Nth element is nil, the byte code N is invalid in this
8838          coding system.
8839
8840          If Nth element is a number NUM, N is the first byte of a
8841          charset whose ID is NUM.
8842
8843          If Nth element is a list of charset IDs, N is the first byte
8844          of one of them.  The list is sorted by dimensions of the
8845          charsets.  A charset of smaller dimension comes firtst. */
8846       val = Fmake_vector (make_number (256), Qnil);
8847
8848       for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
8849         {
8850           struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
8851           int dim = CHARSET_DIMENSION (charset);
8852           int idx = (dim - 1) * 4;
8853
8854           if (CHARSET_ASCII_COMPATIBLE_P (charset))
8855             CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
8856
8857           for (i = charset->code_space[idx];
8858                i <= charset->code_space[idx + 1]; i++)
8859             {
8860               Lisp_Object tmp, tmp2;
8861               int dim2;
8862
8863               tmp = AREF (val, i);
8864               if (NILP (tmp))
8865                 tmp = XCAR (tail);
8866               else if (NUMBERP (tmp))
8867                 {
8868                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
8869                   if (dim < dim2)
8870                     tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
8871                   else
8872                     tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
8873                 }
8874               else
8875                 {
8876                   for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2))
8877                     {
8878                       dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2))));
8879                       if (dim < dim2)
8880                         break;
8881                     }
8882                   if (NILP (tmp2))
8883                     tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
8884                   else
8885                     {
8886                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
8887                       XSETCAR (tmp2, XCAR (tail));
8888                     }
8889                 }
8890               ASET (val, i, tmp);
8891             }
8892         }
8893       ASET (attrs, coding_attr_charset_valids, val);
8894       category = coding_category_charset;
8895     }
8896   else if (EQ (coding_type, Qccl))
8897     {
8898       Lisp_Object valids;
8899
8900       if (nargs < coding_arg_ccl_max)
8901         goto short_args;
8902
8903       val = args[coding_arg_ccl_decoder];
8904       CHECK_CCL_PROGRAM (val);
8905       if (VECTORP (val))
8906         val = Fcopy_sequence (val);
8907       ASET (attrs, coding_attr_ccl_decoder, val);
8908
8909       val = args[coding_arg_ccl_encoder];
8910       CHECK_CCL_PROGRAM (val);
8911       if (VECTORP (val))
8912         val = Fcopy_sequence (val);
8913       ASET (attrs, coding_attr_ccl_encoder, val);
8914
8915       val = args[coding_arg_ccl_valids];
8916       valids = Fmake_string (make_number (256), make_number (0));
8917       for (tail = val; !NILP (tail); tail = Fcdr (tail))
8918         {
8919           int from, to;
8920
8921           val = Fcar (tail);
8922           if (INTEGERP (val))
8923             {
8924               from = to = XINT (val);
8925               if (from < 0 || from > 255)
8926                 args_out_of_range_3 (val, make_number (0), make_number (255));
8927             }
8928           else
8929             {
8930               CHECK_CONS (val);
8931               CHECK_NATNUM_CAR (val);
8932               CHECK_NATNUM_CDR (val);
8933               from = XINT (XCAR (val));
8934               if (from > 255)
8935                 args_out_of_range_3 (XCAR (val),
8936                                      make_number (0), make_number (255));
8937               to = XINT (XCDR (val));
8938               if (to < from || to > 255)
8939                 args_out_of_range_3 (XCDR (val),
8940                                      XCAR (val), make_number (255));
8941             }
8942           for (i = from; i <= to; i++)
8943             SSET (valids, i, 1);
8944         }
8945       ASET (attrs, coding_attr_ccl_valids, valids);
8946
8947       category = coding_category_ccl;
8948     }
8949   else if (EQ (coding_type, Qutf_16))
8950     {
8951       Lisp_Object bom, endian;
8952
8953       CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
8954
8955       if (nargs < coding_arg_utf16_max)
8956         goto short_args;
8957
8958       bom = args[coding_arg_utf16_bom];
8959       if (! NILP (bom) && ! EQ (bom, Qt))
8960         {
8961           CHECK_CONS (bom);
8962           val = XCAR (bom);
8963           CHECK_CODING_SYSTEM (val);
8964           val = XCDR (bom);
8965           CHECK_CODING_SYSTEM (val);
8966         }
8967       ASET (attrs, coding_attr_utf_16_bom, bom);
8968
8969       endian = args[coding_arg_utf16_endian];
8970       CHECK_SYMBOL (endian);
8971       if (NILP (endian))
8972         endian = Qbig;
8973       else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle))
8974         error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian)));
8975       ASET (attrs, coding_attr_utf_16_endian, endian);
8976
8977       category = (CONSP (bom)
8978                   ? coding_category_utf_16_auto
8979                   : NILP (bom)
8980                   ? (EQ (endian, Qbig)
8981                      ? coding_category_utf_16_be_nosig
8982                      : coding_category_utf_16_le_nosig)
8983                   : (EQ (endian, Qbig)
8984                      ? coding_category_utf_16_be
8985                      : coding_category_utf_16_le));
8986     }
8987   else if (EQ (coding_type, Qiso_2022))
8988     {
8989       Lisp_Object initial, reg_usage, request, flags;
8990       int i;
8991
8992       if (nargs < coding_arg_iso2022_max)
8993         goto short_args;
8994
8995       initial = Fcopy_sequence (args[coding_arg_iso2022_initial]);
8996       CHECK_VECTOR (initial);
8997       for (i = 0; i < 4; i++)
8998         {
8999           val = Faref (initial, make_number (i));
9000           if (! NILP (val))
9001             {
9002               struct charset *charset;
9003
9004               CHECK_CHARSET_GET_CHARSET (val, charset);
9005               ASET (initial, i, make_number (CHARSET_ID (charset)));
9006               if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
9007                 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9008             }
9009           else
9010             ASET (initial, i, make_number (-1));
9011         }
9012
9013       reg_usage = args[coding_arg_iso2022_reg_usage];
9014       CHECK_CONS (reg_usage);
9015       CHECK_NUMBER_CAR (reg_usage);
9016       CHECK_NUMBER_CDR (reg_usage);
9017
9018       request = Fcopy_sequence (args[coding_arg_iso2022_request]);
9019       for (tail = request; ! NILP (tail); tail = Fcdr (tail))
9020         {
9021           int id;
9022           Lisp_Object tmp;
9023
9024           val = Fcar (tail);
9025           CHECK_CONS (val);
9026           tmp = XCAR (val);
9027           CHECK_CHARSET_GET_ID (tmp, id);
9028           CHECK_NATNUM_CDR (val);
9029           if (XINT (XCDR (val)) >= 4)
9030             error ("Invalid graphic register number: %d", XINT (XCDR (val)));
9031           XSETCAR (val, make_number (id));
9032         }
9033
9034       flags = args[coding_arg_iso2022_flags];
9035       CHECK_NATNUM (flags);
9036       i = XINT (flags);
9037       if (EQ (args[coding_arg_charset_list], Qiso_2022))
9038         flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT);
9039
9040       ASET (attrs, coding_attr_iso_initial, initial);
9041       ASET (attrs, coding_attr_iso_usage, reg_usage);
9042       ASET (attrs, coding_attr_iso_request, request);
9043       ASET (attrs, coding_attr_iso_flags, flags);
9044       setup_iso_safe_charsets (attrs);
9045
9046       if (i & CODING_ISO_FLAG_SEVEN_BITS)
9047         category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT
9048                           | CODING_ISO_FLAG_SINGLE_SHIFT))
9049                     ? coding_category_iso_7_else
9050                     : EQ (args[coding_arg_charset_list], Qiso_2022)
9051                     ? coding_category_iso_7
9052                     : coding_category_iso_7_tight);
9053       else
9054         {
9055           int id = XINT (AREF (initial, 1));
9056
9057           category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT)
9058                        || EQ (args[coding_arg_charset_list], Qiso_2022)
9059                        || id < 0)
9060                       ? coding_category_iso_8_else
9061                       : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1)
9062                       ? coding_category_iso_8_1
9063                       : coding_category_iso_8_2);
9064         }
9065       if (category != coding_category_iso_8_1
9066           && category != coding_category_iso_8_2)
9067         CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
9068     }
9069   else if (EQ (coding_type, Qemacs_mule))
9070     {
9071       if (EQ (args[coding_arg_charset_list], Qemacs_mule))
9072         ASET (attrs, coding_attr_emacs_mule_full, Qt);
9073       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9074       category = coding_category_emacs_mule;
9075     }
9076   else if (EQ (coding_type, Qshift_jis))
9077     {
9078
9079       struct charset *charset;
9080
9081       if (XINT (Flength (charset_list)) != 3
9082           && XINT (Flength (charset_list)) != 4)
9083         error ("There should be three or four charsets");
9084
9085       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9086       if (CHARSET_DIMENSION (charset) != 1)
9087         error ("Dimension of charset %s is not one",
9088                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9089       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9090         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9091
9092       charset_list = XCDR (charset_list);
9093       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9094       if (CHARSET_DIMENSION (charset) != 1)
9095         error ("Dimension of charset %s is not one",
9096                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9097
9098       charset_list = XCDR (charset_list);
9099       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9100       if (CHARSET_DIMENSION (charset) != 2)
9101         error ("Dimension of charset %s is not two",
9102                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9103
9104       charset_list = XCDR (charset_list);
9105       if (! NILP (charset_list))
9106         {
9107           charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9108           if (CHARSET_DIMENSION (charset) != 2)
9109             error ("Dimension of charset %s is not two",
9110                    SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9111         }
9112
9113       category = coding_category_sjis;
9114       Vsjis_coding_system = name;
9115     }
9116   else if (EQ (coding_type, Qbig5))
9117     {
9118       struct charset *charset;
9119
9120       if (XINT (Flength (charset_list)) != 2)
9121         error ("There should be just two charsets");
9122
9123       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9124       if (CHARSET_DIMENSION (charset) != 1)
9125         error ("Dimension of charset %s is not one",
9126                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9127       if (CHARSET_ASCII_COMPATIBLE_P (charset))
9128         CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9129
9130       charset_list = XCDR (charset_list);
9131       charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
9132       if (CHARSET_DIMENSION (charset) != 2)
9133         error ("Dimension of charset %s is not two",
9134                SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
9135
9136       category = coding_category_big5;
9137       Vbig5_coding_system = name;
9138     }
9139   else if (EQ (coding_type, Qraw_text))
9140     {
9141       category = coding_category_raw_text;
9142       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9143     }
9144   else if (EQ (coding_type, Qutf_8))
9145     {
9146       category = coding_category_utf_8;
9147       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
9148     }
9149   else if (EQ (coding_type, Qundecided))
9150     category = coding_category_undecided;
9151   else
9152     error ("Invalid coding system type: %s",
9153            SDATA (SYMBOL_NAME (coding_type)));
9154
9155   CODING_ATTR_CATEGORY (attrs) = make_number (category);
9156   CODING_ATTR_PLIST (attrs)
9157     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
9158                                 CODING_ATTR_PLIST (attrs)));
9159   CODING_ATTR_PLIST (attrs)
9160     = Fcons (QCascii_compatible_p,
9161              Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
9162                     CODING_ATTR_PLIST (attrs)));
9163
9164   eol_type = args[coding_arg_eol_type];
9165   if (! NILP (eol_type)
9166       && ! EQ (eol_type, Qunix)
9167       && ! EQ (eol_type, Qdos)
9168       && ! EQ (eol_type, Qmac))
9169     error ("Invalid eol-type");
9170
9171   aliases = Fcons (name, Qnil);
9172
9173   if (NILP (eol_type))
9174     {
9175       eol_type = make_subsidiaries (name);
9176       for (i = 0; i < 3; i++)
9177         {
9178           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
9179
9180           this_name = AREF (eol_type, i);
9181           this_aliases = Fcons (this_name, Qnil);
9182           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
9183           this_spec = Fmake_vector (make_number (3), attrs);
9184           ASET (this_spec, 1, this_aliases);
9185           ASET (this_spec, 2, this_eol_type);
9186           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
9187           Vcoding_system_list = Fcons (this_name, Vcoding_system_list);
9188           val = Fassoc (Fsymbol_name (this_name), Vcoding_system_alist);
9189           if (NILP (val))
9190             Vcoding_system_alist
9191               = Fcons (Fcons (Fsymbol_name (this_name), Qnil),
9192                        Vcoding_system_alist);
9193         }
9194     }
9195
9196   spec_vec = Fmake_vector (make_number (3), attrs);
9197   ASET (spec_vec, 1, aliases);
9198   ASET (spec_vec, 2, eol_type);
9199
9200   Fputhash (name, spec_vec, Vcoding_system_hash_table);
9201   Vcoding_system_list = Fcons (name, Vcoding_system_list);
9202   val = Fassoc (Fsymbol_name (name), Vcoding_system_alist);
9203   if (NILP (val))
9204     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil),
9205                                   Vcoding_system_alist);
9206
9207   {
9208     int id = coding_categories[category].id;
9209
9210     if (id < 0 || EQ (name, CODING_ID_NAME (id)))
9211       setup_coding_system (name, &coding_categories[category]);
9212   }
9213
9214   return Qnil;
9215
9216  short_args:
9217   return Fsignal (Qwrong_number_of_arguments,
9218                   Fcons (intern ("define-coding-system-internal"),
9219                          make_number (nargs)));
9220 }
9221
9222
9223 DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
9224        3, 3, 0,
9225        doc: /* Change value in CODING-SYSTEM's property list PROP to VAL.  */)
9226   (coding_system, prop, val)
9227      Lisp_Object coding_system, prop, val;
9228 {
9229   Lisp_Object spec, attrs;
9230
9231   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9232   attrs = AREF (spec, 0);
9233   if (EQ (prop, QCmnemonic))
9234     {
9235       if (! STRINGP (val))
9236         CHECK_CHARACTER (val);
9237       CODING_ATTR_MNEMONIC (attrs) = val;
9238     }
9239   else if (EQ (prop, QCdefalut_char))
9240     {
9241       if (NILP (val))
9242         val = make_number (' ');
9243       else
9244         CHECK_CHARACTER (val);
9245       CODING_ATTR_DEFAULT_CHAR (attrs) = val;
9246     }
9247   else if (EQ (prop, QCdecode_translation_table))
9248     {
9249       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9250         CHECK_SYMBOL (val);
9251       CODING_ATTR_DECODE_TBL (attrs) = val;
9252     }
9253   else if (EQ (prop, QCencode_translation_table))
9254     {
9255       if (! CHAR_TABLE_P (val) && ! CONSP (val))
9256         CHECK_SYMBOL (val);
9257       CODING_ATTR_ENCODE_TBL (attrs) = val;
9258     }
9259   else if (EQ (prop, QCpost_read_conversion))
9260     {
9261       CHECK_SYMBOL (val);
9262       CODING_ATTR_POST_READ (attrs) = val;
9263     }
9264   else if (EQ (prop, QCpre_write_conversion))
9265     {
9266       CHECK_SYMBOL (val);
9267       CODING_ATTR_PRE_WRITE (attrs) = val;
9268     }
9269   else if (EQ (prop, QCascii_compatible_p))
9270     {
9271       CODING_ATTR_ASCII_COMPAT (attrs) = val;
9272     }
9273
9274   CODING_ATTR_PLIST (attrs)
9275     = Fplist_put (CODING_ATTR_PLIST (attrs), prop, val);
9276   return val;
9277 }
9278
9279
9280 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
9281        Sdefine_coding_system_alias, 2, 2, 0,
9282        doc: /* Define ALIAS as an alias for CODING-SYSTEM.  */)
9283      (alias, coding_system)
9284      Lisp_Object alias, coding_system;
9285 {
9286   Lisp_Object spec, aliases, eol_type, val;
9287
9288   CHECK_SYMBOL (alias);
9289   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9290   aliases = AREF (spec, 1);
9291   /* ALISES should be a list of length more than zero, and the first
9292      element is a base coding system.  Append ALIAS at the tail of the
9293      list.  */
9294   while (!NILP (XCDR (aliases)))
9295     aliases = XCDR (aliases);
9296   XSETCDR (aliases, Fcons (alias, Qnil));
9297
9298   eol_type = AREF (spec, 2);
9299   if (VECTORP (eol_type))
9300     {
9301       Lisp_Object subsidiaries;
9302       int i;
9303
9304       subsidiaries = make_subsidiaries (alias);
9305       for (i = 0; i < 3; i++)
9306         Fdefine_coding_system_alias (AREF (subsidiaries, i),
9307                                      AREF (eol_type, i));
9308     }
9309
9310   Fputhash (alias, spec, Vcoding_system_hash_table);
9311   Vcoding_system_list = Fcons (alias, Vcoding_system_list);
9312   val = Fassoc (Fsymbol_name (alias), Vcoding_system_alist);
9313   if (NILP (val))
9314     Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil),
9315                                   Vcoding_system_alist);
9316
9317   return Qnil;
9318 }
9319
9320 DEFUN ("coding-system-base", Fcoding_system_base, Scoding_system_base,
9321        1, 1, 0,
9322        doc: /* Return the base of CODING-SYSTEM.
9323 Any alias or subsidiary coding system is not a base coding system.  */)
9324   (coding_system)
9325      Lisp_Object coding_system;
9326 {
9327   Lisp_Object spec, attrs;
9328
9329   if (NILP (coding_system))
9330     return (Qno_conversion);
9331   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9332   attrs = AREF (spec, 0);
9333   return CODING_ATTR_BASE_NAME (attrs);
9334 }
9335
9336 DEFUN ("coding-system-plist", Fcoding_system_plist, Scoding_system_plist,
9337        1, 1, 0,
9338        doc: "Return the property list of CODING-SYSTEM.")
9339      (coding_system)
9340      Lisp_Object coding_system;
9341 {
9342   Lisp_Object spec, attrs;
9343
9344   if (NILP (coding_system))
9345     coding_system = Qno_conversion;
9346   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9347   attrs = AREF (spec, 0);
9348   return CODING_ATTR_PLIST (attrs);
9349 }
9350
9351
9352 DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
9353        1, 1, 0,
9354        doc: /* Return the list of aliases of CODING-SYSTEM.  */)
9355      (coding_system)
9356      Lisp_Object coding_system;
9357 {
9358   Lisp_Object spec;
9359
9360   if (NILP (coding_system))
9361     coding_system = Qno_conversion;
9362   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
9363   return AREF (spec, 1);
9364 }
9365
9366 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
9367        Scoding_system_eol_type, 1, 1, 0,
9368        doc: /* Return eol-type of CODING-SYSTEM.
9369 An eol-type is integer 0, 1, 2, or a vector of coding systems.
9370
9371 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
9372 and CR respectively.
9373
9374 A vector value indicates that a format of end-of-line should be
9375 detected automatically.  Nth element of the vector is the subsidiary
9376 coding system whose eol-type is N.  */)
9377      (coding_system)
9378      Lisp_Object coding_system;
9379 {
9380   Lisp_Object spec, eol_type;
9381   int n;
9382
9383   if (NILP (coding_system))
9384     coding_system = Qno_conversion;
9385   if (! CODING_SYSTEM_P (coding_system))
9386     return Qnil;
9387   spec = CODING_SYSTEM_SPEC (coding_system);
9388   eol_type = AREF (spec, 2);
9389   if (VECTORP (eol_type))
9390     return Fcopy_sequence (eol_type);
9391   n = EQ (eol_type, Qunix) ? 0 : EQ (eol_type, Qdos) ? 1 : 2;
9392   return make_number (n);
9393 }
9394
9395 #endif /* emacs */
9396
9397 \f
9398 /*** 9. Post-amble ***/
9399
9400 void
9401 init_coding_once ()
9402 {
9403   int i;
9404
9405   for (i = 0; i < coding_category_max; i++)
9406     {
9407       coding_categories[i].id = -1;
9408       coding_priorities[i] = i;
9409     }
9410
9411   /* ISO2022 specific initialize routine.  */
9412   for (i = 0; i < 0x20; i++)
9413     iso_code_class[i] = ISO_control_0;
9414   for (i = 0x21; i < 0x7F; i++)
9415     iso_code_class[i] = ISO_graphic_plane_0;
9416   for (i = 0x80; i < 0xA0; i++)
9417     iso_code_class[i] = ISO_control_1;
9418   for (i = 0xA1; i < 0xFF; i++)
9419     iso_code_class[i] = ISO_graphic_plane_1;
9420   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
9421   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
9422   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
9423   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
9424   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
9425   iso_code_class[ISO_CODE_ESC] = ISO_escape;
9426   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
9427   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
9428   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
9429
9430   for (i = 0; i < 256; i++)
9431     {
9432       emacs_mule_bytes[i] = 1;
9433     }
9434   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_11] = 3;
9435   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_12] = 3;
9436   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_21] = 4;
9437   emacs_mule_bytes[EMACS_MULE_LEADING_CODE_PRIVATE_22] = 4;
9438 }
9439
9440 #ifdef emacs
9441
9442 void
9443 syms_of_coding ()
9444 {
9445   staticpro (&Vcoding_system_hash_table);
9446   {
9447     Lisp_Object args[2];
9448     args[0] = QCtest;
9449     args[1] = Qeq;
9450     Vcoding_system_hash_table = Fmake_hash_table (2, args);
9451   }
9452
9453   staticpro (&Vsjis_coding_system);
9454   Vsjis_coding_system = Qnil;
9455
9456   staticpro (&Vbig5_coding_system);
9457   Vbig5_coding_system = Qnil;
9458
9459   staticpro (&Vcode_conversion_reused_workbuf);
9460   Vcode_conversion_reused_workbuf = Qnil;
9461
9462   staticpro (&Vcode_conversion_workbuf_name);
9463   Vcode_conversion_workbuf_name = build_string (" *code-conversion-work*");
9464
9465   reused_workbuf_in_use = 0;
9466
9467   DEFSYM (Qcharset, "charset");
9468   DEFSYM (Qtarget_idx, "target-idx");
9469   DEFSYM (Qcoding_system_history, "coding-system-history");
9470   Fset (Qcoding_system_history, Qnil);
9471
9472   /* Target FILENAME is the first argument.  */
9473   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
9474   /* Target FILENAME is the third argument.  */
9475   Fput (Qwrite_region, Qtarget_idx, make_number (2));
9476
9477   DEFSYM (Qcall_process, "call-process");
9478   /* Target PROGRAM is the first argument.  */
9479   Fput (Qcall_process, Qtarget_idx, make_number (0));
9480
9481   DEFSYM (Qcall_process_region, "call-process-region");
9482   /* Target PROGRAM is the third argument.  */
9483   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
9484
9485   DEFSYM (Qstart_process, "start-process");
9486   /* Target PROGRAM is the third argument.  */
9487   Fput (Qstart_process, Qtarget_idx, make_number (2));
9488
9489   DEFSYM (Qopen_network_stream, "open-network-stream");
9490   /* Target SERVICE is the fourth argument.  */
9491   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
9492
9493   DEFSYM (Qcoding_system, "coding-system");
9494   DEFSYM (Qcoding_aliases, "coding-aliases");
9495
9496   DEFSYM (Qeol_type, "eol-type");
9497   DEFSYM (Qunix, "unix");
9498   DEFSYM (Qdos, "dos");
9499
9500   DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system");
9501   DEFSYM (Qpost_read_conversion, "post-read-conversion");
9502   DEFSYM (Qpre_write_conversion, "pre-write-conversion");
9503   DEFSYM (Qdefault_char, "default-char");
9504   DEFSYM (Qundecided, "undecided");
9505   DEFSYM (Qno_conversion, "no-conversion");
9506   DEFSYM (Qraw_text, "raw-text");
9507
9508   DEFSYM (Qiso_2022, "iso-2022");
9509
9510   DEFSYM (Qutf_8, "utf-8");
9511   DEFSYM (Qutf_8_emacs, "utf-8-emacs");
9512
9513   DEFSYM (Qutf_16, "utf-16");
9514   DEFSYM (Qbig, "big");
9515   DEFSYM (Qlittle, "little");
9516
9517   DEFSYM (Qshift_jis, "shift-jis");
9518   DEFSYM (Qbig5, "big5");
9519
9520   DEFSYM (Qcoding_system_p, "coding-system-p");
9521
9522   DEFSYM (Qcoding_system_error, "coding-system-error");
9523   Fput (Qcoding_system_error, Qerror_conditions,
9524         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
9525   Fput (Qcoding_system_error, Qerror_message,
9526         build_string ("Invalid coding system"));
9527
9528   /* Intern this now in case it isn't already done.
9529      Setting this variable twice is harmless.
9530      But don't staticpro it here--that is done in alloc.c.  */
9531   Qchar_table_extra_slots = intern ("char-table-extra-slots");
9532
9533   DEFSYM (Qtranslation_table, "translation-table");
9534   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
9535   DEFSYM (Qtranslation_table_id, "translation-table-id");
9536   DEFSYM (Qtranslation_table_for_decode, "translation-table-for-decode");
9537   DEFSYM (Qtranslation_table_for_encode, "translation-table-for-encode");
9538
9539   DEFSYM (Qvalid_codes, "valid-codes");
9540
9541   DEFSYM (Qemacs_mule, "emacs-mule");
9542
9543   DEFSYM (QCcategory, ":category");
9544   DEFSYM (QCmnemonic, ":mnemonic");
9545   DEFSYM (QCdefalut_char, ":default-char");
9546   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
9547   DEFSYM (QCencode_translation_table, ":encode-translation-table");
9548   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
9549   DEFSYM (QCpre_write_conversion, ":pre-write-conversion");
9550   DEFSYM (QCascii_compatible_p, ":ascii-compatible-p");
9551
9552   Vcoding_category_table
9553     = Fmake_vector (make_number (coding_category_max), Qnil);
9554   staticpro (&Vcoding_category_table);
9555   /* Followings are target of code detection.  */
9556   ASET (Vcoding_category_table, coding_category_iso_7,
9557         intern ("coding-category-iso-7"));
9558   ASET (Vcoding_category_table, coding_category_iso_7_tight,
9559         intern ("coding-category-iso-7-tight"));
9560   ASET (Vcoding_category_table, coding_category_iso_8_1,
9561         intern ("coding-category-iso-8-1"));
9562   ASET (Vcoding_category_table, coding_category_iso_8_2,
9563         intern ("coding-category-iso-8-2"));
9564   ASET (Vcoding_category_table, coding_category_iso_7_else,
9565         intern ("coding-category-iso-7-else"));
9566   ASET (Vcoding_category_table, coding_category_iso_8_else,
9567         intern ("coding-category-iso-8-else"));
9568   ASET (Vcoding_category_table, coding_category_utf_8,
9569         intern ("coding-category-utf-8"));
9570   ASET (Vcoding_category_table, coding_category_utf_16_be,
9571         intern ("coding-category-utf-16-be"));
9572   ASET (Vcoding_category_table, coding_category_utf_16_auto,
9573         intern ("coding-category-utf-16-auto"));
9574   ASET (Vcoding_category_table, coding_category_utf_16_le,
9575         intern ("coding-category-utf-16-le"));
9576   ASET (Vcoding_category_table, coding_category_utf_16_be_nosig,
9577         intern ("coding-category-utf-16-be-nosig"));
9578   ASET (Vcoding_category_table, coding_category_utf_16_le_nosig,
9579         intern ("coding-category-utf-16-le-nosig"));
9580   ASET (Vcoding_category_table, coding_category_charset,
9581         intern ("coding-category-charset"));
9582   ASET (Vcoding_category_table, coding_category_sjis,
9583         intern ("coding-category-sjis"));
9584   ASET (Vcoding_category_table, coding_category_big5,
9585         intern ("coding-category-big5"));
9586   ASET (Vcoding_category_table, coding_category_ccl,
9587         intern ("coding-category-ccl"));
9588   ASET (Vcoding_category_table, coding_category_emacs_mule,
9589         intern ("coding-category-emacs-mule"));
9590   /* Followings are NOT target of code detection.  */
9591   ASET (Vcoding_category_table, coding_category_raw_text,
9592         intern ("coding-category-raw-text"));
9593   ASET (Vcoding_category_table, coding_category_undecided,
9594         intern ("coding-category-undecided"));
9595
9596   DEFSYM (Qinsufficient_source, "insufficient-source");
9597   DEFSYM (Qinconsistent_eol, "inconsistent-eol");
9598   DEFSYM (Qinvalid_source, "invalid-source");
9599   DEFSYM (Qinterrupted, "interrupted");
9600   DEFSYM (Qinsufficient_memory, "insufficient-memory");
9601   DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
9602
9603   defsubr (&Scoding_system_p);
9604   defsubr (&Sread_coding_system);
9605   defsubr (&Sread_non_nil_coding_system);
9606   defsubr (&Scheck_coding_system);
9607   defsubr (&Sdetect_coding_region);
9608   defsubr (&Sdetect_coding_string);
9609   defsubr (&Sfind_coding_systems_region_internal);
9610   defsubr (&Sunencodable_char_position);
9611   defsubr (&Scheck_coding_systems_region);
9612   defsubr (&Sdecode_coding_region);
9613   defsubr (&Sencode_coding_region);
9614   defsubr (&Sdecode_coding_string);
9615   defsubr (&Sencode_coding_string);
9616   defsubr (&Sdecode_sjis_char);
9617   defsubr (&Sencode_sjis_char);
9618   defsubr (&Sdecode_big5_char);
9619   defsubr (&Sencode_big5_char);
9620   defsubr (&Sset_terminal_coding_system_internal);
9621   defsubr (&Sset_safe_terminal_coding_system_internal);
9622   defsubr (&Sterminal_coding_system);
9623   defsubr (&Sset_keyboard_coding_system_internal);
9624   defsubr (&Skeyboard_coding_system);
9625   defsubr (&Sfind_operation_coding_system);
9626   defsubr (&Sset_coding_system_priority);
9627   defsubr (&Sdefine_coding_system_internal);
9628   defsubr (&Sdefine_coding_system_alias);
9629   defsubr (&Scoding_system_put);
9630   defsubr (&Scoding_system_base);
9631   defsubr (&Scoding_system_plist);
9632   defsubr (&Scoding_system_aliases);
9633   defsubr (&Scoding_system_eol_type);
9634   defsubr (&Scoding_system_priority_list);
9635
9636   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
9637                doc: /* List of coding systems.
9638
9639 Do not alter the value of this variable manually.  This variable should be
9640 updated by the functions `define-coding-system' and
9641 `define-coding-system-alias'.  */);
9642   Vcoding_system_list = Qnil;
9643
9644   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
9645                doc: /* Alist of coding system names.
9646 Each element is one element list of coding system name.
9647 This variable is given to `completing-read' as TABLE argument.
9648
9649 Do not alter the value of this variable manually.  This variable should be
9650 updated by the functions `make-coding-system' and
9651 `define-coding-system-alias'.  */);
9652   Vcoding_system_alist = Qnil;
9653
9654   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
9655                doc: /* List of coding-categories (symbols) ordered by priority.
9656
9657 On detecting a coding system, Emacs tries code detection algorithms
9658 associated with each coding-category one by one in this order.  When
9659 one algorithm agrees with a byte sequence of source text, the coding
9660 system bound to the corresponding coding-category is selected.
9661
9662 Don't modify this variable directly, but use `set-coding-priority'.  */);
9663   {
9664     int i;
9665
9666     Vcoding_category_list = Qnil;
9667     for (i = coding_category_max - 1; i >= 0; i--)
9668       Vcoding_category_list
9669         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
9670                  Vcoding_category_list);
9671   }
9672
9673   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
9674                doc: /* Specify the coding system for read operations.
9675 It is useful to bind this variable with `let', but do not set it globally.
9676 If the value is a coding system, it is used for decoding on read operation.
9677 If not, an appropriate element is used from one of the coding system alists:
9678 There are three such tables, `file-coding-system-alist',
9679 `process-coding-system-alist', and `network-coding-system-alist'.  */);
9680   Vcoding_system_for_read = Qnil;
9681
9682   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
9683                doc: /* Specify the coding system for write operations.
9684 Programs bind this variable with `let', but you should not set it globally.
9685 If the value is a coding system, it is used for encoding of output,
9686 when writing it to a file and when sending it to a file or subprocess.
9687
9688 If this does not specify a coding system, an appropriate element
9689 is used from one of the coding system alists:
9690 There are three such tables, `file-coding-system-alist',
9691 `process-coding-system-alist', and `network-coding-system-alist'.
9692 For output to files, if the above procedure does not specify a coding system,
9693 the value of `buffer-file-coding-system' is used.  */);
9694   Vcoding_system_for_write = Qnil;
9695
9696   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
9697                doc: /*
9698 Coding system used in the latest file or process I/O.  */);
9699   Vlast_coding_system_used = Qnil;
9700
9701   DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
9702                doc: /*
9703 Error status of the last code conversion.
9704
9705 When an error was detected in the last code conversion, this variable
9706 is set to one of the following symbols.
9707   `insufficient-source'
9708   `inconsistent-eol'
9709   `invalid-source'
9710   `interrupted'
9711   `insufficient-memory'
9712 When no error was detected, the value doesn't change.  So, to check
9713 the error status of a code conversion by this variable, you must
9714 explicitly set this variable to nil before performing code
9715 conversion.  */);
9716   Vlast_code_conversion_error = Qnil;
9717
9718   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
9719                doc: /*
9720 *Non-nil means always inhibit code conversion of end-of-line format.
9721 See info node `Coding Systems' and info node `Text and Binary' concerning
9722 such conversion.  */);
9723   inhibit_eol_conversion = 0;
9724
9725   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
9726                doc: /*
9727 Non-nil means process buffer inherits coding system of process output.
9728 Bind it to t if the process output is to be treated as if it were a file
9729 read from some filesystem.  */);
9730   inherit_process_coding_system = 0;
9731
9732   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
9733                doc: /*
9734 Alist to decide a coding system to use for a file I/O operation.
9735 The format is ((PATTERN . VAL) ...),
9736 where PATTERN is a regular expression matching a file name,
9737 VAL is a coding system, a cons of coding systems, or a function symbol.
9738 If VAL is a coding system, it is used for both decoding and encoding
9739 the file contents.
9740 If VAL is a cons of coding systems, the car part is used for decoding,
9741 and the cdr part is used for encoding.
9742 If VAL is a function symbol, the function must return a coding system
9743 or a cons of coding systems which are used as above.  The function is
9744 called with an argument that is a list of the arguments with which
9745 `find-operation-coding-system' was called.  If the function can't decide
9746 a coding system, it can return `undecided' so that the normal
9747 code-detection is performed.
9748
9749 See also the function `find-operation-coding-system'
9750 and the variable `auto-coding-alist'.  */);
9751   Vfile_coding_system_alist = Qnil;
9752
9753   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
9754                doc: /*
9755 Alist to decide a coding system to use for a process I/O operation.
9756 The format is ((PATTERN . VAL) ...),
9757 where PATTERN is a regular expression matching a program name,
9758 VAL is a coding system, a cons of coding systems, or a function symbol.
9759 If VAL is a coding system, it is used for both decoding what received
9760 from the program and encoding what sent to the program.
9761 If VAL is a cons of coding systems, the car part is used for decoding,
9762 and the cdr part is used for encoding.
9763 If VAL is a function symbol, the function must return a coding system
9764 or a cons of coding systems which are used as above.
9765
9766 See also the function `find-operation-coding-system'.  */);
9767   Vprocess_coding_system_alist = Qnil;
9768
9769   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
9770                doc: /*
9771 Alist to decide a coding system to use for a network I/O operation.
9772 The format is ((PATTERN . VAL) ...),
9773 where PATTERN is a regular expression matching a network service name
9774 or is a port number to connect to,
9775 VAL is a coding system, a cons of coding systems, or a function symbol.
9776 If VAL is a coding system, it is used for both decoding what received
9777 from the network stream and encoding what sent to the network stream.
9778 If VAL is a cons of coding systems, the car part is used for decoding,
9779 and the cdr part is used for encoding.
9780 If VAL is a function symbol, the function must return a coding system
9781 or a cons of coding systems which are used as above.
9782
9783 See also the function `find-operation-coding-system'.  */);
9784   Vnetwork_coding_system_alist = Qnil;
9785
9786   DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
9787                doc: /* Coding system to use with system messages.
9788 Also used for decoding keyboard input on X Window system.  */);
9789   Vlocale_coding_system = Qnil;
9790
9791   /* The eol mnemonics are reset in startup.el system-dependently.  */
9792   DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
9793                doc: /*
9794 *String displayed in mode line for UNIX-like (LF) end-of-line format.  */);
9795   eol_mnemonic_unix = build_string (":");
9796
9797   DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
9798                doc: /*
9799 *String displayed in mode line for DOS-like (CRLF) end-of-line format.  */);
9800   eol_mnemonic_dos = build_string ("\\");
9801
9802   DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
9803                doc: /*
9804 *String displayed in mode line for MAC-like (CR) end-of-line format.  */);
9805   eol_mnemonic_mac = build_string ("/");
9806
9807   DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
9808                doc: /*
9809 *String displayed in mode line when end-of-line format is not yet determined.  */);
9810   eol_mnemonic_undecided = build_string (":");
9811
9812   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
9813                doc: /*
9814 *Non-nil enables character translation while encoding and decoding.  */);
9815   Venable_character_translation = Qt;
9816
9817   DEFVAR_LISP ("standard-translation-table-for-decode",
9818                &Vstandard_translation_table_for_decode,
9819                doc: /* Table for translating characters while decoding.  */);
9820   Vstandard_translation_table_for_decode = Qnil;
9821
9822   DEFVAR_LISP ("standard-translation-table-for-encode",
9823                &Vstandard_translation_table_for_encode,
9824                doc: /* Table for translating characters while encoding.  */);
9825   Vstandard_translation_table_for_encode = Qnil;
9826
9827   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
9828                doc: /* Alist of charsets vs revision numbers.
9829 While encoding, if a charset (car part of an element) is found,
9830 designate it with the escape sequence identifying revision (cdr part
9831 of the element).  */);
9832   Vcharset_revision_table = Qnil;
9833
9834   DEFVAR_LISP ("default-process-coding-system",
9835                &Vdefault_process_coding_system,
9836                doc: /* Cons of coding systems used for process I/O by default.
9837 The car part is used for decoding a process output,
9838 the cdr part is used for encoding a text to be sent to a process.  */);
9839   Vdefault_process_coding_system = Qnil;
9840
9841   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
9842                doc: /*
9843 Table of extra Latin codes in the range 128..159 (inclusive).
9844 This is a vector of length 256.
9845 If Nth element is non-nil, the existence of code N in a file
9846 \(or output of subprocess) doesn't prevent it to be detected as
9847 a coding system of ISO 2022 variant which has a flag
9848 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
9849 or reading output of a subprocess.
9850 Only 128th through 159th elements has a meaning.  */);
9851   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
9852
9853   DEFVAR_LISP ("select-safe-coding-system-function",
9854                &Vselect_safe_coding_system_function,
9855                doc: /*
9856 Function to call to select safe coding system for encoding a text.
9857
9858 If set, this function is called to force a user to select a proper
9859 coding system which can encode the text in the case that a default
9860 coding system used in each operation can't encode the text.  The
9861 function should take care that the buffer is not modified while
9862 the coding system is being selected.
9863
9864 The default value is `select-safe-coding-system' (which see).  */);
9865   Vselect_safe_coding_system_function = Qnil;
9866
9867   DEFVAR_BOOL ("coding-system-require-warning",
9868                &coding_system_require_warning,
9869                doc: /* Internal use only.
9870 If non-nil, on writing a file, `select-safe-coding-system-function' is
9871 called even if `coding-system-for-write' is non-nil.  The command
9872 `universal-coding-system-argument' binds this variable to t temporarily.  */);
9873   coding_system_require_warning = 0;
9874
9875
9876   DEFVAR_BOOL ("inhibit-iso-escape-detection",
9877                &inhibit_iso_escape_detection,
9878                doc: /*
9879 If non-nil, Emacs ignores ISO2022's escape sequence on code detection.
9880
9881 By default, on reading a file, Emacs tries to detect how the text is
9882 encoded.  This code detection is sensitive to escape sequences.  If
9883 the sequence is valid as ISO2022, the code is determined as one of
9884 the ISO2022 encodings, and the file is decoded by the corresponding
9885 coding system (e.g. `iso-2022-7bit').
9886
9887 However, there may be a case that you want to read escape sequences in
9888 a file as is.  In such a case, you can set this variable to non-nil.
9889 Then, as the code detection ignores any escape sequences, no file is
9890 detected as encoded in some ISO2022 encoding.  The result is that all
9891 escape sequences become visible in a buffer.
9892
9893 The default value is nil, and it is strongly recommended not to change
9894 it.  That is because many Emacs Lisp source files that contain
9895 non-ASCII characters are encoded by the coding system `iso-2022-7bit'
9896 in Emacs's distribution, and they won't be decoded correctly on
9897 reading if you suppress escape sequence detection.
9898
9899 The other way to read escape sequences in a file without decoding is
9900 to explicitly specify some coding system that doesn't use ISO2022's
9901 escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
9902   inhibit_iso_escape_detection = 0;
9903
9904   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
9905                doc: /* Char table for translating self-inserting characters.
9906 This is applied to the result of input methods, not their input.  See also
9907 `keyboard-translate-table'.  */);
9908     Vtranslation_table_for_input = Qnil;
9909
9910   {
9911     Lisp_Object args[coding_arg_max];
9912     Lisp_Object plist[16];
9913     int i;
9914
9915     for (i = 0; i < coding_arg_max; i++)
9916       args[i] = Qnil;
9917
9918     plist[0] = intern (":name");
9919     plist[1] = args[coding_arg_name] = Qno_conversion;
9920     plist[2] = intern (":mnemonic");
9921     plist[3] = args[coding_arg_mnemonic] = make_number ('=');
9922     plist[4] = intern (":coding-type");
9923     plist[5] = args[coding_arg_coding_type] = Qraw_text;
9924     plist[6] = intern (":ascii-compatible-p");
9925     plist[7] = args[coding_arg_ascii_compatible_p] = Qt;
9926     plist[8] = intern (":default-char");
9927     plist[9] = args[coding_arg_default_char] = make_number (0);
9928     plist[10] = intern (":for-unibyte");
9929     plist[11] = args[coding_arg_for_unibyte] = Qt;
9930     plist[12] = intern (":docstring");
9931     plist[13] = build_string ("Do no conversion.\n\
9932 \n\
9933 When you visit a file with this coding, the file is read into a\n\
9934 unibyte buffer as is, thus each byte of a file is treated as a\n\
9935 character.");
9936     plist[14] = intern (":eol-type");
9937     plist[15] = args[coding_arg_eol_type] = Qunix;
9938     args[coding_arg_plist] = Flist (16, plist);
9939     Fdefine_coding_system_internal (coding_arg_max, args);
9940
9941     plist[1] = args[coding_arg_name] = Qundecided;
9942     plist[3] = args[coding_arg_mnemonic] = make_number ('-');
9943     plist[5] = args[coding_arg_coding_type] = Qundecided;
9944     /* This is already set.
9945        plist[7] = args[coding_arg_ascii_compatible_p] = Qt; */
9946     plist[8] = intern (":charset-list");
9947     plist[9] = args[coding_arg_charset_list] = Fcons (Qascii, Qnil);
9948     plist[11] = args[coding_arg_for_unibyte] = Qnil;
9949     plist[13] = build_string ("No conversion on encoding, automatic conversion on decoding.");
9950     plist[15] = args[coding_arg_eol_type] = Qnil;
9951     args[coding_arg_plist] = Flist (16, plist);
9952     Fdefine_coding_system_internal (coding_arg_max, args);
9953   }
9954
9955   setup_coding_system (Qno_conversion, &safe_terminal_coding);
9956
9957   {
9958     int i;
9959
9960     for (i = 0; i < coding_category_max; i++)
9961       Fset (AREF (Vcoding_category_table, i), Qno_conversion);
9962   }
9963 #if defined (MSDOS) || defined (WINDOWSNT)
9964   system_eol_type = Qdos;
9965 #else
9966   system_eol_type = Qunix;
9967 #endif
9968   staticpro (&system_eol_type);
9969 }
9970
9971 char *
9972 emacs_strerror (error_number)
9973      int error_number;
9974 {
9975   char *str;
9976
9977   synchronize_system_messages_locale ();
9978   str = strerror (error_number);
9979
9980   if (! NILP (Vlocale_coding_system))
9981     {
9982       Lisp_Object dec = code_convert_string_norecord (build_string (str),
9983                                                       Vlocale_coding_system,
9984                                                       0);
9985       str = (char *) SDATA (dec);
9986     }
9987
9988   return str;
9989 }
9990
9991 #endif /* emacs */
9992
9993 /* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
9994    (do not change this comment) */